/php/pear/PHP/CodeSniffer/Tokenizers/PHP.php

https://gitlab.com/trang1104/portable_project · PHP · 612 lines · 431 code · 57 blank · 124 comment · 64 complexity · f85f47c7ee7ac9f43c82ecb80fe14c07 MD5 · raw file

  1. <?php
  2. /**
  3. * Tokenizes PHP code.
  4. *
  5. * PHP version 5
  6. *
  7. * @category PHP
  8. * @package PHP_CodeSniffer
  9. * @author Greg Sherwood <gsherwood@squiz.net>
  10. * @copyright 2006-2011 Squiz Pty Ltd (ABN 77 084 670 600)
  11. * @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
  12. * @link http://pear.php.net/package/PHP_CodeSniffer
  13. */
  14. /**
  15. * Tokenizes PHP code.
  16. *
  17. * @category PHP
  18. * @package PHP_CodeSniffer
  19. * @author Greg Sherwood <gsherwood@squiz.net>
  20. * @copyright 2006-2011 Squiz Pty Ltd (ABN 77 084 670 600)
  21. * @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
  22. * @version Release: 1.3.3
  23. * @link http://pear.php.net/package/PHP_CodeSniffer
  24. */
  25. class PHP_CodeSniffer_Tokenizers_PHP
  26. {
  27. /**
  28. * A list of tokens that are allowed to open a scope.
  29. *
  30. * This array also contains information about what kind of token the scope
  31. * opener uses to open and close the scope, if the token strictly requires
  32. * an opener, if the token can share a scope closer, and who it can be shared
  33. * with. An example of a token that shares a scope closer is a CASE scope.
  34. *
  35. * @var array
  36. */
  37. public $scopeOpeners = array(
  38. T_IF => array(
  39. 'start' => array(T_OPEN_CURLY_BRACKET),
  40. 'end' => array(T_CLOSE_CURLY_BRACKET),
  41. 'strict' => false,
  42. 'shared' => false,
  43. 'with' => array(),
  44. ),
  45. T_TRY => array(
  46. 'start' => array(T_OPEN_CURLY_BRACKET),
  47. 'end' => array(T_CLOSE_CURLY_BRACKET),
  48. 'strict' => true,
  49. 'shared' => false,
  50. 'with' => array(),
  51. ),
  52. T_CATCH => array(
  53. 'start' => array(T_OPEN_CURLY_BRACKET),
  54. 'end' => array(T_CLOSE_CURLY_BRACKET),
  55. 'strict' => true,
  56. 'shared' => false,
  57. 'with' => array(),
  58. ),
  59. T_ELSE => array(
  60. 'start' => array(T_OPEN_CURLY_BRACKET),
  61. 'end' => array(T_CLOSE_CURLY_BRACKET),
  62. 'strict' => false,
  63. 'shared' => false,
  64. 'with' => array(),
  65. ),
  66. T_ELSEIF => array(
  67. 'start' => array(T_OPEN_CURLY_BRACKET),
  68. 'end' => array(T_CLOSE_CURLY_BRACKET),
  69. 'strict' => false,
  70. 'shared' => false,
  71. 'with' => array(),
  72. ),
  73. T_FOR => array(
  74. 'start' => array(T_OPEN_CURLY_BRACKET),
  75. 'end' => array(T_CLOSE_CURLY_BRACKET),
  76. 'strict' => false,
  77. 'shared' => false,
  78. 'with' => array(),
  79. ),
  80. T_FOREACH => array(
  81. 'start' => array(T_OPEN_CURLY_BRACKET),
  82. 'end' => array(T_CLOSE_CURLY_BRACKET),
  83. 'strict' => false,
  84. 'shared' => false,
  85. 'with' => array(),
  86. ),
  87. T_INTERFACE => array(
  88. 'start' => array(T_OPEN_CURLY_BRACKET),
  89. 'end' => array(T_CLOSE_CURLY_BRACKET),
  90. 'strict' => true,
  91. 'shared' => false,
  92. 'with' => array(),
  93. ),
  94. T_FUNCTION => array(
  95. 'start' => array(T_OPEN_CURLY_BRACKET),
  96. 'end' => array(T_CLOSE_CURLY_BRACKET),
  97. 'strict' => true,
  98. 'shared' => false,
  99. 'with' => array(),
  100. ),
  101. T_CLASS => array(
  102. 'start' => array(T_OPEN_CURLY_BRACKET),
  103. 'end' => array(T_CLOSE_CURLY_BRACKET),
  104. 'strict' => true,
  105. 'shared' => false,
  106. 'with' => array(),
  107. ),
  108. T_NAMESPACE => array(
  109. 'start' => array(T_OPEN_CURLY_BRACKET),
  110. 'end' => array(T_CLOSE_CURLY_BRACKET),
  111. 'strict' => false,
  112. 'shared' => false,
  113. 'with' => array(),
  114. ),
  115. T_WHILE => array(
  116. 'start' => array(T_OPEN_CURLY_BRACKET),
  117. 'end' => array(T_CLOSE_CURLY_BRACKET),
  118. 'strict' => false,
  119. 'shared' => false,
  120. 'with' => array(),
  121. ),
  122. T_DO => array(
  123. 'start' => array(T_OPEN_CURLY_BRACKET),
  124. 'end' => array(T_CLOSE_CURLY_BRACKET),
  125. 'strict' => true,
  126. 'shared' => false,
  127. 'with' => array(),
  128. ),
  129. T_SWITCH => array(
  130. 'start' => array(T_OPEN_CURLY_BRACKET),
  131. 'end' => array(T_CLOSE_CURLY_BRACKET),
  132. 'strict' => true,
  133. 'shared' => false,
  134. 'with' => array(),
  135. ),
  136. T_CASE => array(
  137. 'start' => array(
  138. T_COLON,
  139. T_SEMICOLON,
  140. ),
  141. 'end' => array(T_BREAK),
  142. 'strict' => true,
  143. 'shared' => true,
  144. 'with' => array(
  145. T_DEFAULT,
  146. T_CASE,
  147. T_SWITCH,
  148. ),
  149. ),
  150. T_DEFAULT => array(
  151. 'start' => array(T_COLON),
  152. 'end' => array(T_BREAK),
  153. 'strict' => true,
  154. 'shared' => true,
  155. 'with' => array(
  156. T_CASE,
  157. T_SWITCH,
  158. ),
  159. ),
  160. T_START_HEREDOC => array(
  161. 'start' => array(T_START_HEREDOC),
  162. 'end' => array(T_END_HEREDOC),
  163. 'strict' => true,
  164. 'shared' => false,
  165. 'with' => array(),
  166. ),
  167. );
  168. /**
  169. * A list of tokens that end the scope.
  170. *
  171. * This array is just a unique collection of the end tokens
  172. * from the _scopeOpeners array. The data is duplicated here to
  173. * save time during parsing of the file.
  174. *
  175. * @var array
  176. */
  177. public $endScopeTokens = array(
  178. T_CLOSE_CURLY_BRACKET,
  179. T_BREAK,
  180. T_END_HEREDOC,
  181. );
  182. /**
  183. * Creates an array of tokens when given some PHP code.
  184. *
  185. * Starts by using token_get_all() but does a lot of extra processing
  186. * to insert information about the context of the token.
  187. *
  188. * @param string $string The string to tokenize.
  189. * @param string $eolChar The EOL character to use for splitting strings.
  190. *
  191. * @return array
  192. */
  193. public function tokenizeString($string, $eolChar='\n')
  194. {
  195. $tokens = @token_get_all($string);
  196. $finalTokens = array();
  197. $newStackPtr = 0;
  198. $numTokens = count($tokens);
  199. for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
  200. $token = $tokens[$stackPtr];
  201. $tokenIsArray = is_array($token);
  202. /*
  203. If we are using \r\n newline characters, the \r and \n are sometimes
  204. split over two tokens. This normally occurs after comments. We need
  205. to merge these two characters together so that our line endings are
  206. consistent for all lines.
  207. */
  208. if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
  209. if (isset($tokens[($stackPtr + 1)]) === true
  210. && is_array($tokens[($stackPtr + 1)]) === true
  211. && $tokens[($stackPtr + 1)][1][0] === "\n"
  212. ) {
  213. $token[1] .= "\n";
  214. if ($tokens[($stackPtr + 1)][1] === "\n") {
  215. // The next token's content has been merged into this token,
  216. // so we can skip it.
  217. $stackPtr++;
  218. } else {
  219. $tokens[($stackPtr + 1)][1]
  220. = substr($tokens[($stackPtr + 1)][1], 1);
  221. }
  222. }
  223. }//end if
  224. /*
  225. If this is a double quoted string, PHP will tokenise the whole
  226. thing which causes problems with the scope map when braces are
  227. within the string. So we need to merge the tokens together to
  228. provide a single string.
  229. */
  230. if ($tokenIsArray === false && $token === '"') {
  231. $tokenContent = '"';
  232. $nestedVars = array();
  233. for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
  234. $subTokenIsArray = is_array($tokens[$i]);
  235. if ($subTokenIsArray === true) {
  236. $tokenContent .= $tokens[$i][1];
  237. if ($tokens[$i][1] === '{'
  238. && $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE
  239. ) {
  240. $nestedVars[] = $i;
  241. }
  242. } else {
  243. $tokenContent .= $tokens[$i];
  244. if ($tokens[$i] === '}') {
  245. array_pop($nestedVars);
  246. }
  247. }
  248. if ($subTokenIsArray === false
  249. && $tokens[$i] === '"'
  250. && empty($nestedVars) === true
  251. ) {
  252. // We found the other end of the double quoted string.
  253. break;
  254. }
  255. }
  256. $stackPtr = $i;
  257. // Convert each line within the double quoted string to a
  258. // new token, so it conforms with other multiple line tokens.
  259. $tokenLines = explode($eolChar, $tokenContent);
  260. $numLines = count($tokenLines);
  261. $newToken = array();
  262. for ($j = 0; $j < $numLines; $j++) {
  263. $newToken['content'] = $tokenLines[$j];
  264. if ($j === ($numLines - 1)) {
  265. if ($tokenLines[$j] === '') {
  266. break;
  267. }
  268. } else {
  269. $newToken['content'] .= $eolChar;
  270. }
  271. $newToken['code'] = T_DOUBLE_QUOTED_STRING;
  272. $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
  273. $finalTokens[$newStackPtr] = $newToken;
  274. $newStackPtr++;
  275. }
  276. // Continue, as we're done with this token.
  277. continue;
  278. }//end if
  279. /*
  280. If this is a heredoc, PHP will tokenise the whole
  281. thing which causes problems when heredocs don't
  282. contain real PHP code, which is almost never.
  283. We want to leave the start and end heredoc tokens
  284. alone though.
  285. */
  286. if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
  287. // Add the start heredoc token to the final array.
  288. $finalTokens[$newStackPtr]
  289. = PHP_CodeSniffer::standardiseToken($token);
  290. // Check if this is actually a nowdoc and use a different token
  291. // to help the sniffs.
  292. $nowdoc = false;
  293. if ($token[1][3] === "'") {
  294. $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
  295. $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
  296. $nowdoc = true;
  297. }
  298. $newStackPtr++;
  299. $tokenContent = '';
  300. for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
  301. $subTokenIsArray = is_array($tokens[$i]);
  302. if ($subTokenIsArray === true
  303. && $tokens[$i][0] === T_END_HEREDOC
  304. ) {
  305. // We found the other end of the heredoc.
  306. break;
  307. }
  308. if ($subTokenIsArray === true) {
  309. $tokenContent .= $tokens[$i][1];
  310. } else {
  311. $tokenContent .= $tokens[$i];
  312. }
  313. }
  314. $stackPtr = $i;
  315. // Convert each line within the heredoc to a
  316. // new token, so it conforms with other multiple line tokens.
  317. $tokenLines = explode($eolChar, $tokenContent);
  318. $numLines = count($tokenLines);
  319. $newToken = array();
  320. for ($j = 0; $j < $numLines; $j++) {
  321. $newToken['content'] = $tokenLines[$j];
  322. if ($j === ($numLines - 1)) {
  323. if ($tokenLines[$j] === '') {
  324. break;
  325. }
  326. } else {
  327. $newToken['content'] .= $eolChar;
  328. }
  329. if ($nowdoc === true) {
  330. $newToken['code'] = T_NOWDOC;
  331. $newToken['type'] = 'T_NOWDOC';
  332. } else {
  333. $newToken['code'] = T_HEREDOC;
  334. $newToken['type'] = 'T_HEREDOC';
  335. }
  336. $finalTokens[$newStackPtr] = $newToken;
  337. $newStackPtr++;
  338. }
  339. // Add the end heredoc token to the final array.
  340. $finalTokens[$newStackPtr]
  341. = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
  342. if ($nowdoc === true) {
  343. $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
  344. $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
  345. $nowdoc = true;
  346. }
  347. $newStackPtr++;
  348. // Continue, as we're done with this token.
  349. continue;
  350. }//end if
  351. /*
  352. If this token has newlines in its content, split each line up
  353. and create a new token for each line. We do this so it's easier
  354. to asertain where errors occur on a line.
  355. Note that $token[1] is the token's content.
  356. */
  357. if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
  358. $tokenLines = explode($eolChar, $token[1]);
  359. $numLines = count($tokenLines);
  360. $tokenName = token_name($token[0]);
  361. for ($i = 0; $i < $numLines; $i++) {
  362. $newToken['content'] = $tokenLines[$i];
  363. if ($i === ($numLines - 1)) {
  364. if ($tokenLines[$i] === '') {
  365. break;
  366. }
  367. } else {
  368. $newToken['content'] .= $eolChar;
  369. }
  370. $newToken['type'] = $tokenName;
  371. $newToken['code'] = $token[0];
  372. $finalTokens[$newStackPtr] = $newToken;
  373. $newStackPtr++;
  374. }
  375. } else {
  376. $newToken = PHP_CodeSniffer::standardiseToken($token);
  377. // This is a special condition for T_ARRAY tokens use to
  378. // type hint function arguments as being arrays. We want to keep
  379. // the parenthsis map clean, so let's tag these tokens as
  380. // T_ARRAY_HINT.
  381. if ($newToken['code'] === T_ARRAY) {
  382. // Recalculate number of tokens.
  383. $numTokens = count($tokens);
  384. for ($i = $stackPtr; $i < $numTokens; $i++) {
  385. if (is_array($tokens[$i]) === false) {
  386. if ($tokens[$i] === '(') {
  387. break;
  388. }
  389. } else if ($tokens[$i][0] === T_VARIABLE) {
  390. $newToken['code'] = T_ARRAY_HINT;
  391. $newToken['type'] = 'T_ARRAY_HINT';
  392. break;
  393. }
  394. }
  395. }
  396. $finalTokens[$newStackPtr] = $newToken;
  397. $newStackPtr++;
  398. }//end if
  399. }//end for
  400. return $finalTokens;
  401. }//end tokenizeString()
  402. /**
  403. * Performs additional processing after main tokenizing.
  404. *
  405. * This additional processing checks for CASE statements
  406. * that are using curly braces for scope openers and closers. It
  407. * also turn some T_FUNCTION tokens into T_CLOSURE when they
  408. * are not standard function definitions.
  409. *
  410. * @param array &$tokens The array of tokens to process.
  411. * @param string $eolChar The EOL character to use for splitting strings.
  412. *
  413. * @return void
  414. */
  415. public function processAdditional(&$tokens, $eolChar)
  416. {
  417. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  418. echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL;
  419. }
  420. $numTokens = count($tokens);
  421. for ($i = ($numTokens - 1); $i >= 0; $i--) {
  422. // Looking for functions that are actually closures.
  423. if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
  424. for ($x = ($i + 1); $x < $numTokens; $x++) {
  425. if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
  426. break;
  427. }
  428. }
  429. if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
  430. $tokens[$i]['code'] = T_CLOSURE;
  431. $tokens[$i]['type'] = 'T_CLOSURE';
  432. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  433. $line = $tokens[$i]['line'];
  434. echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
  435. }
  436. for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
  437. if (isset($tokens[$x]['conditions'][$i]) === false) {
  438. continue;
  439. }
  440. $tokens[$x]['conditions'][$i] = T_CLOSURE;
  441. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  442. $type = $tokens[$x]['type'];
  443. echo "\t\t* cleaned $x ($type) *".PHP_EOL;
  444. }
  445. }
  446. }
  447. continue;
  448. }//end if
  449. if (($tokens[$i]['code'] !== T_CASE
  450. && $tokens[$i]['code'] !== T_DEFAULT)
  451. || isset($tokens[$i]['scope_opener']) === false
  452. ) {
  453. // Only interested in CASE and DEFAULT statements
  454. // from here on in.
  455. continue;
  456. }
  457. $scopeOpener = $tokens[$i]['scope_opener'];
  458. $scopeCloser = $tokens[$i]['scope_closer'];
  459. // If the first char after the opener is a curly brace
  460. // and that brace has been ignored, it is actually
  461. // opening this case statement and the opener and closer are
  462. // probably set incorrectly.
  463. for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) {
  464. if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
  465. // Non-whitespace content.
  466. break;
  467. }
  468. }
  469. if ($tokens[$x]['code'] === T_CASE) {
  470. // Special case for multiple CASE statements that
  471. // share the same closer. Because we are going
  472. // backwards through the file, this next CASE
  473. // statement is already fixed, so just use its
  474. // closer and don't worry about fixing anything.
  475. $newCloser = $tokens[$x]['scope_closer'];
  476. $tokens[$i]['scope_closer'] = $newCloser;
  477. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  478. $oldType = $tokens[$scopeCloser]['type'];
  479. $newType = $tokens[$newCloser]['type'];
  480. $line = $tokens[$i]['line'];
  481. echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
  482. }
  483. continue;
  484. }
  485. if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET
  486. || isset($tokens[$x]['scope_condition']) === true
  487. ) {
  488. // Not a CASE with a curly brace opener.
  489. continue;
  490. }
  491. // The closer for this CASE/DEFAULT should be the closing
  492. // curly brace and not whatever it already is. The opener needs
  493. // to be the opening curly brace so everything matches up.
  494. $newCloser = $tokens[$x]['bracket_closer'];
  495. $tokens[$i]['scope_closer'] = $newCloser;
  496. $tokens[$x]['scope_closer'] = $newCloser;
  497. $tokens[$i]['scope_opener'] = $x;
  498. $tokens[$x]['scope_condition'] = $i;
  499. $tokens[$newCloser]['scope_condition'] = $i;
  500. $tokens[$newCloser]['scope_opener'] = $x;
  501. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  502. $line = $tokens[$i]['line'];
  503. $tokenType = $tokens[$i]['type'];
  504. $oldType = $tokens[$scopeOpener]['type'];
  505. $newType = $tokens[$x]['type'];
  506. echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL;
  507. $oldType = $tokens[$scopeCloser]['type'];
  508. $newType = $tokens[$newCloser]['type'];
  509. echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
  510. }
  511. // Now fix up all the tokens that think they are
  512. // inside the CASE/DEFAULT statement when they are really outside.
  513. for ($x = $newCloser; $x < $scopeCloser; $x++) {
  514. foreach ($tokens[$x]['conditions'] as $num => $oldCond) {
  515. if ($oldCond === $tokens[$i]['code']) {
  516. $oldConditions = $tokens[$x]['conditions'];
  517. unset($tokens[$x]['conditions'][$num]);
  518. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  519. $type = $tokens[$x]['type'];
  520. $oldConds = '';
  521. foreach ($oldConditions as $condition) {
  522. $oldConds .= token_name($condition).',';
  523. }
  524. $oldConds = rtrim($oldConds, ',');
  525. $newConds = '';
  526. foreach ($tokens[$x]['conditions'] as $condition) {
  527. $newConds .= token_name($condition).',';
  528. }
  529. $newConds = rtrim($newConds, ',');
  530. echo "\t\t* cleaned $x ($type) *".PHP_EOL;
  531. echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL;
  532. }
  533. break;
  534. }
  535. }
  536. }
  537. }//end for
  538. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  539. echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL;
  540. }
  541. }//end processAdditional()
  542. }//end class
  543. ?>