/lib/pear/PHP/CodeSniffer/Tokenizers/PHP.php

https://github.com/viggof/moodle · PHP · 398 lines · 263 code · 39 blank · 96 comment · 46 complexity · f1122296bd1fe55e89a0914d16b9db71 MD5 · raw file

  1. <?php
  2. /**
  3. * Tokenizes PHP code.
  4. *
  5. * PHP version 5
  6. *
  7. * @category PHP
  8. * @package PHP_CodeSniffer
  9. * @author Greg Sherwood <gsherwood@squiz.net>
  10. * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
  11. * @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
  12. * @version CVS: $Id$
  13. * @link http://pear.php.net/package/PHP_CodeSniffer
  14. */
  15. /**
  16. * Tokenizes PHP code.
  17. *
  18. * @category PHP
  19. * @package PHP_CodeSniffer
  20. * @author Greg Sherwood <gsherwood@squiz.net>
  21. * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
  22. * @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
  23. * @version Release: 1.1.0
  24. * @link http://pear.php.net/package/PHP_CodeSniffer
  25. */
  26. class PHP_CodeSniffer_Tokenizers_PHP
  27. {
  28. /**
  29. * A list of tokens that are allowed to open a scope.
  30. *
  31. * This array also contains information about what kind of token the scope
  32. * opener uses to open and close the scope, if the token strictly requires
  33. * an opener, if the token can share a scope closer, and who it can be shared
  34. * with. An example of a token that shares a scope closer is a CASE scope.
  35. *
  36. * @var array
  37. */
  38. public $scopeOpeners = array(
  39. T_IF => array(
  40. 'start' => T_OPEN_CURLY_BRACKET,
  41. 'end' => T_CLOSE_CURLY_BRACKET,
  42. 'strict' => false,
  43. 'shared' => false,
  44. 'with' => array(),
  45. ),
  46. T_TRY => array(
  47. 'start' => T_OPEN_CURLY_BRACKET,
  48. 'end' => T_CLOSE_CURLY_BRACKET,
  49. 'strict' => true,
  50. 'shared' => false,
  51. 'with' => array(),
  52. ),
  53. T_CATCH => array(
  54. 'start' => T_OPEN_CURLY_BRACKET,
  55. 'end' => T_CLOSE_CURLY_BRACKET,
  56. 'strict' => true,
  57. 'shared' => false,
  58. 'with' => array(),
  59. ),
  60. T_ELSE => array(
  61. 'start' => T_OPEN_CURLY_BRACKET,
  62. 'end' => T_CLOSE_CURLY_BRACKET,
  63. 'strict' => false,
  64. 'shared' => false,
  65. 'with' => array(),
  66. ),
  67. T_ELSEIF => array(
  68. 'start' => T_OPEN_CURLY_BRACKET,
  69. 'end' => T_CLOSE_CURLY_BRACKET,
  70. 'strict' => false,
  71. 'shared' => false,
  72. 'with' => array(),
  73. ),
  74. T_FOR => array(
  75. 'start' => T_OPEN_CURLY_BRACKET,
  76. 'end' => T_CLOSE_CURLY_BRACKET,
  77. 'strict' => false,
  78. 'shared' => false,
  79. 'with' => array(),
  80. ),
  81. T_FOREACH => array(
  82. 'start' => T_OPEN_CURLY_BRACKET,
  83. 'end' => T_CLOSE_CURLY_BRACKET,
  84. 'strict' => false,
  85. 'shared' => false,
  86. 'with' => array(),
  87. ),
  88. T_INTERFACE => array(
  89. 'start' => T_OPEN_CURLY_BRACKET,
  90. 'end' => T_CLOSE_CURLY_BRACKET,
  91. 'strict' => true,
  92. 'shared' => false,
  93. 'with' => array(),
  94. ),
  95. T_FUNCTION => array(
  96. 'start' => T_OPEN_CURLY_BRACKET,
  97. 'end' => T_CLOSE_CURLY_BRACKET,
  98. 'strict' => false,
  99. 'shared' => false,
  100. 'with' => array(),
  101. ),
  102. T_CLASS => array(
  103. 'start' => T_OPEN_CURLY_BRACKET,
  104. 'end' => T_CLOSE_CURLY_BRACKET,
  105. 'strict' => true,
  106. 'shared' => false,
  107. 'with' => array(),
  108. ),
  109. T_WHILE => array(
  110. 'start' => T_OPEN_CURLY_BRACKET,
  111. 'end' => T_CLOSE_CURLY_BRACKET,
  112. 'strict' => false,
  113. 'shared' => false,
  114. 'with' => array(),
  115. ),
  116. T_DO => array(
  117. 'start' => T_OPEN_CURLY_BRACKET,
  118. 'end' => T_CLOSE_CURLY_BRACKET,
  119. 'strict' => true,
  120. 'shared' => false,
  121. 'with' => array(),
  122. ),
  123. T_SWITCH => array(
  124. 'start' => T_OPEN_CURLY_BRACKET,
  125. 'end' => T_CLOSE_CURLY_BRACKET,
  126. 'strict' => true,
  127. 'shared' => false,
  128. 'with' => array(),
  129. ),
  130. T_CASE => array(
  131. 'start' => T_COLON,
  132. 'end' => T_BREAK,
  133. 'strict' => true,
  134. 'shared' => true,
  135. 'with' => array(
  136. T_DEFAULT,
  137. T_CASE,
  138. ),
  139. ),
  140. T_DEFAULT => array(
  141. 'start' => T_COLON,
  142. 'end' => T_BREAK,
  143. 'strict' => true,
  144. 'shared' => true,
  145. 'with' => array(T_CASE),
  146. ),
  147. T_START_HEREDOC => array(
  148. 'start' => T_START_HEREDOC,
  149. 'end' => T_END_HEREDOC,
  150. 'strict' => true,
  151. 'shared' => false,
  152. 'with' => array(),
  153. ),
  154. );
  155. /**
  156. * A list of tokens that end the scope.
  157. *
  158. * This array is just a unique collection of the end tokens
  159. * from the _scopeOpeners array. The data is duplicated here to
  160. * save time during parsing of the file.
  161. *
  162. * @var array
  163. */
  164. public $endScopeTokens = array(
  165. T_CLOSE_CURLY_BRACKET,
  166. T_BREAK,
  167. T_END_HEREDOC,
  168. );
  169. /**
  170. * Creates an array of tokens when given some PHP code.
  171. *
  172. * Starts by using token_get_all() but does a lot of extra processing
  173. * to insert information about the context of the token.
  174. *
  175. * @param string $string The string to tokenize.
  176. * @param string $eolChar The EOL character to use for splitting strings.
  177. *
  178. * @return array
  179. */
  180. public function tokenizeString($string, $eolChar='\n')
  181. {
  182. $tokens = @token_get_all($string);
  183. $finalTokens = array();
  184. $newStackPtr = 0;
  185. $numTokens = count($tokens);
  186. for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
  187. $token = $tokens[$stackPtr];
  188. $tokenIsArray = is_array($token);
  189. /*
  190. If we are using \r\n newline characters, the \r and \n are sometimes
  191. split over two tokens. This normally occurs after comments. We need
  192. to merge these two characters together so that our line endings are
  193. consistent for all lines.
  194. */
  195. if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
  196. if (isset($tokens[($stackPtr + 1)]) === true && is_array($tokens[($stackPtr + 1)]) === true && $tokens[($stackPtr + 1)][1][0] === "\n") {
  197. $token[1] .= "\n";
  198. if ($tokens[($stackPtr + 1)][1] === "\n") {
  199. // The next token's content has been merged into this token,
  200. // so we can skip it.
  201. $stackPtr++;
  202. } else {
  203. $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1);
  204. }
  205. }
  206. }//end if
  207. /*
  208. If this is a double quoted string, PHP will tokenise the whole
  209. thing which causes problems with the scope map when braces are
  210. within the string. So we need to merge the tokens together to
  211. provide a single string.
  212. */
  213. if ($tokenIsArray === false && $token === '"') {
  214. $tokenContent = '"';
  215. $nestedVars = array();
  216. for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
  217. $subTokenIsArray = is_array($tokens[$i]);
  218. if ($subTokenIsArray === true) {
  219. $tokenContent .= $tokens[$i][1];
  220. if ($tokens[$i][1] === '{') {
  221. $nestedVars[] = $i;
  222. }
  223. } else {
  224. $tokenContent .= $tokens[$i];
  225. if ($tokens[$i] === '}') {
  226. array_pop($nestedVars);
  227. }
  228. }
  229. if ($subTokenIsArray === false && $tokens[$i] === '"' && empty($nestedVars) === true) {
  230. // We found the other end of the double quoted string.
  231. break;
  232. }
  233. }
  234. $stackPtr = $i;
  235. // Convert each line within the double quoted string to a
  236. // new token, so it conforms with other multiple line tokens.
  237. $tokenLines = explode($eolChar, $tokenContent);
  238. $numLines = count($tokenLines);
  239. $newToken = array();
  240. for ($j = 0; $j < $numLines; $j++) {
  241. $newToken['content'] = $tokenLines[$j];
  242. if ($j === ($numLines - 1)) {
  243. if ($tokenLines[$j] === '') {
  244. break;
  245. }
  246. } else {
  247. $newToken['content'] .= $eolChar;
  248. }
  249. $newToken['code'] = T_DOUBLE_QUOTED_STRING;
  250. $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
  251. $finalTokens[$newStackPtr] = $newToken;
  252. $newStackPtr++;
  253. }
  254. // Continue, as we're done with this token.
  255. continue;
  256. }//end if
  257. /*
  258. If this is a heredoc, PHP will tokenise the whole
  259. thing which causes problems when heredocs don't
  260. contain real PHP code, which is almost never.
  261. We want to leave the start and end heredoc tokens
  262. alone though.
  263. */
  264. if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
  265. // Add the start heredoc token to the final array.
  266. $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($token);
  267. $newStackPtr++;
  268. $tokenContent = '';
  269. for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
  270. $subTokenIsArray = is_array($tokens[$i]);
  271. if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
  272. // We found the other end of the heredoc.
  273. break;
  274. }
  275. if ($subTokenIsArray === true) {
  276. $tokenContent .= $tokens[$i][1];
  277. } else {
  278. $tokenContent .= $tokens[$i];
  279. }
  280. }
  281. $stackPtr = $i;
  282. // Convert each line within the heredoc to a
  283. // new token, so it conforms with other multiple line tokens.
  284. $tokenLines = explode($eolChar, $tokenContent);
  285. $numLines = count($tokenLines);
  286. $newToken = array();
  287. for ($j = 0; $j < $numLines; $j++) {
  288. $newToken['content'] = $tokenLines[$j];
  289. if ($j === ($numLines - 1)) {
  290. if ($tokenLines[$j] === '') {
  291. break;
  292. }
  293. } else {
  294. $newToken['content'] .= $eolChar;
  295. }
  296. $newToken['code'] = T_HEREDOC;
  297. $newToken['type'] = 'T_HEREDOC';
  298. $finalTokens[$newStackPtr] = $newToken;
  299. $newStackPtr++;
  300. }
  301. // Add the end heredoc token to the final array.
  302. $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
  303. $newStackPtr++;
  304. // Continue, as we're done with this token.
  305. continue;
  306. }//end if
  307. /*
  308. If this token has newlines in its content, split each line up
  309. and create a new token for each line. We do this so it's easier
  310. to asertain where errors occur on a line.
  311. Note that $token[1] is the token's content.
  312. */
  313. if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
  314. $tokenLines = explode($eolChar, $token[1]);
  315. $numLines = count($tokenLines);
  316. $tokenName = token_name($token[0]);
  317. for ($i = 0; $i < $numLines; $i++) {
  318. $newToken['content'] = $tokenLines[$i];
  319. if ($i === ($numLines - 1)) {
  320. if ($tokenLines[$i] === '') {
  321. break;
  322. }
  323. } else {
  324. $newToken['content'] .= $eolChar;
  325. }
  326. $newToken['type'] = $tokenName;
  327. $newToken['code'] = $token[0];
  328. $finalTokens[$newStackPtr] = $newToken;
  329. $newStackPtr++;
  330. }
  331. } else {
  332. $newToken = PHP_CodeSniffer::standardiseToken($token);
  333. // This is a special condition for T_ARRAY tokens use to
  334. // type hint function arguments as being arrays. We want to keep
  335. // the parenthsis map clean, so let's tag these tokens as
  336. // T_ARRAY_HINT.
  337. if ($newToken['code'] === T_ARRAY) {
  338. // Recalculate number of tokens.
  339. $numTokens = count($tokens);
  340. for ($i = $stackPtr; $i < $numTokens; $i++) {
  341. if (is_array($tokens[$i]) === false) {
  342. if ($tokens[$i] === '(') {
  343. break;
  344. }
  345. } else if ($tokens[$i][0] === T_VARIABLE) {
  346. $newToken['code'] = T_ARRAY_HINT;
  347. $newToken['type'] = 'T_ARRAY_HINT';
  348. break;
  349. }
  350. }
  351. }
  352. $finalTokens[$newStackPtr] = $newToken;
  353. $newStackPtr++;
  354. }//end if
  355. }//end for
  356. return $finalTokens;
  357. }//end tokenizeString()
  358. }//end class
  359. ?>