PageRenderTime 67ms CodeModel.GetById 13ms app.highlight 48ms RepoModel.GetById 1ms app.codeStats 0ms

/php/pear/PHP/CodeSniffer/Tokenizers/PHP.php

https://gitlab.com/trang1104/portable_project
PHP | 612 lines | 431 code | 57 blank | 124 comment | 64 complexity | f85f47c7ee7ac9f43c82ecb80fe14c07 MD5 | raw file
  1<?php
  2/**
  3 * Tokenizes PHP code.
  4 *
  5 * PHP version 5
  6 *
  7 * @category  PHP
  8 * @package   PHP_CodeSniffer
  9 * @author    Greg Sherwood <gsherwood@squiz.net>
 10 * @copyright 2006-2011 Squiz Pty Ltd (ABN 77 084 670 600)
 11 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
 12 * @link      http://pear.php.net/package/PHP_CodeSniffer
 13 */
 14
 15/**
 16 * Tokenizes PHP code.
 17 *
 18 * @category  PHP
 19 * @package   PHP_CodeSniffer
 20 * @author    Greg Sherwood <gsherwood@squiz.net>
 21 * @copyright 2006-2011 Squiz Pty Ltd (ABN 77 084 670 600)
 22 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
 23 * @version   Release: 1.3.3
 24 * @link      http://pear.php.net/package/PHP_CodeSniffer
 25 */
 26class PHP_CodeSniffer_Tokenizers_PHP
 27{
 28
 29    /**
 30     * A list of tokens that are allowed to open a scope.
 31     *
 32     * This array also contains information about what kind of token the scope
 33     * opener uses to open and close the scope, if the token strictly requires
 34     * an opener, if the token can share a scope closer, and who it can be shared
 35     * with. An example of a token that shares a scope closer is a CASE scope.
 36     *
 37     * @var array
 38     */
 39    public $scopeOpeners = array(
 40                            T_IF            => array(
 41                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 42                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 43                                                'strict' => false,
 44                                                'shared' => false,
 45                                                'with'   => array(),
 46                                               ),
 47                            T_TRY           => array(
 48                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 49                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 50                                                'strict' => true,
 51                                                'shared' => false,
 52                                                'with'   => array(),
 53                                               ),
 54                            T_CATCH         => array(
 55                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 56                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 57                                                'strict' => true,
 58                                                'shared' => false,
 59                                                'with'   => array(),
 60                                               ),
 61                            T_ELSE          => array(
 62                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 63                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 64                                                'strict' => false,
 65                                                'shared' => false,
 66                                                'with'   => array(),
 67                                               ),
 68                            T_ELSEIF        => array(
 69                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 70                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 71                                                'strict' => false,
 72                                                'shared' => false,
 73                                                'with'   => array(),
 74                                               ),
 75                            T_FOR           => array(
 76                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 77                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 78                                                'strict' => false,
 79                                                'shared' => false,
 80                                                'with'   => array(),
 81                                               ),
 82                            T_FOREACH       => array(
 83                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 84                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 85                                                'strict' => false,
 86                                                'shared' => false,
 87                                                'with'   => array(),
 88                                               ),
 89                            T_INTERFACE     => array(
 90                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 91                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 92                                                'strict' => true,
 93                                                'shared' => false,
 94                                                'with'   => array(),
 95                                               ),
 96                            T_FUNCTION      => array(
 97                                                'start'  => array(T_OPEN_CURLY_BRACKET),
 98                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
 99                                                'strict' => true,
100                                                'shared' => false,
101                                                'with'   => array(),
102                                               ),
103                            T_CLASS         => array(
104                                                'start'  => array(T_OPEN_CURLY_BRACKET),
105                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
106                                                'strict' => true,
107                                                'shared' => false,
108                                                'with'   => array(),
109                                               ),
110                            T_NAMESPACE     => array(
111                                                'start'  => array(T_OPEN_CURLY_BRACKET),
112                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
113                                                'strict' => false,
114                                                'shared' => false,
115                                                'with'   => array(),
116                                               ),
117                            T_WHILE         => array(
118                                                'start'  => array(T_OPEN_CURLY_BRACKET),
119                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
120                                                'strict' => false,
121                                                'shared' => false,
122                                                'with'   => array(),
123                                               ),
124                            T_DO            => array(
125                                                'start'  => array(T_OPEN_CURLY_BRACKET),
126                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
127                                                'strict' => true,
128                                                'shared' => false,
129                                                'with'   => array(),
130                                               ),
131                            T_SWITCH        => array(
132                                                'start'  => array(T_OPEN_CURLY_BRACKET),
133                                                'end'    => array(T_CLOSE_CURLY_BRACKET),
134                                                'strict' => true,
135                                                'shared' => false,
136                                                'with'   => array(),
137                                               ),
138                            T_CASE          => array(
139                                                'start'  => array(
140                                                             T_COLON,
141                                                             T_SEMICOLON,
142                                                            ),
143                                                'end'    => array(T_BREAK),
144                                                'strict' => true,
145                                                'shared' => true,
146                                                'with'   => array(
147                                                             T_DEFAULT,
148                                                             T_CASE,
149                                                             T_SWITCH,
150                                                            ),
151                                               ),
152                            T_DEFAULT       => array(
153                                                'start'  => array(T_COLON),
154                                                'end'    => array(T_BREAK),
155                                                'strict' => true,
156                                                'shared' => true,
157                                                'with'   => array(
158                                                             T_CASE,
159                                                             T_SWITCH,
160                                                            ),
161                                               ),
162                            T_START_HEREDOC => array(
163                                                'start'  => array(T_START_HEREDOC),
164                                                'end'    => array(T_END_HEREDOC),
165                                                'strict' => true,
166                                                'shared' => false,
167                                                'with'   => array(),
168                                               ),
169                           );
170
171    /**
172     * A list of tokens that end the scope.
173     *
174     * This array is just a unique collection of the end tokens
175     * from the _scopeOpeners array. The data is duplicated here to
176     * save time during parsing of the file.
177     *
178     * @var array
179     */
180    public $endScopeTokens = array(
181                              T_CLOSE_CURLY_BRACKET,
182                              T_BREAK,
183                              T_END_HEREDOC,
184                             );
185
186
187    /**
188     * Creates an array of tokens when given some PHP code.
189     *
190     * Starts by using token_get_all() but does a lot of extra processing
191     * to insert information about the context of the token.
192     *
193     * @param string $string  The string to tokenize.
194     * @param string $eolChar The EOL character to use for splitting strings.
195     *
196     * @return array
197     */
198    public function tokenizeString($string, $eolChar='\n')
199    {
200        $tokens      = @token_get_all($string);
201        $finalTokens = array();
202
203        $newStackPtr = 0;
204        $numTokens   = count($tokens);
205        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
206            $token        = $tokens[$stackPtr];
207            $tokenIsArray = is_array($token);
208
209            /*
210                If we are using \r\n newline characters, the \r and \n are sometimes
211                split over two tokens. This normally occurs after comments. We need
212                to merge these two characters together so that our line endings are
213                consistent for all lines.
214            */
215
216            if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
217                if (isset($tokens[($stackPtr + 1)]) === true
218                    && is_array($tokens[($stackPtr + 1)]) === true
219                    && $tokens[($stackPtr + 1)][1][0] === "\n"
220                ) {
221                    $token[1] .= "\n";
222
223                    if ($tokens[($stackPtr + 1)][1] === "\n") {
224                        // The next token's content has been merged into this token,
225                        // so we can skip it.
226                        $stackPtr++;
227                    } else {
228                        $tokens[($stackPtr + 1)][1]
229                            = substr($tokens[($stackPtr + 1)][1], 1);
230                    }
231                }
232            }//end if
233
234            /*
235                If this is a double quoted string, PHP will tokenise the whole
236                thing which causes problems with the scope map when braces are
237                within the string. So we need to merge the tokens together to
238                provide a single string.
239            */
240
241            if ($tokenIsArray === false && $token === '"') {
242                $tokenContent = '"';
243                $nestedVars   = array();
244                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
245                    $subTokenIsArray = is_array($tokens[$i]);
246
247                    if ($subTokenIsArray === true) {
248                        $tokenContent .= $tokens[$i][1];
249                        if ($tokens[$i][1] === '{'
250                            && $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE
251                        ) {
252                            $nestedVars[] = $i;
253                        }
254                    } else {
255                        $tokenContent .= $tokens[$i];
256                        if ($tokens[$i] === '}') {
257                            array_pop($nestedVars);
258                        }
259                    }
260
261                    if ($subTokenIsArray === false
262                        && $tokens[$i] === '"'
263                        && empty($nestedVars) === true
264                    ) {
265                        // We found the other end of the double quoted string.
266                        break;
267                    }
268                }
269
270                $stackPtr = $i;
271
272                // Convert each line within the double quoted string to a
273                // new token, so it conforms with other multiple line tokens.
274                $tokenLines = explode($eolChar, $tokenContent);
275                $numLines   = count($tokenLines);
276                $newToken   = array();
277
278                for ($j = 0; $j < $numLines; $j++) {
279                    $newToken['content'] = $tokenLines[$j];
280                    if ($j === ($numLines - 1)) {
281                        if ($tokenLines[$j] === '') {
282                            break;
283                        }
284                    } else {
285                        $newToken['content'] .= $eolChar;
286                    }
287
288                    $newToken['code']          = T_DOUBLE_QUOTED_STRING;
289                    $newToken['type']          = 'T_DOUBLE_QUOTED_STRING';
290                    $finalTokens[$newStackPtr] = $newToken;
291                    $newStackPtr++;
292                }
293
294                // Continue, as we're done with this token.
295                continue;
296            }//end if
297
298            /*
299                If this is a heredoc, PHP will tokenise the whole
300                thing which causes problems when heredocs don't
301                contain real PHP code, which is almost never.
302                We want to leave the start and end heredoc tokens
303                alone though.
304            */
305
306            if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
307                // Add the start heredoc token to the final array.
308                $finalTokens[$newStackPtr]
309                    = PHP_CodeSniffer::standardiseToken($token);
310
311                // Check if this is actually a nowdoc and use a different token
312                // to help the sniffs.
313                $nowdoc = false;
314                if ($token[1][3] === "'") {
315                    $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
316                    $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
317                    $nowdoc = true;
318                }
319
320                $newStackPtr++;
321
322                $tokenContent = '';
323                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
324                    $subTokenIsArray = is_array($tokens[$i]);
325                    if ($subTokenIsArray === true
326                        && $tokens[$i][0] === T_END_HEREDOC
327                    ) {
328                        // We found the other end of the heredoc.
329                        break;
330                    }
331
332                    if ($subTokenIsArray === true) {
333                        $tokenContent .= $tokens[$i][1];
334                    } else {
335                        $tokenContent .= $tokens[$i];
336                    }
337                }
338
339                $stackPtr = $i;
340
341                // Convert each line within the heredoc to a
342                // new token, so it conforms with other multiple line tokens.
343                $tokenLines = explode($eolChar, $tokenContent);
344                $numLines   = count($tokenLines);
345                $newToken   = array();
346
347                for ($j = 0; $j < $numLines; $j++) {
348                    $newToken['content'] = $tokenLines[$j];
349                    if ($j === ($numLines - 1)) {
350                        if ($tokenLines[$j] === '') {
351                            break;
352                        }
353                    } else {
354                        $newToken['content'] .= $eolChar;
355                    }
356
357                    if ($nowdoc === true) {
358                        $newToken['code'] = T_NOWDOC;
359                        $newToken['type'] = 'T_NOWDOC';
360                    } else {
361                        $newToken['code'] = T_HEREDOC;
362                        $newToken['type'] = 'T_HEREDOC';
363                    }
364
365                    $finalTokens[$newStackPtr] = $newToken;
366                    $newStackPtr++;
367                }
368
369                // Add the end heredoc token to the final array.
370                $finalTokens[$newStackPtr]
371                    = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
372
373                if ($nowdoc === true) {
374                    $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
375                    $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
376                    $nowdoc = true;
377                }
378
379                $newStackPtr++;
380
381                // Continue, as we're done with this token.
382                continue;
383            }//end if
384
385            /*
386                If this token has newlines in its content, split each line up
387                and create a new token for each line. We do this so it's easier
388                to asertain where errors occur on a line.
389                Note that $token[1] is the token's content.
390            */
391
392            if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
393                $tokenLines = explode($eolChar, $token[1]);
394                $numLines   = count($tokenLines);
395                $tokenName  = token_name($token[0]);
396
397                for ($i = 0; $i < $numLines; $i++) {
398                    $newToken['content'] = $tokenLines[$i];
399                    if ($i === ($numLines - 1)) {
400                        if ($tokenLines[$i] === '') {
401                            break;
402                        }
403                    } else {
404                        $newToken['content'] .= $eolChar;
405                    }
406
407                    $newToken['type']          = $tokenName;
408                    $newToken['code']          = $token[0];
409                    $finalTokens[$newStackPtr] = $newToken;
410                    $newStackPtr++;
411                }
412            } else {
413                $newToken = PHP_CodeSniffer::standardiseToken($token);
414
415                // This is a special condition for T_ARRAY tokens use to
416                // type hint function arguments as being arrays. We want to keep
417                // the parenthsis map clean, so let's tag these tokens as
418                // T_ARRAY_HINT.
419                if ($newToken['code'] === T_ARRAY) {
420                    // Recalculate number of tokens.
421                    $numTokens = count($tokens);
422                    for ($i = $stackPtr; $i < $numTokens; $i++) {
423                        if (is_array($tokens[$i]) === false) {
424                            if ($tokens[$i] === '(') {
425                                break;
426                            }
427                        } else if ($tokens[$i][0] === T_VARIABLE) {
428                            $newToken['code'] = T_ARRAY_HINT;
429                            $newToken['type'] = 'T_ARRAY_HINT';
430                            break;
431                        }
432                    }
433                }
434
435                $finalTokens[$newStackPtr] = $newToken;
436                $newStackPtr++;
437            }//end if
438        }//end for
439
440        return $finalTokens;
441
442    }//end tokenizeString()
443
444
445    /**
446     * Performs additional processing after main tokenizing.
447     *
448     * This additional processing checks for CASE statements
449     * that are using curly braces for scope openers and closers. It
450     * also turn some T_FUNCTION tokens into T_CLOSURE when they
451     * are not standard function definitions.
452     *
453     * @param array  &$tokens The array of tokens to process.
454     * @param string $eolChar The EOL character to use for splitting strings.
455     *
456     * @return void
457     */
458    public function processAdditional(&$tokens, $eolChar)
459    {
460        if (PHP_CODESNIFFER_VERBOSITY > 1) {
461            echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL;
462        }
463
464        $numTokens = count($tokens);
465        for ($i = ($numTokens - 1); $i >= 0; $i--) {
466            // Looking for functions that are actually closures.
467            if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
468                for ($x = ($i + 1); $x < $numTokens; $x++) {
469                    if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
470                        break;
471                    }
472                }
473
474                if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
475                    $tokens[$i]['code'] = T_CLOSURE;
476                    $tokens[$i]['type'] = 'T_CLOSURE';
477                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
478                        $line = $tokens[$i]['line'];
479                        echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
480                    }
481
482                    for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
483                        if (isset($tokens[$x]['conditions'][$i]) === false) {
484                            continue;
485                        }
486
487                        $tokens[$x]['conditions'][$i] = T_CLOSURE;
488                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
489                            $type = $tokens[$x]['type'];
490                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
491                        }
492                    }
493                }
494
495                continue;
496            }//end if
497
498            if (($tokens[$i]['code'] !== T_CASE
499                && $tokens[$i]['code'] !== T_DEFAULT)
500                || isset($tokens[$i]['scope_opener']) === false
501            ) {
502                // Only interested in CASE and DEFAULT statements
503                // from here on in.
504                continue;
505            }
506
507            $scopeOpener = $tokens[$i]['scope_opener'];
508            $scopeCloser = $tokens[$i]['scope_closer'];
509
510            // If the first char after the opener is a curly brace
511            // and that brace has been ignored, it is actually
512            // opening this case statement and the opener and closer are
513            // probably set incorrectly.
514            for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) {
515                if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
516                    // Non-whitespace content.
517                    break;
518                }
519            }
520
521            if ($tokens[$x]['code'] === T_CASE) {
522                // Special case for multiple CASE statements that
523                // share the same closer. Because we are going
524                // backwards through the file, this next CASE
525                // statement is already fixed, so just use its
526                // closer and don't worry about fixing anything.
527                $newCloser = $tokens[$x]['scope_closer'];
528                $tokens[$i]['scope_closer'] = $newCloser;
529                if (PHP_CODESNIFFER_VERBOSITY > 1) {
530                    $oldType = $tokens[$scopeCloser]['type'];
531                    $newType = $tokens[$newCloser]['type'];
532                    $line    = $tokens[$i]['line'];
533                    echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
534                }
535
536                continue;
537            }
538
539            if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET
540                || isset($tokens[$x]['scope_condition']) === true
541            ) {
542                // Not a CASE with a curly brace opener.
543                continue;
544            }
545
546            // The closer for this CASE/DEFAULT should be the closing
547            // curly brace and not whatever it already is. The opener needs
548            // to be the opening curly brace so everything matches up.
549            $newCloser = $tokens[$x]['bracket_closer'];
550            $tokens[$i]['scope_closer'] = $newCloser;
551            $tokens[$x]['scope_closer'] = $newCloser;
552            $tokens[$i]['scope_opener'] = $x;
553            $tokens[$x]['scope_condition'] = $i;
554            $tokens[$newCloser]['scope_condition'] = $i;
555            $tokens[$newCloser]['scope_opener']    = $x;
556            if (PHP_CODESNIFFER_VERBOSITY > 1) {
557                $line      = $tokens[$i]['line'];
558                $tokenType = $tokens[$i]['type'];
559
560                $oldType = $tokens[$scopeOpener]['type'];
561                $newType = $tokens[$x]['type'];
562                echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL;
563
564                $oldType = $tokens[$scopeCloser]['type'];
565                $newType = $tokens[$newCloser]['type'];
566                echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
567            }
568
569            // Now fix up all the tokens that think they are
570            // inside the CASE/DEFAULT statement when they are really outside.
571            for ($x = $newCloser; $x < $scopeCloser; $x++) {
572                foreach ($tokens[$x]['conditions'] as $num => $oldCond) {
573                    if ($oldCond === $tokens[$i]['code']) {
574                        $oldConditions = $tokens[$x]['conditions'];
575                        unset($tokens[$x]['conditions'][$num]);
576
577                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
578                            $type     = $tokens[$x]['type'];
579                            $oldConds = '';
580                            foreach ($oldConditions as $condition) {
581                                $oldConds .= token_name($condition).',';
582                            }
583
584                            $oldConds = rtrim($oldConds, ',');
585
586                            $newConds = '';
587                            foreach ($tokens[$x]['conditions'] as $condition) {
588                                $newConds .= token_name($condition).',';
589                            }
590
591                            $newConds = rtrim($newConds, ',');
592
593                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
594                            echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL;
595                        }
596
597                        break;
598                    }
599                }
600            }
601        }//end for
602
603        if (PHP_CODESNIFFER_VERBOSITY > 1) {
604            echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL;
605        }
606
607    }//end processAdditional()
608
609
610}//end class
611
612?>