/highlight_php.inc.php
PHP | 528 lines | 232 code | 71 blank | 225 comment | 104 complexity | 698b2b0f1caa27bd477bba23687b69a8 MD5 | raw file
Possible License(s): CC-BY-SA-3.0
- <?php
- /**
- * PHPLighter
- *
- * A PHP syntax highlighting engine the utilizes token_get_all() to produce a
- * level of syntax highlighting that rivals any modern editor. No more barebone
- * generic syntax highlighting!
- *
- * NOTICE OF LICENSE
- *
- * Licensed under the Open Software License version 3.0
- *
- * This source file is subject to the Open Software License (OSL 3.0) that is
- * bundled with this package in the file LICENSE. It is also available through
- * the world wide web at this URL:
- *
- * http://opensource.org/licenses/OSL-3.0
- *
- * @package phplighter
- * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
- * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
- * @link http://brandonwamboldt.ca/phplighter
- * @version 1.0.12
- */
-
- /**
- * The main PHPLighter class
- *
- * @see PHPLighter::highlight()
- *
- * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
- * @since 1.0.0
- */
- class PHPLighter
- {
- /**
- * Only basic classes will be added, for builtin tokens (No C_* classes)
- *
- * @since 1.0.8
- * @var int
- */
- const BASIC_HIGHLIGHTING = 1;
-
- /**
- * PHPDoc tags such as @since, @access and @var will not be given tags
- *
- * @since 1.0.8
- * @var int
- */
- const NO_TOKENIZE_DOC_TAGS = 2;
-
- /**
- * Links/URLs in comments will not be converted into HTML links
- *
- * @since 1.0.8
- * @var int
- */
- const NO_LINKIFY_LINKS = 4;
-
- /**
- * E-mail addresses in comments will not be converted to mailto: links
- *
- * @since 1.0.8
- * @var int
- */
- const NO_LINKIFY_EMAILS = 8;
-
- /**
- * An array of builtin functions (Functions included in a PHP extension)
- *
- * @access protected
- * @since 1.0.0
- * @static
- * @var array
- */
- protected static $builtin_functions = NULL;
-
- /**
- * Contains a list of PHPDoc tags separated by a | for use in a regex
- *
- * PHPDocumentor tags are placed in docblock comments (Comments that start
- * with /**), and are prefixed with an @ symbol.
- *
- * @link http://www.phpdoc.org/docs/latest/for-users/list-of-tags.html
- *
- * @access protected
- * @since 1.0.0
- * @var string
- */
- protected static $phpdoc_tags = 'abstract|access|author|category|copyright|deprecated|example|final|filesource|global|ignore|internal|license|link|method|name|package|param|property|return|see|since|static|staticvar|subpacakage|throws|todo|tutorial|uses|var|version';
-
- /**
- * Contains an array of PHP Magic Methods
- *
- * @link http://php.net/manual/en/language.oop5.magic.php
- *
- * @access protected
- * @since 1.0.0
- * @var array
- */
- protected static $magic_methods = array(
- '__sleep',
- '__wakeup',
- '__construct',
- '__toString',
- '__destruct',
- '__invoke',
- '__set_state',
- '__call',
- '__callStatic',
- '__get',
- '__set',
- '__isset',
- '__unset',
- '__clone'
- );
-
- /**
- * Class options
- *
- * @access protected
- * @since 1.0.0
- * @var integer
- */
- protected $options = 0;
-
- /**
- * The original PHP source code to highlight
- *
- * @access protected
- * @since 1.0.0
- * @var string
- */
- protected $source = '';
-
- /**
- * An array of tokens, generated by token_get_all() usign the original PHP
- * source code passed to us
- *
- * @access protected
- * @since 1.0.0
- * @var array
- */
- protected $tokens = array();
-
- /**
- * Sets the source code to highlight and the options to apply
- *
- * @param string $source The PHP code to be highlighted. This should include the opening tag
- * @param int $options optional The options to apply when highlighting the source code
- *
- * @access public
- * @since 1.0.0
- */
- public function __construct( $source, $options = 0 )
- {
- $this->options = $options;
- $this->source = $source;
-
- $this->get_builtin_functions();
- $this->tokenize();
- }
-
- /**
- * Highlights the given PHP and returns it as a string with HTML tags
- *
- * @return string
- *
- * @access public
- * @since 1.0.0
- */
- public function parse()
- {
- // Basic parsing mode?
- if ( $this->options & self::BASIC_HIGHLIGHTING ) {
- return $this->basic_parse();
- }
-
- $in_namespace = FALSE;
- $in_class = FALSE;
- $in_string = FALSE;
- $output = '';
-
- // Go through each token and make it into a tag
- foreach ( $this->tokens as $i => $token ) {
-
- // Certain tokens such as ; are returned by themselves, everything else is returned as
- // an array with three elements
- if ( is_array( $token ) ) {
-
- // So we don't have to call these functions all over
- $identifier = $token[0];
- $token_name = token_name( $token[0] );
- $token = htmlspecialchars( $token[1] );
-
- // Don't enclose whitespace in a <span> tag, it gets excessive
- if ( $identifier === T_WHITESPACE ) {
- $output .= $token;
- }
-
- // Special handling for docblocks to deal with docblock tags, links and email links
- else if ( $identifier === T_DOC_COMMENT ) {
-
- if ( ! ( $this->options & self::NO_TOKENIZE_DOC_TAGS ) ) {
- $token = preg_replace( '/(\@(' . self::$phpdoc_tags . '))(\s)/i', '<span class="C_DOCBLOCK_TAG">\1</span>\3', $token );
- }
-
- if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
- $token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_DOCBLOCK_LINK">\1</a>', $token );
- }
-
- if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
- $token = preg_replace( '/\<\;([A-Za-z0-9].*?@.*?)\>\;/i', '<<a href="mailto:\1" class="C_DOCBLOCK_LINK">\1</a>>', $token );
- }
-
- $output .= '<span class="' . $token_name . '">' . $token . '</span>';
- }
-
- // Special handling for comments to deal with links and email links
- else if ( $identifier === T_COMMENT ) {
-
- if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
- $token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_COMMENT_LINK">\1</a>', $token );
- }
-
- if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
- $token = preg_replace( '/\<\;([A-Za-z0-9].*?@.*?)\>\;/i', '<<a href="mailto:\1" class="C_COMMENT_LINK">\1</a>>', $token );
- }
-
- $output .= '<span class="' . $token_name . '">' . $token . '</span>';
- }
-
- // The 'self' keyword is just tokenized as a string, we give it a special identifier
- else if ( $identifier === T_STRING && strtolower( $token ) == 'self' ) {
- $output .= '<span class="T_STRING C_SELF">' . $token . '</span>';
- }
-
- // The 'null' keyword is just tokenized as a string, we give it a special identifier
- else if ( $identifier === T_STRING && strtolower( $token ) == 'null' ) {
- $output .= '<span class="T_STRING C_NULL">' . $token . '</span>';
- }
-
- // The 'true' keyword is just tokenized as a string, we give it a special identifier
- else if ( $identifier === T_STRING && strtolower( $token ) == 'true' ) {
- $output .= '<span class="T_STRING C_TRUE">' . $token . '</span>';
- }
-
- // The 'false' keyword is just tokenized as a string, we give it a special identifier
- else if ( $identifier === T_STRING && strtolower( $token ) == 'false' ) {
- $output .= '<span class="T_STRING C_FALSE">' . $token . '</span>';
- }
-
- // Identify class method calls, like $this->myMethod(), not 100% successful
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) === '(' ) {
- $output .= '<span class="T_STRING C_METHOD_CALL">' . $token . '</span>';
- }
-
- // Idenfy classnames
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_CLASS ) {
- $output .= '<span class="T_STRING C_CLASSNAME">' . $token . '</span>';
- }
-
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_EXTENDS ) {
- $output .= '<span class="T_STRING C_EXTENDS_CLASS">' . $token . '</span>';
- }
-
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_IMPLEMENTS ) {
- $output .= '<span class="T_STRING C_IMPLEMENTS_CLASS">' . $token . '</span>';
- }
-
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_NEW ) {
- $output .= '<span class="T_STRING C_CLASSNAME_REF">' . $token . '</span>';
- }
-
- else if ( $identifier === T_STRING && $this->next_token( $i ) === T_VARIABLE ) {
- $output .= '<span class="T_STRING C_PARAMETER_TYPEHINT">' . $token . '</span>';
- }
-
- // Class properties ($var->property)
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) !== '(' ) {
- $output .= '<span class="T_STRING C_OBJECT_PROPERTY">' . $token . '</span>';
- }
-
- // PHP Magic Method definitions
- else if ( $identifier === T_STRING && in_array( $token, self::$magic_methods ) ) {
- $output .= '<span class="T_STRING C_MAGIC_METHOD">' . $token . '</span>';
- }
-
- // Namespace declarations
- else if ( $identifier === T_STRING && ( $this->prev_token( $i ) === T_NAMESPACE || $this->prev_token( $i ) === T_USE ) ) {
- $output .= '<span class="C_NAMESPACE"><span class="T_STRING">' . $token . '</span>';
- $in_namespace = TRUE;
- }
-
- // Function names
- else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_FUNCTION ) {
- $output .= '<span class="T_STRING C_FUNCTION_NAME">' . $token . '</span>';
- }
-
- // Builtin Functions
- else if ( $identifier === T_STRING && in_array( $token, PHPLighter::$builtin_functions ) && $this->prev_token( $i ) !== T_FUNCTION ) {
- $output .= '<span class="T_STRING C_BUILTIN_FUNCTION">' . $token . '</span>';
- }
-
- // Special string handling
- else if ( $identifier === T_CONSTANT_ENCAPSED_STRING || $identifier === T_ENCAPSED_AND_WHITESPACE ) {
- $output .= '<span class="' . $token_name . '">' . preg_replace( '`(\\\[^ ])`', '<span class="C_DOUBLE_BACKSLASH">\1</span>', $token ) . '</span>';
- }
-
- // Start heredocs
- else if ( $identifier === T_START_HEREDOC ) {
- $output .= '<span class="T_START_HEREDOC"><span class="C_HEREDOC_ARROWS"><<<</span>' . str_replace( '<<<', '', $token ) . '</span>';
- }
-
- // All other token
- else {
- $output .= '<span class="' . $token_name . '">' . $token . '</span>';
- }
- } else {
-
- // If we are in a namespace string and encounter a semicolon, end the namespace string
- if ( $token == ';' && $in_namespace ) {
- $output .= '</span><span class="C_SEMICOLON">;</span>';
- $in_namespace = FALSE;
- }
-
- // Give semicolons a name
- else if ( $token == ';' ) {
- $output .= '<span class="C_SEMICOLON">;</span>';
- }
-
- // Give assignment operators a name
- else if ( $token == '=' ) {
- $output .= '<span class="C_ASSIGNMENT">=</span>';
- }
-
- // Special handling for double quoted strings which may contain variables
- else if ( $token == '"' ) {
- if ( $in_string ) {
- $output .= '"</span>';
- $in_string = FALSE;
- } else {
- $output .= '<span class="C_VARSTRING">"';
- $in_string = TRUE;
- }
- }
-
- // All other single character tokens
- else {
- $output .= $token;
- }
- }
- }
-
- return '<pre class="pretty-php">' . $output . '</pre>';
- }
-
- /**
- * Just like parse() but it only uses builtin tokens
- *
- * @return string
- *
- * @access protected
- * @since 1.0.0
- */
- protected function basic_parse()
- {
- $output = '';
-
- // Go through each token and make it into a tag
- foreach ( $this->tokens as $i => $token ) {
- // Certain tokens such as ; are returned by themselves, everything else is returned as
- // an array with three elements
- if ( is_array( $token ) ) {
- if ( $token[0] !== T_WHITESPACE ) {
- $output .= '<span class="' . token_name( $token[0] ) . '">' . htmlspecialchars( $token[1] ) . '</span>';
- } else {
- $output .= $token[1];
- }
- } else {
- $output .= $token;
- }
- }
-
- return $output;
- }
-
- /**
- * If we haven't already got a list of builtin functions, get them and
- * store them as a static variable
- *
- * @access protected
- * @since 1.0.0
- */
- protected function get_builtin_functions()
- {
- if ( PHPLighter::$builtin_functions === NULL ) {
- PHPLighter::$builtin_functions = array();
- $loaded_ext = get_loaded_extensions();
-
- foreach ( $loaded_ext as $ext ) {
- PHPLighter::$builtin_functions = array_merge( PHPLighter::$builtin_functions, (array) get_extension_funcs( $ext ) );
- }
- }
- }
-
- /**
- * Split given source into PHP tokens (PHP 4 >= 4.2.0, PHP 5)
- *
- * token_get_all() returns an array of tokens. Some array elements will be
- * an array themselves, containing the token type, the PHP source for that
- * token, and the line number that the token occured on. Other elements
- * will simply be the token itself, such as (, ), [, and ]
- *
- * @link http://ca.php.net/manual/en/tokens.php
- * @link http://ca.php.net/token_get_all
- * @link http://ca.php.net/manual/en/function.token-name.php
- *
- * @access protected
- * @since 1.0.0
- */
- protected function tokenize()
- {
- $this->tokens = token_get_all( $this->source );
- }
-
- /**
- * Retrieve the next token from the array of tokens
- *
- * @param int $position The position in the token array
- * @param int $modifier The token to get (Defaults to 1, for the next token)
- * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
- * @return int
- *
- * @access protected
- * @since 1.0.0
- */
- protected function next_token( $position, $modifier = 1, $significant = TRUE )
- {
- // No tokens left
- if ( ! isset( $this->tokens[$position + $modifier] ) ) {
- return 0;
- }
-
- $token = $this->tokens[$position + $modifier];
-
- // Return the next significant (Non T_WHITESPACE) token?
- if ( $significant === TRUE ) {
-
- // If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
- if ( $this->tokens[$position + $modifier][0] === T_WHITESPACE ) {
- $token = $this->next_token( $position, $modifier + 1 );
- } else {
- $token = $this->tokens[$position + $modifier];
- }
- }
-
- // Return the next token
- if ( is_array( $token ) ) {
- return $token[0];
- }
-
- return $token;
- }
-
- /**
- * Retrieve the previous token from the array of tokens
- *
- * @param int $position The position in the token array
- * @param int $modifier The token to get (Defaults to 1, for the previous token)
- * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
- * @return int
- *
- * @access protected
- * @since 1.0.0
- */
- protected function prev_token( $position, $modifier = 1, $significant = TRUE )
- {
- // No tokens left
- if ( ! isset( $this->tokens[$position - $modifier] ) ) {
- return 0;
- }
-
- $token = $this->tokens[$position - $modifier];
-
- // Return the next significant (Non T_WHITESPACE) token?
- if ( $significant === TRUE ) {
-
- // If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
- if ( $this->tokens[$position - $modifier][0] === T_WHITESPACE ) {
- $token = $this->prev_token( $position, $modifier + 1 );
- } else {
- $token = $this->tokens[$position - $modifier];
- }
- }
-
- // Return the next token
- if ( is_array( $token ) ) {
- return $token[0];
- } else {
- return $token;
- }
- }
-
- /**
- * Outputs or returns a syntax highlighted version of the given PHP code using
- * the colors defined in the included stylesheet
- *
- * @param string $source The PHP code to be highlighted. This should include the opening tag
- * @param bool $return optional Set this parameter to TRUE to make this function return the highlighted code
- * @return string|void
- *
- * @access public
- * @since 1.0.0
- * @static
- */
- public static function highlight( $source, $return = FALSE, $options = 0 )
- {
- $p = new PHPLighter( $source, $options );
- $str = $p->parse();
-
- if ( $return ) {
- return $str;
- }
-
- echo $str;
- }
- }