PageRenderTime 25ms CodeModel.GetById 2ms app.highlight 17ms RepoModel.GetById 2ms app.codeStats 0ms

/highlight_php.inc.php

http://github.com/brandonwamboldt/PHPLighter
PHP | 528 lines | 232 code | 71 blank | 225 comment | 104 complexity | 698b2b0f1caa27bd477bba23687b69a8 MD5 | raw file
  1<?php
  2/**
  3 * PHPLighter
  4 *
  5 * A PHP syntax highlighting engine the utilizes token_get_all() to produce a
  6 * level of syntax highlighting that rivals any modern editor. No more barebone
  7 * generic syntax highlighting!
  8 *
  9 * NOTICE OF LICENSE
 10 *
 11 * Licensed under the Open Software License version 3.0
 12 *
 13 * This source file is subject to the Open Software License (OSL 3.0) that is
 14 * bundled with this package in the file LICENSE. It is also available through
 15 * the world wide web at this URL:
 16 *
 17 * http://opensource.org/licenses/OSL-3.0
 18 *
 19 * @package phplighter
 20 * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
 21 * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
 22 * @link http://brandonwamboldt.ca/phplighter
 23 * @version 1.0.12
 24 */
 25
 26/**
 27 * The main PHPLighter class
 28 *
 29 * @see PHPLighter::highlight()
 30 *
 31 * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
 32 * @since 1.0.0
 33 */
 34class PHPLighter
 35{
 36	/**
 37	 * Only basic classes will be added, for builtin tokens (No C_* classes)
 38	 *
 39	 * @since 1.0.8
 40	 * @var int
 41	 */
 42	const BASIC_HIGHLIGHTING = 1;
 43
 44	/**
 45	 * PHPDoc tags such as @since, @access and @var will not be given tags
 46	 *
 47	 * @since 1.0.8
 48	 * @var int
 49	 */
 50	const NO_TOKENIZE_DOC_TAGS = 2;
 51
 52	/**
 53	 * Links/URLs in comments will not be converted into HTML links
 54	 *
 55	 * @since 1.0.8
 56	 * @var int
 57	 */
 58	const NO_LINKIFY_LINKS = 4;
 59
 60	/**
 61	 * E-mail addresses in comments will not be converted to mailto: links
 62	 *
 63	 * @since 1.0.8
 64	 * @var int
 65	 */
 66	const NO_LINKIFY_EMAILS = 8;
 67
 68	/**
 69	 * An array of builtin functions (Functions included in a PHP extension)
 70	 *
 71	 * @access protected
 72	 * @since 1.0.0
 73	 * @static
 74	 * @var array
 75	 */
 76	protected static $builtin_functions = NULL;
 77
 78	/**
 79	 * Contains a list of PHPDoc tags separated by a | for use in a regex
 80	 *
 81	 * PHPDocumentor tags are placed in docblock comments (Comments that start
 82	 * with /**), and are prefixed with an @ symbol.
 83	 *
 84	 * @link http://www.phpdoc.org/docs/latest/for-users/list-of-tags.html
 85	 *
 86	 * @access protected
 87	 * @since 1.0.0
 88	 * @var string
 89	 */
 90	protected static $phpdoc_tags = 'abstract|access|author|category|copyright|deprecated|example|final|filesource|global|ignore|internal|license|link|method|name|package|param|property|return|see|since|static|staticvar|subpacakage|throws|todo|tutorial|uses|var|version';
 91
 92	/**
 93	 * Contains an array of PHP Magic Methods
 94	 *
 95	 * @link http://php.net/manual/en/language.oop5.magic.php
 96	 *
 97	 * @access protected
 98	 * @since 1.0.0
 99	 * @var array
100	 */
101	protected static $magic_methods = array(
102		'__sleep',
103		'__wakeup',
104		'__construct',
105		'__toString',
106		'__destruct',
107		'__invoke',
108		'__set_state',
109		'__call',
110		'__callStatic',
111		'__get',
112		'__set',
113		'__isset',
114		'__unset',
115		'__clone'
116	);
117
118	/**
119	 * Class options
120	 *
121	 * @access protected
122	 * @since 1.0.0
123	 * @var integer
124	 */
125	protected $options = 0;
126
127	/**
128	 * The original PHP source code to highlight
129	 *
130	 * @access protected
131	 * @since 1.0.0
132	 * @var string
133	 */
134	protected $source = '';
135
136	/**
137	 * An array of tokens, generated by token_get_all() usign the original PHP
138	 * source code passed to us
139	 *
140	 * @access protected
141	 * @since 1.0.0
142	 * @var array
143	 */
144	protected $tokens = array();
145
146	/**
147	 * Sets the source code to highlight and the options to apply
148	 *
149	 * @param string $source The PHP code to be highlighted. This should include the opening tag
150	 * @param int $options optional The options to apply when highlighting the source code
151	 *
152	 * @access public
153	 * @since 1.0.0
154	 */
155	public function __construct( $source, $options = 0 )
156	{
157		$this->options = $options;
158		$this->source = $source;
159
160		$this->get_builtin_functions();
161		$this->tokenize();
162	}
163
164	/**
165	 * Highlights the given PHP and returns it as a string with HTML tags
166	 *
167	 * @return string
168	 *
169	 * @access public
170	 * @since 1.0.0
171	 */
172	public function parse()
173	{
174		// Basic parsing mode?
175		if ( $this->options & self::BASIC_HIGHLIGHTING ) {
176			return $this->basic_parse();
177		}
178
179		$in_namespace = FALSE;
180		$in_class     = FALSE;
181		$in_string    = FALSE;
182		$output       = '';
183
184		// Go through each token and make it into a tag
185		foreach ( $this->tokens as $i => $token ) {
186
187			// Certain tokens such as ; are returned by themselves, everything else is returned as
188			// an array with three elements
189			if ( is_array( $token ) ) {
190
191				// So we don't have to call these functions all over
192				$identifier = $token[0];
193				$token_name = token_name( $token[0] );
194				$token      = htmlspecialchars( $token[1] );
195
196				// Don't enclose whitespace in a <span> tag, it gets excessive
197				if ( $identifier === T_WHITESPACE ) {
198					$output .= $token;
199				}
200
201				// Special handling for docblocks to deal with docblock tags, links and email links
202				else if ( $identifier === T_DOC_COMMENT ) {
203
204					if ( ! ( $this->options & self::NO_TOKENIZE_DOC_TAGS ) ) {
205						$token = preg_replace( '/(\@(' . self::$phpdoc_tags . '))(\s)/i', '<span class="C_DOCBLOCK_TAG">\1</span>\3', $token );
206					}
207
208					if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
209						$token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_DOCBLOCK_LINK">\1</a>', $token );
210					}
211
212					if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
213						$token = preg_replace( '/\&lt\;([A-Za-z0-9].*?@.*?)\&gt\;/i', '&lt;<a href="mailto:\1" class="C_DOCBLOCK_LINK">\1</a>&gt;', $token );
214					}
215
216					$output .= '<span class="' . $token_name . '">' . $token . '</span>';
217				}
218
219				// Special handling for comments to deal with links and email links
220				else if ( $identifier === T_COMMENT ) {
221
222					if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
223						$token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_COMMENT_LINK">\1</a>', $token );
224					}
225
226					if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
227						$token = preg_replace( '/\&lt\;([A-Za-z0-9].*?@.*?)\&gt\;/i', '&lt;<a href="mailto:\1" class="C_COMMENT_LINK">\1</a>&gt;', $token );
228					}
229
230					$output .= '<span class="' . $token_name . '">' . $token . '</span>';
231				}
232
233				// The 'self' keyword is just tokenized as a string, we give it a special identifier
234				else if ( $identifier === T_STRING && strtolower( $token ) == 'self' ) {
235					$output .= '<span class="T_STRING C_SELF">' . $token . '</span>';
236				}
237
238				// The 'null' keyword is just tokenized as a string, we give it a special identifier
239				else if ( $identifier === T_STRING && strtolower( $token ) == 'null' ) {
240					$output .= '<span class="T_STRING C_NULL">' . $token . '</span>';
241				}
242
243				// The 'true' keyword is just tokenized as a string, we give it a special identifier
244				else if ( $identifier === T_STRING && strtolower( $token ) == 'true' ) {
245					$output .= '<span class="T_STRING C_TRUE">' . $token . '</span>';
246				}
247
248				// The 'false' keyword is just tokenized as a string, we give it a special identifier
249				else if ( $identifier === T_STRING && strtolower( $token ) == 'false' ) {
250					$output .= '<span class="T_STRING C_FALSE">' . $token . '</span>';
251				}
252
253				// Identify class method calls, like $this->myMethod(), not 100% successful
254				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) === '(' ) {
255					$output .= '<span class="T_STRING C_METHOD_CALL">' . $token . '</span>';
256				}
257
258				// Idenfy classnames
259				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_CLASS ) {
260					$output .= '<span class="T_STRING C_CLASSNAME">' . $token . '</span>';
261				}
262
263				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_EXTENDS ) {
264					$output .= '<span class="T_STRING C_EXTENDS_CLASS">' . $token . '</span>';
265				}
266
267				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_IMPLEMENTS ) {
268					$output .= '<span class="T_STRING C_IMPLEMENTS_CLASS">' . $token . '</span>';
269				}
270
271				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_NEW ) {
272					$output .= '<span class="T_STRING C_CLASSNAME_REF">' . $token . '</span>';
273				}
274
275				else if ( $identifier === T_STRING && $this->next_token( $i ) === T_VARIABLE ) {
276					$output .= '<span class="T_STRING C_PARAMETER_TYPEHINT">' . $token . '</span>';
277				}
278
279				// Class properties ($var->property)
280				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) !== '(' ) {
281					$output .= '<span class="T_STRING C_OBJECT_PROPERTY">' . $token . '</span>';
282				}
283
284				// PHP Magic Method definitions
285				else if ( $identifier === T_STRING && in_array( $token, self::$magic_methods ) ) {
286					$output .= '<span class="T_STRING C_MAGIC_METHOD">' . $token . '</span>';
287				}
288
289				// Namespace declarations
290				else if ( $identifier === T_STRING && ( $this->prev_token( $i ) === T_NAMESPACE || $this->prev_token( $i ) === T_USE ) ) {
291					$output .= '<span class="C_NAMESPACE"><span class="T_STRING">' . $token . '</span>';
292					$in_namespace = TRUE;
293				}
294
295				// Function names
296				else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_FUNCTION ) {
297					$output .= '<span class="T_STRING C_FUNCTION_NAME">' . $token . '</span>';
298				}
299
300				// Builtin Functions
301				else if ( $identifier === T_STRING && in_array( $token, PHPLighter::$builtin_functions ) && $this->prev_token( $i ) !== T_FUNCTION ) {
302					$output .= '<span class="T_STRING C_BUILTIN_FUNCTION">' . $token . '</span>';
303				}
304
305				// Special string handling
306				else if ( $identifier === T_CONSTANT_ENCAPSED_STRING || $identifier === T_ENCAPSED_AND_WHITESPACE ) {
307					$output .= '<span class="' . $token_name . '">' . preg_replace( '`(\\\[^ ])`', '<span class="C_DOUBLE_BACKSLASH">\1</span>', $token ) . '</span>';
308				}
309
310				// Start heredocs
311				else if ( $identifier === T_START_HEREDOC ) {
312					$output .= '<span class="T_START_HEREDOC"><span class="C_HEREDOC_ARROWS">&lt;&lt;&lt;</span>' .  str_replace( '&lt;&lt;&lt;', '', $token )  . '</span>';
313				}
314
315				// All other token
316				else {
317					$output .= '<span class="' . $token_name . '">' . $token . '</span>';
318				}
319			} else {
320
321				// If we are in a namespace string and encounter a semicolon, end the namespace string
322				if ( $token == ';' && $in_namespace ) {
323					$output .= '</span><span class="C_SEMICOLON">;</span>';
324					$in_namespace = FALSE;
325				}
326
327				// Give semicolons a name
328				else if ( $token == ';' ) {
329					$output .= '<span class="C_SEMICOLON">;</span>';
330				}
331
332				// Give assignment operators a name
333				else if ( $token == '=' ) {
334					$output .= '<span class="C_ASSIGNMENT">=</span>';
335				}
336
337				// Special handling for double quoted strings which may contain variables
338				else if ( $token == '"' ) {
339					if ( $in_string ) {
340						$output .= '"</span>';
341						$in_string = FALSE;
342					} else {
343						$output .= '<span class="C_VARSTRING">"';
344						$in_string = TRUE;
345					}
346				}
347
348				// All other single character tokens
349				else {
350					$output .= $token;
351				}
352			}
353		}
354
355		return '<pre class="pretty-php">' . $output . '</pre>';
356	}
357
358	/**
359	 * Just like parse() but it only uses builtin tokens
360	 *
361	 * @return string
362	 *
363	 * @access protected
364	 * @since 1.0.0
365	 */
366	protected function basic_parse()
367	{
368		$output = '';
369
370		// Go through each token and make it into a tag
371		foreach ( $this->tokens as $i => $token ) {
372			// Certain tokens such as ; are returned by themselves, everything else is returned as
373			// an array with three elements
374			if ( is_array( $token ) ) {
375				if ( $token[0] !== T_WHITESPACE ) {
376					$output .= '<span class="' . token_name( $token[0] ) . '">' . htmlspecialchars( $token[1] ) . '</span>';
377				} else {
378					$output .= $token[1];
379				}
380			} else {
381				$output .= $token;
382			}
383		}
384
385		return $output;
386	}
387
388	/**
389	 * If we haven't already got a list of builtin functions, get them and
390	 * store them as a static variable
391	 *
392	 * @access protected
393	 * @since 1.0.0
394	 */
395	protected function get_builtin_functions()
396	{
397		if ( PHPLighter::$builtin_functions === NULL ) {
398			PHPLighter::$builtin_functions = array();
399			$loaded_ext = get_loaded_extensions();
400
401			foreach ( $loaded_ext as $ext ) {
402				PHPLighter::$builtin_functions = array_merge( PHPLighter::$builtin_functions, (array) get_extension_funcs( $ext ) );
403			}
404		}
405	}
406
407	/**
408	 * Split given source into PHP tokens (PHP 4 >= 4.2.0, PHP 5)
409	 *
410	 * token_get_all() returns an array of tokens. Some array elements will be
411	 * an array themselves, containing the token type, the PHP source for that
412	 * token, and the line number that the token occured on. Other elements
413	 * will simply be the token itself, such as (, ), [, and ]
414	 *
415	 * @link http://ca.php.net/manual/en/tokens.php
416	 * @link http://ca.php.net/token_get_all
417	 * @link http://ca.php.net/manual/en/function.token-name.php
418	 *
419	 * @access protected
420	 * @since 1.0.0
421	 */
422	protected function tokenize()
423	{
424		$this->tokens = token_get_all( $this->source );
425	}
426
427	/**
428	 * Retrieve the next token from the array of tokens
429	 *
430	 * @param int $position The position in the token array
431	 * @param int $modifier The token to get (Defaults to 1, for the next token)
432	 * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
433	 * @return int
434	 *
435	 * @access protected
436	 * @since 1.0.0
437	 */
438	protected function next_token( $position, $modifier = 1, $significant = TRUE )
439	{
440		// No tokens left
441		if ( ! isset( $this->tokens[$position + $modifier] ) ) {
442			return 0;
443		}
444
445		$token = $this->tokens[$position + $modifier];
446
447		// Return the next significant (Non T_WHITESPACE) token?
448		if ( $significant === TRUE ) {
449
450			// If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
451			if ( $this->tokens[$position + $modifier][0] === T_WHITESPACE ) {
452				$token = $this->next_token( $position, $modifier + 1 );
453			} else {
454				$token = $this->tokens[$position + $modifier];
455			}
456		}
457
458		// Return the next token
459		if ( is_array( $token ) ) {
460			return $token[0];
461		}
462
463		return $token;
464	}
465
466	/**
467	 * Retrieve the previous token from the array of tokens
468	 *
469	 * @param int $position The position in the token array
470	 * @param int $modifier The token to get (Defaults to 1, for the previous token)
471	 * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
472	 * @return int
473	 *
474	 * @access protected
475	 * @since 1.0.0
476	 */
477	protected function prev_token( $position, $modifier = 1, $significant = TRUE )
478	{
479		// No tokens left
480		if ( ! isset( $this->tokens[$position - $modifier] ) ) {
481			return 0;
482		}
483
484		$token = $this->tokens[$position - $modifier];
485
486		// Return the next significant (Non T_WHITESPACE) token?
487		if ( $significant === TRUE ) {
488
489			// If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
490			if ( $this->tokens[$position - $modifier][0] === T_WHITESPACE ) {
491				$token = $this->prev_token( $position, $modifier + 1 );
492			} else {
493				$token = $this->tokens[$position - $modifier];
494			}
495		}
496
497		// Return the next token
498		if ( is_array( $token ) ) {
499			return $token[0];
500		} else {
501			return $token;
502		}
503	}
504
505	/**
506	 * Outputs or returns a syntax highlighted version of the given PHP code using
507	 * the colors defined in the included stylesheet
508	 *
509	 * @param string $source The PHP code to be highlighted. This should include the opening tag
510	 * @param bool $return optional Set this parameter to TRUE to make this function return the highlighted code
511	 * @return string|void
512	 *
513	 * @access public
514	 * @since 1.0.0
515	 * @static
516	 */
517	public static function highlight( $source, $return = FALSE, $options = 0 )
518	{
519		$p = new PHPLighter( $source, $options );
520		$str = $p->parse();
521
522		if ( $return ) {
523			return $str;
524		}
525
526		echo $str;
527	}
528}