PageRenderTime 45ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/highlight_php.inc.php

http://github.com/brandonwamboldt/PHPLighter
PHP | 528 lines | 232 code | 71 blank | 225 comment | 104 complexity | 698b2b0f1caa27bd477bba23687b69a8 MD5 | raw file
Possible License(s): CC-BY-SA-3.0
  1. <?php
  2. /**
  3. * PHPLighter
  4. *
  5. * A PHP syntax highlighting engine the utilizes token_get_all() to produce a
  6. * level of syntax highlighting that rivals any modern editor. No more barebone
  7. * generic syntax highlighting!
  8. *
  9. * NOTICE OF LICENSE
  10. *
  11. * Licensed under the Open Software License version 3.0
  12. *
  13. * This source file is subject to the Open Software License (OSL 3.0) that is
  14. * bundled with this package in the file LICENSE. It is also available through
  15. * the world wide web at this URL:
  16. *
  17. * http://opensource.org/licenses/OSL-3.0
  18. *
  19. * @package phplighter
  20. * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
  21. * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0)
  22. * @link http://brandonwamboldt.ca/phplighter
  23. * @version 1.0.12
  24. */
  25. /**
  26. * The main PHPLighter class
  27. *
  28. * @see PHPLighter::highlight()
  29. *
  30. * @author Brandon Wamboldt <brandon.wamboldt@gmail.com>
  31. * @since 1.0.0
  32. */
  33. class PHPLighter
  34. {
  35. /**
  36. * Only basic classes will be added, for builtin tokens (No C_* classes)
  37. *
  38. * @since 1.0.8
  39. * @var int
  40. */
  41. const BASIC_HIGHLIGHTING = 1;
  42. /**
  43. * PHPDoc tags such as @since, @access and @var will not be given tags
  44. *
  45. * @since 1.0.8
  46. * @var int
  47. */
  48. const NO_TOKENIZE_DOC_TAGS = 2;
  49. /**
  50. * Links/URLs in comments will not be converted into HTML links
  51. *
  52. * @since 1.0.8
  53. * @var int
  54. */
  55. const NO_LINKIFY_LINKS = 4;
  56. /**
  57. * E-mail addresses in comments will not be converted to mailto: links
  58. *
  59. * @since 1.0.8
  60. * @var int
  61. */
  62. const NO_LINKIFY_EMAILS = 8;
  63. /**
  64. * An array of builtin functions (Functions included in a PHP extension)
  65. *
  66. * @access protected
  67. * @since 1.0.0
  68. * @static
  69. * @var array
  70. */
  71. protected static $builtin_functions = NULL;
  72. /**
  73. * Contains a list of PHPDoc tags separated by a | for use in a regex
  74. *
  75. * PHPDocumentor tags are placed in docblock comments (Comments that start
  76. * with /**), and are prefixed with an @ symbol.
  77. *
  78. * @link http://www.phpdoc.org/docs/latest/for-users/list-of-tags.html
  79. *
  80. * @access protected
  81. * @since 1.0.0
  82. * @var string
  83. */
  84. protected static $phpdoc_tags = 'abstract|access|author|category|copyright|deprecated|example|final|filesource|global|ignore|internal|license|link|method|name|package|param|property|return|see|since|static|staticvar|subpacakage|throws|todo|tutorial|uses|var|version';
  85. /**
  86. * Contains an array of PHP Magic Methods
  87. *
  88. * @link http://php.net/manual/en/language.oop5.magic.php
  89. *
  90. * @access protected
  91. * @since 1.0.0
  92. * @var array
  93. */
  94. protected static $magic_methods = array(
  95. '__sleep',
  96. '__wakeup',
  97. '__construct',
  98. '__toString',
  99. '__destruct',
  100. '__invoke',
  101. '__set_state',
  102. '__call',
  103. '__callStatic',
  104. '__get',
  105. '__set',
  106. '__isset',
  107. '__unset',
  108. '__clone'
  109. );
  110. /**
  111. * Class options
  112. *
  113. * @access protected
  114. * @since 1.0.0
  115. * @var integer
  116. */
  117. protected $options = 0;
  118. /**
  119. * The original PHP source code to highlight
  120. *
  121. * @access protected
  122. * @since 1.0.0
  123. * @var string
  124. */
  125. protected $source = '';
  126. /**
  127. * An array of tokens, generated by token_get_all() usign the original PHP
  128. * source code passed to us
  129. *
  130. * @access protected
  131. * @since 1.0.0
  132. * @var array
  133. */
  134. protected $tokens = array();
  135. /**
  136. * Sets the source code to highlight and the options to apply
  137. *
  138. * @param string $source The PHP code to be highlighted. This should include the opening tag
  139. * @param int $options optional The options to apply when highlighting the source code
  140. *
  141. * @access public
  142. * @since 1.0.0
  143. */
  144. public function __construct( $source, $options = 0 )
  145. {
  146. $this->options = $options;
  147. $this->source = $source;
  148. $this->get_builtin_functions();
  149. $this->tokenize();
  150. }
  151. /**
  152. * Highlights the given PHP and returns it as a string with HTML tags
  153. *
  154. * @return string
  155. *
  156. * @access public
  157. * @since 1.0.0
  158. */
  159. public function parse()
  160. {
  161. // Basic parsing mode?
  162. if ( $this->options & self::BASIC_HIGHLIGHTING ) {
  163. return $this->basic_parse();
  164. }
  165. $in_namespace = FALSE;
  166. $in_class = FALSE;
  167. $in_string = FALSE;
  168. $output = '';
  169. // Go through each token and make it into a tag
  170. foreach ( $this->tokens as $i => $token ) {
  171. // Certain tokens such as ; are returned by themselves, everything else is returned as
  172. // an array with three elements
  173. if ( is_array( $token ) ) {
  174. // So we don't have to call these functions all over
  175. $identifier = $token[0];
  176. $token_name = token_name( $token[0] );
  177. $token = htmlspecialchars( $token[1] );
  178. // Don't enclose whitespace in a <span> tag, it gets excessive
  179. if ( $identifier === T_WHITESPACE ) {
  180. $output .= $token;
  181. }
  182. // Special handling for docblocks to deal with docblock tags, links and email links
  183. else if ( $identifier === T_DOC_COMMENT ) {
  184. if ( ! ( $this->options & self::NO_TOKENIZE_DOC_TAGS ) ) {
  185. $token = preg_replace( '/(\@(' . self::$phpdoc_tags . '))(\s)/i', '<span class="C_DOCBLOCK_TAG">\1</span>\3', $token );
  186. }
  187. if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
  188. $token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_DOCBLOCK_LINK">\1</a>', $token );
  189. }
  190. if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
  191. $token = preg_replace( '/\&lt\;([A-Za-z0-9].*?@.*?)\&gt\;/i', '&lt;<a href="mailto:\1" class="C_DOCBLOCK_LINK">\1</a>&gt;', $token );
  192. }
  193. $output .= '<span class="' . $token_name . '">' . $token . '</span>';
  194. }
  195. // Special handling for comments to deal with links and email links
  196. else if ( $identifier === T_COMMENT ) {
  197. if ( ! ( $this->options & self::NO_LINKIFY_LINKS ) ) {
  198. $token = preg_replace( '/(http\:\/\/[A-Za-z0-9\.\/\-\_\~\#\?\=\&\!\%]*)/i', '<a href="\1" class="C_COMMENT_LINK">\1</a>', $token );
  199. }
  200. if ( ! ( $this->options & self::NO_LINKIFY_EMAILS ) ) {
  201. $token = preg_replace( '/\&lt\;([A-Za-z0-9].*?@.*?)\&gt\;/i', '&lt;<a href="mailto:\1" class="C_COMMENT_LINK">\1</a>&gt;', $token );
  202. }
  203. $output .= '<span class="' . $token_name . '">' . $token . '</span>';
  204. }
  205. // The 'self' keyword is just tokenized as a string, we give it a special identifier
  206. else if ( $identifier === T_STRING && strtolower( $token ) == 'self' ) {
  207. $output .= '<span class="T_STRING C_SELF">' . $token . '</span>';
  208. }
  209. // The 'null' keyword is just tokenized as a string, we give it a special identifier
  210. else if ( $identifier === T_STRING && strtolower( $token ) == 'null' ) {
  211. $output .= '<span class="T_STRING C_NULL">' . $token . '</span>';
  212. }
  213. // The 'true' keyword is just tokenized as a string, we give it a special identifier
  214. else if ( $identifier === T_STRING && strtolower( $token ) == 'true' ) {
  215. $output .= '<span class="T_STRING C_TRUE">' . $token . '</span>';
  216. }
  217. // The 'false' keyword is just tokenized as a string, we give it a special identifier
  218. else if ( $identifier === T_STRING && strtolower( $token ) == 'false' ) {
  219. $output .= '<span class="T_STRING C_FALSE">' . $token . '</span>';
  220. }
  221. // Identify class method calls, like $this->myMethod(), not 100% successful
  222. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) === '(' ) {
  223. $output .= '<span class="T_STRING C_METHOD_CALL">' . $token . '</span>';
  224. }
  225. // Idenfy classnames
  226. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_CLASS ) {
  227. $output .= '<span class="T_STRING C_CLASSNAME">' . $token . '</span>';
  228. }
  229. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_EXTENDS ) {
  230. $output .= '<span class="T_STRING C_EXTENDS_CLASS">' . $token . '</span>';
  231. }
  232. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_IMPLEMENTS ) {
  233. $output .= '<span class="T_STRING C_IMPLEMENTS_CLASS">' . $token . '</span>';
  234. }
  235. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_NEW ) {
  236. $output .= '<span class="T_STRING C_CLASSNAME_REF">' . $token . '</span>';
  237. }
  238. else if ( $identifier === T_STRING && $this->next_token( $i ) === T_VARIABLE ) {
  239. $output .= '<span class="T_STRING C_PARAMETER_TYPEHINT">' . $token . '</span>';
  240. }
  241. // Class properties ($var->property)
  242. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_OBJECT_OPERATOR && $this->next_token( $i ) !== '(' ) {
  243. $output .= '<span class="T_STRING C_OBJECT_PROPERTY">' . $token . '</span>';
  244. }
  245. // PHP Magic Method definitions
  246. else if ( $identifier === T_STRING && in_array( $token, self::$magic_methods ) ) {
  247. $output .= '<span class="T_STRING C_MAGIC_METHOD">' . $token . '</span>';
  248. }
  249. // Namespace declarations
  250. else if ( $identifier === T_STRING && ( $this->prev_token( $i ) === T_NAMESPACE || $this->prev_token( $i ) === T_USE ) ) {
  251. $output .= '<span class="C_NAMESPACE"><span class="T_STRING">' . $token . '</span>';
  252. $in_namespace = TRUE;
  253. }
  254. // Function names
  255. else if ( $identifier === T_STRING && $this->prev_token( $i ) === T_FUNCTION ) {
  256. $output .= '<span class="T_STRING C_FUNCTION_NAME">' . $token . '</span>';
  257. }
  258. // Builtin Functions
  259. else if ( $identifier === T_STRING && in_array( $token, PHPLighter::$builtin_functions ) && $this->prev_token( $i ) !== T_FUNCTION ) {
  260. $output .= '<span class="T_STRING C_BUILTIN_FUNCTION">' . $token . '</span>';
  261. }
  262. // Special string handling
  263. else if ( $identifier === T_CONSTANT_ENCAPSED_STRING || $identifier === T_ENCAPSED_AND_WHITESPACE ) {
  264. $output .= '<span class="' . $token_name . '">' . preg_replace( '`(\\\[^ ])`', '<span class="C_DOUBLE_BACKSLASH">\1</span>', $token ) . '</span>';
  265. }
  266. // Start heredocs
  267. else if ( $identifier === T_START_HEREDOC ) {
  268. $output .= '<span class="T_START_HEREDOC"><span class="C_HEREDOC_ARROWS">&lt;&lt;&lt;</span>' . str_replace( '&lt;&lt;&lt;', '', $token ) . '</span>';
  269. }
  270. // All other token
  271. else {
  272. $output .= '<span class="' . $token_name . '">' . $token . '</span>';
  273. }
  274. } else {
  275. // If we are in a namespace string and encounter a semicolon, end the namespace string
  276. if ( $token == ';' && $in_namespace ) {
  277. $output .= '</span><span class="C_SEMICOLON">;</span>';
  278. $in_namespace = FALSE;
  279. }
  280. // Give semicolons a name
  281. else if ( $token == ';' ) {
  282. $output .= '<span class="C_SEMICOLON">;</span>';
  283. }
  284. // Give assignment operators a name
  285. else if ( $token == '=' ) {
  286. $output .= '<span class="C_ASSIGNMENT">=</span>';
  287. }
  288. // Special handling for double quoted strings which may contain variables
  289. else if ( $token == '"' ) {
  290. if ( $in_string ) {
  291. $output .= '"</span>';
  292. $in_string = FALSE;
  293. } else {
  294. $output .= '<span class="C_VARSTRING">"';
  295. $in_string = TRUE;
  296. }
  297. }
  298. // All other single character tokens
  299. else {
  300. $output .= $token;
  301. }
  302. }
  303. }
  304. return '<pre class="pretty-php">' . $output . '</pre>';
  305. }
  306. /**
  307. * Just like parse() but it only uses builtin tokens
  308. *
  309. * @return string
  310. *
  311. * @access protected
  312. * @since 1.0.0
  313. */
  314. protected function basic_parse()
  315. {
  316. $output = '';
  317. // Go through each token and make it into a tag
  318. foreach ( $this->tokens as $i => $token ) {
  319. // Certain tokens such as ; are returned by themselves, everything else is returned as
  320. // an array with three elements
  321. if ( is_array( $token ) ) {
  322. if ( $token[0] !== T_WHITESPACE ) {
  323. $output .= '<span class="' . token_name( $token[0] ) . '">' . htmlspecialchars( $token[1] ) . '</span>';
  324. } else {
  325. $output .= $token[1];
  326. }
  327. } else {
  328. $output .= $token;
  329. }
  330. }
  331. return $output;
  332. }
  333. /**
  334. * If we haven't already got a list of builtin functions, get them and
  335. * store them as a static variable
  336. *
  337. * @access protected
  338. * @since 1.0.0
  339. */
  340. protected function get_builtin_functions()
  341. {
  342. if ( PHPLighter::$builtin_functions === NULL ) {
  343. PHPLighter::$builtin_functions = array();
  344. $loaded_ext = get_loaded_extensions();
  345. foreach ( $loaded_ext as $ext ) {
  346. PHPLighter::$builtin_functions = array_merge( PHPLighter::$builtin_functions, (array) get_extension_funcs( $ext ) );
  347. }
  348. }
  349. }
  350. /**
  351. * Split given source into PHP tokens (PHP 4 >= 4.2.0, PHP 5)
  352. *
  353. * token_get_all() returns an array of tokens. Some array elements will be
  354. * an array themselves, containing the token type, the PHP source for that
  355. * token, and the line number that the token occured on. Other elements
  356. * will simply be the token itself, such as (, ), [, and ]
  357. *
  358. * @link http://ca.php.net/manual/en/tokens.php
  359. * @link http://ca.php.net/token_get_all
  360. * @link http://ca.php.net/manual/en/function.token-name.php
  361. *
  362. * @access protected
  363. * @since 1.0.0
  364. */
  365. protected function tokenize()
  366. {
  367. $this->tokens = token_get_all( $this->source );
  368. }
  369. /**
  370. * Retrieve the next token from the array of tokens
  371. *
  372. * @param int $position The position in the token array
  373. * @param int $modifier The token to get (Defaults to 1, for the next token)
  374. * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
  375. * @return int
  376. *
  377. * @access protected
  378. * @since 1.0.0
  379. */
  380. protected function next_token( $position, $modifier = 1, $significant = TRUE )
  381. {
  382. // No tokens left
  383. if ( ! isset( $this->tokens[$position + $modifier] ) ) {
  384. return 0;
  385. }
  386. $token = $this->tokens[$position + $modifier];
  387. // Return the next significant (Non T_WHITESPACE) token?
  388. if ( $significant === TRUE ) {
  389. // If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
  390. if ( $this->tokens[$position + $modifier][0] === T_WHITESPACE ) {
  391. $token = $this->next_token( $position, $modifier + 1 );
  392. } else {
  393. $token = $this->tokens[$position + $modifier];
  394. }
  395. }
  396. // Return the next token
  397. if ( is_array( $token ) ) {
  398. return $token[0];
  399. }
  400. return $token;
  401. }
  402. /**
  403. * Retrieve the previous token from the array of tokens
  404. *
  405. * @param int $position The position in the token array
  406. * @param int $modifier The token to get (Defaults to 1, for the previous token)
  407. * @param bool $significant optional Whether or not to retrieve only significant tokens (Not T_WHITESPACE)
  408. * @return int
  409. *
  410. * @access protected
  411. * @since 1.0.0
  412. */
  413. protected function prev_token( $position, $modifier = 1, $significant = TRUE )
  414. {
  415. // No tokens left
  416. if ( ! isset( $this->tokens[$position - $modifier] ) ) {
  417. return 0;
  418. }
  419. $token = $this->tokens[$position - $modifier];
  420. // Return the next significant (Non T_WHITESPACE) token?
  421. if ( $significant === TRUE ) {
  422. // If the requested token is a T_WHITESPACE token, call this function again but add 1 to the modifer
  423. if ( $this->tokens[$position - $modifier][0] === T_WHITESPACE ) {
  424. $token = $this->prev_token( $position, $modifier + 1 );
  425. } else {
  426. $token = $this->tokens[$position - $modifier];
  427. }
  428. }
  429. // Return the next token
  430. if ( is_array( $token ) ) {
  431. return $token[0];
  432. } else {
  433. return $token;
  434. }
  435. }
  436. /**
  437. * Outputs or returns a syntax highlighted version of the given PHP code using
  438. * the colors defined in the included stylesheet
  439. *
  440. * @param string $source The PHP code to be highlighted. This should include the opening tag
  441. * @param bool $return optional Set this parameter to TRUE to make this function return the highlighted code
  442. * @return string|void
  443. *
  444. * @access public
  445. * @since 1.0.0
  446. * @static
  447. */
  448. public static function highlight( $source, $return = FALSE, $options = 0 )
  449. {
  450. $p = new PHPLighter( $source, $options );
  451. $str = $p->parse();
  452. if ( $return ) {
  453. return $str;
  454. }
  455. echo $str;
  456. }
  457. }