PageRenderTime 72ms CodeModel.GetById 35ms RepoModel.GetById 1ms app.codeStats 0ms

/inc/hyperlight/hyperlight.php

https://bitbucket.org/yoander/mtrack
PHP | 1033 lines | 622 code | 133 blank | 278 comment | 89 complexity | 9b34829f1fda8b42d08dcbe54bd1ecac MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. <?php
  2. /*
  3. * Copyright 2008 Konrad Rudolph
  4. * All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. /*
  25. * TODO list
  26. * =========
  27. *
  28. * - FIXME Nested syntax elements create redundant nested tags under certain
  29. * circumstances. This can be reproduced by the following PHP snippet:
  30. *
  31. * <pre class="<?php echo; ? >">
  32. *
  33. * (Remove space between `?` and `>`).
  34. * Although this no longer occurs, it is fixed by checking for `$token === ''`
  35. * in the `emit*` methods. This should never happen anyway. Probably something
  36. * to do with the zero-width lookahead in the PHP syntax definition.
  37. *
  38. * - `hyperlight_calculate_fold_marks`: refactor, write proper handler
  39. *
  40. * - Line numbers (on client-side?)
  41. *
  42. */
  43. /**
  44. * Hyperlight source code highlighter for PHP.
  45. * @package hyperlight
  46. */
  47. /** @ignore */
  48. require_once dirname(__FILE__) . '/preg_helper.php';
  49. if (!function_exists('array_peek')) {
  50. /**
  51. * @internal
  52. * This does exactly what you think it does. */
  53. function array_peek(array &$array) {
  54. $cnt = count($array);
  55. return $cnt === 0 ? null : $array[$cnt - 1];
  56. }
  57. }
  58. /**
  59. * @internal
  60. * For internal debugging purposes.
  61. */
  62. function dump($obj, $descr = null) {
  63. if ($descr !== null)
  64. echo "<h3>$descr</h3>";
  65. ob_start();
  66. var_dump($obj);
  67. $dump = ob_get_clean();
  68. ?><pre><?php echo htmlspecialchars($dump); ?></pre><?php
  69. return true;
  70. }
  71. /**
  72. * Raised when the grammar offers a rule that has not been defined.
  73. */
  74. class NoMatchingRuleException extends Exception {
  75. /** @internal */
  76. public function __construct($states, $position, $code) {
  77. $state = array_pop($states);
  78. parent::__construct(
  79. "State '$state' has no matching rule at position $position:\n" .
  80. $this->errorSurrounding($code, $position)
  81. );
  82. }
  83. // Try to extract the location of the error more or less precisely.
  84. // Only used for a comprehensive display.
  85. private function errorSurrounding($code, $pos) {
  86. $size = 10;
  87. $begin = $pos < $size ? 0 : $pos - $size;
  88. $end = $pos + $size > strlen($code) ? strlen($code) : $pos + $size;
  89. $offs = $pos - $begin;
  90. return substr($code, $begin, $end - $begin) . "\n" . sprintf("%{$offs}s", '^');
  91. }
  92. }
  93. /**
  94. * Represents a nesting rule in the grammar of a language definition.
  95. *
  96. * Individual rules can either be represented by raw strings ("simple" rules) or
  97. * by a nesting rule. Nesting rules specify where they can start and end. Inside
  98. * a nesting rule, other rules may be applied (both simple and nesting).
  99. * For example, a nesting rule may define a string literal. Inside that string,
  100. * other rules may be applied that recognize escape sequences.
  101. *
  102. * To use a nesting rule, supply how it may start and end, e.g.:
  103. * <code>
  104. * $string_rule = array('string' => new Rule('/"/', '/"/'));
  105. * </code>
  106. * You also need to specify nested states:
  107. * <code>
  108. * $string_states = array('string' => 'escaped');
  109. * <code>
  110. * Now you can add another rule for <var>escaped</var>:
  111. * <code>
  112. * $escaped_rule = array('escaped' => '/\\(x\d{1,4}|.)/');
  113. * </code>
  114. */
  115. class Rule {
  116. /**
  117. * Common rules.
  118. */
  119. const ALL_WHITESPACE = '/(\s|\r|\n)+/';
  120. const C_IDENTIFIER = '/[a-z_][a-z0-9_]*/i';
  121. const C_COMMENT = '#//.*?\n|/\*.*?\*/#s';
  122. const C_MULTILINECOMMENT = '#/\*.*?\*/#s';
  123. const DOUBLEQUOTESTRING = '/"(?:\\\\"|.)*?"/s';
  124. const SINGLEQUOTESTRING = "/'(?:\\\\'|.)*?'/s";
  125. const C_DOUBLEQUOTESTRING = '/L?"(?:\\\\"|.)*?"/s';
  126. const C_SINGLEQUOTESTRING = "/L?'(?:\\\\'|.)*?'/s";
  127. const STRING = '/"(?:\\\\"|.)*?"|\'(?:\\\\\'|.)*?\'/s';
  128. const C_NUMBER = '/
  129. (?: # Integer followed by optional fractional part.
  130. (?:
  131. 0(?:
  132. x[0-9a-f]+
  133. |
  134. [0-7]*
  135. )
  136. |
  137. \d+
  138. )
  139. (?:\.\d*)?
  140. (?:e[+-]\d+)?
  141. )
  142. |
  143. (?: # Just the fractional part.
  144. (?:\.\d+)
  145. (?:e[+-]?\d+)?
  146. )
  147. /ix';
  148. private $_start;
  149. private $_end;
  150. /** @ignore */
  151. public function __construct($start, $end = null) {
  152. $this->_start = $start;
  153. $this->_end = $end;
  154. }
  155. /**
  156. * Returns the pattern with which this rule starts.
  157. * @return string
  158. */
  159. public function start() {
  160. return $this->_start;
  161. }
  162. /**
  163. * Returns the pattern with which this rule may end.
  164. * @return string
  165. */
  166. public function end() {
  167. return $this->_end;
  168. }
  169. }
  170. /**
  171. * Abstract base class of all Hyperlight language definitions.
  172. *
  173. * In order to define a new language definition, this class is inherited.
  174. * The only function that needs to be overridden is the constructor. Helper
  175. * functions from the base class can then be called to construct the grammar
  176. * and store additional information.
  177. * The name of the subclass must be of the schema <var>{Lang}Language</var>,
  178. * where <var>{Lang}</var> is a short, unique name for the language starting
  179. * with a capital letter and continuing in lower case. For example,
  180. * <var>PhpLanguage</var> is a valid name. The language definition must
  181. * reside in a file located at <var>languages/{lang}.php</var>. Here,
  182. * <var>{lang}</var> is the all-lowercase spelling of the name, e.g.
  183. * <var>languages/php.php</var>.
  184. *
  185. */
  186. abstract class HyperLanguage {
  187. private $_states = array();
  188. private $_rules = array();
  189. private $_mappings = array();
  190. private $_info = array();
  191. private $_extensions = array();
  192. private $_caseInsensitive = false;
  193. private $_postProcessors = array();
  194. private static $_languageCache = array();
  195. private static $_compiledLanguageCache = array();
  196. private static $_filetypes;
  197. /**
  198. * Indices for information.
  199. */
  200. const NAME = 1;
  201. const VERSION = 2;
  202. const AUTHOR = 10;
  203. const WEBSITE = 5;
  204. const EMAIL = 6;
  205. /**
  206. * Retrieves a language definition name based on a file extension.
  207. *
  208. * Uses the contents of the <var>languages/filetypes</var> file to
  209. * guess the language definition name from a file name extension.
  210. * This file has to be generated using the
  211. * <var>collect-filetypes.php</var> script every time the language
  212. * definitions have been changed.
  213. *
  214. * @param string $ext the file name extension.
  215. * @return string The language definition name or <var>NULL</var>.
  216. */
  217. public static function nameFromExt($ext) {
  218. if (self::$_filetypes === null) {
  219. $ft_content = file('languages/filetypes', 1);
  220. foreach ($ft_content as $line) {
  221. list ($name, $extensions) = explode(':', trim($line));
  222. $extensions = explode(',', $extensions);
  223. // Inverse lookup.
  224. foreach ($extensions as $extension)
  225. $ft_data[$extension] = $name;
  226. }
  227. self::$_filetypes = $ft_data;
  228. }
  229. $ext = strtolower($ext);
  230. return
  231. array_key_exists($ext, self::$_filetypes) ?
  232. self::$_filetypes[strtolower($ext)] : null;
  233. }
  234. public static function compile(HyperLanguage $lang) {
  235. $id = $lang->id();
  236. if (!isset(self::$_compiledLanguageCache[$id]))
  237. self::$_compiledLanguageCache[$id] = $lang->makeCompiledLanguage();
  238. return self::$_compiledLanguageCache[$id];
  239. }
  240. public static function compileFromName($lang) {
  241. return self::compile(self::fromName($lang));
  242. }
  243. protected static function exists($lang) {
  244. return isset(self::$_languageCache[$lang]) or
  245. file_exists("languages/$lang.php");
  246. }
  247. protected static function fromName($lang) {
  248. if (!isset(self::$_languageCache[$lang])) {
  249. require_once dirname(__FILE__) . "/$lang.php";
  250. $klass = ucfirst("{$lang}Language");
  251. self::$_languageCache[$lang] = new $klass();
  252. }
  253. return self::$_languageCache[$lang];
  254. }
  255. public function id() {
  256. $klass = get_class($this);
  257. return strtolower(substr($klass, 0, strlen($klass) - strlen('Language')));
  258. }
  259. protected function setCaseInsensitive($value) {
  260. $this->_caseInsensitive = $value;
  261. }
  262. protected function addStates(array $states) {
  263. $this->_states = self::mergeProperties($this->_states, $states);
  264. }
  265. protected function getState($key) {
  266. return $this->_states[$key];
  267. }
  268. protected function removeState($key) {
  269. unset($this->_states[$key]);
  270. }
  271. protected function addRules(array $rules) {
  272. $this->_rules = self::mergeProperties($this->_rules, $rules);
  273. }
  274. protected function getRule($key) {
  275. return $this->_rules[$key];
  276. }
  277. protected function removeRule($key) {
  278. unset($this->_rules[$key]);
  279. }
  280. protected function addMappings(array $mappings) {
  281. // TODO Implement nested mappings.
  282. $this->_mappings = array_merge($this->_mappings, $mappings);
  283. }
  284. protected function getMapping($key) {
  285. return $this->_mappings[$key];
  286. }
  287. protected function removeMapping($key) {
  288. unset($this->_mappings[$key]);
  289. }
  290. protected function setInfo(array $info) {
  291. $this->_info = $info;
  292. }
  293. protected function setExtensions(array $extensions) {
  294. $this->_extensions = $extensions;
  295. }
  296. protected function addPostprocessing($rule, HyperLanguage $language) {
  297. $this->_postProcessors[$rule] = $language;
  298. }
  299. // protected function addNestedLanguage(HyperLanguage $language, $hoistBackRules) {
  300. // $prefix = get_class($language);
  301. // if (!is_array($hoistBackRules))
  302. // $hoistBackRules = array($hoistBackRules);
  303. //
  304. // $states = array(); // Step 1: states
  305. //
  306. // foreach ($language->_states as $stateName => $state) {
  307. // $prefixedRules = array();
  308. //
  309. // if (strstr($stateName, ' ')) {
  310. // $parts = explode(' ', $stateName);
  311. // $prefixed = array();
  312. // foreach ($parts as $part)
  313. // $prefixed[] = "$prefix$part";
  314. // $stateName = implode(' ', $prefixed);
  315. // }
  316. // else
  317. // $stateName = "$prefix$stateName";
  318. //
  319. // foreach ($state as $key => $rule) {
  320. // if (is_string($key) and is_array($rule)) {
  321. // $nestedRules = array();
  322. // foreach ($rule as $nestedRule)
  323. // $nestedRules[] = ($nestedRule === '') ? '' :
  324. // "$prefix$nestedRule";
  325. //
  326. // $prefixedRules["$prefix$key"] = $nestedRules;
  327. // }
  328. // else
  329. // $prefixedRules[] = "$prefix$rule";
  330. // }
  331. //
  332. // if ($stateName === 'init')
  333. // $prefixedRules = array_merge($hoistBackRules, $prefixedRules);
  334. //
  335. // $states[$stateName] = $prefixedRules;
  336. // }
  337. //
  338. // $rules = array(); // Step 2: rules
  339. // // Mappings need to set up already!
  340. // $mappings = array();
  341. //
  342. // foreach ($language->_rules as $ruleName => $rule) {
  343. // if (is_array($rule)) {
  344. // $nestedRules = array();
  345. // foreach ($rule as $nestedName => $nestedRule) {
  346. // if (is_string($nestedName)) {
  347. // $nestedRules["$prefix$nestedName"] = $nestedRule;
  348. // $mappings["$prefix$nestedName"] = $nestedName;
  349. // }
  350. // else
  351. // $nestedRules[] = $nestedRule;
  352. // }
  353. // $rules["$prefix$ruleName"] = $nestedRules;
  354. // }
  355. // else {
  356. // $rules["$prefix$ruleName"] = $rule;
  357. // $mappings["$prefix$ruleName"] = $ruleName;
  358. // }
  359. // }
  360. //
  361. // // Step 3: mappings.
  362. //
  363. // foreach ($language->_mappings as $ruleName => $cssClass) {
  364. // if (strstr($ruleName, ' ')) {
  365. // $parts = explode(' ', $ruleName);
  366. // $prefixed = array();
  367. // foreach ($parts as $part)
  368. // $prefixed[] = "$prefix$part";
  369. // $mappings[implode(' ', $prefixed)] = $cssClass;
  370. // }
  371. // else
  372. // $mappings["$prefix$ruleName"] = $cssClass;
  373. // }
  374. //
  375. // $this->addStates($states);
  376. // $this->addRules($rules);
  377. // $this->addMappings($mappings);
  378. //
  379. // return $prefix . 'init';
  380. // }
  381. private function makeCompiledLanguage() {
  382. return new HyperlightCompiledLanguage(
  383. $this->id(),
  384. $this->_info,
  385. $this->_extensions,
  386. $this->_states,
  387. $this->_rules,
  388. $this->_mappings,
  389. $this->_caseInsensitive,
  390. $this->_postProcessors
  391. );
  392. }
  393. private static function mergeProperties(array $old, array $new) {
  394. foreach ($new as $key => $value) {
  395. if (is_string($key)) {
  396. if (isset($old[$key]) and is_array($old[$key]))
  397. $old[$key] = array_merge($old[$key], $new);
  398. else
  399. $old[$key] = $value;
  400. }
  401. else
  402. $old[] = $value;
  403. }
  404. return $old;
  405. }
  406. }
  407. class HyperlightCompiledLanguage {
  408. private $_id;
  409. private $_info;
  410. private $_extensions;
  411. private $_states;
  412. private $_rules;
  413. private $_mappings;
  414. private $_caseInsensitive;
  415. private $_postProcessors = array();
  416. public function __construct($id, $info, $extensions, $states, $rules, $mappings, $caseInsensitive, $postProcessors) {
  417. $this->_id = $id;
  418. $this->_info = $info;
  419. $this->_extensions = $extensions;
  420. $this->_caseInsensitive = $caseInsensitive;
  421. $this->_states = $this->compileStates($states);
  422. $this->_rules = $this->compileRules($rules);
  423. $this->_mappings = $mappings;
  424. foreach ($postProcessors as $ppkey => $ppvalue)
  425. $this->_postProcessors[$ppkey] = HyperLanguage::compile($ppvalue);
  426. }
  427. public function id() {
  428. return $this->_id;
  429. }
  430. public function name() {
  431. return $this->_info[HyperLanguage::NAME];
  432. }
  433. public function authorName() {
  434. if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info))
  435. return null;
  436. $author = $this->_info[HyperLanguage::AUTHOR];
  437. if (is_string($author))
  438. return $author;
  439. if (!array_key_exists(HyperLanguage::NAME, $author))
  440. return null;
  441. return $author[HyperLanguage::NAME];
  442. }
  443. public function authorWebsite() {
  444. if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info) or
  445. !is_array($this->_info[HyperLanguage::AUTHOR]) or
  446. !array_key_exists(HyperLanguage::WEBSITE, $this->_info[HyperLanguage::AUTHOR]))
  447. return null;
  448. return $this->_info[HyperLanguage::AUTHOR][HyperLanguage::WEBSITE];
  449. }
  450. public function authorEmail() {
  451. if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info) or
  452. !is_array($this->_info[HyperLanguage::AUTHOR]) or
  453. !array_key_exists(HyperLanguage::EMAIL, $this->_info[HyperLanguage::AUTHOR]))
  454. return null;
  455. return $this->_info[HyperLanguage::AUTHOR][HyperLanguage::EMAIL];
  456. }
  457. public function authorContact() {
  458. $email = $this->authorEmail();
  459. return $email !== null ? $email : $this->authorWebsite();
  460. }
  461. public function extensions() {
  462. return $this->_extensions;
  463. }
  464. public function state($stateName) {
  465. return $this->_states[$stateName];
  466. }
  467. public function rule($ruleName) {
  468. return $this->_rules[$ruleName];
  469. }
  470. public function className($state) {
  471. if (array_key_exists($state, $this->_mappings))
  472. return $this->_mappings[$state];
  473. else if (strstr($state, ' ') === false)
  474. // No mapping for state.
  475. return $state;
  476. else {
  477. // Try mapping parts of nested state name.
  478. $parts = explode(' ', $state);
  479. $ret = array();
  480. foreach ($parts as $part) {
  481. if (array_key_exists($part, $this->_mappings))
  482. $ret[] = $this->_mappings[$part];
  483. else
  484. $ret[] = $part;
  485. }
  486. return implode(' ', $ret);
  487. }
  488. }
  489. public function postProcessors() {
  490. return $this->_postProcessors;
  491. }
  492. private function compileStates($states) {
  493. $ret = array();
  494. foreach ($states as $name => $state) {
  495. $newstate = array();
  496. if (!is_array($state))
  497. $state = array($state);
  498. foreach ($state as $key => $elem) {
  499. if ($elem === null)
  500. continue;
  501. if (is_string($key)) {
  502. if (!is_array($elem))
  503. $elem = array($elem);
  504. foreach ($elem as $el2) {
  505. if ($el2 === '')
  506. $newstate[] = $key;
  507. else
  508. $newstate[] = "$key $el2";
  509. }
  510. }
  511. else
  512. $newstate[] = $elem;
  513. }
  514. $ret[$name] = $newstate;
  515. }
  516. return $ret;
  517. }
  518. private function compileRules($rules) {
  519. $tmp = array();
  520. // Preprocess keyword list and flatten nested lists:
  521. // End of regular expression matching keywords.
  522. $end = $this->_caseInsensitive ? ')\b/i' : ')\b/';
  523. foreach ($rules as $name => $rule) {
  524. if (is_array($rule)) {
  525. if (self::isAssocArray($rule)) {
  526. // Array is a nested list of rules.
  527. foreach ($rule as $key => $value) {
  528. if (is_array($value))
  529. // Array represents a list of keywords.
  530. $value = '/\b(?:' . implode('|', $value) . $end;
  531. if (!is_string($key) or strlen($key) === 0)
  532. $tmp[$name] = $value;
  533. else
  534. $tmp["$name $key"] = $value;
  535. }
  536. }
  537. else {
  538. // Array represents a list of keywords.
  539. $rule = '/\b(?:' . implode('|', $rule) . $end;
  540. $tmp[$name] = $rule;
  541. }
  542. }
  543. else {
  544. $tmp[$name] = $rule;
  545. } // if (is_array($rule))
  546. } // foreach
  547. $ret = array();
  548. foreach ($this->_states as $name => $state) {
  549. $regex_rules = array();
  550. $regex_names = array();
  551. $nesting_rules = array();
  552. foreach ($state as $rule_name) {
  553. $rule = $tmp[$rule_name];
  554. if ($rule instanceof Rule)
  555. $nesting_rules[$rule_name] = $rule;
  556. else {
  557. $regex_rules[] = $rule;
  558. $regex_names[] = $rule_name;
  559. }
  560. }
  561. $ret[$name] = array_merge(
  562. array(preg_merge('|', $regex_rules, $regex_names)),
  563. $nesting_rules
  564. );
  565. }
  566. return $ret;
  567. }
  568. private static function isAssocArray(array $array) {
  569. foreach($array as $key => $_)
  570. if (is_string($key))
  571. return true;
  572. return false;
  573. }
  574. }
  575. class Hyperlight {
  576. private $_lang;
  577. private $_result;
  578. private $_states;
  579. private $_omitSpans;
  580. private $_postProcessors = array();
  581. public function __construct($lang) {
  582. if (is_string($lang))
  583. $this->_lang = HyperLanguage::compileFromName(strtolower($lang));
  584. else if ($lang instanceof HyperlightCompiledLanguage)
  585. $this->_lang = $lang;
  586. else if ($lang instanceof HyperLanguage)
  587. $this->_lang = HyperLanguage::compile($lang);
  588. else
  589. trigger_error(
  590. 'Invalid argument type for $lang to Hyperlight::__construct',
  591. E_USER_ERROR
  592. );
  593. foreach ($this->_lang->postProcessors() as $ppkey => $ppvalue)
  594. $this->_postProcessors[$ppkey] = new Hyperlight($ppvalue);
  595. $this->reset();
  596. }
  597. public function language() {
  598. return $this->_lang;
  599. }
  600. public function reset() {
  601. $this->_states = array('init');
  602. $this->_omitSpans = array();
  603. }
  604. public function render($code) {
  605. // Normalize line breaks.
  606. $this->_code = preg_replace('/\r\n?/', "\n", $code);
  607. $fm = hyperlight_calculate_fold_marks($this->_code, $this->language()->id());
  608. return hyperlight_apply_fold_marks($this->renderCode(), $fm);
  609. }
  610. public function renderAndPrint($code) {
  611. echo $this->render($code);
  612. }
  613. private function renderCode() {
  614. $code = $this->_code;
  615. $pos = 0;
  616. $len = strlen($code);
  617. $this->_result = '';
  618. $state = array_peek($this->_states);
  619. // If there are open states (reentrant parsing), open the corresponding
  620. // tags first:
  621. for ($i = 1; $i < count($this->_states); ++$i)
  622. if (!$this->_omitSpans[$i - 1]) {
  623. $class = $this->_lang->className($this->_states[$i]);
  624. $this->write("<span class=\"$class\">");
  625. }
  626. // Emergency break to catch faulty rules.
  627. $prev_pos = -1;
  628. while ($pos < $len) {
  629. // The token next to the current position, after the inner loop completes.
  630. // i.e. $closest_hit = array($matched_text, $position)
  631. $closest_hit = array('', $len);
  632. // The rule that found this token.
  633. $closest_rule = null;
  634. $rules = $this->_lang->rule($state);
  635. foreach ($rules as $name => $rule) {
  636. if ($rule instanceof Rule)
  637. $this->matchIfCloser(
  638. $rule->start(), $name, $pos, $closest_hit, $closest_rule
  639. );
  640. else if (preg_match($rule, $code, $matches, PREG_OFFSET_CAPTURE, $pos) == 1) {
  641. // Search which of the sub-patterns matched.
  642. foreach ($matches as $group => $match) {
  643. if (!is_string($group))
  644. continue;
  645. if ($match[1] !== -1) {
  646. $closest_hit = $match;
  647. $closest_rule = str_replace('_', ' ', $group);
  648. break;
  649. }
  650. }
  651. }
  652. } // foreach ($rules)
  653. // If we're currently inside a rule, check whether we've come to the
  654. // end of it, or the end of any other rule we're nested in.
  655. if (count($this->_states) > 1) {
  656. $n = count($this->_states) - 1;
  657. do {
  658. $rule = $this->_lang->rule($this->_states[$n - 1]);
  659. $rule = $rule[$this->_states[$n]];
  660. --$n;
  661. if ($n < 0)
  662. throw new NoMatchingRuleException($this->_states, $pos, $code);
  663. } while ($rule->end() === null);
  664. $this->matchIfCloser($rule->end(), $n + 1, $pos, $closest_hit, $closest_rule);
  665. }
  666. // We take the closest hit:
  667. if ($closest_hit[1] > $pos)
  668. $this->emit(substr($code, $pos, $closest_hit[1] - $pos));
  669. $prev_pos = $pos;
  670. $pos = $closest_hit[1] + strlen($closest_hit[0]);
  671. if ($prev_pos === $pos and is_string($closest_rule))
  672. if (array_key_exists($closest_rule, $this->_lang->rule($state))) {
  673. array_push($this->_states, $closest_rule);
  674. $state = $closest_rule;
  675. $this->emitPartial('', $closest_rule);
  676. }
  677. if ($closest_hit[1] === $len)
  678. break;
  679. else if (!is_string($closest_rule)) {
  680. // Pop state.
  681. if (count($this->_states) <= $closest_rule)
  682. throw new NoMatchingRuleException($this->_states, $pos, $code);
  683. while (count($this->_states) > $closest_rule + 1) {
  684. $lastState = array_pop($this->_states);
  685. $this->emitPop('', $lastState);
  686. }
  687. $lastState = array_pop($this->_states);
  688. $state = array_peek($this->_states);
  689. $this->emitPop($closest_hit[0], $lastState);
  690. }
  691. else if (array_key_exists($closest_rule, $this->_lang->rule($state))) {
  692. // Push state.
  693. array_push($this->_states, $closest_rule);
  694. $state = $closest_rule;
  695. $this->emitPartial($closest_hit[0], $closest_rule);
  696. }
  697. else
  698. $this->emit($closest_hit[0], $closest_rule);
  699. } // while ($pos < $len)
  700. // Close any tags that are still open (can happen in incomplete code
  701. // fragments that don't necessarily signify an error (consider PHP
  702. // embedded in HTML, or a C++ preprocessor code not ending on newline).
  703. $omitSpansBackup = $this->_omitSpans;
  704. for ($i = count($this->_states); $i > 1; --$i)
  705. $this->emitPop();
  706. $this->_omitSpans = $omitSpansBackup;
  707. return $this->_result;
  708. }
  709. private function matchIfCloser($expr, $next, $pos, &$closest_hit, &$closest_rule) {
  710. $matches = array();
  711. if (preg_match($expr, $this->_code, $matches, PREG_OFFSET_CAPTURE, $pos) == 1) {
  712. if (
  713. (
  714. // Two hits at same position -- compare length
  715. // For equal lengths: first come, first serve.
  716. $matches[0][1] == $closest_hit[1] and
  717. strlen($matches[0][0]) > strlen($closest_hit[0])
  718. ) or
  719. $matches[0][1] < $closest_hit[1]
  720. ) {
  721. $closest_hit = $matches[0];
  722. $closest_rule = $next;
  723. }
  724. }
  725. }
  726. private function processToken($token) {
  727. if ($token === '')
  728. return '';
  729. $nest_lang = array_peek($this->_states);
  730. if (array_key_exists($nest_lang, $this->_postProcessors))
  731. return $this->_postProcessors[$nest_lang]->render($token);
  732. else
  733. #return self::htmlentities($token);
  734. return htmlspecialchars($token, ENT_NOQUOTES);
  735. }
  736. private function emit($token, $class = '') {
  737. $token = $this->processToken($token);
  738. if ($token === '')
  739. return;
  740. $class = $this->_lang->className($class);
  741. if ($class === '')
  742. $this->write($token);
  743. else
  744. $this->write("<span class=\"$class\">$token</span>");
  745. }
  746. private function emitPartial($token, $class) {
  747. $token = $this->processToken($token);
  748. $class = $this->_lang->className($class);
  749. if ($class === '') {
  750. if ($token !== '')
  751. $this->write($token);
  752. array_push($this->_omitSpans, true);
  753. }
  754. else {
  755. $this->write("<span class=\"$class\">$token");
  756. array_push($this->_omitSpans, false);
  757. }
  758. }
  759. private function emitPop($token = '', $class = '') {
  760. $token = $this->processToken($token);
  761. if (array_pop($this->_omitSpans))
  762. $this->write($token);
  763. else
  764. $this->write("$token</span>");
  765. }
  766. private function write($text) {
  767. $this->_result .= $text;
  768. }
  769. // // DAMN! What did I need them for? Something to do with encoding …
  770. // // but why not use the `$charset` argument on `htmlspecialchars`?
  771. // private static function htmlentitiesCallback($match) {
  772. // switch ($match[0]) {
  773. // case '<': return '&lt;';
  774. // case '>': return '&gt;';
  775. // case '&': return '&amp;';
  776. // }
  777. // }
  778. //
  779. // private static function htmlentities($text) {
  780. // return htmlspecialchars($text, ENT_NOQUOTES);
  781. // return preg_replace_callback(
  782. // '/[<>&]/', array('Hyperlight', 'htmlentitiesCallback'), $text
  783. // );
  784. // }
  785. } // class Hyperlight
  786. /**
  787. * <var>echo</var>s a highlighted code.
  788. *
  789. * For example, the following
  790. * <code>
  791. * hyperlight('<?php echo \'Hello, world\'; ?>', 'php');
  792. * </code>
  793. * results in:
  794. * <code>
  795. * <pre class="source-code php">...</pre>
  796. * </code>
  797. *
  798. * @param string $code The code.
  799. * @param string $lang The language of the code.
  800. * @param string $tag The surrounding tag to use. Optional.
  801. * @param array $attributes Attributes to decorate {@link $tag} with.
  802. * If no tag is given, this argument can be passed in its place. This
  803. * behaviour will be assumed if the third argument is an array.
  804. * Attributes must be given as a hash of key value pairs.
  805. */
  806. function hyperlight($code, $lang, $tag = 'pre', array $attributes = array()) {
  807. if ($code == '')
  808. die("`hyperlight` needs a code to work on!");
  809. if ($lang == '')
  810. die("`hyperlight` needs to know the code's language!");
  811. if (is_array($tag) and !empty($attributes))
  812. die("Can't pass array arguments for \$tag *and* \$attributes to `hyperlight`!");
  813. if ($tag == '')
  814. $tag = 'pre';
  815. if (is_array($tag)) {
  816. $attributes = $tag;
  817. $tag = 'pre';
  818. }
  819. $lang = htmlspecialchars(strtolower($lang));
  820. $class = "source-code $lang";
  821. $attr = array();
  822. foreach ($attributes as $key => $value) {
  823. if ($key == 'class')
  824. $class .= ' ' . htmlspecialchars($value);
  825. else
  826. $attr[] = htmlspecialchars($key) . '="' .
  827. htmlspecialchars($value) . '"';
  828. }
  829. $attr = empty($attr) ? '' : ' ' . implode(' ', $attr);
  830. $hl = new Hyperlight($lang);
  831. echo "<$tag class=\"$class\"$attr>";
  832. $hl->renderAndPrint(trim($code));
  833. echo "</$tag>";
  834. }
  835. /**
  836. * Is the same as:
  837. * <code>
  838. * hyperlight(file_get_contents($filename), $lang, $tag, $attributes);
  839. * </code>
  840. * @see hyperlight()
  841. */
  842. function hyperlight_file($filename, $lang = null, $tag = 'pre', array $attributes = array()) {
  843. if ($lang == '') {
  844. // Try to guess it from file extension.
  845. $pos = strrpos($filename, '.');
  846. if ($pos !== false) {
  847. $ext = substr($filename, $pos + 1);
  848. $lang = HyperLanguage::nameFromExt($ext);
  849. }
  850. }
  851. hyperlight(file_get_contents($filename), $lang, $tag, $attributes);
  852. }
  853. if (defined('HYPERLIGHT_SHORTCUT')) {
  854. function hy() {
  855. $args = func_get_args();
  856. call_user_func_array('hyperlight', $args);
  857. }
  858. function hyf() {
  859. $args = func_get_args();
  860. call_user_func_array('hyperlight_file', $args);
  861. }
  862. }
  863. function hyperlight_calculate_fold_marks($code, $lang) {
  864. $supporting_languages = array('csharp', 'vb');
  865. if (!in_array($lang, $supporting_languages))
  866. return array();
  867. $fold_begin_marks = array('/^\s*#Region/', '/^\s*#region/');
  868. $fold_end_marks = array('/^\s*#End Region/', '/\s*#endregion/');
  869. $lines = preg_split('/\r|\n|\r\n/', $code);
  870. $fold_begin = array();
  871. foreach ($fold_begin_marks as $fbm)
  872. $fold_begin = $fold_begin + preg_grep($fbm, $lines);
  873. $fold_end = array();
  874. foreach ($fold_end_marks as $fem)
  875. $fold_end = $fold_end + preg_grep($fem, $lines);
  876. if (count($fold_begin) !== count($fold_end) or count($fold_begin) === 0)
  877. return array();
  878. $fb = array();
  879. $fe = array();
  880. foreach ($fold_begin as $line => $_)
  881. $fb[] = $line;
  882. foreach ($fold_end as $line => $_)
  883. $fe[] = $line;
  884. $ret = array();
  885. for ($i = 0; $i < count($fb); $i++)
  886. $ret[$fb[$i]] = $fe[$i];
  887. return $ret;
  888. }
  889. function hyperlight_apply_fold_marks($code, array $fold_marks) {
  890. if ($fold_marks === null or count($fold_marks) === 0)
  891. return $code;
  892. $lines = explode("\n", $code);
  893. foreach ($fold_marks as $begin => $end) {
  894. $lines[$begin] = '<span class="fold-header">' . $lines[$begin] . '<span class="dots"> </span></span>';
  895. $lines[$begin + 1] = '<span class="fold">' . $lines[$begin + 1];
  896. $lines[$end + 1] = '</span>' . $lines[$end + 1];
  897. }
  898. return implode("\n", $lines);
  899. }
  900. ?>