PageRenderTime 88ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/samples/scalate-sample-sitegen/src/highlight/highlight.js

http://github.com/scalate/scalate
JavaScript | 603 lines | 532 code | 54 blank | 17 comment | 145 complexity | e226fbbe9a15eb37f2b794d27eeaf86c MD5 | raw file
  1. /*
  2. Syntax highlighting with language autodetection.
  3. http://softwaremaniacs.org/soft/highlight/
  4. */
  5. var hljs = new function() {
  6. var LANGUAGES = {}
  7. // selected_languages is used to support legacy mode of selecting languages
  8. // available for highlighting by passing them as arguments into
  9. // initHighlighting function. Currently the whole library is expected to
  10. // contain only those language definitions that are actually get used.
  11. var selected_languages = {};
  12. /* Utility functions */
  13. function escape(value) {
  14. return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;').replace(/>/gm, '&gt;');
  15. }
  16. function contains(array, item) {
  17. if (!array)
  18. return false;
  19. for (var i = 0; i < array.length; i++)
  20. if (array[i] == item)
  21. return true;
  22. return false;
  23. }
  24. function langRe(language, value, global) {
  25. var mode = 'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '');
  26. return new RegExp(value, mode);
  27. }
  28. function findCode(pre) {
  29. for (var i = 0; i < pre.childNodes.length; i++) {
  30. node = pre.childNodes[i];
  31. if (node.nodeName == 'CODE')
  32. return node;
  33. if (!(node.nodeType == 3 && node.nodeValue.match(/\s+/)))
  34. return null;
  35. }
  36. }
  37. function blockText(block) {
  38. var result = '';
  39. for (var i = 0; i < block.childNodes.length; i++)
  40. if (block.childNodes[i].nodeType == 3)
  41. result += block.childNodes[i].nodeValue;
  42. else if (block.childNodes[i].nodeName == 'BR')
  43. result += '\n';
  44. else
  45. result += blockText(block.childNodes[i]);
  46. return result;
  47. }
  48. function blockLanguage(block) {
  49. var classes = block.className.split(/\s+/)
  50. classes = classes.concat(block.parentNode.className.split(/\s+/));
  51. for (var i = 0; i < classes.length; i++) {
  52. var class_ = classes[i].replace(/^language-/, '');
  53. if (class_ == 'no-highlight') {
  54. throw 'No highlight'
  55. }
  56. if (LANGUAGES[class_]) {
  57. return class_;
  58. }
  59. }
  60. }
  61. /* Stream merging */
  62. function nodeStream(node) {
  63. var result = [];
  64. (function (node, offset) {
  65. for (var i = 0; i < node.childNodes.length; i++) {
  66. if (node.childNodes[i].nodeType == 3)
  67. offset += node.childNodes[i].nodeValue.length;
  68. else if (node.childNodes[i].nodeName == 'BR')
  69. offset += 1
  70. else {
  71. result.push({
  72. event: 'start',
  73. offset: offset,
  74. node: node.childNodes[i]
  75. });
  76. offset = arguments.callee(node.childNodes[i], offset)
  77. result.push({
  78. event: 'stop',
  79. offset: offset,
  80. node: node.childNodes[i]
  81. });
  82. }
  83. }
  84. return offset;
  85. })(node, 0);
  86. return result;
  87. }
  88. function mergeStreams(stream1, stream2, value) {
  89. var processed = 0;
  90. var result = '';
  91. var nodeStack = [];
  92. function selectStream() {
  93. if (stream1.length && stream2.length) {
  94. if (stream1[0].offset != stream2[0].offset)
  95. return (stream1[0].offset < stream2[0].offset) ? stream1 : stream2;
  96. else
  97. return (stream1[0].event == 'start' && stream2[0].event == 'stop') ? stream2 : stream1;
  98. } else {
  99. return stream1.length ? stream1 : stream2;
  100. }
  101. }
  102. function open(node) {
  103. var result = '<' + node.nodeName.toLowerCase();
  104. for (var i = 0; i < node.attributes.length; i++) {
  105. var attribute = node.attributes[i];
  106. result += ' ' + attribute.nodeName.toLowerCase();
  107. if (attribute.nodeValue != undefined) {
  108. result += '="' + escape(attribute.nodeValue) + '"';
  109. }
  110. }
  111. return result + '>';
  112. }
  113. function close(node) {
  114. return '</' + node.nodeName.toLowerCase() + '>';
  115. }
  116. while (stream1.length || stream2.length) {
  117. var current = selectStream().splice(0, 1)[0];
  118. result += escape(value.substr(processed, current.offset - processed));
  119. processed = current.offset;
  120. if ( current.event == 'start') {
  121. result += open(current.node);
  122. nodeStack.push(current.node);
  123. } else if (current.event == 'stop') {
  124. var i = nodeStack.length;
  125. do {
  126. i--;
  127. var node = nodeStack[i];
  128. result += close(node);
  129. } while (node != current.node);
  130. nodeStack.splice(i, 1);
  131. while (i < nodeStack.length) {
  132. result += open(nodeStack[i]);
  133. i++;
  134. }
  135. }
  136. }
  137. result += value.substr(processed);
  138. return result;
  139. }
  140. /* Core highlighting function */
  141. function highlight(language_name, value) {
  142. function compileSubModes(mode, language) {
  143. mode.sub_modes = [];
  144. for (var i = 0; i < mode.contains.length; i++) {
  145. for (var j = 0; j < language.modes.length; j++) {
  146. if (language.modes[j].className == mode.contains[i]) {
  147. mode.sub_modes[mode.sub_modes.length] = language.modes[j];
  148. }
  149. }
  150. }
  151. }
  152. function subMode(lexem, mode) {
  153. if (!mode.contains) {
  154. return null;
  155. }
  156. if (!mode.sub_modes) {
  157. compileSubModes(mode, language);
  158. }
  159. for (var i = 0; i < mode.sub_modes.length; i++) {
  160. if (mode.sub_modes[i].beginRe.test(lexem)) {
  161. return mode.sub_modes[i];
  162. }
  163. }
  164. return null;
  165. }
  166. function endOfMode(mode_index, lexem) {
  167. if (modes[mode_index].end && modes[mode_index].endRe.test(lexem))
  168. return 1;
  169. if (modes[mode_index].endsWithParent) {
  170. var level = endOfMode(mode_index - 1, lexem);
  171. return level ? level + 1 : 0;
  172. }
  173. return 0;
  174. }
  175. function isIllegal(lexem, mode) {
  176. return mode.illegalRe && mode.illegalRe.test(lexem);
  177. }
  178. function compileTerminators(mode, language) {
  179. var terminators = [];
  180. function addTerminator(re) {
  181. if (!contains(terminators, re)) {
  182. terminators[terminators.length] = re;
  183. }
  184. }
  185. if (mode.contains)
  186. for (var i = 0; i < language.modes.length; i++) {
  187. if (contains(mode.contains, language.modes[i].className)) {
  188. addTerminator(language.modes[i].begin);
  189. }
  190. }
  191. var index = modes.length - 1;
  192. do {
  193. if (modes[index].end) {
  194. addTerminator(modes[index].end);
  195. }
  196. index--;
  197. } while (modes[index + 1].endsWithParent);
  198. if (mode.illegal) {
  199. addTerminator(mode.illegal);
  200. }
  201. var terminator_re = '(' + terminators[0];
  202. for (var i = 0; i < terminators.length; i++)
  203. terminator_re += '|' + terminators[i];
  204. terminator_re += ')';
  205. return langRe(language, terminator_re);
  206. }
  207. function eatModeChunk(value, index) {
  208. var mode = modes[modes.length - 1];
  209. if (!mode.terminators) {
  210. mode.terminators = compileTerminators(mode, language);
  211. }
  212. value = value.substr(index);
  213. var match = mode.terminators.exec(value);
  214. if (!match)
  215. return [value, '', true];
  216. if (match.index == 0)
  217. return ['', match[0], false];
  218. else
  219. return [value.substr(0, match.index), match[0], false];
  220. }
  221. function keywordMatch(mode, match) {
  222. var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0]
  223. for (var className in mode.keywordGroups) {
  224. if (!mode.keywordGroups.hasOwnProperty(className))
  225. continue;
  226. var value = mode.keywordGroups[className].hasOwnProperty(match_str);
  227. if (value)
  228. return [className, value];
  229. }
  230. return false;
  231. }
  232. function processKeywords(buffer, mode) {
  233. if (!mode.keywords || !mode.lexems)
  234. return escape(buffer);
  235. if (!mode.lexemsRe) {
  236. var lexems_re = '(' + mode.lexems[0];
  237. for (var i = 1; i < mode.lexems.length; i++)
  238. lexems_re += '|' + mode.lexems[i];
  239. lexems_re += ')';
  240. mode.lexemsRe = langRe(language, lexems_re, true);
  241. }
  242. var result = '';
  243. var last_index = 0;
  244. mode.lexemsRe.lastIndex = 0;
  245. var match = mode.lexemsRe.exec(buffer);
  246. while (match) {
  247. result += escape(buffer.substr(last_index, match.index - last_index));
  248. var keyword_match = keywordMatch(mode, match);
  249. if (keyword_match) {
  250. keyword_count += keyword_match[1];
  251. result += '<span class="'+ keyword_match[0] +'">' + escape(match[0]) + '</span>';
  252. } else {
  253. result += escape(match[0]);
  254. }
  255. last_index = mode.lexemsRe.lastIndex;
  256. match = mode.lexemsRe.exec(buffer);
  257. }
  258. result += escape(buffer.substr(last_index, buffer.length - last_index));
  259. return result;
  260. }
  261. function processBuffer(buffer, mode) {
  262. if (mode.subLanguage && selected_languages[mode.subLanguage]) {
  263. var result = highlight(mode.subLanguage, buffer);
  264. keyword_count += result.keyword_count;
  265. relevance += result.relevance;
  266. return result.value;
  267. } else {
  268. return processKeywords(buffer, mode);
  269. }
  270. }
  271. function startNewMode(mode, lexem) {
  272. var markup = mode.noMarkup?'':'<span class="' + mode.displayClassName + '">';
  273. if (mode.returnBegin) {
  274. result += markup;
  275. mode.buffer = '';
  276. } else if (mode.excludeBegin) {
  277. result += escape(lexem) + markup;
  278. mode.buffer = '';
  279. } else {
  280. result += markup;
  281. mode.buffer = lexem;
  282. }
  283. modes[modes.length] = mode;
  284. }
  285. function processModeInfo(buffer, lexem, end) {
  286. var current_mode = modes[modes.length - 1];
  287. if (end) {
  288. result += processBuffer(current_mode.buffer + buffer, current_mode);
  289. return false;
  290. }
  291. var new_mode = subMode(lexem, current_mode);
  292. if (new_mode) {
  293. result += processBuffer(current_mode.buffer + buffer, current_mode);
  294. startNewMode(new_mode, lexem);
  295. relevance += new_mode.relevance;
  296. return new_mode.returnBegin;
  297. }
  298. var end_level = endOfMode(modes.length - 1, lexem);
  299. if (end_level) {
  300. var markup = current_mode.noMarkup?'':'</span>';
  301. if (current_mode.returnEnd) {
  302. result += processBuffer(current_mode.buffer + buffer, current_mode) + markup;
  303. } else if (current_mode.excludeEnd) {
  304. result += processBuffer(current_mode.buffer + buffer, current_mode) + markup + escape(lexem);
  305. } else {
  306. result += processBuffer(current_mode.buffer + buffer + lexem, current_mode) + markup;
  307. }
  308. while (end_level > 1) {
  309. markup = modes[modes.length - 2].noMarkup?'':'</span>';
  310. result += markup;
  311. end_level--;
  312. modes.length--;
  313. }
  314. modes.length--;
  315. modes[modes.length - 1].buffer = '';
  316. if (current_mode.starts) {
  317. for (var i = 0; i < language.modes.length; i++) {
  318. if (language.modes[i].className == current_mode.starts) {
  319. startNewMode(language.modes[i], '');
  320. break;
  321. }
  322. }
  323. }
  324. return current_mode.returnEnd;
  325. }
  326. if (isIllegal(lexem, current_mode))
  327. throw 'Illegal';
  328. }
  329. var language = LANGUAGES[language_name];
  330. var modes = [language.defaultMode];
  331. var relevance = 0;
  332. var keyword_count = 0;
  333. var result = '';
  334. try {
  335. var index = 0;
  336. language.defaultMode.buffer = '';
  337. do {
  338. var mode_info = eatModeChunk(value, index);
  339. var return_lexem = processModeInfo(mode_info[0], mode_info[1], mode_info[2]);
  340. index += mode_info[0].length;
  341. if (!return_lexem) {
  342. index += mode_info[1].length;
  343. }
  344. } while (!mode_info[2]);
  345. if(modes.length > 1)
  346. throw 'Illegal';
  347. return {
  348. relevance: relevance,
  349. keyword_count: keyword_count,
  350. value: result
  351. }
  352. } catch (e) {
  353. if (e == 'Illegal') {
  354. return {
  355. relevance: 0,
  356. keyword_count: 0,
  357. value: escape(value)
  358. }
  359. } else {
  360. throw e;
  361. }
  362. }
  363. }
  364. /* Initialization */
  365. function compileModes() {
  366. for (var i in LANGUAGES) {
  367. if (!LANGUAGES.hasOwnProperty(i))
  368. continue;
  369. var language = LANGUAGES[i];
  370. for (var j = 0; j < language.modes.length; j++) {
  371. var mode = language.modes[j];
  372. if (mode.begin)
  373. mode.beginRe = langRe(language, '^' + mode.begin);
  374. if (mode.end)
  375. mode.endRe = langRe(language, '^' + mode.end);
  376. if (mode.illegal)
  377. mode.illegalRe = langRe(language, '^(?:' + mode.illegal + ')');
  378. language.defaultMode.illegalRe = langRe(language, '^(?:' + language.defaultMode.illegal + ')');
  379. if (mode.relevance == undefined) {
  380. mode.relevance = 1;
  381. }
  382. if (!mode.displayClassName) {
  383. mode.displayClassName = mode.className;
  384. }
  385. }
  386. }
  387. }
  388. function compileKeywords() {
  389. function compileModeKeywords(mode) {
  390. if (!mode.keywordGroups) {
  391. for (var key in mode.keywords) {
  392. if (!mode.keywords.hasOwnProperty(key))
  393. continue;
  394. if (mode.keywords[key] instanceof Object)
  395. mode.keywordGroups = mode.keywords;
  396. else
  397. mode.keywordGroups = {'keyword': mode.keywords};
  398. break;
  399. }
  400. }
  401. }
  402. for (var i in LANGUAGES) {
  403. if (!LANGUAGES.hasOwnProperty(i))
  404. continue;
  405. var language = LANGUAGES[i];
  406. compileModeKeywords(language.defaultMode);
  407. for (var j = 0; j < language.modes.length; j++) {
  408. compileModeKeywords(language.modes[j]);
  409. }
  410. }
  411. }
  412. function initialize() {
  413. if (initialize.called)
  414. return;
  415. initialize.called = true;
  416. compileModes();
  417. compileKeywords();
  418. selected_languages = LANGUAGES;
  419. }
  420. /* Public library functions */
  421. function highlightBlock(block, tabReplace) {
  422. initialize();
  423. try {
  424. var text = blockText(block);
  425. var language = blockLanguage(block);
  426. } catch (e) {
  427. if (e == 'No highlight')
  428. return;
  429. }
  430. if (language) {
  431. var result = highlight(language, text).value;
  432. } else {
  433. var max_relevance = 0;
  434. for (var key in selected_languages) {
  435. if (!selected_languages.hasOwnProperty(key))
  436. continue;
  437. var lang_result = highlight(key, text);
  438. var relevance = lang_result.keyword_count + lang_result.relevance;
  439. if (relevance > max_relevance) {
  440. max_relevance = relevance;
  441. var result = lang_result.value;
  442. language = key;
  443. }
  444. }
  445. }
  446. if (result) {
  447. var class_name = block.className;
  448. if (!class_name.match(language)) {
  449. class_name += ' ' + language;
  450. }
  451. var original = nodeStream(block);
  452. if (original.length) {
  453. var pre = document.createElement('pre');
  454. pre.innerHTML = result;
  455. result = mergeStreams(original, nodeStream(pre), text);
  456. }
  457. if (tabReplace) {
  458. result = result.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) {
  459. return p1.replace(/\t/g, tabReplace);
  460. })
  461. }
  462. // See these 4 lines? This is IE's notion of "block.innerHTML = result". Love this browser :-/
  463. var container = document.createElement('div');
  464. container.innerHTML = '<pre><code class="' + class_name + '">' + result + '</code></pre>';
  465. var environment = block.parentNode.parentNode;
  466. environment.replaceChild(container.firstChild, block.parentNode);
  467. }
  468. }
  469. function initHighlighting() {
  470. if (initHighlighting.called)
  471. return;
  472. initHighlighting.called = true;
  473. initialize();
  474. if (arguments.length) {
  475. for (var i = 0; i < arguments.length; i++) {
  476. if (LANGUAGES[arguments[i]]) {
  477. selected_languages[arguments[i]] = LANGUAGES[arguments[i]];
  478. }
  479. }
  480. }
  481. var pres = document.getElementsByTagName('pre');
  482. for (var i = 0; i < pres.length; i++) {
  483. var code = findCode(pres[i]);
  484. if (code)
  485. highlightBlock(code, hljs.tabReplace);
  486. }
  487. }
  488. function initHighlightingOnLoad() {
  489. var original_arguments = arguments;
  490. var handler = function(){initHighlighting.apply(null, original_arguments)};
  491. if (window.addEventListener) {
  492. window.addEventListener('DOMContentLoaded', handler, false);
  493. window.addEventListener('load', handler, false);
  494. } else if (window.attachEvent)
  495. window.attachEvent('onload', handler);
  496. else
  497. window.onload = handler;
  498. }
  499. /* Interface definition */
  500. this.LANGUAGES = LANGUAGES;
  501. this.initHighlightingOnLoad = initHighlightingOnLoad;
  502. this.highlightBlock = highlightBlock;
  503. this.initHighlighting = initHighlighting;
  504. // Common regexps
  505. this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*';
  506. this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*';
  507. this.NUMBER_RE = '\\b\\d+(\\.\\d+)?';
  508. this.C_NUMBER_RE = '\\b(0x[A-Za-z0-9]+|\\d+(\\.\\d+)?)';
  509. this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
  510. // Common modes
  511. this.APOS_STRING_MODE = {
  512. className: 'string',
  513. begin: '\'', end: '\'',
  514. illegal: '\\n',
  515. contains: ['escape'],
  516. relevance: 0
  517. };
  518. this.QUOTE_STRING_MODE = {
  519. className: 'string',
  520. begin: '"', end: '"',
  521. illegal: '\\n',
  522. contains: ['escape'],
  523. relevance: 0
  524. };
  525. this.BACKSLASH_ESCAPE = {
  526. className: 'escape',
  527. begin: '\\\\.', end: '^', noMarkup: true,
  528. relevance: 0
  529. };
  530. this.C_LINE_COMMENT_MODE = {
  531. className: 'comment',
  532. begin: '//', end: '$',
  533. relevance: 0
  534. };
  535. this.C_BLOCK_COMMENT_MODE = {
  536. className: 'comment',
  537. begin: '/\\*', end: '\\*/'
  538. };
  539. this.HASH_COMMENT_MODE = {
  540. className: 'comment',
  541. begin: '#', end: '$'
  542. };
  543. this.C_NUMBER_MODE = {
  544. className: 'number',
  545. begin: this.C_NUMBER_RE, end: '^',
  546. relevance: 0
  547. };
  548. }();
  549. var initHighlightingOnLoad = hljs.initHighlightingOnLoad;