PageRenderTime 54ms CodeModel.GetById 12ms app.highlight 36ms RepoModel.GetById 1ms app.codeStats 1ms

/samples/scalate-sample-sitegen/src/highlight/highlight.js

http://github.com/scalate/scalate
JavaScript | 603 lines | 532 code | 54 blank | 17 comment | 145 complexity | e226fbbe9a15eb37f2b794d27eeaf86c MD5 | raw file
  1/*
  2Syntax highlighting with language autodetection.
  3http://softwaremaniacs.org/soft/highlight/
  4*/
  5
  6var hljs = new function() {
  7  var LANGUAGES = {}
  8  // selected_languages is used to support legacy mode of selecting languages
  9  // available for highlighting by passing them as arguments into
 10  // initHighlighting function. Currently the whole library is expected to
 11  // contain only those language definitions that are actually get used.
 12  var selected_languages = {};
 13
 14  /* Utility functions */
 15
 16  function escape(value) {
 17    return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;').replace(/>/gm, '&gt;');
 18  }
 19
 20  function contains(array, item) {
 21    if (!array)
 22      return false;
 23    for (var i = 0; i < array.length; i++)
 24      if (array[i] == item)
 25        return true;
 26    return false;
 27  }
 28
 29  function langRe(language, value, global) {
 30    var mode =  'm' + (language.case_insensitive ? 'i' : '') + (global ? 'g' : '');
 31    return new RegExp(value, mode);
 32  }
 33
 34  function findCode(pre) {
 35    for (var i = 0; i < pre.childNodes.length; i++) {
 36      node = pre.childNodes[i];
 37      if (node.nodeName == 'CODE')
 38        return node;
 39      if (!(node.nodeType == 3 && node.nodeValue.match(/\s+/)))
 40        return null;
 41    }
 42  }
 43
 44  function blockText(block) {
 45    var result = '';
 46    for (var i = 0; i < block.childNodes.length; i++)
 47      if (block.childNodes[i].nodeType == 3)
 48        result += block.childNodes[i].nodeValue;
 49      else if (block.childNodes[i].nodeName == 'BR')
 50        result += '\n';
 51      else
 52        result += blockText(block.childNodes[i]);
 53    return result;
 54  }
 55
 56  function blockLanguage(block) {
 57    var classes = block.className.split(/\s+/)
 58    classes = classes.concat(block.parentNode.className.split(/\s+/));
 59    for (var i = 0; i < classes.length; i++) {
 60      var class_ = classes[i].replace(/^language-/, '');
 61      if (class_ == 'no-highlight') {
 62        throw 'No highlight'
 63      }
 64      if (LANGUAGES[class_]) {
 65        return class_;
 66      }
 67    }
 68  }
 69
 70  /* Stream merging */
 71
 72  function nodeStream(node) {
 73    var result = [];
 74    (function (node, offset) {
 75      for (var i = 0; i < node.childNodes.length; i++) {
 76        if (node.childNodes[i].nodeType == 3)
 77          offset += node.childNodes[i].nodeValue.length;
 78        else if (node.childNodes[i].nodeName == 'BR')
 79          offset += 1
 80        else {
 81          result.push({
 82            event: 'start',
 83            offset: offset,
 84            node: node.childNodes[i]
 85          });
 86          offset = arguments.callee(node.childNodes[i], offset)
 87          result.push({
 88            event: 'stop',
 89            offset: offset,
 90            node: node.childNodes[i]
 91          });
 92        }
 93      }
 94      return offset;
 95    })(node, 0);
 96    return result;
 97  }
 98
 99  function mergeStreams(stream1, stream2, value) {
100    var processed = 0;
101    var result = '';
102    var nodeStack = [];
103
104    function selectStream() {
105      if (stream1.length && stream2.length) {
106        if (stream1[0].offset != stream2[0].offset)
107          return (stream1[0].offset < stream2[0].offset) ? stream1 : stream2;
108        else
109          return (stream1[0].event == 'start' && stream2[0].event == 'stop') ? stream2 : stream1;
110      } else {
111        return stream1.length ? stream1 : stream2;
112      }
113    }
114
115    function open(node) {
116      var result = '<' + node.nodeName.toLowerCase();
117      for (var i = 0; i < node.attributes.length; i++) {
118        var attribute = node.attributes[i];
119        result += ' ' + attribute.nodeName.toLowerCase();
120        if (attribute.nodeValue != undefined) {
121          result += '="' + escape(attribute.nodeValue) + '"';
122        }
123      }
124      return result + '>';
125    }
126
127    function close(node) {
128      return '</' + node.nodeName.toLowerCase() + '>';
129    }
130
131    while (stream1.length || stream2.length) {
132      var current = selectStream().splice(0, 1)[0];
133      result += escape(value.substr(processed, current.offset - processed));
134      processed = current.offset;
135      if ( current.event == 'start') {
136        result += open(current.node);
137        nodeStack.push(current.node);
138      } else if (current.event == 'stop') {
139        var i = nodeStack.length;
140        do {
141          i--;
142          var node = nodeStack[i];
143          result += close(node);
144        } while (node != current.node);
145        nodeStack.splice(i, 1);
146        while (i < nodeStack.length) {
147          result += open(nodeStack[i]);
148          i++;
149        }
150      }
151    }
152    result += value.substr(processed);
153    return result;
154  }
155
156  /* Core highlighting function */
157
158  function highlight(language_name, value) {
159    function compileSubModes(mode, language) {
160      mode.sub_modes = [];
161      for (var i = 0; i < mode.contains.length; i++) {
162        for (var j = 0; j < language.modes.length; j++) {
163          if (language.modes[j].className == mode.contains[i]) {
164            mode.sub_modes[mode.sub_modes.length] = language.modes[j];
165          }
166        }
167      }
168    }
169
170    function subMode(lexem, mode) {
171      if (!mode.contains) {
172        return null;
173      }
174      if (!mode.sub_modes) {
175        compileSubModes(mode, language);
176      }
177      for (var i = 0; i < mode.sub_modes.length; i++) {
178        if (mode.sub_modes[i].beginRe.test(lexem)) {
179          return mode.sub_modes[i];
180        }
181      }
182      return null;
183    }
184
185    function endOfMode(mode_index, lexem) {
186      if (modes[mode_index].end && modes[mode_index].endRe.test(lexem))
187        return 1;
188      if (modes[mode_index].endsWithParent) {
189        var level = endOfMode(mode_index - 1, lexem);
190        return level ? level + 1 : 0;
191      }
192      return 0;
193    }
194
195    function isIllegal(lexem, mode) {
196      return mode.illegalRe && mode.illegalRe.test(lexem);
197    }
198
199    function compileTerminators(mode, language) {
200      var terminators = [];
201
202      function addTerminator(re) {
203        if (!contains(terminators, re)) {
204          terminators[terminators.length] = re;
205        }
206      }
207
208      if (mode.contains)
209        for (var i = 0; i < language.modes.length; i++) {
210          if (contains(mode.contains, language.modes[i].className)) {
211            addTerminator(language.modes[i].begin);
212          }
213        }
214
215      var index = modes.length - 1;
216      do {
217        if (modes[index].end) {
218          addTerminator(modes[index].end);
219        }
220        index--;
221      } while (modes[index + 1].endsWithParent);
222
223      if (mode.illegal) {
224        addTerminator(mode.illegal);
225      }
226
227      var terminator_re = '(' + terminators[0];
228      for (var i = 0; i < terminators.length; i++)
229        terminator_re += '|' + terminators[i];
230      terminator_re += ')';
231      return langRe(language, terminator_re);
232    }
233
234    function eatModeChunk(value, index) {
235      var mode = modes[modes.length - 1];
236      if (!mode.terminators) {
237        mode.terminators = compileTerminators(mode, language);
238      }
239      value = value.substr(index);
240      var match = mode.terminators.exec(value);
241      if (!match)
242        return [value, '', true];
243      if (match.index == 0)
244        return ['', match[0], false];
245      else
246        return [value.substr(0, match.index), match[0], false];
247    }
248
249    function keywordMatch(mode, match) {
250      var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0]
251      for (var className in mode.keywordGroups) {
252        if (!mode.keywordGroups.hasOwnProperty(className))
253          continue;
254        var value = mode.keywordGroups[className].hasOwnProperty(match_str);
255        if (value)
256          return [className, value];
257      }
258      return false;
259    }
260
261    function processKeywords(buffer, mode) {
262      if (!mode.keywords || !mode.lexems)
263        return escape(buffer);
264      if (!mode.lexemsRe) {
265        var lexems_re = '(' + mode.lexems[0];
266        for (var i = 1; i < mode.lexems.length; i++)
267          lexems_re += '|' + mode.lexems[i];
268        lexems_re += ')';
269        mode.lexemsRe = langRe(language, lexems_re, true);
270      }
271      var result = '';
272      var last_index = 0;
273      mode.lexemsRe.lastIndex = 0;
274      var match = mode.lexemsRe.exec(buffer);
275      while (match) {
276        result += escape(buffer.substr(last_index, match.index - last_index));
277        var keyword_match = keywordMatch(mode, match);
278        if (keyword_match) {
279          keyword_count += keyword_match[1];
280          result += '<span class="'+ keyword_match[0] +'">' + escape(match[0]) + '</span>';
281        } else {
282          result += escape(match[0]);
283        }
284        last_index = mode.lexemsRe.lastIndex;
285        match = mode.lexemsRe.exec(buffer);
286      }
287      result += escape(buffer.substr(last_index, buffer.length - last_index));
288      return result;
289    }
290
291    function processBuffer(buffer, mode) {
292      if (mode.subLanguage && selected_languages[mode.subLanguage]) {
293        var result = highlight(mode.subLanguage, buffer);
294        keyword_count += result.keyword_count;
295        relevance += result.relevance;
296        return result.value;
297      } else {
298        return processKeywords(buffer, mode);
299      }
300    }
301
302    function startNewMode(mode, lexem) {
303      var markup = mode.noMarkup?'':'<span class="' + mode.displayClassName + '">';
304      if (mode.returnBegin) {
305        result += markup;
306        mode.buffer = '';
307      } else if (mode.excludeBegin) {
308        result += escape(lexem) + markup;
309        mode.buffer = '';
310      } else {
311        result += markup;
312        mode.buffer = lexem;
313      }
314      modes[modes.length] = mode;
315    }
316
317    function processModeInfo(buffer, lexem, end) {
318      var current_mode = modes[modes.length - 1];
319      if (end) {
320        result += processBuffer(current_mode.buffer + buffer, current_mode);
321        return false;
322      }
323
324      var new_mode = subMode(lexem, current_mode);
325      if (new_mode) {
326        result += processBuffer(current_mode.buffer + buffer, current_mode);
327        startNewMode(new_mode, lexem);
328        relevance += new_mode.relevance;
329        return new_mode.returnBegin;
330      }
331
332      var end_level = endOfMode(modes.length - 1, lexem);
333      if (end_level) {
334        var markup = current_mode.noMarkup?'':'</span>';
335        if (current_mode.returnEnd) {
336          result += processBuffer(current_mode.buffer + buffer, current_mode) + markup;
337        } else if (current_mode.excludeEnd) {
338          result += processBuffer(current_mode.buffer + buffer, current_mode) + markup + escape(lexem);
339        } else {
340          result += processBuffer(current_mode.buffer + buffer + lexem, current_mode) + markup;
341        }
342        while (end_level > 1) {
343          markup = modes[modes.length - 2].noMarkup?'':'</span>';
344          result += markup;
345          end_level--;
346          modes.length--;
347        }
348        modes.length--;
349        modes[modes.length - 1].buffer = '';
350        if (current_mode.starts) {
351          for (var i = 0; i < language.modes.length; i++) {
352            if (language.modes[i].className == current_mode.starts) {
353              startNewMode(language.modes[i], '');
354              break;
355            }
356          }
357        }
358        return current_mode.returnEnd;
359      }
360
361      if (isIllegal(lexem, current_mode))
362        throw 'Illegal';
363    }
364
365    var language = LANGUAGES[language_name];
366    var modes = [language.defaultMode];
367    var relevance = 0;
368    var keyword_count = 0;
369    var result = '';
370    try {
371      var index = 0;
372      language.defaultMode.buffer = '';
373      do {
374        var mode_info = eatModeChunk(value, index);
375        var return_lexem = processModeInfo(mode_info[0], mode_info[1], mode_info[2]);
376        index += mode_info[0].length;
377        if (!return_lexem) {
378          index += mode_info[1].length;
379        }
380      } while (!mode_info[2]);
381      if(modes.length > 1)
382        throw 'Illegal';
383      return {
384        relevance: relevance,
385        keyword_count: keyword_count,
386        value: result
387      }
388    } catch (e) {
389      if (e == 'Illegal') {
390        return {
391          relevance: 0,
392          keyword_count: 0,
393          value: escape(value)
394        }
395      } else {
396        throw e;
397      }
398    }
399  }
400
401  /* Initialization */
402
403  function compileModes() {
404    for (var i in LANGUAGES) {
405      if (!LANGUAGES.hasOwnProperty(i))
406        continue;
407      var language = LANGUAGES[i];
408      for (var j = 0; j < language.modes.length; j++) {
409        var mode = language.modes[j];
410        if (mode.begin)
411          mode.beginRe = langRe(language, '^' + mode.begin);
412        if (mode.end)
413          mode.endRe = langRe(language, '^' + mode.end);
414        if (mode.illegal)
415          mode.illegalRe = langRe(language, '^(?:' + mode.illegal + ')');
416        language.defaultMode.illegalRe = langRe(language, '^(?:' + language.defaultMode.illegal + ')');
417        if (mode.relevance == undefined) {
418          mode.relevance = 1;
419        }
420        if (!mode.displayClassName) {
421          mode.displayClassName = mode.className;
422        }
423      }
424    }
425  }
426
427  function compileKeywords() {
428
429    function compileModeKeywords(mode) {
430      if (!mode.keywordGroups) {
431        for (var key in mode.keywords) {
432          if (!mode.keywords.hasOwnProperty(key))
433            continue;
434          if (mode.keywords[key] instanceof Object)
435            mode.keywordGroups = mode.keywords;
436          else
437            mode.keywordGroups = {'keyword': mode.keywords};
438          break;
439        }
440      }
441    }
442
443    for (var i in LANGUAGES) {
444      if (!LANGUAGES.hasOwnProperty(i))
445        continue;
446      var language = LANGUAGES[i];
447      compileModeKeywords(language.defaultMode);
448      for (var j = 0; j < language.modes.length; j++) {
449        compileModeKeywords(language.modes[j]);
450      }
451    }
452  }
453
454  function initialize() {
455    if (initialize.called)
456        return;
457    initialize.called = true;
458    compileModes();
459    compileKeywords();
460    selected_languages = LANGUAGES;
461  }
462
463  /* Public library functions */
464
465  function highlightBlock(block, tabReplace) {
466    initialize();
467
468    try {
469      var text = blockText(block);
470      var language = blockLanguage(block);
471    } catch (e) {
472      if (e == 'No highlight')
473        return;
474    }
475
476    if (language) {
477      var result = highlight(language, text).value;
478    } else {
479      var max_relevance = 0;
480      for (var key in selected_languages) {
481        if (!selected_languages.hasOwnProperty(key))
482          continue;
483        var lang_result = highlight(key, text);
484        var relevance = lang_result.keyword_count + lang_result.relevance;
485        if (relevance > max_relevance) {
486          max_relevance = relevance;
487          var result = lang_result.value;
488          language = key;
489        }
490      }
491    }
492
493    if (result) {
494      var class_name = block.className;
495      if (!class_name.match(language)) {
496        class_name += ' ' + language;
497      }
498      var original = nodeStream(block);
499      if (original.length) {
500        var pre = document.createElement('pre');
501        pre.innerHTML = result;
502        result = mergeStreams(original, nodeStream(pre), text);
503      }
504      if (tabReplace) {
505        result = result.replace(/^((<[^>]+>|\t)+)/gm, function(match, p1, offset, s) {
506          return p1.replace(/\t/g, tabReplace);
507        })
508      }
509      // See these 4 lines? This is IE's notion of "block.innerHTML = result". Love this browser :-/
510      var container = document.createElement('div');
511      container.innerHTML = '<pre><code class="' + class_name + '">' + result + '</code></pre>';
512      var environment = block.parentNode.parentNode;
513      environment.replaceChild(container.firstChild, block.parentNode);
514    }
515  }
516
517  function initHighlighting() {
518    if (initHighlighting.called)
519      return;
520    initHighlighting.called = true;
521    initialize();
522    if (arguments.length) {
523      for (var i = 0; i < arguments.length; i++) {
524        if (LANGUAGES[arguments[i]]) {
525          selected_languages[arguments[i]] = LANGUAGES[arguments[i]];
526        }
527      }
528    }
529    var pres = document.getElementsByTagName('pre');
530    for (var i = 0; i < pres.length; i++) {
531      var code = findCode(pres[i]);
532      if (code)
533        highlightBlock(code, hljs.tabReplace);
534    }
535  }
536
537  function initHighlightingOnLoad() {
538    var original_arguments = arguments;
539    var handler = function(){initHighlighting.apply(null, original_arguments)};
540    if (window.addEventListener) {
541      window.addEventListener('DOMContentLoaded', handler, false);
542      window.addEventListener('load', handler, false);
543    } else if (window.attachEvent)
544      window.attachEvent('onload', handler);
545    else
546      window.onload = handler;
547  }
548
549  /* Interface definition */
550
551  this.LANGUAGES = LANGUAGES;
552  this.initHighlightingOnLoad = initHighlightingOnLoad;
553  this.highlightBlock = highlightBlock;
554  this.initHighlighting = initHighlighting;
555
556  // Common regexps
557  this.IDENT_RE = '[a-zA-Z][a-zA-Z0-9_]*';
558  this.UNDERSCORE_IDENT_RE = '[a-zA-Z_][a-zA-Z0-9_]*';
559  this.NUMBER_RE = '\\b\\d+(\\.\\d+)?';
560  this.C_NUMBER_RE = '\\b(0x[A-Za-z0-9]+|\\d+(\\.\\d+)?)';
561  this.RE_STARTERS_RE = '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
562
563  // Common modes
564  this.APOS_STRING_MODE = {
565    className: 'string',
566    begin: '\'', end: '\'',
567    illegal: '\\n',
568    contains: ['escape'],
569    relevance: 0
570  };
571  this.QUOTE_STRING_MODE = {
572    className: 'string',
573    begin: '"', end: '"',
574    illegal: '\\n',
575    contains: ['escape'],
576    relevance: 0
577  };
578  this.BACKSLASH_ESCAPE = {
579    className: 'escape',
580    begin: '\\\\.', end: '^', noMarkup: true,
581    relevance: 0
582  };
583  this.C_LINE_COMMENT_MODE = {
584    className: 'comment',
585    begin: '//', end: '$',
586    relevance: 0
587  };
588  this.C_BLOCK_COMMENT_MODE = {
589    className: 'comment',
590    begin: '/\\*', end: '\\*/'
591  };
592  this.HASH_COMMENT_MODE = {
593    className: 'comment',
594    begin: '#', end: '$'
595  };
596  this.C_NUMBER_MODE = {
597    className: 'number',
598    begin: this.C_NUMBER_RE, end: '^',
599    relevance: 0
600  };
601}();
602
603var initHighlightingOnLoad = hljs.initHighlightingOnLoad;