PageRenderTime 108ms CodeModel.GetById 52ms app.highlight 50ms RepoModel.GetById 1ms app.codeStats 1ms

/bin/otlParser.rb

https://bitbucket.org/Mr_Shark/rc-files
Ruby | 606 lines | 518 code | 40 blank | 48 comment | 13 complexity | 1eba6b26493d98ad9eb909de3a257f39 MD5 | raw file
  1#!/usr/bin/env ruby
  2# Module for parsing/formatting TVO outlines.
  3# $Id: otlParser.rb 118 2006-04-17 21:12:45Z ned $
  4
  5require 'stringio'
  6require 'getoptlong'
  7
  8module TVO
  9
 10  RETodo1      = /\b(TODO|XXX|NOTE)\b/
 11  REStandout   = /\*\*\s*\b(.+?)\b\s*\*\*/
 12
 13  RETagDef     = /<id=([^>]+)>|\[\[([^\[\]]+)\]\]/i
 14  REExternTagRef     = /<url:\s*([^>]+)\s*>|\[([a-z]+:[^\]]+)\]/i
 15  RETagRef     = /\[([^\[\]:]+)\]/
 16  REVimTagRef  = /\[(:[^\[\]:]+)\]/
 17  REHTMLOnly   = /(.*?)\s*<html:\s*([^>]+|<[^>]+>)\s*>\s*(.*)/i
 18
 19  REItalic    = /I<(.+?)>/
 20  REBold      = /B<(.+?)>/
 21  RECode      = /C<(.+?)>/
 22  REUnderline = /U<(.+?)>/
 23
 24  RETextLeader = /^\t*\|\s*/
 25  REText       = /^\t*\|\s*(.*)/
 26
 27  # outlineItem := head text? outlineItem*
 28  # 
 29  # from Vim syntax definition:
 30  # text
 31  #   contains=vikiHyperLinks,RETodo,RETagDef,RETagRef,RETextLeader nextgroup=REText 
 32  # RETabs : /^\t\{0-9}[^\t|].*/
 33  #   contains=vikiHyperLinks,RETodo,RETagDef,RETagRef nextgroup=RETabs,REText 
 34  # vikiHyperLinks = vikiLink,vikiExtendedLink,vikiURL,vikiInexistentLink 
 35  # 
 36  class Item
 37  public
 38
 39    attr_accessor :parent, :keepHead
 40    attr_reader :head, :children, :level
 41
 42    def initialize(level, head='', text=nil, parent=nil, children=[])
 43      @level = level 
 44      @head = head
 45      @text = text ? text.to_a.join("\n").split("\n") : []
 46      @children = children
 47      @parent = parent
 48      @keepHead = false
 49      self
 50    end
 51
 52    def head=(headText)
 53      if headText.nil?
 54        @head = headText
 55      elsif headText[0..0] == '+'
 56        @keepHead = true
 57        @head = headText[1..-1]
 58      else
 59        @head = headText
 60      end
 61    end
 62
 63    def addText(text)
 64      @text.push(text)
 65    end
 66
 67    def addChild(child)
 68      @children.push(child)
 69      child.parent = self
 70    end
 71
 72    def children=(_children)
 73      _children.each { |c| addChild(c) }
 74    end
 75
 76    def text
 77      @text
 78    end
 79
 80    def text=(_text)
 81      @text = _text.split("\n")
 82    end
 83
 84    def each_text_line(&blk)
 85      text.each(&blk)
 86    end
 87
 88    # returns array of arrays [marker, para]
 89    # marker is '' or '-' or '*'
 90    # para is array of paragraph lines 
 91    def textParagraphs
 92      paras = []
 93      thisPara = []
 94      markerLength = 0
 95      marker = ''
 96      text.each do |textline|
 97        case textline
 98        when /^(\s*([-*])\s*)(.*)/ 
 99          paras.push([ marker, thisPara ])
100          marker = $2
101          markerLength = $1.length
102          thisPara = [ $3 ]
103        when /^(\s*)(.+)/
104          if $1.length == markerLength
105            thisPara.push($2)
106          else
107            paras.push([ marker, thisPara ])
108            markerLength = $1.length
109            thisPara = [ $2 ]
110            marker = ''
111          end
112        when /^\s*$/
113          paras.push([ marker, thisPara ])
114          thisPara = []
115          marker = ''
116        end
117      end
118      paras.push([ marker, thisPara ])
119      return paras.reject { |p| p[1].length == 0 }
120    end
121
122    # calls given block with:
123    #   array of related lines
124    #   marker ('' or '-' or '*')
125    def relatedTextParagraphsDo
126      lastMarker = ''
127      related = []
128      paras = textParagraphs
129      paras.push([nil, []])  # to flush last one
130      paras.each do |p|
131        marker = p[0]
132        textLines = p[1]
133        if marker == lastMarker
134          related.push(textLines)
135        else
136          # process related paragraphs if any
137          if related.length > 0
138            yield related, lastMarker
139          end
140          lastMarker = marker
141          related = [ textLines ]
142        end
143      end
144    end
145
146  end
147
148  class Formatter
149  protected
150
151    # default output is just flattened.
152    def printHead?(item)
153      return (!@textOnly || item.keepHead)
154    end
155
156    def visitHead(item,seq=0)
157      return unless printHead?(item)
158      file().puts(embellish(item.head), "")
159    end
160
161    def visitText(item,seq=0)
162      item.text.each { |textLine| file.puts(embellish(textLine)) }
163      file.puts("") if item.text.length > 0
164    end
165
166    def visitItem(item,seq=0)
167      if item.level >= 0
168        visitHead(item,seq)
169        visitText(item,seq)
170      end
171      item.children.each_with_index { |ch,n| visitItem(ch,n) }
172      nil
173    end
174
175    # format individual spans
176
177    def italic(text) ; text; end
178    def bold(text) ; text; end
179    def code(text) ; text; end
180    def underline(text) ; text; end
181    def standout(text) ; text; end
182    def tagDef(text) ; text; end
183    def tagRef(text) ; text; end
184    def vimTagRef(text) ; text; end
185    def htmlOnly(text); end
186
187    def embellish(text)
188      text.
189        gsub(REItalic)    { |s| italic($1) }.
190        gsub(REBold)      { |s| bold($1) }.
191        gsub(RECode)      { |s| code($1) }.
192        gsub(REUnderline) { |s| underline($1) }.
193        gsub(REStandout)  { |s| standout($1) }.
194        gsub(RETagDef)    { |s| tagDef($1||$2) }.
195        gsub(RETagRef)    { |s| tagRef($1) }.
196        gsub(REVimTagRef) { |s| vimTagRef($1) }.
197        gsub(REHTMLOnly)  { |s| htmlOnly($1) }
198    end
199
200  public
201
202    def self.formatterNames
203      TVO.constants.
204        select { |c|
205          cl = TVO.const_get(c) rescue ''
206          cl.kind_of?(Class) && cl <= self
207         }.collect { |cn| cn.sub(/Formatter$/, '') }.
208         sort
209    end
210
211    attr_accessor :file, :textOnly
212
213    def initialize(_file=$stdout)
214      @file = _file
215      @textOnly = false
216    end
217
218    def format(outlineRoot)
219      visitItem(outlineRoot)
220    end
221  end
222
223  # Output TVO again (for building OTL files programmatically)
224  class OutlineFormatter < Formatter
225  protected
226    Prefixes = (0..9).to_a.collect { |n| ("\t" * n) }
227
228    def prefixForLevel(level)
229      Prefixes[level] || ((level < 0) ? "" : ("\t" * level))
230    end
231
232    def visitHead(item,seq=0)
233      return unless printHead?(item)
234      file.print(prefixForLevel(item.level), item.head, "\n")
235    end
236
237    def visitText(item,seq=0)
238      prefix = prefixForLevel(item.level) + '| '
239      item.text.each { |tline| file.print(prefix, tline, "\n") }
240    end
241
242  end
243
244  # Format outline as h1-h6/ul
245  # Classes used are:
246  # <a href="">
247  #   otlExternTagRef
248  #   otlTagRef
249  # <a name="">
250  #   otlTagDef
251  # <span>
252  #   otlHTMLOnly
253  #   otlTodo
254  #   otlStandout
255  #   otlVimTagRef
256  #   otlUnderline
257  # <hr>
258  #   h1 .. h<#>
259  # <h1> .. <h5>
260  #   h1 .. h6
261  # <h6>
262  #   h6 .. h<#>
263  # <ul>,<li>
264  #   t<#>pd  (if marker was '-')
265  #   t<#>pa  (if marker was '*')
266  # <div>,<p>
267  #   t<#>p
268  #
269  #
270  class HTMLFormatter < Formatter
271
272  def self.quoted(text)
273    text.gsub(/&/, '&amp;').  gsub(/</, '&lt;').  gsub(/>/, '&gt;')
274  end
275
276  def self.requoted(re)
277    Regexp.new(re.source.gsub(/\\\\/, '\\'). gsub(/</, '&lt;').  gsub(/>/, '&gt;'))
278  end
279
280  RETagDef     = /&lt;id=([^&]+)&gt;|\[\[([^\[\]]+)\]\]/i
281  REExternTagRef     = /&lt;url:\s*([^>]+)\s*&gt;|\[([a-z]+:[^\]]+)\]/i
282  RETagRef     = /\[([^\[\]:&]+)\]/
283  REVimTagRef  = /\[(:[^\[\]:&]+)\]/
284  REHTMLOnly   = /(.*?)\s*&lt;html:\s*(.+?|.*&lt;.+?&gt;)\s*&gt;\s*(.*)/i
285
286  REItalic    = /I&lt;(.+?)&gt;/
287  REBold      = /B&lt;(.+?)&gt;/
288  RECode      = /C&lt;(.+?)&gt;/
289  REUnderline = /U&lt;(.+?)&gt;/
290  REGtLt      = /&&([gl]t;)/
291
292  protected
293    # notice in-text markings
294    # Would be run after quoted
295    def decorated(textLine)
296      if textLine.match(REHTMLOnly)
297        return textLine.
298          gsub(REHTMLOnly) do |s|
299            "#{decorated($1)} <span class=\"otlHTMLOnly\">#{$2}</span> #{decorated($3)}"
300          end
301      else
302        return textLine.
303          gsub(REGtLt)  {|s| "&#{$1}" }.
304          gsub(REItalic)  {|s| "<i>#{$1}</i>" }.
305          gsub(REBold)  {|s| "<strong>#{$1}</strong>" }.
306          gsub(RECode)  {|s| "<tt>#{$1}</tt>" }.
307          gsub(REUnderline)  {|s| "<span class=\"otlUnderline\">#{$1}</span>" }.
308          gsub(RETodo1)   {|s| "<span class=\"otlTodo\">#{s}</span>" }.
309          gsub(REStandout)   {|s| "<span class=\"otlStandout\">#{$1}</span>" }.
310          gsub(RETagDef)  {|s| "<a class=\"otlTagDef\" name=\"#{urlEncoded($1||$2)}\"></a>" }.
311        gsub(REExternTagRef){|s|
312          "<a class=\"otlExternTagRef\" href=\"#{urlEncoded($1||$2)}\">#{$1||$2}</a>" }.
313        gsub(RETagRef)  do |s|
314          url=dest=$1
315          if dest =~ /^--\s*(.+)\s*--$/
316            url = dest = $1
317          end
318          if dest =~ /^([^#]+)#([^#]+)$/
319            url=dest
320            dest=$1
321          end
322          if File.readable?(dest)
323            "<a class=\"otlExternTagRef\" href=\"#{urlEncoded(url)}\">#{url}</a>"
324          else
325            "<a class=\"otlTagRef\" href=\"##{urlEncoded(url)}\">#{url}</a>"
326          end
327        end.
328        gsub(REVimTagRef) { "<span class=\"otlVimTagRef\">#{$&}</span>" }
329      end
330    end
331
332    def quoted(text)
333      self.class.quoted(text).gsub(/\n/, "\n" + (" " * @nest))
334    end
335
336    def urlEncoded(text)
337      text.gsub(/[^#.A-Za-z0-9]/) { |c| sprintf("%%%02X", c[0]) }
338    end
339
340    def htmlTag(tagname, attribs={})
341      file.print("\n", " " * @nest)
342      file.print('<', tagname)
343      attribs.each_pair { |k,v| file.print(" #{k}=\"#{quoted(v)}\"") }
344      if block_given?
345        file.print('>')
346        @nest += 1
347        text = yield
348        @nest -= 1
349        file.print(decorated(quoted(text))) if text
350        file.print('</', tagname, '>')
351      else
352        file.print(' />')
353      end
354      nil
355    end
356
357    def tagAndClassForHead(itemLevel)
358      hLevel = "h#{itemLevel}"
359      tag = (itemLevel.between?(1,6) ? hLevel : 'h6')
360      return *[tag, hLevel]
361    end
362
363    def visitHead(item,seq=0)
364      return unless printHead?(item)
365      itemLevel = item.level + 1
366      (tag, hLevel) = tagAndClassForHead(itemLevel)
367      if itemLevel == 1 && seq > 0
368        htmlTag('hr', { :class => hLevel })
369      end
370      htmlTag(tag, { :class => hLevel } ) { item.head }
371    end
372
373    def tagsAndClassForTextPara(itemLevel,marker)
374      case marker
375      when '-'
376        return *['ul','li',"t#{itemLevel}pd"]
377      when '*'
378        return *['ul','li',"t#{itemLevel}pa"]
379      else
380        return *['div','p',"t#{itemLevel}p"]
381      end
382    end
383
384    def formatTextParagraph(para, itemTag, itemClass)
385      htmlTag(itemTag, {:class => itemClass }) { para.join("\n") }
386    end
387
388    def visitText(item,seq=0)
389      item.relatedTextParagraphsDo do |related, marker|
390        (groupTag,itemTag,itemClass) = tagsAndClassForTextPara(item.level,marker)
391        htmlTag(groupTag, {:class => itemClass }) do
392          related.each { |p| formatTextParagraph(p, itemTag, itemClass) }
393              nil
394          end
395        end
396    end
397
398  public
399    attr_accessor :stylesheet
400
401    def self.defaultStylesheet
402      "http://www.w3.org/StyleSheets/Core/Modernist"
403    end
404
405    def initialize(_file=$stdout)
406      super
407      @nest = 0
408      @stylesheet = self.class.defaultStylesheet
409    end
410
411    def format(outlineRoot)
412      file.print('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
413      htmlTag('html') do
414        htmlTag('head') do
415          htmlTag('title') { outlineRoot.children[0].head }
416          htmlTag('link', { :rel => 'stylesheet', :type => 'text/css', :href => stylesheet() })
417        end
418        htmlTag('body') { visitItem(outlineRoot) }
419      end
420    end
421
422  end
423
424  # Format entire outline as nested series of ul/li/ul
425  class HTMLListFormatter < HTMLFormatter
426
427    def visitHead(item,seq=0)
428      return unless printHead?(item)
429      hLevel = "h#{item.level + 1}"
430      htmlTag('li', { :class => hLevel } ) { item.head }
431    end
432
433    def visitItem(item, seq=0)
434      hLevel = "h#{item.level + 1}"
435      if item.level >= 0
436        if item.level == 0 && seq > 0
437          htmlTag('hr', { :class => hLevel })
438        end
439        visitHead(item,seq)
440        visitText(item,seq)
441      end
442      if item.children.length > 0
443        htmlTag('ul', { :class => hLevel }) do
444          item.children.each_with_index { |ch,n| visitItem(ch,n) }
445          nil
446        end
447      end
448    end
449  end
450
451  # Construct an OutlineItem (the root item) from input text.
452  class Parser
453  protected
454    @@debug = false
455
456    def gets(sepString = $/)
457      retval = @getback.gets(sepString) || @file.gets(sepString)
458      if retval.nil?
459        $stderr.puts("(EOF)") if @@debug
460        throw(:eof, nil) 
461      end
462      return retval.chomp(sepString)
463    end
464
465    def puts(line)
466      @pushback.puts(line)
467      @pushback.sync
468    end
469
470    def head(level)
471      $stderr.print("Looking for head(#{level})") if @@debug
472      line = gets
473      if m = line.match(@tabREs[level]) and m[2][0..0] != '|'
474        $stderr.print("... got #{m[2]}\n") if @@debug
475        return m[2]
476      else
477        puts(line)
478        $stderr.print("... rej #{line.inspect}\n") if @@debug
479        return nil
480      end
481    end
482
483    def text(level)
484      $stderr.print("Looking for text(#{level})") if @@debug
485      line = gets
486      if m = line.match(@tabREs[level]) and m[2].match(/\|\s?(.*)/)
487        $stderr.print("... got #{m[2][2..-1]}\n") if @@debug
488        return $1
489      else
490        puts(line)
491        $stderr.print("... rej #{line.inspect}\n") if @@debug
492        return nil
493      end
494    end
495
496    # get next outline item that starts with (at least) "level" tabs.
497    # return single item.
498    def item(level)
499      throw(:toodeep, nil) if level > 9
500      catch(:eof) do
501        catch(:toodeep) do
502          retval = Item.new(level)
503          retval.head = head(level)
504          return nil unless retval.head
505          catch(:eof) do
506            while t = text(level)
507              retval.addText(t)
508            end
509          end
510          retval.children = items(level+1)
511          $stderr.puts("Returning item [level=#{level}] [head=\"#{retval.head}\"] [text=#{retval.text.length}lns]") if @@debug
512          retval
513        end
514      end
515    end
516
517    # return array of items at the given level.
518    def items(level)
519      retval = []
520      while nextItem = item(level)
521        retval.push(nextItem)
522      end
523      return retval
524    end
525
526  public
527
528    def initialize(file=$stdin)
529      @pbString = ""
530      @pushback = StringIO.new(@pbString)
531      @getback = StringIO.new(@pbString)
532      @file=file
533      @tabREs = (0..9).to_a.collect { |n| Regexp.new("^(\\t{#{n}})(\\S.*)") } 
534    end
535
536    def outline
537      return Item.new(-1, '', nil, nil, items(0))
538    end
539
540    def Parser.debugMode=(bool)
541      @@debug = bool
542    end
543
544  end # class Parser
545
546  # read options from ARGV
547  def parseAndFormat
548
549    # parse arguments
550    formatType = ''
551    outputFileName = nil
552    textOnly = false
553
554    parser = GetoptLong.new
555    parser.set_options(
556      [ '--format', '-f', GetoptLong::REQUIRED_ARGUMENT],
557      [ '--help', '-h', GetoptLong::NO_ARGUMENT],
558      [ '--output', '-o', GetoptLong::REQUIRED_ARGUMENT],
559      [ '--debug', '-d', GetoptLong::NO_ARGUMENT],
560      [ '--stylesheet', '-s', GetoptLong::REQUIRED_ARGUMENT],
561      [ '--include', '-i', GetoptLong::REQUIRED_ARGUMENT],
562      [ '--textonly', '-t', GetoptLong::NO_ARGUMENT])
563    parser.each_option do |name, arg|
564      case name
565      when '--format'
566        formatType = arg
567      when '--help'
568        $stderr.print <<-EOF
569          Usage: #{$0} [opt] [file [...]]
570          opt is one or more of:
571            --format, -f #{ "<'" + Formatter.formatterNames().join("'|'") + "'>" }  set output format type
572            --help, -h                            display this help
573            --output, -o <filename>               output to file named filename instead of stdout
574            --debug, -d                           turn on parser debugging to stderr
575            --textonly, -t                        omit heads except those starting with '+'
576            --stylesheet, -s <filename>           link to stylesheet named filename (default=#{HTMLFormatter.defaultStylesheet})
577            --include, -i <filename>              include Ruby module filename
578EOF
579        exit(0)
580      when '--output'
581        outputFileName = arg
582      when '--debug'
583        Parser.debugMode = true
584      when '--textonly'
585        textOnly = true
586      when '--include'
587        require arg
588      end
589    end
590
591    outputFile = outputFileName.nil? ? $stdout : File.open(outputFileName, 'w')
592    outline = Parser.new(ARGF).outline
593    formatterClass = TVO.const_get("#{formatType}Formatter")
594    formatter = formatterClass.new(outputFile)
595
596    formatter.textOnly = textOnly
597    formatter.format(outline)
598
599  end
600
601end # module TVO
602
603if $0 == __FILE__
604  include TVO
605  parseAndFormat
606end