PageRenderTime 41ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/vimfiles/otlParser.rb

https://bitbucket.org/halostatue/vim-config
Ruby | 606 lines | 518 code | 40 blank | 48 comment | 13 complexity | bdcdd8e75a99c9f42acbf46a6dba23ea MD5 | raw file
Possible License(s): GPL-2.0
  1. #!/usr/bin/env ruby
  2. # Module for parsing/formatting TVO outlines.
  3. # $Id: otlParser.rb 118 2006-04-17 21:12:45Z ned $
  4. require 'stringio'
  5. require 'getoptlong'
  6. module TVO
  7. RETodo1 = /\b(TODO|XXX|NOTE)\b/
  8. REStandout = /\*\*\s*\b(.+?)\b\s*\*\*/
  9. RETagDef = /<id=([^>]+)>|\[\[([^\[\]]+)\]\]/i
  10. REExternTagRef = /<url:\s*([^>]+)\s*>|\[([a-z]+:[^\]]+)\]/i
  11. RETagRef = /\[([^\[\]:]+)\]/
  12. REVimTagRef = /\[(:[^\[\]:]+)\]/
  13. REHTMLOnly = /(.*?)\s*<html:\s*([^>]+|<[^>]+>)\s*>\s*(.*)/i
  14. REItalic = /I<(.+?)>/
  15. REBold = /B<(.+?)>/
  16. RECode = /C<(.+?)>/
  17. REUnderline = /U<(.+?)>/
  18. RETextLeader = /^\t*\|\s*/
  19. REText = /^\t*\|\s*(.*)/
  20. # outlineItem := head text? outlineItem*
  21. #
  22. # from Vim syntax definition:
  23. # text
  24. # contains=vikiHyperLinks,RETodo,RETagDef,RETagRef,RETextLeader nextgroup=REText
  25. # RETabs : /^\t\{0-9}[^\t|].*/
  26. # contains=vikiHyperLinks,RETodo,RETagDef,RETagRef nextgroup=RETabs,REText
  27. # vikiHyperLinks = vikiLink,vikiExtendedLink,vikiURL,vikiInexistentLink
  28. #
  29. class Item
  30. public
  31. attr_accessor :parent, :keepHead
  32. attr_reader :head, :children, :level
  33. def initialize(level, head='', text=nil, parent=nil, children=[])
  34. @level = level
  35. @head = head
  36. @text = text ? text.to_a.join("\n").split("\n") : []
  37. @children = children
  38. @parent = parent
  39. @keepHead = false
  40. self
  41. end
  42. def head=(headText)
  43. if headText.nil?
  44. @head = headText
  45. elsif headText[0..0] == '+'
  46. @keepHead = true
  47. @head = headText[1..-1]
  48. else
  49. @head = headText
  50. end
  51. end
  52. def addText(text)
  53. @text.push(text)
  54. end
  55. def addChild(child)
  56. @children.push(child)
  57. child.parent = self
  58. end
  59. def children=(_children)
  60. _children.each { |c| addChild(c) }
  61. end
  62. def text
  63. @text
  64. end
  65. def text=(_text)
  66. @text = _text.split("\n")
  67. end
  68. def each_text_line(&blk)
  69. text.each(&blk)
  70. end
  71. # returns array of arrays [marker, para]
  72. # marker is '' or '-' or '*'
  73. # para is array of paragraph lines
  74. def textParagraphs
  75. paras = []
  76. thisPara = []
  77. markerLength = 0
  78. marker = ''
  79. text.each do |textline|
  80. case textline
  81. when /^(\s*([-*])\s*)(.*)/
  82. paras.push([ marker, thisPara ])
  83. marker = $2
  84. markerLength = $1.length
  85. thisPara = [ $3 ]
  86. when /^(\s*)(.+)/
  87. if $1.length == markerLength
  88. thisPara.push($2)
  89. else
  90. paras.push([ marker, thisPara ])
  91. markerLength = $1.length
  92. thisPara = [ $2 ]
  93. marker = ''
  94. end
  95. when /^\s*$/
  96. paras.push([ marker, thisPara ])
  97. thisPara = []
  98. marker = ''
  99. end
  100. end
  101. paras.push([ marker, thisPara ])
  102. return paras.reject { |p| p[1].length == 0 }
  103. end
  104. # calls given block with:
  105. # array of related lines
  106. # marker ('' or '-' or '*')
  107. def relatedTextParagraphsDo
  108. lastMarker = ''
  109. related = []
  110. paras = textParagraphs
  111. paras.push([nil, []]) # to flush last one
  112. paras.each do |p|
  113. marker = p[0]
  114. textLines = p[1]
  115. if marker == lastMarker
  116. related.push(textLines)
  117. else
  118. # process related paragraphs if any
  119. if related.length > 0
  120. yield related, lastMarker
  121. end
  122. lastMarker = marker
  123. related = [ textLines ]
  124. end
  125. end
  126. end
  127. end
  128. class Formatter
  129. protected
  130. # default output is just flattened.
  131. def printHead?(item)
  132. return (!@textOnly || item.keepHead)
  133. end
  134. def visitHead(item,seq=0)
  135. return unless printHead?(item)
  136. file().puts(embellish(item.head), "")
  137. end
  138. def visitText(item,seq=0)
  139. item.text.each { |textLine| file.puts(embellish(textLine)) }
  140. file.puts("") if item.text.length > 0
  141. end
  142. def visitItem(item,seq=0)
  143. if item.level >= 0
  144. visitHead(item,seq)
  145. visitText(item,seq)
  146. end
  147. item.children.each_with_index { |ch,n| visitItem(ch,n) }
  148. nil
  149. end
  150. # format individual spans
  151. def italic(text) ; text; end
  152. def bold(text) ; text; end
  153. def code(text) ; text; end
  154. def underline(text) ; text; end
  155. def standout(text) ; text; end
  156. def tagDef(text) ; text; end
  157. def tagRef(text) ; text; end
  158. def vimTagRef(text) ; text; end
  159. def htmlOnly(text); end
  160. def embellish(text)
  161. text.
  162. gsub(REItalic) { |s| italic($1) }.
  163. gsub(REBold) { |s| bold($1) }.
  164. gsub(RECode) { |s| code($1) }.
  165. gsub(REUnderline) { |s| underline($1) }.
  166. gsub(REStandout) { |s| standout($1) }.
  167. gsub(RETagDef) { |s| tagDef($1||$2) }.
  168. gsub(RETagRef) { |s| tagRef($1) }.
  169. gsub(REVimTagRef) { |s| vimTagRef($1) }.
  170. gsub(REHTMLOnly) { |s| htmlOnly($1) }
  171. end
  172. public
  173. def self.formatterNames
  174. TVO.constants.
  175. select { |c|
  176. cl = TVO.const_get(c) rescue ''
  177. cl.kind_of?(Class) && cl <= self
  178. }.collect { |cn| cn.sub(/Formatter$/, '') }.
  179. sort
  180. end
  181. attr_accessor :file, :textOnly
  182. def initialize(_file=$stdout)
  183. @file = _file
  184. @textOnly = false
  185. end
  186. def format(outlineRoot)
  187. visitItem(outlineRoot)
  188. end
  189. end
  190. # Output TVO again (for building OTL files programmatically)
  191. class OutlineFormatter < Formatter
  192. protected
  193. Prefixes = (0..9).to_a.collect { |n| ("\t" * n) }
  194. def prefixForLevel(level)
  195. Prefixes[level] || ((level < 0) ? "" : ("\t" * level))
  196. end
  197. def visitHead(item,seq=0)
  198. return unless printHead?(item)
  199. file.print(prefixForLevel(item.level), item.head, "\n")
  200. end
  201. def visitText(item,seq=0)
  202. prefix = prefixForLevel(item.level) + '| '
  203. item.text.each { |tline| file.print(prefix, tline, "\n") }
  204. end
  205. end
  206. # Format outline as h1-h6/ul
  207. # Classes used are:
  208. # <a href="">
  209. # otlExternTagRef
  210. # otlTagRef
  211. # <a name="">
  212. # otlTagDef
  213. # <span>
  214. # otlHTMLOnly
  215. # otlTodo
  216. # otlStandout
  217. # otlVimTagRef
  218. # otlUnderline
  219. # <hr>
  220. # h1 .. h<#>
  221. # <h1> .. <h5>
  222. # h1 .. h6
  223. # <h6>
  224. # h6 .. h<#>
  225. # <ul>,<li>
  226. # t<#>pd (if marker was '-')
  227. # t<#>pa (if marker was '*')
  228. # <div>,<p>
  229. # t<#>p
  230. #
  231. #
  232. class HTMLFormatter < Formatter
  233. def self.quoted(text)
  234. text.gsub(/&/, '&amp;'). gsub(/</, '&lt;'). gsub(/>/, '&gt;')
  235. end
  236. def self.requoted(re)
  237. Regexp.new(re.source.gsub(/\\\\/, '\\'). gsub(/</, '&lt;'). gsub(/>/, '&gt;'))
  238. end
  239. RETagDef = /&lt;id=([^&]+)&gt;|\[\[([^\[\]]+)\]\]/i
  240. REExternTagRef = /&lt;url:\s*([^>]+)\s*&gt;|\[([a-z]+:[^\]]+)\]/i
  241. RETagRef = /\[([^\[\]:&]+)\]/
  242. REVimTagRef = /\[(:[^\[\]:&]+)\]/
  243. REHTMLOnly = /(.*?)\s*&lt;html:\s*(.+?|.*&lt;.+?&gt;)\s*&gt;\s*(.*)/i
  244. REItalic = /I&lt;(.+?)&gt;/
  245. REBold = /B&lt;(.+?)&gt;/
  246. RECode = /C&lt;(.+?)&gt;/
  247. REUnderline = /U&lt;(.+?)&gt;/
  248. REGtLt = /&&([gl]t;)/
  249. protected
  250. # notice in-text markings
  251. # Would be run after quoted
  252. def decorated(textLine)
  253. if textLine.match(REHTMLOnly)
  254. return textLine.
  255. gsub(REHTMLOnly) do |s|
  256. "#{decorated($1)} <span class=\"otlHTMLOnly\">#{$2}</span> #{decorated($3)}"
  257. end
  258. else
  259. return textLine.
  260. gsub(REGtLt) {|s| "&#{$1}" }.
  261. gsub(REItalic) {|s| "<i>#{$1}</i>" }.
  262. gsub(REBold) {|s| "<strong>#{$1}</strong>" }.
  263. gsub(RECode) {|s| "<tt>#{$1}</tt>" }.
  264. gsub(REUnderline) {|s| "<span class=\"otlUnderline\">#{$1}</span>" }.
  265. gsub(RETodo1) {|s| "<span class=\"otlTodo\">#{s}</span>" }.
  266. gsub(REStandout) {|s| "<span class=\"otlStandout\">#{$1}</span>" }.
  267. gsub(RETagDef) {|s| "<a class=\"otlTagDef\" name=\"#{urlEncoded($1||$2)}\"></a>" }.
  268. gsub(REExternTagRef){|s|
  269. "<a class=\"otlExternTagRef\" href=\"#{urlEncoded($1||$2)}\">#{$1||$2}</a>" }.
  270. gsub(RETagRef) do |s|
  271. url=dest=$1
  272. if dest =~ /^--\s*(.+)\s*--$/
  273. url = dest = $1
  274. end
  275. if dest =~ /^([^#]+)#([^#]+)$/
  276. url=dest
  277. dest=$1
  278. end
  279. if File.readable?(dest)
  280. "<a class=\"otlExternTagRef\" href=\"#{urlEncoded(url)}\">#{url}</a>"
  281. else
  282. "<a class=\"otlTagRef\" href=\"##{urlEncoded(url)}\">#{url}</a>"
  283. end
  284. end.
  285. gsub(REVimTagRef) { "<span class=\"otlVimTagRef\">#{$&}</span>" }
  286. end
  287. end
  288. def quoted(text)
  289. self.class.quoted(text).gsub(/\n/, "\n" + (" " * @nest))
  290. end
  291. def urlEncoded(text)
  292. text.gsub(/[^#.A-Za-z0-9]/) { |c| sprintf("%%%02X", c[0]) }
  293. end
  294. def htmlTag(tagname, attribs={})
  295. file.print("\n", " " * @nest)
  296. file.print('<', tagname)
  297. attribs.each_pair { |k,v| file.print(" #{k}=\"#{quoted(v)}\"") }
  298. if block_given?
  299. file.print('>')
  300. @nest += 1
  301. text = yield
  302. @nest -= 1
  303. file.print(decorated(quoted(text))) if text
  304. file.print('</', tagname, '>')
  305. else
  306. file.print(' />')
  307. end
  308. nil
  309. end
  310. def tagAndClassForHead(itemLevel)
  311. hLevel = "h#{itemLevel}"
  312. tag = (itemLevel.between?(1,6) ? hLevel : 'h6')
  313. return *[tag, hLevel]
  314. end
  315. def visitHead(item,seq=0)
  316. return unless printHead?(item)
  317. itemLevel = item.level + 1
  318. (tag, hLevel) = tagAndClassForHead(itemLevel)
  319. if itemLevel == 1 && seq > 0
  320. htmlTag('hr', { :class => hLevel })
  321. end
  322. htmlTag(tag, { :class => hLevel } ) { item.head }
  323. end
  324. def tagsAndClassForTextPara(itemLevel,marker)
  325. case marker
  326. when '-'
  327. return *['ul','li',"t#{itemLevel}pd"]
  328. when '*'
  329. return *['ul','li',"t#{itemLevel}pa"]
  330. else
  331. return *['div','p',"t#{itemLevel}p"]
  332. end
  333. end
  334. def formatTextParagraph(para, itemTag, itemClass)
  335. htmlTag(itemTag, {:class => itemClass }) { para.join("\n") }
  336. end
  337. def visitText(item,seq=0)
  338. item.relatedTextParagraphsDo do |related, marker|
  339. (groupTag,itemTag,itemClass) = tagsAndClassForTextPara(item.level,marker)
  340. htmlTag(groupTag, {:class => itemClass }) do
  341. related.each { |p| formatTextParagraph(p, itemTag, itemClass) }
  342. nil
  343. end
  344. end
  345. end
  346. public
  347. attr_accessor :stylesheet
  348. def self.defaultStylesheet
  349. "tvo.css"
  350. end
  351. def initialize(_file=$stdout)
  352. super
  353. @nest = 0
  354. @stylesheet = self.class.defaultStylesheet
  355. end
  356. def format(outlineRoot)
  357. file.print('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
  358. htmlTag('html') do
  359. htmlTag('head') do
  360. htmlTag('title') { outlineRoot.children[0].head }
  361. htmlTag('link', { :rel => 'stylesheet', :type => 'text/css', :href => stylesheet() })
  362. end
  363. htmlTag('body') { visitItem(outlineRoot) }
  364. end
  365. end
  366. end
  367. # Format entire outline as nested series of ul/li/ul
  368. class HTMLListFormatter < HTMLFormatter
  369. def visitHead(item,seq=0)
  370. return unless printHead?(item)
  371. hLevel = "h#{item.level + 1}"
  372. htmlTag('li', { :class => hLevel } ) { item.head }
  373. end
  374. def visitItem(item, seq=0)
  375. hLevel = "h#{item.level + 1}"
  376. if item.level >= 0
  377. if item.level == 0 && seq > 0
  378. htmlTag('hr', { :class => hLevel })
  379. end
  380. visitHead(item,seq)
  381. visitText(item,seq)
  382. end
  383. if item.children.length > 0
  384. htmlTag('ul', { :class => hLevel }) do
  385. item.children.each_with_index { |ch,n| visitItem(ch,n) }
  386. nil
  387. end
  388. end
  389. end
  390. end
  391. # Construct an OutlineItem (the root item) from input text.
  392. class Parser
  393. protected
  394. @@debug = false
  395. def gets(sepString = $/)
  396. retval = @getback.gets(sepString) || @file.gets(sepString)
  397. if retval.nil?
  398. $stderr.puts("(EOF)") if @@debug
  399. throw(:eof, nil)
  400. end
  401. return retval.chomp(sepString)
  402. end
  403. def puts(line)
  404. @pushback.puts(line)
  405. @pushback.sync
  406. end
  407. def head(level)
  408. $stderr.print("Looking for head(#{level})") if @@debug
  409. line = gets
  410. if m = line.match(@tabREs[level]) and m[2][0..0] != '|'
  411. $stderr.print("... got #{m[2]}\n") if @@debug
  412. return m[2]
  413. else
  414. puts(line)
  415. $stderr.print("... rej #{line.inspect}\n") if @@debug
  416. return nil
  417. end
  418. end
  419. def text(level)
  420. $stderr.print("Looking for text(#{level})") if @@debug
  421. line = gets
  422. if m = line.match(@tabREs[level]) and m[2].match(/\|\s?(.*)/)
  423. $stderr.print("... got #{m[2][2..-1]}\n") if @@debug
  424. return $1
  425. else
  426. puts(line)
  427. $stderr.print("... rej #{line.inspect}\n") if @@debug
  428. return nil
  429. end
  430. end
  431. # get next outline item that starts with (at least) "level" tabs.
  432. # return single item.
  433. def item(level)
  434. throw(:toodeep, nil) if level > 9
  435. catch(:eof) do
  436. catch(:toodeep) do
  437. retval = Item.new(level)
  438. retval.head = head(level)
  439. return nil unless retval.head
  440. catch(:eof) do
  441. while t = text(level)
  442. retval.addText(t)
  443. end
  444. end
  445. retval.children = items(level+1)
  446. $stderr.puts("Returning item [level=#{level}] [head=\"#{retval.head}\"] [text=#{retval.text.length}lns]") if @@debug
  447. retval
  448. end
  449. end
  450. end
  451. # return array of items at the given level.
  452. def items(level)
  453. retval = []
  454. while nextItem = item(level)
  455. retval.push(nextItem)
  456. end
  457. return retval
  458. end
  459. public
  460. def initialize(file=$stdin)
  461. @pbString = ""
  462. @pushback = StringIO.new(@pbString)
  463. @getback = StringIO.new(@pbString)
  464. @file=file
  465. @tabREs = (0..9).to_a.collect { |n| Regexp.new("^(\\t{#{n}})(\\S.*)") }
  466. end
  467. def outline
  468. return Item.new(-1, '', nil, nil, items(0))
  469. end
  470. def Parser.debugMode=(bool)
  471. @@debug = bool
  472. end
  473. end # class Parser
  474. # read options from ARGV
  475. def parseAndFormat
  476. # parse arguments
  477. formatType = ''
  478. outputFileName = nil
  479. textOnly = false
  480. parser = GetoptLong.new
  481. parser.set_options(
  482. [ '--format', '-f', GetoptLong::REQUIRED_ARGUMENT],
  483. [ '--help', '-h', GetoptLong::NO_ARGUMENT],
  484. [ '--output', '-o', GetoptLong::REQUIRED_ARGUMENT],
  485. [ '--debug', '-d', GetoptLong::NO_ARGUMENT],
  486. [ '--stylesheet', '-s', GetoptLong::REQUIRED_ARGUMENT],
  487. [ '--include', '-i', GetoptLong::REQUIRED_ARGUMENT],
  488. [ '--textonly', '-t', GetoptLong::NO_ARGUMENT])
  489. parser.each_option do |name, arg|
  490. case name
  491. when '--format'
  492. formatType = arg
  493. when '--help'
  494. $stderr.print <<-EOF
  495. Usage: #{$0} [opt] [file [...]]
  496. opt is one or more of:
  497. --format, -f #{ "<'" + Formatter.formatterNames().join("'|'") + "'>" } set output format type
  498. --help, -h display this help
  499. --output, -o <filename> output to file named filename instead of stdout
  500. --debug, -d turn on parser debugging to stderr
  501. --textonly, -t omit heads except those starting with '+'
  502. --stylesheet, -s <filename> link to stylesheet named filename (default=#{HTMLFormatter.defaultStylesheet})
  503. --include, -i <filename> include Ruby module filename
  504. EOF
  505. exit(0)
  506. when '--output'
  507. outputFileName = arg
  508. when '--debug'
  509. Parser.debugMode = true
  510. when '--textonly'
  511. textOnly = true
  512. when '--include'
  513. require arg
  514. end
  515. end
  516. outputFile = outputFileName.nil? ? $stdout : File.open(outputFileName, 'w')
  517. outline = Parser.new(ARGF).outline
  518. formatterClass = TVO.const_get("#{formatType}Formatter")
  519. formatter = formatterClass.new(outputFile)
  520. formatter.textOnly = textOnly
  521. formatter.format(outline)
  522. end
  523. end # module TVO
  524. if $0 == __FILE__
  525. include TVO
  526. parseAndFormat
  527. end