PageRenderTime 125ms CodeModel.GetById 22ms RepoModel.GetById 3ms app.codeStats 1ms

/lib/redcloth/docbook.rb

https://code.google.com/p/rextile/
Ruby | 1006 lines | 808 code | 135 blank | 63 comment | 115 complexity | a356106024dc70e026e48f336d960060 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. require 'md5'
  2. unless defined? RedCloth
  3. $:.unshift(File.dirname(__FILE__))
  4. require 'base'
  5. end
  6. class RedCloth < String
  7. DEFAULT_RULES << :docbook
  8. # == Docbook Rules
  9. #
  10. # The following docbook rules can be set individually. Or add the complete
  11. # set of rules with the single :docbook rule, which supplies the rule set in
  12. # the following precedence:
  13. #
  14. # refs_docbook:: Docbook references (i.e. [hobix]http://hobix.com/)
  15. # block_docbook_table:: Docbook table block structures
  16. # block_docbook_lists:: Docbook list structures
  17. # block_docbook_prefix:: Docbook blocks with prefixes (i.e. bq., h2., etc.)
  18. # inline_docbook_image:: Docbook inline images
  19. # inline_docbook_link:: Docbook inline links
  20. # inline_docbook_wiki_words:: Docbook inline refering links
  21. # inline_docbook_wiki_links:: Docbook inline refering links
  22. # inline_docbook_span:: Docbook inline spans
  23. # inline_docbook_glyphs:: Docbook entities (such as em-dashes and smart quotes)
  24. # Elements to handle
  25. DOCBOOK_GLYPHS = [
  26. [ /([^\s\[{(>])\'/, '\1&#8217;' ], # single closing
  27. [ /\'(?=\s|s\b|[#{PUNCT}])/, '&#8217;' ], # single closing
  28. [ /\'/, '&#8216;' ], # single opening
  29. # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
  30. [ /([^\s\[{(>])"/, '\1&#8221;' ], # double closing
  31. [ /"(?=\s|[#{PUNCT}])/, '&#8221;' ], # double closing
  32. [ /"/, '&#8220;' ], # double opening
  33. [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
  34. [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
  35. [ /\s->\s/, ' &rarr; ' ], # right arrow
  36. [ /\s-\s/, ' &#8211; ' ], # en dash
  37. [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
  38. [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
  39. [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
  40. [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
  41. ]
  42. #
  43. # Generates HTML from the Textile contents.
  44. #
  45. # r = RedCloth.new( "And then? She *fell*!" )
  46. # r.to_docbook
  47. # #=>"And then? She <emphasis role=\"strong\">fell</emphasis>!"
  48. #
  49. def to_docbook( *rules )
  50. @stack = Array.new
  51. @ids = Array.new
  52. @references = Array.new
  53. @automatic_content_ids = Array.new
  54. rules = DEFAULT_RULES if rules.empty?
  55. # make our working copy
  56. text = self.dup
  57. @urlrefs = {}
  58. @shelf = []
  59. @rules = rules.collect do |rule|
  60. case rule
  61. when :docbook
  62. DOCBOOK_RULES
  63. else
  64. rule
  65. end
  66. end.flatten
  67. # standard clean up
  68. incoming_entities text
  69. clean_white_space text
  70. # start processor
  71. @pre_list = []
  72. pre_process_docbook text
  73. no_docbook text
  74. docbook_rip_offtags text
  75. docbook_hard_break text
  76. refs text
  77. docbook_blocks text
  78. inline text
  79. smooth_offtags text
  80. retrieve text
  81. post_process_docbook text
  82. clean_html text if filter_html
  83. text.strip!
  84. text << "\n"
  85. @stack.each_with_index {|sect,index| text << "</sect#{@stack.size-index}>\n"}
  86. text << "</chapter>" if @chapter
  87. if (@references - @ids).size > 0
  88. text << %{<chapter label="86" id="chapter-86"><title>To Come</title>}
  89. (@references - @ids).each {|name| text << %!<sect1 id="#{name}"><title>#{name.split('-').map {|t| t.capitalize}.join(' ')}</title><remark>TK</remark></sect1>\n!}
  90. text << "</chapter>"
  91. end
  92. text
  93. end
  94. #######
  95. private
  96. #######
  97. # Elements to handle
  98. # GLYPHS << [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ] # 3+ uppercase acronym
  99. # GLYPHS << [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ] # 3+ uppercase caps
  100. SIMPLE_DOCBOOK_TAGS = [
  101. 'para', 'title', 'remark', 'blockquote', 'itemizedlist', 'orderedlist', 'variablelist', 'programlisting', 'screen',
  102. 'literallayout', 'figure', 'example', 'abbrev', 'accel', 'acronym', 'action', 'application', 'citation',
  103. 'citetitle', 'classname', 'classref', 'command', 'computeroutput', 'email', 'emphasis', 'envar', 'filename',
  104. 'firstterm', 'foreignphrase', 'footnoteref', 'graphic', 'function', 'guibutton', 'guimenu', 'guimenuitem', 'keycap',
  105. 'keysym', 'lineannotation', 'literal', 'option', 'optional', 'parameter', 'prompt', 'quote', 'replaceable',
  106. 'returnvalue', 'sgmltag', 'structfield', 'structname', 'subscript', 'superscript', 'symbol', 'systemitem',
  107. 'type', 'userinput', 'wordasword', 'xref'
  108. ]
  109. DOCBOOK_TAGS = [
  110. ['**', 'emphasis role="strong"'],
  111. ['__', 'emphasis'],
  112. ['*', 'emphasis role="strong"', :limit],
  113. ['_', 'emphasis', :limit],
  114. ['??', 'citation', :limit],
  115. ['^', 'superscript', :limit],
  116. ['~', 'subscript', :limit],
  117. ['%', 'para', :limit],
  118. ['@', 'literal', :limit],
  119. ]
  120. DOCBOOK_TAGS.collect! do |rc, ht, rtype|
  121. rcq = Regexp::quote rc
  122. re =
  123. case rtype
  124. when :limit
  125. /(\W)
  126. (#{rcq})
  127. (#{C})
  128. (?::(\S+?))?
  129. (\S.*?\S|\S)
  130. #{rcq}
  131. (?=\W)/x
  132. else
  133. /(#{rcq})
  134. (#{C})
  135. (?::(\S+))?
  136. (\S.*?\S|\S)
  137. #{rcq}/xm
  138. end
  139. escaped_re =
  140. case rtype
  141. when :limit
  142. /(\W)
  143. (#{@@escape_keyword}#{rcq})
  144. (#{C})
  145. (?::(\S+?))?
  146. (\S.*?\S|\S)
  147. #{rcq}#{@@escape_keyword}
  148. (?=\W)/x
  149. else
  150. /(#{@@escape_keyword}#{rcq})
  151. (#{C})
  152. (?::(\S+))?
  153. (\S.*?\S|\S)
  154. #{rcq}#{@@escape_keyword}/xm
  155. end
  156. [rc, ht, re, rtype, escaped_re]
  157. end
  158. def pre_process_docbook(text)
  159. # Prepare dt and dd the way they should be
  160. text.gsub!( /div\((d[dt])\)\.(.*?)div\(\1\)\./m ) do |m|
  161. "p(#{$1}). #{$2.gsub("\n", LB)}"
  162. end
  163. text.gsub!( /p\(dt\)\.(.*?)p\(dd\)\.(.*?)$/m ) do |m|
  164. dt, dd = $~[1..2]
  165. "- #{dt.gsub(LB,"\n").strip} := #{dd.gsub(LB,"\n").strip} =:"
  166. end
  167. # Prepare superscripts and subscripts
  168. text.gsub!( /(\w)(\^[0-9,]+\^)/, '\1 \2' )
  169. text.gsub!( /(\w)(\~[0-9,]+\~)/, '\1 \2' )
  170. {'w' => 'warning', 'n' => 'note', 'c' => 'comment', 'pro' => 'production', 'dt' => 'dt', 'dd' => 'dd'}.each do |char, word|
  171. parts = text.split(/^\s*#{char}\./)
  172. text.replace(parts.first + "\n" + parts[1..-1].map do |part|
  173. if part =~ /\.#{char}\s*$/
  174. "div(#{word}).\n" + part.sub(/\.#{char}\s*$/, "\ndiv(#{word}). \n")
  175. else
  176. "#{char}.#{part}"
  177. end+"\n"
  178. end.join("\n"))
  179. self.class.class_eval %!
  180. def docbook_#{char}(tag, atts, cite, content)
  181. docbook_p('p', #{word.inspect}, cite, content)
  182. end
  183. !
  184. end
  185. {'bq' => 'blockquote'}.each do |char, word|
  186. parts = text.split(/^\s*#{char}\./)
  187. text.replace(parts.first + "\n" + parts[1..-1].map do |part|
  188. if part =~ /\.#{char}\s*$/
  189. "div(#{word}).\n\n<para>" + part.sub(/\.#{char}\s*$/, "</para>\n\ndiv(#{word}). ")
  190. else
  191. "#{char}.#{part}"
  192. end
  193. end.join("\n"))
  194. end
  195. text.gsub!(/<br.*?>/i, "&#x00A;")
  196. text.gsub!(/<\/?em.*?>/i, "__")
  197. text.gsub!( BACKTICK_CODE_RE ) do |m|
  198. before,lang,code,after = $~[1..4]
  199. docbook_rip_offtags( "#{ before }<programlisting>#{ code.gsub(/\\\`\`\`/,'```') }</programlisting>#{ after }" )
  200. end
  201. text.gsub! %r{<pre>\s*(<code>)?}i, '<para><programlisting>'
  202. text.gsub! %r{(</code>)?\s*</pre>}i, '</programlisting></para>'
  203. text.gsub! %r{<(/?)code>}i, '<\1programlisting>'
  204. end
  205. def post_process_docbook( text )
  206. text.sub!( "</chapter>\n\n", "" )
  207. text.gsub!( LB, "\n" )
  208. text.gsub!( NB, "" )
  209. text << "</#{@div_atts}>" if @div_atts
  210. text.gsub!(%r{<(#{DOCBOOK_PARAS.join("|")})([^>]*)>\s*<para>(.*?)</para>\s*</\1>}mi) { |m| t, c = $~[1..2]; "<#{t}#{c}>" << $3.gsub(/<para>/, "<#{t}#{c}>").gsub(/<\/para>/, "</#{t}>") << "</#{t}>" }
  211. text.gsub! %r{<para[^>]*>\s*<para([^>]*)>}i,'<para\1>' # clean multiple paragraphs in a row just in case
  212. text.gsub! %r{</para>\s*</para>}i,'</para>' # clean multiple paragraphs in a row just in case
  213. text.gsub! %r{<para[^>]*>\s*</para>\s*}i, '' # clean emtpy paras
  214. text.gsub! %r{<(/?)sup>}i, '<\1superscript>'
  215. text.gsub! %r{<(/?)sub>}i, '<\1subscript>'
  216. text.gsub! %r{</?nodocbook>}, ''
  217. text.gsub! %r{x%x%}, '&#38;'
  218. text.scan( /id="id([0-9]+)"/i ) do |match|
  219. text.gsub!( /<ulink url="#{match}">(.*?)<\/ulink>/, %{<link linkend="id#{match}">\\1</link>} )
  220. end
  221. text.gsub!( %r{<programlisting>\n}, "<programlisting>" )
  222. text.gsub!( %r{\n</programlisting>}, "</programlisting>\n" )
  223. i = 1
  224. text.gsub!(/\[\d+\]/) do |ref|
  225. id = ref[/\d+/].to_i
  226. if id == i
  227. i += 1
  228. if text =~ /<footnote id="fn#{id}">(.*?)<\/footnote>/
  229. "<footnote id=\"footnote#{id}\">#{$1}</footnote>"
  230. else
  231. ref
  232. end
  233. else
  234. ref
  235. end
  236. end
  237. text.gsub!(/<footnote id="fn\d+">(.*?)<\/footnote>/, '')
  238. DOCBOOK_TAGS.each do |qtag_rc, ht, qtag_re, rtype, escaped_re|
  239. text.gsub!( escaped_re ) do |m|
  240. case rtype
  241. when :limit
  242. sta,qtag,atts,cite,content = $~[1..5]
  243. else
  244. qtag,atts,cite,content = $~[1..4]
  245. sta = ''
  246. end
  247. ht, atts = docbook_sanitize_para atts, content, ht
  248. atts = docbook_pba( atts )
  249. if @stack.size == 0
  250. sect1 = ""
  251. end_sect1 = ""
  252. end
  253. "#{ sta }#{ sect1 }<#{ ht }#{ atts }>#{ '<para>' if ['note', 'blockquote'].include? ht }#{ cite }#{ content }#{ '</para>' if ['note', 'blockquote'].include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>#{ end_sect1 }"
  254. end
  255. end
  256. end
  257. # Parses a Docbook table block, building XML from the result.
  258. def block_docbook_table( text )
  259. text.gsub!( TABLE_RE ) do |matches|
  260. caption, id, tatts, fullrow = $~[1..4]
  261. tatts = docbook_pba( tatts, caption ? 'table' : 'informaltable' )
  262. tatts = shelve( tatts ) if tatts
  263. rows = []
  264. found_first = false
  265. cols = 0
  266. raw_rows = fullrow.split( /\|$/m ).delete_if {|row|row.empty?}
  267. raw_rows.each do |row|
  268. ratts, row = docbook_pba( $1, 'row' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
  269. row << " "
  270. cells = []
  271. head = 'tbody'
  272. cols = row.split( '|' ).size-1
  273. row.split( '|' ).each_with_index do |cell, i|
  274. next if i == 0
  275. ctyp = 'entry'
  276. head = 'thead' if cell =~ /^_/
  277. catts = ''
  278. catts, cell = docbook_pba( $1, 'entry' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
  279. catts = shelve( catts ) if catts
  280. cells << "<#{ ctyp }#{ catts }>#{ cell.strip.empty? ? "&nbsp;" : row.split( '|' ).size-1 != i ? cell : cell[0...cell.length-1] }</#{ ctyp }>"
  281. end
  282. ratts = shelve( ratts ) if ratts
  283. if head == 'tbody'
  284. if !found_first
  285. found_first = true
  286. rows << "<#{ head }>"
  287. end
  288. else
  289. rows << "<#{ head }>"
  290. end
  291. rows << "<row#{ ratts }>\n#{ cells.join( "\n" ) }\n</row>"
  292. rows << "</#{ head }>" if head != 'tbody' || raw_rows.last == row
  293. end
  294. title = "<title>#{ caption }</title>\n" if caption
  295. if id
  296. @ids << "id#{id}"
  297. id = " id=\"#{ "id#{id}" }\""
  298. end
  299. %{<#{ caption ? nil : 'informal' }table#{ id }#{ tatts }>\n#{title}<tgroup cols="#{cols}">\n#{ rows.join( "\n" ) }\n</tgroup>\n</#{ caption ? nil : 'informal' }table>\n\n}
  300. end
  301. end
  302. # Parses Docbook lists and generates Docbook XML
  303. def block_docbook_lists( text )
  304. orig_text = text.dup
  305. delimiter = ""
  306. text.gsub!( LISTS_RE ) do |match|
  307. lines = match.split( /\n/ )
  308. last_line = -1
  309. depth = []
  310. lines.each_with_index do |line, line_id|
  311. if line =~ LISTS_CONTENT_RE
  312. tl,continuation,atts,content = $~[1..4]
  313. if depth.last
  314. if depth.last.length > tl.length
  315. (depth.length - 1).downto(0) do |i|
  316. break if depth[i].length == tl.length
  317. lines[line_id - 1] << "</para></listitem>\n</#{ lD( depth[i] ) }>\n"
  318. depth.pop
  319. end
  320. end
  321. if depth.last.length == tl.length
  322. lines[line_id - 1] << "</para></listitem>"
  323. end
  324. end
  325. unless depth.last == tl
  326. depth << tl
  327. atts = docbook_pba( atts )
  328. atts = shelve( atts ) if atts
  329. delimiter = lD(tl)
  330. lines[line_id] = "<#{ delimiter }#{ atts }>\n<listitem><para>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
  331. else
  332. lines[line_id] = "<listitem><para>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
  333. end
  334. last_line = line_id
  335. else
  336. last_line = line_id
  337. end
  338. if line_id - last_line > 1 or line_id == lines.length - 1
  339. depth.delete_if do |v|
  340. lines[last_line] << "</para></listitem>\n</#{ lD( v ) }>"
  341. end
  342. end
  343. end
  344. lines.join( "\n" )
  345. end
  346. text != orig_text
  347. end
  348. # Parses Docbook lists and generates Docbook XML
  349. def block_docbook_simple_lists( text )
  350. orig_text = text.dup
  351. delimiter = ""
  352. text.gsub!( LISTS_RE ) do |match|
  353. lines = match.split( /\n/ )
  354. last_line = -1
  355. depth = []
  356. lines.each_with_index do |line, line_id|
  357. if line =~ /^([_]+)(#{A}#{C}) (.*)$/m
  358. tl,atts,content = $~[1..4]
  359. if depth.last
  360. if depth.last.length > tl.length
  361. (depth.length - 1).downto(0) do |i|
  362. break if depth[i].length == tl.length
  363. lines[line_id - 1] << "</member>\n</simplelist>\n"
  364. depth.pop
  365. end
  366. end
  367. if depth.last.length == tl.length
  368. lines[line_id - 1] << "</member>"
  369. end
  370. end
  371. unless depth.last == tl
  372. depth << tl
  373. atts = docbook_pba( atts )
  374. atts = shelve( atts ) if atts
  375. lines[line_id] = "<simplelist#{ atts }>\n<member>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
  376. else
  377. lines[line_id] = "<member>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
  378. end
  379. last_line = line_id
  380. else
  381. last_line = line_id
  382. end
  383. if line_id - last_line > 1 or line_id == lines.length - 1
  384. depth.delete_if do |v|
  385. lines[last_line] << "</member>\n</simplelist>"
  386. end
  387. end
  388. end
  389. lines.join( "\n" )
  390. end
  391. text != orig_text
  392. end
  393. # Parses docbook definition lists and generates HTML
  394. def block_docbook_defs( text )
  395. text.gsub!(/^-\s+(.*?):=(.*?)=:\s*$/m) do |m|
  396. "- #{$1.strip} := <para>"+$2.split(/\n/).map{|w|w.strip}.delete_if{|w|w.empty?}.join("</para><para>")+"</para>"
  397. end
  398. text.gsub!( DEFS_RE ) do |match|
  399. lines = match.split( /\n/ )
  400. lines.each_with_index do |line, line_id|
  401. if line =~ DEFS_CONTENT_RE
  402. dl,continuation,dt,dd = $~[1..4]
  403. atts = pba( atts )
  404. atts = shelve( atts ) if atts
  405. lines[line_id] = line_id == 0 ? "<variablelist>" : ""
  406. lines[line_id] << "\n\t<varlistentry><term>#{ dt.strip }</term>\n\t<listitem><para>#{ dd.strip }</para></listitem></varlistentry>"
  407. end
  408. if line_id == lines.length - 1
  409. lines[-1] << "\n</variablelist>"
  410. end
  411. end
  412. lines.join( "\n" )
  413. end
  414. end
  415. def inline_docbook_code( text )
  416. text.gsub!( CODE_RE ) do |m|
  417. before,lang,code,after = $~[1..4]
  418. code = code.gsub(/\\@@?/,'@')
  419. htmlesc code, :NoQuotes
  420. docbook_rip_offtags( "#{ before }<literal>#{ shelve code }</literal>#{ after }" )
  421. end
  422. end
  423. def lD( text )
  424. text =~ /\#$/ ? 'orderedlist' : 'itemizedlist'
  425. end
  426. def docbook_hard_break( text )
  427. text.gsub!( /(.)\n(?! *[#*\s|]|$)/, "\\1<sbr />" ) if hard_breaks
  428. end
  429. def docbook_bq( tag, atts, cite, content )
  430. cite, cite_title = check_refs( cite )
  431. cite = " citetitle=\"#{ cite }\"" if cite
  432. atts = shelve( atts ) if atts
  433. "<blockquote#{ cite }>\n<para>#{ content }</para>\n</blockquote>"
  434. end
  435. DOCBOOK_DIVS = ['note', 'blockquote', 'warning']
  436. def docbook_p( tag, atts, cite, content )
  437. ht, atts = docbook_sanitize_para atts, content
  438. atts = docbook_pba( atts )
  439. atts << " citetitle=\"#{ cite }\"" if cite
  440. atts = shelve( atts ) if atts
  441. "<#{ ht }#{ atts }>#{ '<para>' if DOCBOOK_DIVS.include? ht }#{ content }#{ '</para>' if DOCBOOK_DIVS.include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>"
  442. end
  443. def docbook_div( tag, atts, cite, content, extra_para = true )
  444. ht, atts = docbook_sanitize_para atts, content
  445. para, end_para = extra_para || (ht == 'para') ? ["\n<para>", "</para>\n"] : ["", ""]
  446. return "<#{ ht }#{ atts }>#{ para }#{ content }#{ end_para }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>\n"
  447. end
  448. def automatic_content_id
  449. i, new_id = 0, 0
  450. while new_id == 0 || @automatic_content_ids.include?(new_id)
  451. j = (i == 0) ? nil : i
  452. new_id = "S"+MD5.new(@stack.map{|title|title.sub(/^\s*\{\{(.+)\}\}.+/,'\1').strip}.join('-').to_s+j.to_s).to_s
  453. i += 1
  454. end
  455. @automatic_content_ids.push(new_id)
  456. return new_id
  457. end
  458. # def docbook_h1, def docbook_h2, def docbook_h3, def docbook_h4
  459. 1.upto 4 do |i|
  460. class_eval %Q{
  461. def docbook_h#{i}( tag, atts, cite, content )
  462. content_id, role = sanitize_content(content)
  463. atts = shelve( atts ) if atts
  464. end_sections = ''
  465. @stack.dup.each do |level|
  466. if @stack.size >= #{i}
  467. sect = '</sect'
  468. sect << @stack.size.to_s
  469. sect << ">\n"
  470. @stack.pop
  471. end_sections << sect
  472. end
  473. end
  474. @stack.push sanitized_id_for(content)
  475. string = end_sections
  476. string << '<sect#{i} id="'
  477. string << (content_id.nil? ? automatic_content_id : sanitized_id_for(content_id))
  478. string << '"'
  479. if role
  480. string << ' role="'
  481. string << role
  482. string << '"'
  483. end
  484. string << '><title>'
  485. string << content.sub(/^\\s*\\{\\{.+\\}\\}(.+)/,'\\1').strip
  486. string << '</title>'
  487. end
  488. }
  489. end
  490. # Handle things like:
  491. # ch. 1. Some Title id. 123
  492. def docbook_ch( tag, atts, cite, content )
  493. content_id, role = sanitize_content(content)
  494. label, title = content.split('.').map {|c| c.strip}
  495. string = ""
  496. # Close of the sections in order to end the chapter cleanly
  497. @stack.each_with_index { |level, index| string << "</sect#{@stack.size-index}>" }
  498. @stack = []
  499. string << "</chapter>\n\n"
  500. @chapter = true # let the instance know that a chapter has started
  501. string << '<chapter label="'
  502. string << label
  503. string << '" id="'
  504. string << (content_id.nil? ? title : sanitized_id_for(content_id))
  505. string << '"><title>'
  506. string << title.to_s
  507. string << '</title>'
  508. return string
  509. end
  510. def docbook_fn_( tag, num, atts, cite, content )
  511. atts << " id=\"fn#{ num }\""
  512. atts = shelve( atts ) if atts
  513. "<footnote#{atts}><para>#{ content }</para></footnote>"
  514. end
  515. def block_docbook_prefix( text )
  516. if text =~ BLOCK_RE
  517. tag,tagpre,num,atts,cite,content = $~[1..6]
  518. atts = docbook_pba( atts )
  519. # pass to prefix handler
  520. if respond_to? "docbook_#{ tag }", true
  521. text.gsub!( $&, method( "docbook_#{ tag }" ).call( tag, atts, cite, content ) )
  522. elsif respond_to? "docbook_#{ tagpre }_", true
  523. text.gsub!( $&, method( "docbook_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
  524. end
  525. end
  526. end
  527. def inline_docbook_span( text )
  528. DOCBOOK_TAGS.each do |qtag_rc, ht, qtag_re, rtype, escaped_re|
  529. text.gsub!( qtag_re ) do |m|
  530. case rtype
  531. when :limit
  532. sta,qtag,atts,cite,content = $~[1..5]
  533. else
  534. qtag,atts,cite,content = $~[1..4]
  535. sta = ''
  536. end
  537. ht, atts = docbook_sanitize_para atts, content, ht
  538. atts = docbook_pba( atts )
  539. atts << " citetitle=\"#{ cite }\"" if cite
  540. atts = shelve( atts ) if atts
  541. if @stack.size == 0
  542. sect1 = ""
  543. end_sect1 = ""
  544. end
  545. "#{ sta }#{ sect1 }<#{ ht }#{ atts }>#{ '<para>' if ['note', 'blockquote'].include? ht }#{ content }#{ '</para>' if ['note', 'blockquote'].include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>#{ end_sect1 }"
  546. end
  547. end
  548. end
  549. def docbook_lookup_hack(name)
  550. @book ||= BOOK.inject([]) {|array, chapter| array += chapter[1]}
  551. @book.index name
  552. end
  553. def inline_docbook_link( text )
  554. text.gsub!( LINK_RE ) do |m|
  555. pre,atts,text,title,url,slash,post = $~[1..7]
  556. url, url_title = check_refs( url )
  557. title ||= url_title
  558. atts = shelve( atts ) if atts
  559. "#{ pre }<ulink url=\"#{ url.to_s.gsub('"','&quot;') }#{ slash.to_s.gsub('"','&quot;') }\">#{ text }</ulink>#{ post }"
  560. end
  561. end
  562. DOCBOOK_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
  563. def refs_docbook( text )
  564. text.gsub!( DOCBOOK_REFS_RE ) do |m|
  565. flag, url = $~[2..3]
  566. @urlrefs[flag.downcase] = [url, nil]
  567. nil
  568. end
  569. end
  570. def inline_docbook_image( text )
  571. text.gsub!( IMAGE_RE ) do |m|
  572. stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
  573. atts = docbook_pba( atts )
  574. atts = " fileref=\"#{ url }\"#{ atts }"
  575. href, alt_title = check_refs( href ) if href
  576. url, url_title = check_refs( url )
  577. out = stln
  578. out << "<figure><title>#{title}</title>\n" if title && !title.empty?
  579. out << "<graphic#{ shelve( atts ) } />\n"
  580. out << "</figure>" if title && !title.empty?
  581. out
  582. end
  583. end
  584. # Turns all urls into clickable links.
  585. # Taken from ActionPack's ActionView
  586. def inline_docbook_autolink_urls(text)
  587. text.gsub!(AUTO_LINK_RE) do
  588. all, a, b, c, d = $&, $1, $2, $3, $5
  589. if a =~ /<a\s/i # don't replace URL's that are already linked
  590. all
  591. else
  592. %(#{a}<ulink url="#{b=="www."?"http://www.":b}#{c}">#{b}#{c}</ulink>#{d})
  593. end
  594. end
  595. end
  596. # Turns all email addresses into clickable links.
  597. def inline_docbook_autolink_emails(text)
  598. text.gsub!(/([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)/, '<email>\1</email>')
  599. end
  600. def no_docbook( text )
  601. text.gsub!( /(^|\s)(\\?)==([^=]+.*?)\2==(\s|$)?/ ) do |m|
  602. $2.empty? ? "#{$1}<nodocbook>#{$3}</nodocbook>#{$4}" : "#{$1}==#{$3}==#{$4}"
  603. end
  604. text.gsub!( /^ *(\\?)==([^=]+.*?)\1==/m ) do |m|
  605. $1.empty? ? "<nodocbook>#{$2}</nodocbook>" : "==#{$2}=="
  606. end
  607. end
  608. def inline_docbook_glyphs( text, level = 0 )
  609. if text !~ HASTAG_MATCH
  610. docbook_pgl text
  611. else
  612. codepre = 0
  613. text.gsub!( ALLTAG_MATCH ) do |line|
  614. ## matches are off if we're between <code>, <pre> etc.
  615. if $1
  616. if line =~ OFFTAG_OPEN
  617. codepre += 1
  618. elsif line =~ OFFTAG_CLOSE
  619. codepre -= 1
  620. codepre = 0 if codepre < 0
  621. end
  622. elsif codepre.zero?
  623. inline_docbook_glyphs( line, level + 1 )
  624. else
  625. htmlesc( line, :NoQuotes )
  626. end
  627. ## p [level, codepre, orig_line, line]
  628. line
  629. end
  630. end
  631. end
  632. DOCBOOK_OFFTAGS = /(nodocbook|programlisting)/i
  633. DOCBOOK_OFFTAG_MATCH = /(?:(<\/#{ DOCBOOK_OFFTAGS }>)|(<#{ DOCBOOK_OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ DOCBOOK_OFFTAGS }|\Z)/mi
  634. DOCBOOK_OFFTAG_OPEN = /<#{ DOCBOOK_OFFTAGS }/
  635. DOCBOOK_OFFTAG_CLOSE = /<\/?#{ DOCBOOK_OFFTAGS }/
  636. def docbook_rip_offtags( text )
  637. if text =~ /<.*>/
  638. ## strip and encode <pre> content
  639. codepre, used_offtags = 0, {}
  640. text.gsub!( DOCBOOK_OFFTAG_MATCH ) do |line|
  641. if $3
  642. offtag, aftertag = $4, $5
  643. codepre += 1
  644. used_offtags[offtag] = true
  645. if codepre - used_offtags.length > 0
  646. htmlesc( line, :NoQuotes ) unless used_offtags['nodocbook']
  647. @pre_list.last << line
  648. line = ""
  649. else
  650. htmlesc( aftertag, :NoQuotes ) if aftertag and not used_offtags['nodocbook']
  651. line = "<redpre##{ @pre_list.length }>"
  652. @pre_list << "#{ $3 }#{ aftertag }"
  653. end
  654. elsif $1 and codepre > 0
  655. if codepre - used_offtags.length > 0
  656. htmlesc( line, :NoQuotes ) unless used_offtags['nodocbook']
  657. @pre_list.last << line
  658. line = ""
  659. end
  660. codepre -= 1 unless codepre.zero?
  661. used_offtags = {} if codepre.zero?
  662. end
  663. line
  664. end
  665. end
  666. text
  667. end
  668. # In order of appearance: Latin, greek, cyrillian, armenian
  669. I18N_HIGHER_CASE_LETTERS =
  670. "?Ä???Ç?É?Ń?փăуÇ?Ü?áƒÜƒĺƒŕƒäƒéƒ??ŕ?â?ä?ăƒíƒňƒöƒîƒńƒúƒűƒ†ƒ˘ƒ§ƒś?ĺ???é??ƒ™ƒŽƒ¨ƒĆƒ?ƒ?ƒĽƒ???ƒ?ƒ?ƒŞƒř?ë?É?á?Ö?ä?í?ě?î?ď?ń?ň?ĺ???é?í?î?ň?ń?ö?†?ű?úťň?§?˘?śťö?ô?ö?ő?ú?™?Ć???¨?Ž???Ľ???????????Ş" +
  671. "ŒëŒíŒěŒîŒďŒńŒóŒňŒôŒöŒőŒúŒ?ŒűŒüŒ†Œ°ŒŁŒ§Œ•ŒśŒßŒŽŒŠ" +
  672. "ŒÜŒŕŒâŒäŒĺŒéŒ?—†—˘—§—ś—Ž—™—¨—Ć—?—?—Ľ—?—?—?—ş—ć“Ä“ä“ĺ“é“?“í“î“ń“ň“ö“ú“ű“†“˘“§“ś“Ž“™“¨“Ć“?“?“Ľ“?“?“?“ş“ć”?”ɔ֔á”â”ă”?”?”í”î”ń”ň”ö”ú”ű”†”˘”§”ś”Ž”™”¨”Ć”?”?”Ľ”?–ń" +
  673. "‘ą‘?‘?‘Ľ‘ľ‘?‘?‘?‘?‘?‘Ş‘ş‘?‘ć‘ř’Ä’?’Ç’É’Ń’Ö’Ü’á’ŕ’â’ä’ă’ĺ’?’?’?’ë’í’ě’î’ď’ń"
  674. I18N_LOWER_CASE_LETTERS =
  675. "?†?°?˘?Ł?§?•ƒ?ƒÖƒÉ?ś?߃áƒ?ƒâƒăƒ?ƒë?Ž?Š?™?´ƒěƒôƒőƒďƒó?íƒ?ƒüƒ°ƒŁƒ•ƒß?¨???Ć?؃´ƒŠƒ?ƒŘƒąƒ?ƒľƒ?ƒ??ǃćƒ?ƒş?Ä?ą?Ń?ŕ?Ü?â?ă?????Ľ?ľ???????ë???ě?ď?ô?ó?ő?°?ü??ťô?•?Ł?ßťő?????Ş?ş?´?Ř?ą???Š???ľ???ř???ć?ş???ű?ć?ü?ř????" +
  676. "Œ¨Œ?ŒĆŒŘŒ?ŒąŒ?Œ?ŒĽŒľŒ?Œ?Œ?Œ?Œ?ŒŞŒşŒ?ŒćŒřœÄœ?œÇœÉœŃœÖœÜœáœŕœâœäœăœĺœ?œéŒ?" +
  677. "–?–ą–?–?–Ľ–ľ–?–?–?–?–?–Ş–ş–?–ć–ř—Ä—?—Ç—É—Ń—Ö—Ü—á—ŕ—â—ä—ă—ĺ—?—é—?—?—ë—í—ě—î—ď—ń—ó—ň—ô—ő—ú—?—ű—ü—°—Ł—•—ß—Š—´—?—Ř—ą—?—ľ—?—?—Ş—?—ř“?“ă“?“?“ë“ě“ď“ó“ô“ő“?“ü“°“Ł“•“ß“Š“´“?“Ř“ą“?“ľ“?“?“Ş“?“ř”Ĕǔєܔŕ”ä”ĺ”é”ë”ě”ď”ó”ô”ő”?”ü”°”Ł”•”ß”Š”´”?”Ř”ą”?”ľ”?" +
  678. "’°’˘’Ł’§’•’ś’ß’Ž’Š’™’´’¨’?’Ć’Ř’?’ą’?’?’Ľ’ľ’?’?’?’?’?’Ş’ş’?’ć’ř÷Ä÷?÷Ç÷É÷Ń÷Ö÷Ü÷á"
  679. WIKI_WORD_PATTERN = '[A-Z' + I18N_HIGHER_CASE_LETTERS + '][a-z' + I18N_LOWER_CASE_LETTERS + ']+[A-Z' + I18N_HIGHER_CASE_LETTERS + ']\w+'
  680. CAMEL_CASED_WORD_BORDER = /([a-z#{I18N_LOWER_CASE_LETTERS}])([A-Z#{I18N_HIGHER_CASE_LETTERS}])/u
  681. WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
  682. WIKI_LINK = /(":)?\[\[([^\]]+)\]\]/
  683. def inline_docbook_wiki_words( text )
  684. text.gsub!( WIKI_WORD ) do |m|
  685. textile_link_suffix, escape, page_name = $~[1..3]
  686. if escape.nil? && textile_link_suffix !=~ /https?:\/\/[^\s]+$/
  687. "#{textile_link_suffix}<xref linkend=\"#{ sanitized_reference_for page_name }\"></xref>"
  688. else
  689. "#{textile_link_suffix}#{page_name}"
  690. end
  691. end
  692. end
  693. def inline_docbook_wiki_links( text )
  694. text.gsub!( WIKI_LINK ) do |m|
  695. textile_link_suffix, content_id = $~[1..2]
  696. "#{textile_link_suffix}<xref linkend=\"#{ sanitized_reference_for "id#{content_id}" }\"></xref>"
  697. end
  698. end
  699. # Search and replace for glyphs (quotes, dashes, other symbols)
  700. def docbook_pgl( text )
  701. DOCBOOK_GLYPHS.each do |re, resub, tog|
  702. next if tog and method( tog ).call
  703. text.gsub! re, resub
  704. end
  705. end
  706. # Parses attribute lists and builds an HTML attribute string
  707. def docbook_pba( text_in, element = "" )
  708. return '' unless text_in
  709. style = []
  710. text = text_in.dup
  711. if element == 'td'
  712. colspan = $1 if text =~ /\\(\d+)/
  713. rowspan = $1 if text =~ /\/(\d+)/
  714. end
  715. style << "#{ $1 };" if not filter_styles and
  716. text.sub!( /\{([^}]*)\}/, '' )
  717. lang = $1 if
  718. text.sub!( /\[([^)]+?)\]/, '' )
  719. cls = $1 if
  720. text.sub!( /\(([^()]+?)\)/, '' )
  721. cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
  722. atts = ''
  723. atts << " role=\"#{ cls }\"" unless cls.to_s.empty?
  724. atts << " id=\"#{ id }\"" if id
  725. atts << " colspan=\"#{ colspan }\"" if colspan
  726. atts << " rowspan=\"#{ rowspan }\"" if rowspan
  727. atts
  728. end
  729. def sanitize_content( text="" )
  730. text.replace text[/(.*?) role\. (\w+)/] ? $1 : text
  731. role = $2
  732. text.replace text[/(.*?) id\. ([0-9]+)/] ? $1 : text
  733. content_id = $2 ? "id#{$2}" : nil
  734. return content_id, role
  735. end
  736. def sanitized_id_for( text )
  737. word = text.gsub(CAMEL_CASED_WORD_BORDER, '\1 \2').downcase.gsub(/\s/,'-').gsub(/[^A-Za-z0-9\-\{\}]/,'').sub(/^[^\w\{]*/, '')
  738. @ids << word unless @ids.include? word
  739. return word
  740. end
  741. def sanitized_reference_for( text )
  742. word = text.gsub(CAMEL_CASED_WORD_BORDER, '\1 \2').downcase.gsub(/\s/,'-').gsub(/[^A-Za-z0-9\-\{\}]/,'').sub(/^[^\w\{]*/, '')
  743. @references << word unless @references.include? word
  744. return word
  745. end
  746. DOCBOOK_PARAS = ['para', 'remark', 'tip', 'important']
  747. def docbook_blocks( text, deep_code = false )
  748. @current_class ||= nil
  749. # Find all occurences of div(class). and process them as blocks
  750. text.gsub!( /^div\((.*?)\)\.\s*(.*?)(?=div\([^\)]+\)\.\s*)/m ) do |blk|
  751. block_class = (@current_class == $1) ? nil : %{ role=#{$1.inspect}}
  752. @current_class = $1
  753. BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : docbook_div('div', block_class, nil, "\n\n#{$2.strip}\n\n", false) )
  754. end
  755. # Take care of the very last div
  756. text.sub!( /div\((.*?)\)\.\s*(.*)/m ) do |blk|
  757. block_class = (@current_class == $1) ? nil : %{ role=#{$1.inspect}}
  758. @current_class = $1
  759. BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : docbook_div('div', block_class, nil, "\n\n#{$2.strip}\n\n", false) )
  760. end
  761. # Handle the text now that the placeholders for divs are set, splitting at BLOCK_GROUP_SPLITTER
  762. text.replace(text.strip.split(BLOCK_GROUP_SPLITTER.strip).map do |chunk|
  763. tag, tag_name, para, body, end_para, end_tag = $~[1..6] if chunk.strip =~ %r{(<(#{(DOCBOOK_PARAS+DOCBOOK_DIVS).join("|")}).*?>)\s*(<para[^>]*>)?\s*(.*?)\s*(</para>)?\s*(</\2>)}m
  764. if tag && chunk.strip.split[0][/<.*?>/] == tag
  765. if DOCBOOK_PARAS.include? tag_name
  766. tag = "#{para}#{tag}"
  767. end_tag = "#{end_para}#{end_tag}"
  768. end
  769. body = docbook_block_groups(body, deep_code)
  770. body = "\n"+body.strip+"\n" unless DOCBOOK_PARAS.include? tag_name
  771. tag + body + end_tag + "\n"
  772. else
  773. docbook_block_groups(chunk, deep_code)
  774. end
  775. end.join)
  776. end
  777. def docbook_block_groups( text, deep_code = false )
  778. text.replace text.split( BLOCKS_GROUP_RE ).collect { |blk| docbook_blk(blk, deep_code) }.join("\n")
  779. end
  780. def docbook_blk( text, deep_code = false )
  781. return text if text =~ /<[0-9]+>/
  782. plain = text !~ /\A[#*> ]/
  783. # skip blocks that are complex HTML
  784. if text =~ /^<\/?(\w+).*>/ and not SIMPLE_DOCBOOK_TAGS.include? $1
  785. text
  786. else
  787. # search for indentation levels
  788. text.strip!
  789. if text.empty?
  790. text
  791. else
  792. code_blk = nil
  793. text.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
  794. flush_left iblk
  795. docbook_blocks iblk, plain
  796. iblk.gsub( /^(\S)/, "\\1" )
  797. if plain
  798. code_blk = iblk; ""
  799. else
  800. iblk
  801. end
  802. end
  803. block_applied = 0
  804. @rules.each do |rule_name|
  805. block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( text ) )
  806. end
  807. if block_applied.zero?
  808. if deep_code
  809. text = "<para><programlisting>#{ text }</programlisting></para>" # unless text =~ /list>/
  810. else
  811. text = "<para>#{text}</para>\n"
  812. end
  813. end
  814. # hard_break text
  815. text << "\n#{ code_blk }"
  816. end
  817. return text
  818. end
  819. end
  820. def docbook_sanitize_para(atts, content, ht = "para")
  821. case atts
  822. when /comment/
  823. ht = "remark"
  824. atts = nil
  825. when /preface/
  826. ht = "preface"
  827. atts = nil
  828. when /blockquote/
  829. ht = "blockquote"
  830. atts = nil
  831. when /warning/
  832. ht = "warning"
  833. atts = nil
  834. when /note/
  835. ht = "note"
  836. atts = nil
  837. when /tip/
  838. ht = "tip"
  839. atts = nil
  840. when /important/
  841. ht = "important"
  842. atts = nil
  843. when /filename/
  844. ht = "filename"
  845. atts = nil
  846. when /production/
  847. ht = "remark"
  848. atts = nil
  849. when /xref/
  850. if content =~ /^(.*)\[Hack \#(.*)\]$/
  851. name = $2
  852. ht = %Q{link linkend="#{sanitized_reference_for name}"}
  853. content.gsub!( /^(.*)\s\[Hack \#(.*)\]$/, '\1' )
  854. else
  855. ht = %Q{xref linkend="#{sanitized_reference_for content}"}
  856. content.replace ''
  857. end
  858. atts = nil
  859. when /synopsis/
  860. ht = "para"
  861. atts = %{ role="hack synopsis"}
  862. when /author/
  863. ht = "para"
  864. atts = %{ role="hacks-contributor"}
  865. when /technical/
  866. ht = "command"
  867. atts = nil
  868. end
  869. return ht, atts
  870. end
  871. end