PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/gollum/markup.rb

https://bitbucket.org/destructuring/gollum
Ruby | 664 lines | 397 code | 76 blank | 191 comment | 66 complexity | 1716c45bc88fc73c660c3b85ce0dea13 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, CC-BY-SA-3.0, MIT
  1. # ~*~ encoding: utf-8 ~*~
  2. require 'digest/sha1'
  3. require 'cgi'
  4. require 'pygments'
  5. require 'base64'
  6. require File.expand_path '../frontend/helpers', __FILE__
  7. require File.expand_path '../gitcode', __FILE__
  8. # initialize Pygments
  9. Pygments.start
  10. module Gollum
  11. class Markup
  12. include Precious::Helpers
  13. attr_accessor :toc
  14. attr_reader :metadata
  15. # Initialize a new Markup object.
  16. #
  17. # page - The Gollum::Page.
  18. #
  19. # Returns a new Gollum::Markup object, ready for rendering.
  20. def initialize(page)
  21. @wiki = page.wiki
  22. @name = page.filename
  23. @data = page.text_data
  24. @version = page.version.id if page.version
  25. @format = page.format
  26. @sub_page = page.sub_page
  27. @parent_page = page.parent_page
  28. @dir = ::File.dirname(page.path)
  29. @tagmap = {}
  30. @codemap = {}
  31. @wsdmap = {}
  32. @premap = {}
  33. @toc = nil
  34. @metadata = nil
  35. @to_xml = { :save_with => Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^ 1, :indent => 0, :encoding => 'UTF-8' }
  36. end
  37. # Render the content with Gollum wiki syntax on top of the file's own
  38. # markup language.
  39. #
  40. # no_follow - Boolean that determines if rel="nofollow" is added to all
  41. # <a> tags.
  42. # encoding - Encoding Constant or String.
  43. #
  44. # Returns the formatted String content.
  45. def render(no_follow = false, encoding = nil)
  46. sanitize = no_follow ?
  47. @wiki.history_sanitizer :
  48. @wiki.sanitizer
  49. data = @data.dup
  50. data = extract_metadata(data)
  51. data = extract_gitcode(data)
  52. data = extract_code(data)
  53. data = extract_wsd(data)
  54. data = extract_tags(data)
  55. begin
  56. data = GitHub::Markup.render(@name, data)
  57. if data.nil?
  58. raise "There was an error converting #{@name} to HTML."
  59. end
  60. rescue Object => e
  61. data = %{<p class="gollum-error">#{e.message}</p>}
  62. end
  63. data = process_tags(data)
  64. data = process_code(data, encoding)
  65. doc = Nokogiri::HTML::DocumentFragment.parse(data)
  66. doc = sanitize.clean_node!(doc) if sanitize
  67. doc,toc = process_headers(doc)
  68. @toc = @sub_page ? ( @parent_page ? @parent_page.toc_data : "[[_TOC_]]" ) : toc
  69. yield doc if block_given?
  70. # nokogiri's save options are ored together. FORMAT has a value of 1 so ^ 1 removes it.
  71. # formatting will create extra spaces in pre tags.
  72. # https://github.com/sparklemotion/nokogiri/issues/782
  73. # DEFAULT_HTML encodes unicode so XHTML is used for proper unicode support in href.
  74. data = doc.to_xml( @to_xml )
  75. data = process_toc_tags(data)
  76. data = process_wsd(data)
  77. data.gsub!(/<p><\/p>/) do
  78. ''
  79. end
  80. data
  81. end
  82. # Inserts header anchors and creates TOC
  83. #
  84. # doc - Nokogiri parsed document
  85. #
  86. # Returns doc Document and toc String
  87. def process_headers(doc)
  88. toc = nil
  89. doc.css('h1,h2,h3,h4,h5,h6').each do |h|
  90. # must escape "
  91. h_name = h.content.gsub(' ','-').gsub('"','%22')
  92. level = h.name.gsub(/[hH]/,'').to_i
  93. # Add anchors
  94. h.add_child(%Q{<a class="anchor" id="#{h_name}" href="##{h_name}"></a>})
  95. # Build TOC
  96. toc ||= Nokogiri::XML::DocumentFragment.parse('<div class="toc"><div class="toc-title">Table of Contents</div></div>')
  97. tail ||= toc.child
  98. tail_level ||= 0
  99. while tail_level < level
  100. node = Nokogiri::XML::Node.new('ul', doc)
  101. tail = tail.add_child(node)
  102. tail_level += 1
  103. end
  104. while tail_level > level
  105. tail = tail.parent
  106. tail_level -= 1
  107. end
  108. node = Nokogiri::XML::Node.new('li', doc)
  109. # % -> %25 so anchors work on Firefox. See issue #475
  110. node.add_child(%Q{<a href="##{h_name}">#{h.content}</a>})
  111. tail.add_child(node)
  112. end
  113. toc = toc.to_xml(@to_xml) if toc != nil
  114. [doc, toc]
  115. end
  116. #########################################################################
  117. #
  118. # Tags
  119. #
  120. #########################################################################
  121. # Extract all tags into the tagmap and replace with placeholders.
  122. #
  123. # data - The raw String data.
  124. #
  125. # Returns the placeholder'd String data.
  126. def extract_tags(data)
  127. if @format == :asciidoc
  128. return data
  129. end
  130. data.gsub!(/(.?)\[\[(.+?)\]\]([^\[]?)/m) do
  131. if $1 == "'" && $3 != "'"
  132. "[[#{$2}]]#{$3}"
  133. elsif $2.include?('][')
  134. if $2[0..4] == 'file:'
  135. pre = $1
  136. post = $3
  137. parts = $2.split('][')
  138. parts[0][0..4] = ""
  139. link = "#{parts[1]}|#{parts[0].sub(/\.org/,'')}"
  140. id = Digest::SHA1.hexdigest(link)
  141. @tagmap[id] = link
  142. "#{pre}#{id}#{post}"
  143. else
  144. $&
  145. end
  146. else
  147. id = Digest::SHA1.hexdigest($2)
  148. @tagmap[id] = $2
  149. "#{$1}#{id}#{$3}"
  150. end
  151. end
  152. data
  153. end
  154. # Process all tags from the tagmap and replace the placeholders with the
  155. # final markup.
  156. #
  157. # data - The String data (with placeholders).
  158. #
  159. # Returns the marked up String data.
  160. def process_tags(data)
  161. @tagmap.each do |id, tag|
  162. # If it's preformatted, just put the tag back
  163. if is_preformatted?(data, id)
  164. data.gsub!(id) do
  165. "[[#{tag}]]"
  166. end
  167. else
  168. data.gsub!(id) do
  169. process_tag(tag).gsub('%2F', '/')
  170. end
  171. end
  172. end
  173. data
  174. end
  175. # Find `id` within `data` and determine if it's within
  176. # preformatted tags.
  177. #
  178. # data - The String data (with placeholders).
  179. # id - The String SHA1 hash.
  180. PREFORMATTED_TAGS = %w(code tt)
  181. def is_preformatted?(data, id)
  182. doc = Nokogiri::HTML::DocumentFragment.parse(data)
  183. node = doc.search("[text()*='#{id}']").first
  184. node && (PREFORMATTED_TAGS.include?(node.name) ||
  185. node.ancestors.any? { |a| PREFORMATTED_TAGS.include?(a.name) })
  186. end
  187. # Process a single tag into its final HTML form.
  188. #
  189. # tag - The String tag contents (the stuff inside the double
  190. # brackets).
  191. #
  192. # Returns the String HTML version of the tag.
  193. def process_tag(tag)
  194. if tag =~ /^_TOC_$/
  195. %{[[#{tag}]]}
  196. elsif html = process_image_tag(tag)
  197. html
  198. elsif html = process_file_link_tag(tag)
  199. html
  200. else
  201. process_page_link_tag(tag)
  202. end
  203. end
  204. # Attempt to process the tag as an image tag.
  205. #
  206. # tag - The String tag contents (the stuff inside the double brackets).
  207. #
  208. # Returns the String HTML if the tag is a valid image tag or nil
  209. # if it is not.
  210. def process_image_tag(tag)
  211. parts = tag.split('|')
  212. return if parts.size.zero?
  213. name = parts[0].strip
  214. path = if file = find_file(name)
  215. ::File.join @wiki.base_path, file.path
  216. elsif name =~ /^https?:\/\/.+(jpg|png|gif|svg|bmp)$/i
  217. name
  218. end
  219. if path
  220. opts = parse_image_tag_options(tag)
  221. containered = false
  222. classes = [] # applied to whatever the outermost container is
  223. attrs = [] # applied to the image
  224. align = opts['align']
  225. if opts['float']
  226. containered = true
  227. align ||= 'left'
  228. if %w{left right}.include?(align)
  229. classes << "float-#{align}"
  230. end
  231. elsif %w{top texttop middle absmiddle bottom absbottom baseline}.include?(align)
  232. attrs << %{align="#{align}"}
  233. elsif align
  234. if %w{left center right}.include?(align)
  235. containered = true
  236. classes << "align-#{align}"
  237. end
  238. end
  239. if width = opts['width']
  240. if width =~ /^\d+(\.\d+)?(em|px)$/
  241. attrs << %{width="#{width}"}
  242. end
  243. end
  244. if height = opts['height']
  245. if height =~ /^\d+(\.\d+)?(em|px)$/
  246. attrs << %{height="#{height}"}
  247. end
  248. end
  249. if alt = opts['alt']
  250. attrs << %{alt="#{alt}"}
  251. end
  252. attr_string = attrs.size > 0 ? attrs.join(' ') + ' ' : ''
  253. if opts['frame'] || containered
  254. classes << 'frame' if opts['frame']
  255. %{<span class="#{classes.join(' ')}">} +
  256. %{<span>} +
  257. %{<img src="#{path}" #{attr_string}/>} +
  258. (alt ? %{<span>#{alt}</span>} : '') +
  259. %{</span>} +
  260. %{</span>}
  261. else
  262. %{<img src="#{path}" #{attr_string}/>}
  263. end
  264. end
  265. end
  266. # Parse any options present on the image tag and extract them into a
  267. # Hash of option names and values.
  268. #
  269. # tag - The String tag contents (the stuff inside the double brackets).
  270. #
  271. # Returns the options Hash:
  272. # key - The String option name.
  273. # val - The String option value or true if it is a binary option.
  274. def parse_image_tag_options(tag)
  275. tag.split('|')[1..-1].inject({}) do |memo, attr|
  276. parts = attr.split('=').map { |x| x.strip }
  277. memo[parts[0]] = (parts.size == 1 ? true : parts[1])
  278. memo
  279. end
  280. end
  281. # Attempt to process the tag as a file link tag.
  282. #
  283. # tag - The String tag contents (the stuff inside the double
  284. # brackets).
  285. #
  286. # Returns the String HTML if the tag is a valid file link tag or nil
  287. # if it is not.
  288. def process_file_link_tag(tag)
  289. parts = tag.split('|')
  290. return if parts.size.zero?
  291. name = parts[0].strip
  292. path = parts[1] && parts[1].strip
  293. path = if path && file = find_file(path)
  294. ::File.join @wiki.base_path, file.path
  295. elsif path =~ %r{^https?://}
  296. path
  297. else
  298. nil
  299. end
  300. if name && path && file
  301. %{<a href="#{::File.join @wiki.base_path, file.path}">#{name}</a>}
  302. elsif name && path
  303. %{<a href="#{path}">#{name}</a>}
  304. else
  305. nil
  306. end
  307. end
  308. # Attempt to process the tag as a page link tag.
  309. #
  310. # tag - The String tag contents (the stuff inside the double
  311. # brackets).
  312. #
  313. # Returns the String HTML if the tag is a valid page link tag or nil
  314. # if it is not.
  315. def process_page_link_tag(tag)
  316. parts = tag.split('|')
  317. parts.reverse! if @format == :mediawiki
  318. name, page_name = *parts.compact.map(&:strip)
  319. cname = @wiki.page_class.cname(page_name || name)
  320. if name =~ %r{^https?://} && page_name.nil?
  321. %{<a href="#{name}">#{name}</a>}
  322. else
  323. presence = "absent"
  324. link_name = cname
  325. page, extra = find_page_from_name(cname)
  326. if page
  327. link_name = @wiki.page_class.cname(page.name)
  328. presence = "present"
  329. end
  330. link = ::File.join(@wiki.base_path, page ? page.escaped_url_path : CGI.escape(link_name))
  331. # //page is invalid
  332. # strip all duplicate forward slashes using helpers.rb trim_leading_slash
  333. # //page => /page
  334. link = trim_leading_slash link
  335. %{<a class="internal #{presence}" href="#{link}#{extra}">#{name}</a>}
  336. end
  337. end
  338. # Process the special table of contents tag [[_TOC_]]
  339. #
  340. # data - The String data (with placeholders).
  341. #
  342. # Returns the marked up String data.
  343. def process_toc_tags(data)
  344. data.gsub!("[[_TOC_]]") do
  345. @toc.nil? ? '' : @toc
  346. end
  347. data
  348. end
  349. # Find the given file in the repo.
  350. #
  351. # name - The String absolute or relative path of the file.
  352. #
  353. # Returns the Gollum::File or nil if none was found.
  354. def find_file(name, version=@version)
  355. if name =~ /^\//
  356. @wiki.file(name[1..-1], version)
  357. else
  358. path = @dir == '.' ? name : ::File.join(@dir, name)
  359. @wiki.file(path, version)
  360. end
  361. end
  362. # Find a page from a given cname. If the page has an anchor (#) and has
  363. # no match, strip the anchor and try again.
  364. #
  365. # cname - The String canonical page name including path.
  366. #
  367. # Returns a Gollum::Page instance if a page is found, or an Array of
  368. # [Gollum::Page, String extra] if a page without the extra anchor data
  369. # is found.
  370. def find_page_from_name(cname)
  371. slash = cname.rindex('/')
  372. unless slash.nil?
  373. name = cname[slash+1..-1]
  374. path = cname[0..slash]
  375. page = @wiki.paged(name, path)
  376. else
  377. page = @wiki.paged(cname, '/') || @wiki.page(cname)
  378. end
  379. if page
  380. return page
  381. end
  382. if pos = cname.index('#')
  383. [@wiki.page(cname[0...pos]), cname[pos..-1]]
  384. end
  385. end
  386. #########################################################################
  387. #
  388. # Gitcode - fetch code from github search path and replace the contents
  389. # to a code-block that gets run the next parse.
  390. # Acceptable formats:
  391. # ```language:local-file.ext```
  392. # ```language:/abs/other-file.ext```
  393. # ```language:github/gollum/master/somefile.txt```
  394. #
  395. #########################################################################
  396. def extract_gitcode data
  397. data.gsub /^[ \t]*``` ?([^:\n\r]+):([^`\n\r]+)```/ do
  398. contents = ''
  399. # Use empty string if $2 is nil.
  400. uri = $2 || ''
  401. # Detect local file.
  402. if uri[0..6] != 'github/'
  403. if file = self.find_file(uri, @wiki.ref)
  404. contents = file.raw_data
  405. else
  406. # How do we communicate a render error?
  407. next "File not found: #{Rack::Utils::escape_html(uri)}"
  408. end
  409. else
  410. contents = Gollum::Gitcode.new(uri).contents
  411. end
  412. "```#{$1}\n#{contents}\n```\n"
  413. end
  414. end
  415. #########################################################################
  416. #
  417. # Code
  418. #
  419. #########################################################################
  420. # Extract all code blocks into the codemap and replace with placeholders.
  421. #
  422. # data - The raw String data.
  423. #
  424. # Returns the placeholder'd String data.
  425. def extract_code(data)
  426. data.gsub!(/^([ \t]*)(~~~+) ?([^\r\n]+)?\r?\n(.+?)\r?\n\1(~~~+)[ \t\r]*$/m) do
  427. m_indent = $1
  428. m_start = $2 # ~~~
  429. m_lang = $3
  430. m_code = $4
  431. m_end = $5 # ~~~
  432. # start and finish tilde fence must be the same length
  433. return '' if m_start.length != m_end.length
  434. lang = m_lang ? m_lang.strip : nil
  435. id = Digest::SHA1.hexdigest("#{lang}.#{m_code}")
  436. cached = check_cache(:code, id)
  437. # extract lang from { .ruby } or { #stuff .ruby .indent }
  438. # see http://johnmacfarlane.net/pandoc/README.html#delimited-code-blocks
  439. if lang
  440. lang = lang.match(/\.([^}\s]+)/)
  441. lang = lang[1] unless lang.nil?
  442. end
  443. @codemap[id] = cached ?
  444. { :output => cached } :
  445. { :lang => lang, :code => m_code, :indent => m_indent }
  446. "#{m_indent}#{id}" # print the SHA1 ID with the proper indentation
  447. end
  448. data.gsub!(/^([ \t]*)``` ?([^\r\n]+)?\r?\n(.+?)\r?\n\1```[ \t]*\r?$/m) do
  449. lang = $2 ? $2.strip : nil
  450. id = Digest::SHA1.hexdigest("#{lang}.#{$3}")
  451. cached = check_cache(:code, id)
  452. @codemap[id] = cached ?
  453. { :output => cached } :
  454. { :lang => lang, :code => $3, :indent => $1 }
  455. "#{$1}#{id}" # print the SHA1 ID with the proper indentation
  456. end
  457. data
  458. end
  459. # Remove the leading space from a code block. Leading space
  460. # is only removed if every single line in the block has leading
  461. # whitespace.
  462. #
  463. # code - The code block to remove spaces from
  464. # regex - A regex to match whitespace
  465. def remove_leading_space(code, regex)
  466. if code.lines.all? { |line| line =~ /\A\r?\n\Z/ || line =~ regex }
  467. code.gsub!(regex) do
  468. ''
  469. end
  470. end
  471. end
  472. # Process all code from the codemap and replace the placeholders with the
  473. # final HTML.
  474. #
  475. # data - The String data (with placeholders).
  476. # encoding - Encoding Constant or String.
  477. #
  478. # Returns the marked up String data.
  479. def process_code(data, encoding = nil)
  480. return data if data.nil? || data.size.zero? || @codemap.size.zero?
  481. blocks = []
  482. @codemap.each do |id, spec|
  483. next if spec[:output] # cached
  484. code = spec[:code]
  485. remove_leading_space(code, /^#{spec[:indent]}/m)
  486. remove_leading_space(code, /^( |\t)/m)
  487. blocks << [spec[:lang], code]
  488. end
  489. highlighted = []
  490. blocks.each do |lang, code|
  491. encoding ||= 'utf-8'
  492. begin
  493. # must set startinline to true for php to be highlighted without <?
  494. # http://pygments.org/docs/lexers/
  495. hl_code = Pygments.highlight(code, :lexer => lang, :options => {:encoding => encoding.to_s, :startinline => true})
  496. rescue
  497. hl_code = code
  498. end
  499. highlighted << hl_code
  500. end
  501. @codemap.each do |id, spec|
  502. body = spec[:output] || begin
  503. if (body = highlighted.shift.to_s).size > 0
  504. update_cache(:code, id, body)
  505. body
  506. else
  507. "<pre><code>#{CGI.escapeHTML(spec[:code])}</code></pre>"
  508. end
  509. end
  510. data.gsub!(id) do
  511. body
  512. end
  513. end
  514. data
  515. end
  516. #########################################################################
  517. #
  518. # Sequence Diagrams
  519. #
  520. #########################################################################
  521. # Extract all sequence diagram blocks into the wsdmap and replace with
  522. # placeholders.
  523. #
  524. # data - The raw String data.
  525. #
  526. # Returns the placeholder'd String data.
  527. def extract_wsd(data)
  528. data.gsub(/^\{\{\{\{\{\{ ?(.+?)\r?\n(.+?)\r?\n\}\}\}\}\}\}\r?$/m) do
  529. id = Digest::SHA1.hexdigest($2)
  530. @wsdmap[id] = { :style => $1, :code => $2 }
  531. id
  532. end
  533. end
  534. # Process all diagrams from the wsdmap and replace the placeholders with
  535. # the final HTML.
  536. #
  537. # data - The String data (with placeholders).
  538. #
  539. # Returns the marked up String data.
  540. def process_wsd(data)
  541. @wsdmap.each do |id, spec|
  542. style = spec[:style]
  543. code = spec[:code]
  544. data.gsub!(id) do
  545. Gollum::WebSequenceDiagram.new(code, style).to_tag
  546. end
  547. end
  548. data
  549. end
  550. #########################################################################
  551. #
  552. # Metadata
  553. #
  554. #########################################################################
  555. # Extract metadata for data and build metadata table. Metadata
  556. # is content found between markers, and must
  557. # be a valid YAML mapping.
  558. #
  559. # Because ri and ruby 1.8.7 are awesome, the markers can't
  560. # be included in this documentation without triggering
  561. # `Unhandled special: Special: type=17`
  562. # Please read the source code for the exact markers
  563. #
  564. # Returns the String of formatted data with metadata removed.
  565. def extract_metadata(data)
  566. @metadata = {}
  567. data
  568. end
  569. # Hook for getting the formatted value of extracted tag data.
  570. #
  571. # type - Symbol value identifying what type of data is being extracted.
  572. # id - String SHA1 hash of original extracted tag data.
  573. #
  574. # Returns the String cached formatted data, or nil.
  575. def check_cache(type, id)
  576. end
  577. # Hook for caching the formatted value of extracted tag data.
  578. #
  579. # type - Symbol value identifying what type of data is being extracted.
  580. # id - String SHA1 hash of original extracted tag data.
  581. # data - The String formatted value to be cached.
  582. #
  583. # Returns nothing.
  584. def update_cache(type, id, data)
  585. end
  586. end
  587. MarkupGFM = Markup
  588. end