PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/transform-markup/src/main/resources/ruby/asciidoctor/lib/asciidoctor.rb

https://github.com/mkouba/rewrite
Ruby | 907 lines | 463 code | 108 blank | 336 comment | 30 complexity | 2961c1bc995eaa1e896b63f3bb3b7ade MD5 | raw file
Possible License(s): Apache-2.0
  1. require 'rubygems' unless RUBY_VERSION >= '1.9'
  2. require 'strscan'
  3. require 'set'
  4. $:.unshift(File.dirname(__FILE__))
  5. #$:.unshift(File.join(File.dirname(__FILE__), '..', 'vendor'))
  6. # Public: Methods for parsing Asciidoc input files and rendering documents
  7. # using eRuby templates.
  8. #
  9. # Asciidoc documents comprise a header followed by zero or more sections.
  10. # Sections are composed of blocks of content. For example:
  11. #
  12. # Doc Title
  13. # =========
  14. #
  15. # SECTION 1
  16. # ---------
  17. #
  18. # This is a paragraph block in the first section.
  19. #
  20. # SECTION 2
  21. #
  22. # This section has a paragraph block and an olist block.
  23. #
  24. # 1. Item 1
  25. # 2. Item 2
  26. #
  27. # Examples:
  28. #
  29. # Use built-in templates:
  30. #
  31. # lines = File.readlines("your_file.asc")
  32. # doc = Asciidoctor::Document.new(lines)
  33. # html = doc.render
  34. # File.open("your_file.html", "w+") do |file|
  35. # file.puts html
  36. # end
  37. #
  38. # Use custom (Tilt-supported) templates:
  39. #
  40. # lines = File.readlines("your_file.asc")
  41. # doc = Asciidoctor::Document.new(lines, :template_dir => 'templates')
  42. # html = doc.render
  43. # File.open("your_file.html", "w+") do |file|
  44. # file.puts html
  45. # end
  46. module Asciidoctor
  47. module SafeMode
  48. # A safe mode level that disables any of the security features enforced
  49. # by Asciidoctor (Ruby is still subject to its own restrictions).
  50. UNSAFE = 0;
  51. # A safe mode level that closely parallels safe mode in AsciiDoc. This value
  52. # prevents access to files which reside outside of the parent directory of
  53. # the source file and disables any macro other than the include::[] macro.
  54. SAFE = 1;
  55. # A safe mode level that disallows the document from setting attributes
  56. # that would affect the rendering of the document, in addition to all the
  57. # security features of SafeMode::SAFE. For instance, this level disallows
  58. # changing the backend or the source-highlighter using an attribute defined
  59. # in the source document. This is the most fundamental level of security
  60. # for server-side deployments (hence the name).
  61. SERVER = 10;
  62. # A safe mode level that disallows the document from attempting to read
  63. # files from the file system and including the contents of them into the
  64. # document, in additional to all the security features of SafeMode::SERVER.
  65. # For instance, this level disallows use of the include::[] macro and the
  66. # embedding of binary content (data uri), stylesheets and JavaScripts
  67. # referenced by the document.(Asciidoctor and trusted extensions may still
  68. # be allowed to embed trusted content into the document).
  69. #
  70. # Since Asciidoctor is aiming for wide adoption, this level is the default
  71. # and is recommended for server-side deployments.
  72. SECURE = 20;
  73. # A planned safe mode level that disallows the use of passthrough macros and
  74. # prevents the document from setting any known attributes, in addition to all
  75. # the security features of SafeMode::SECURE.
  76. #
  77. # Please note that this level is not currently implemented (and therefore not
  78. # enforced)!
  79. #PARANOID = 100;
  80. end
  81. # The root path of the Asciidoctor gem
  82. ROOT_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
  83. # The default document type
  84. # Can influence markup generated by render templates
  85. DEFAULT_DOCTYPE = 'article'
  86. # The backend determines the format of the rendered output, default to html5
  87. DEFAULT_BACKEND = 'html5'
  88. DEFAULT_STYLESHEET_PATH = File.join(ROOT_PATH, 'stylesheets', 'asciidoctor.css')
  89. DEFAULT_STYLESHEET_KEYS = ['', 'DEFAULT'].to_set
  90. DEFAULT_STYLESHEET_NAME = File.basename(DEFAULT_STYLESHEET_PATH)
  91. # Pointers to the preferred version for a given backend.
  92. BACKEND_ALIASES = {
  93. 'html' => 'html5',
  94. 'docbook' => 'docbook45'
  95. }
  96. # Default page widths for calculating absolute widths
  97. DEFAULT_PAGE_WIDTHS = {
  98. 'docbook' => 425
  99. }
  100. # Default extensions for the respective base backends
  101. DEFAULT_EXTENSIONS = {
  102. 'html' => '.html',
  103. 'docbook' => '.xml',
  104. 'asciidoc' => '.ad',
  105. 'markdown' => '.md'
  106. }
  107. SECTION_LEVELS = {
  108. '=' => 0,
  109. '-' => 1,
  110. '~' => 2,
  111. '^' => 3,
  112. '+' => 4
  113. }
  114. ADMONITION_STYLES = ['NOTE', 'TIP', 'IMPORTANT', 'WARNING', 'CAUTION'].to_set
  115. # NOTE: AsciiDoc doesn't support pass style for paragraph
  116. PARAGRAPH_STYLES = ['comment', 'example', 'literal', 'listing', 'normal', 'pass', 'quote', 'sidebar', 'source', 'verse'].to_set
  117. VERBATIM_STYLES = ['literal', 'listing', 'source', 'verse'].to_set
  118. DELIMITED_BLOCKS = {
  119. # NOTE: AsciiDoc doesn't support pass style for open block
  120. '--' => [:open, ['comment', 'example', 'literal', 'listing', 'pass', 'quote', 'sidebar', 'source', 'verse', 'admonition'].to_set],
  121. '----' => [:listing, ['literal', 'source'].to_set],
  122. '....' => [:literal, ['listing', 'source'].to_set],
  123. '====' => [:example, ['admonition'].to_set],
  124. '****' => [:sidebar, Set.new],
  125. '____' => [:quote, ['verse'].to_set],
  126. '++++' => [:pass, Set.new],
  127. '|===' => [:table, Set.new],
  128. '!===' => [:table, Set.new],
  129. '////' => [:comment, Set.new],
  130. '```' => [:fenced_code, Set.new],
  131. '~~~' => [:fenced_code, Set.new]
  132. }
  133. BREAK_LINES = {
  134. %q{'''} => :ruler,
  135. '<<<' => :page_break
  136. }
  137. LIST_CONTEXTS = [:ulist, :olist, :dlist, :colist]
  138. NESTABLE_LIST_CONTEXTS = [:ulist, :olist, :dlist]
  139. ORDERED_LIST_STYLES = [:arabic, :loweralpha, :lowerroman, :upperalpha, :upperroman]
  140. ORDERED_LIST_MARKER_PATTERNS = {
  141. :arabic => /\d+[.>]/,
  142. :loweralpha => /[a-z]\./,
  143. :upperalpha => /[A-Z]\./,
  144. :lowerroman => /[ivx]+\)/,
  145. :upperroman => /[IVX]+\)/
  146. }
  147. LIST_CONTINUATION = '+'
  148. LINE_BREAK = ' +'
  149. # NOTE allows for empty space in line as it could be left by the template engine
  150. BLANK_LINES_PATTERN = /^\s*\n/
  151. LINE_FEED_ENTITY = '&#10;' # or &#x0A;
  152. # Flags to control compliance with the behavior of AsciiDoc
  153. COMPLIANCE = {
  154. # AsciiDoc terminates paragraphs adjacent to
  155. # block content (delimiter or block attribute list)
  156. # Compliance value: true
  157. # TODO what about literal paragraph?
  158. :block_terminates_paragraph => true,
  159. # AsciiDoc does not treat paragraphs labeled with a
  160. # verbatim style (literal, listing, source, verse)
  161. # as verbatim; override this behavior
  162. # Compliance value: false
  163. :strict_verbatim_paragraphs => true,
  164. # AsciiDoc allows start and end delimiters around
  165. # a block to be different lengths
  166. # this option requires that they be the same
  167. # Compliance value: false
  168. :congruent_block_delimiters => true
  169. }
  170. # The following pattern, which appears frequently, captures the contents between square brackets,
  171. # ignoring escaped closing brackets (closing brackets prefixed with a backslash '\' character)
  172. #
  173. # Pattern:
  174. # (?:\[((?:\\\]|[^\]])*?)\])
  175. # Matches:
  176. # [enclosed text here] or [enclosed [text\] here]
  177. REGEXP = {
  178. # NOTE: this is a inline admonition note
  179. :admonition_inline => /^(#{ADMONITION_STYLES.to_a * '|'}):\s/,
  180. # [[Foo]]
  181. :anchor => /^\[\[([^\s\[\]]+)\]\]$/,
  182. # Foowhatevs [[Bar]]
  183. :anchor_embedded => /^(.*?)\s*\[\[([^\[\]]+)\]\]$/,
  184. # [[ref]] (anywhere inline)
  185. :anchor_macro => /\\?\[\[([\w":].*?)\]\]/,
  186. # matches any block delimiter:
  187. # open, listing, example, literal, comment, quote, sidebar, passthrough, table
  188. # NOTE position the most common blocks towards the front of the pattern
  189. :any_blk => %r{^(?:--|(?:-|\.|=|\*|_|\+|/){4,}|[\|!]={3,}|(?:`|~){3,}.*)$},
  190. # detect a list item of any sort
  191. # [[:graph:]] is a non-blank character
  192. :any_list => /^(?:
  193. <?\d+>[[:blank:]]+[[:graph:]]|
  194. [[:blank:]]*(?:(?:-|\*|\.){1,5}|\d+\.|[A-Za-z]\.|[IVXivx]+\))[[:blank:]]+[[:graph:]]|
  195. [[:blank:]]*.*?(?::{2,4}|;;)(?:[[:blank:]]+[[:graph:]]|$)
  196. )/x,
  197. # :foo: bar
  198. # :Author: Dan
  199. # :numbered!:
  200. :attr_entry => /^:(\w.*?):(?:[[:blank:]]+(.*))?$/,
  201. # {name?value}
  202. :attr_conditional => /^\s*\{([^\?]+)\?\s*([^\}]+)\s*\}/,
  203. # + Attribute values treat lines ending with ' +' as a continuation,
  204. # not a line-break as elsewhere in the document, where this is
  205. # a forced line break. This should be the same regexp as :line_break,
  206. # below, but it gets its own entry because readability ftw, even
  207. # though repeating regexps ftl.
  208. :attr_continue => /^[[:blank:]]*(.*)[[:blank:]]\+[[:blank:]]*$/,
  209. # :foo!:
  210. :attr_delete => /^:([^:]+)!:$/,
  211. # An attribute list above a block element
  212. #
  213. # Can be strictly positional:
  214. # [quote, Adam Smith, Wealth of Nations]
  215. # Or can have name/value pairs
  216. # [NOTE, caption="Good to know"]
  217. # Can be defined by an attribute
  218. # [{lead}]
  219. :blk_attr_list => /^\[(|[[:blank:]]*[\w\{,"'].*)\]$/,
  220. # block attribute list or block id (bulk query)
  221. :attr_line => /^\[(|[[:blank:]]*[\w\{,"'].*|\[[^\[\]]*\])\]$/,
  222. # attribute reference
  223. # {foo}
  224. # {counter:pcount:1}
  225. :attr_ref => /(\\?)\{(\w+(?:[\-:]\w+)*)(\\?)\}/,
  226. # The author info line the appears immediately following the document title
  227. # John Doe <john@anonymous.com>
  228. :author_info => /^(\w[\w\-'.]*)(?: +(\w[\w\-'.]*))?(?: +(\w[\w\-'.]*))?(?: +<([^>]+)>)?$/,
  229. # [[[Foo]]] (anywhere inline)
  230. :biblio_macro => /\\?\[\[\[([\w:][\w:.-]*?)\]\]\]/,
  231. # callout reference inside literal text
  232. # <1>
  233. # special characters will already be replaced, hence their use in the regex
  234. :callout_render => /\\?&lt;(\d+)&gt;/,
  235. # ...but not while scanning
  236. :callout_scan => /\\?<(\d+)>/,
  237. # <1> Foo
  238. :colist => /^<?(\d+)>[[:blank:]]+(.*)/,
  239. # ////
  240. # comment block
  241. # ////
  242. :comment_blk => %r{^/{4,}$},
  243. # // (and then whatever)
  244. :comment => %r{^//(?:[^/]|$)},
  245. # one,two
  246. # one, two
  247. # one , two
  248. :csv_delimiter => /[[:blank:]]*,[[:blank:]]*/,
  249. # one;two
  250. # one; two
  251. # one ; two
  252. :semicolon_delim => /[[:blank:]]*;[[:blank:]]*/,
  253. # one,two;three;four
  254. :scsv_csv_delim => /[[:blank:]]*[,;][[:blank:]]*/,
  255. # 29
  256. :digits => /^\d+$/,
  257. # foo:: || foo::: || foo:::: || foo;;
  258. # Should be followed by a definition, on the same line...
  259. # foo:: That which precedes 'bar' (see also, <<bar>>)
  260. # ...or on a separate line
  261. # foo::
  262. # That which precedes 'bar' (see also, <<bar>>)
  263. # The term may be an attribute reference
  264. # {term_foo}:: {def_foo}
  265. # REVIEW leading space has already been stripped, so may not need in regex
  266. :dlist => /^[[:blank:]]*(.*?)(:{2,4}|;;)(?:[[:blank:]]+(.*))?$/,
  267. :dlist_siblings => {
  268. # (?:.*?[^:])? - a non-capturing group which grabs longest sequence of characters that doesn't end w/ colon
  269. '::' => /^[[:blank:]]*((?:.*[^:])?)(::)(?:[[:blank:]]+(.*))?$/,
  270. ':::' => /^[[:blank:]]*((?:.*[^:])?)(:::)(?:[[:blank:]]+(.*))?$/,
  271. '::::' => /^[[:blank:]]*((?:.*[^:])?)(::::)(?:[[:blank:]]+(.*))?$/,
  272. ';;' => /^[[:blank:]]*(.*)(;;)(?:[[:blank:]]+(.*))?$/
  273. },
  274. # ====
  275. #:example => /^={4,}$/,
  276. # footnote:[text]
  277. # footnoteref:[id,text]
  278. # footnoteref:[id]
  279. :footnote_macro => /\\?(footnote|footnoteref):\[((?:\\\]|[^\]])*?)\]/,
  280. # image::filename.png[Caption]
  281. # video::http://youtube.com/12345[Cats vs Dogs]
  282. :media_blk_macro => /^(image|video|audio)::(\S+?)\[((?:\\\]|[^\]])*?)\]$/,
  283. # image:filename.png[Alt Text]
  284. # image:filename.png[More [Alt\] Text] (alt text becomes "More [Alt] Text")
  285. :image_macro => /\\?image:([^:\[]+)\[((?:\\\]|[^\]])*?)\]/,
  286. # indexterm:[Tigers,Big cats]
  287. # (((Tigers,Big cats)))
  288. :indexterm_macro => /\\?(?:indexterm:(?:\[((?:\\\]|[^\]])*?)\])|\(\(\((.*?)\)\)\)(?!\)))/m,
  289. # indexterm2:[Tigers]
  290. # ((Tigers))
  291. :indexterm2_macro => /\\?(?:indexterm2:(?:\[((?:\\\]|[^\]])*?)\])|\(\((.*?)\)\)(?!\)))/m,
  292. # whitespace at the beginning of the line
  293. :leading_blanks => /^([[:blank:]]*)/,
  294. # leading parent directory references in path
  295. :leading_parent_dirs => /^(?:\.\.\/)*/,
  296. # + From the Asciidoc User Guide: "A plus character preceded by at
  297. # least one space character at the end of a non-blank line forces
  298. # a line break. It generates a line break (br) tag for HTML outputs.
  299. #
  300. # + (would not match because there's no space before +)
  301. # + (would match and capture '')
  302. # Foo + (would and capture 'Foo')
  303. :line_break => /^(.*)[[:blank:]]\+$/,
  304. # inline link and some inline link macro
  305. # FIXME revisit!
  306. :link_inline => %r{(^|link:|\s|>|&lt;|[\(\)\[\]])(\\?(?:https?|ftp)://[^\s\[<]*[^\s.,\[<])(?:\[((?:\\\]|[^\]])*?)\])?},
  307. # inline link macro
  308. # link:path[label]
  309. :link_macro => /\\?(?:link|mailto):([^\s\[]+)(?:\[((?:\\\]|[^\]])*?)\])/,
  310. # inline email address
  311. # doc.writer@asciidoc.org
  312. :email_inline => /[\\>:]?\w[\w.%+-]*@[[:alnum:]][[:alnum:].-]*\.[[:alpha:]]{2,4}\b/,
  313. # ----
  314. #:listing => /^\-{4,}$/,
  315. # ....
  316. #:literal => /^\.{4,}$/,
  317. # <TAB>Foo or one-or-more-spaces-or-tabs then whatever
  318. :lit_par => /^([[:blank:]]+.*)$/,
  319. # --
  320. #:open_blk => /^\-\-$/,
  321. # . Foo (up to 5 consecutive dots)
  322. # 1. Foo (arabic, default)
  323. # a. Foo (loweralpha)
  324. # A. Foo (upperalpha)
  325. # i. Foo (lowerroman)
  326. # I. Foo (upperroman)
  327. # REVIEW leading space has already been stripped, so may not need in regex
  328. :olist => /^[[:blank:]]*(\.{1,5}|\d+\.|[A-Za-z]\.|[IVXivx]+\))[[:blank:]]+(.*)$/,
  329. # ''' (ruler)
  330. # <<< (pagebreak)
  331. :break_line => /^('|<){3,}$/,
  332. # ++++
  333. #:pass => /^\+{4,}$/,
  334. # inline passthrough macros
  335. # +++text+++
  336. # $$text$$
  337. # pass:quotes[text]
  338. :pass_macro => /\\?(?:(\+{3}|\${2})(.*?)\1|pass:([a-z,]*)\[((?:\\\]|[^\]])*?)\])/m,
  339. # passthrough macro allowed in value of attribute assignment
  340. # pass:[text]
  341. :pass_macro_basic => /^pass:([a-z,]*)\[(.*)\]$/,
  342. # inline literal passthrough macro
  343. # `text`
  344. :pass_lit => /(^|[^`\w])(\\?`([^`\s]|[^`\s].*?\S)`)(?![`\w])/m,
  345. # placeholder for extracted passthrough text
  346. :pass_placeholder => /\x0(\d+)\x0/,
  347. # ____
  348. #:quote => /^_{4,}$/,
  349. # The document revision info line the appears immediately following the
  350. # document title author info line, if present
  351. # v1.0, 2013-01-01: Ring in the new year release
  352. :revision_info => /^(?:\D*(.*?),)?(?:\s*(?!:)(.*?))(?:\s*(?!^):\s*(.*))?$/,
  353. # '''
  354. #:ruler => /^'{3,}$/,
  355. # ****
  356. #:sidebar_blk => /^\*{4,}$/,
  357. # \' within a word
  358. :single_quote_esc => /(\w)\\'(\w)/,
  359. # an alternative if our backend generated single-quoted html/xml attributes
  360. #:single_quote_esc => /(\w|=)\\'(\w)/,
  361. # used for sanitizing attribute names
  362. :illegal_attr_name_chars => /[^\w\-]/,
  363. # |===
  364. # |table
  365. # |===
  366. #:table => /^\|={3,}$/,
  367. # !===
  368. # !table
  369. # !===
  370. #:table_nested => /^!={3,}$/,
  371. # 1*h,2*,^3e
  372. :table_colspec => /^(?:(\d+)\*)?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?(\d+)?([a-z])?$/,
  373. # 2.3+<.>m
  374. # TODO might want to use step-wise scan rather than this mega-regexp
  375. :table_cellspec => {
  376. :start => /^[[:blank:]]*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?\|/,
  377. :end => /[[:blank:]]+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
  378. },
  379. # .Foo but not . Foo or ..Foo
  380. :blk_title => /^\.([^\s.].*)$/,
  381. # matches double quoted text, capturing quote char and text (single-line)
  382. :dbl_quoted => /^("|)(.*)\1$/,
  383. # matches double quoted text, capturing quote char and text (multi-line)
  384. :m_dbl_quoted => /^("|)(.*)\1$/m,
  385. # == Foo
  386. # ^ yields a level 2 title
  387. #
  388. # == Foo ==
  389. # ^ also yields a level 2 title
  390. #
  391. # both equivalent to this two-line version:
  392. # Foo
  393. # ~~~
  394. #
  395. # match[1] is the delimiter, whose length determines the level
  396. # match[2] is the title itself
  397. # match[3] is an inline anchor, which becomes the section id
  398. :section_title => /^(={1,5})\s+(\S.*?)(?:\s*\[\[([^\[]+)\]\])?(?:\s+\1)?$/,
  399. # does not begin with a dot and has at least one alphanumeric character
  400. :section_name => /^((?=.*\w+.*)[^.].*?)$/,
  401. # ====== || ------ || ~~~~~~ || ^^^^^^ || ++++++
  402. # TODO build from SECTION_LEVELS keys
  403. :section_underline => /^(?:=|-|~|\^|\+)+$/,
  404. # toc::[]
  405. # toc::[levels=2]
  406. :toc => /^toc::\[(.*?)\]$/,
  407. # * Foo (up to 5 consecutive asterisks)
  408. # - Foo
  409. # REVIEW leading space has already been stripped, so may not need in regex
  410. :ulist => /^[[:blank:]]*(-|\*{1,5})[[:blank:]]+(.*)$/,
  411. # inline xref macro
  412. # <<id,reftext>> (special characters have already been escaped, hence the entity references)
  413. # xref:id[reftext]
  414. :xref_macro => /\\?(?:&lt;&lt;([\w":].*?)&gt;&gt;|xref:([\w":].*?)\[(.*?)\])/m,
  415. # ifdef::basebackend-html[]
  416. # ifndef::theme[]
  417. # ifeval::["{asciidoctor-version}" >= "0.1.0"]
  418. # ifdef::asciidoctor[Asciidoctor!]
  419. # endif::theme[]
  420. # endif::basebackend-html[]
  421. # endif::[]
  422. :ifdef_macro => /^[\\]?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,\+])\S+?)?)\[(.+)?\]$/,
  423. # "{asciidoctor-version}" >= "0.1.0"
  424. :eval_expr => /^(\S.*?)[[:blank:]]*(==|!=|<=|>=|<|>)[[:blank:]]*(\S.*)$/,
  425. # ...or if we want to be more strict up front about what's on each side
  426. #:eval_expr => /^(true|false|("|'|)\{\w+(?:\-\w+)*\}\2|("|')[^\3]*\3|\-?\d+(?:\.\d+)*)[[:blank:]]*(==|!=|<=|>=|<|>)[[:blank:]]*(true|false|("|'|)\{\w+(?:\-\w+)*\}\6|("|')[^\7]*\7|\-?\d+(?:\.\d+)*)$/,
  427. # include::chapter1.ad[]
  428. # include::example.txt[lines=1;2;5..10]
  429. :include_macro => /^\\?include::([^\[]+)\[(.*?)\]$/,
  430. # http://domain
  431. # https://domain
  432. # data:info
  433. :uri_sniff => /^[[:alpha:]][[:alnum:].+-]*:/i,
  434. :uri_encode_chars => /[^\w\-.!~*';:@=+$,()\[\]]/
  435. }
  436. INTRINSICS = Hash.new{|h,k| STDERR.puts "Missing intrinsic: #{k.inspect}"; "{#{k}}"}.merge(
  437. {
  438. 'startsb' => '[',
  439. 'endsb' => ']',
  440. 'brvbar' => '|',
  441. 'caret' => '^',
  442. 'asterisk' => '*',
  443. 'tilde' => '~',
  444. 'plus' => '&#43;',
  445. 'apostrophe' => '\'',
  446. 'backslash' => '\\',
  447. 'backtick' => '`',
  448. 'empty' => '',
  449. 'sp' => ' ',
  450. 'space' => ' ',
  451. 'two-colons' => '::',
  452. 'two-semicolons' => ';;',
  453. 'nbsp' => '&#160;',
  454. 'deg' => '&#176;',
  455. 'zwsp' => '&#8203;',
  456. 'quot' => '&#34;',
  457. 'apos' => '&#39;',
  458. 'lsquo' => '&#8216;',
  459. 'rsquo' => '&#8217;',
  460. 'ldquo' => '&#8220;',
  461. 'rdquo' => '&#8221;',
  462. 'wj' => '&#8288;',
  463. 'amp' => '&',
  464. 'lt' => '<',
  465. 'gt' => '>'
  466. }
  467. )
  468. SPECIAL_CHARS = {
  469. '<' => '&lt;',
  470. '>' => '&gt;',
  471. '&' => '&amp;'
  472. }
  473. SPECIAL_CHARS_PATTERN = /[#{SPECIAL_CHARS.keys.join}]/
  474. #SPECIAL_CHARS_PATTERN = /(?:<|>|&(?![[:alpha:]]{2,};|#[[:digit:]]{2,}+;|#x[[:alnum:]]{2,}+;))/
  475. # unconstrained quotes:: can appear anywhere
  476. # constrained quotes:: must be bordered by non-word characters
  477. # NOTE these substituions are processed in the order they appear here and
  478. # the order in which they are replaced is important
  479. QUOTE_SUBS = [
  480. # **strong**
  481. [:strong, :unconstrained, /\\?(?:\[([^\]]+?)\])?\*\*(.+?)\*\*/m],
  482. # *strong*
  483. [:strong, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\*(\S|\S.*?\S)\*(?=\W|$)/m],
  484. # ``double-quoted''
  485. [:double, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?``(\S|\S.*?\S)''(?=\W|$)/m],
  486. # 'emphasis'
  487. [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?'(\S|\S.*?\S)'(?=\W|$)/m],
  488. # `single-quoted'
  489. [:single, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?`(\S|\S.*?\S)'(?=\W|$)/m],
  490. # ++monospaced++
  491. [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?\+\+(.+?)\+\+/m],
  492. # +monospaced+
  493. [:monospaced, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\+(\S|\S.*?\S)\+(?=\W|$)/m],
  494. # __emphasis__
  495. [:emphasis, :unconstrained, /\\?(?:\[([^\]]+?)\])?\_\_(.+?)\_\_/m],
  496. # _emphasis_
  497. [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?_(\S|\S.*?\S)_(?=\W|$)/m],
  498. # ##unquoted##
  499. [:none, :unconstrained, /\\?(?:\[([^\]]+?)\])?##(.+?)##/m],
  500. # #unquoted#
  501. [:none, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?#(\S|\S.*?\S)#(?=\W|$)/m],
  502. # ^superscript^
  503. [:superscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\^(.+?)\^/m],
  504. # ~subscript~
  505. [:subscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\~(.+?)\~/m]
  506. ]
  507. # NOTE in Ruby 1.8.7, [^\\] does not match start of line,
  508. # so we need to match it explicitly
  509. # order is significant
  510. REPLACEMENTS = [
  511. # (C)
  512. [/\\?\(C\)/, '&#169;', :none],
  513. # (R)
  514. [/\\?\(R\)/, '&#174;', :none],
  515. # (TM)
  516. [/\\?\(TM\)/, '&#8482;', :none],
  517. # foo -- bar
  518. [/(^|\n| |\\)--( |\n|$)/, '&#8201;&#8212;&#8201;', :none],
  519. # foo--bar
  520. [/(\w)\\?--(?=\w)/, '&#8212;', :leading],
  521. # ellipsis
  522. [/\\?\.\.\./, '&#8230;', :leading],
  523. # single quotes
  524. [/(\w)\\?'(\w)/, '&#8217;', :bounding],
  525. # right arrow ->
  526. [/\\?-&gt;/, '&#8594;', :none],
  527. # right double arrow =>
  528. [/\\?=&gt;/, '&#8658;', :none],
  529. # left arrow <-
  530. [/\\?&lt;-/, '&#8592;', :none],
  531. # right left arrow <=
  532. [/\\?&lt;=/, '&#8656;', :none],
  533. # restore entities
  534. [/\\?(&)amp;((?:[[:alpha:]]+|#[[:digit:]]+|#x[[:alnum:]]+);)/, '', :bounding]
  535. ]
  536. # Public: Parse the AsciiDoc source input into an Asciidoctor::Document
  537. #
  538. # Accepts input as an IO (or StringIO), String or String Array object. If the
  539. # input is a File, information about the file is stored in attributes on the
  540. # Document object.
  541. #
  542. # input - the AsciiDoc source as a IO, String or Array.
  543. # options - a Hash of options to control processing (default: {})
  544. # see Asciidoctor::Document#initialize for details
  545. # block - a callback block for handling include::[] directives
  546. #
  547. # returns the Asciidoctor::Document
  548. def self.load(input, options = {}, &block)
  549. if (monitor = options.fetch(:monitor, false))
  550. start = Time.now
  551. end
  552. lines = nil
  553. if input.is_a?(File)
  554. options[:attributes] ||= {}
  555. attrs = options[:attributes]
  556. lines = input.readlines
  557. input_mtime = input.mtime
  558. input_path = File.expand_path(input.path)
  559. # hold off on setting infile and indir until we get a better sense of their purpose
  560. attrs['docfile'] = input_path
  561. attrs['docdir'] = File.dirname(input_path)
  562. attrs['docname'] = File.basename(input_path, File.extname(input_path))
  563. attrs['docdate'] = input_mtime.strftime('%Y-%m-%d')
  564. attrs['doctime'] = input_mtime.strftime('%H:%M:%S %Z')
  565. attrs['docdatetime'] = [attrs['docdate'], attrs['doctime']] * ' '
  566. elsif input.respond_to?(:readlines)
  567. input.rewind rescue nil
  568. lines = input.readlines
  569. elsif input.is_a?(String)
  570. lines = input.lines.entries
  571. elsif input.is_a?(Array)
  572. lines = input.dup
  573. else
  574. raise "Unsupported input type: #{input.class}"
  575. end
  576. if monitor
  577. read_time = Time.now - start
  578. start = Time.now
  579. end
  580. doc = Document.new(lines, options, &block)
  581. if monitor
  582. parse_time = Time.now - start
  583. monitor[:read] = read_time
  584. monitor[:parse] = parse_time
  585. monitor[:load] = read_time + parse_time
  586. end
  587. doc
  588. end
  589. # Public: Parse the contents of the AsciiDoc source file into an Asciidoctor::Document
  590. #
  591. # Accepts input as an IO, String or String Array object. If the
  592. # input is a File, information about the file is stored in
  593. # attributes on the Document.
  594. #
  595. # input - the String AsciiDoc source filename
  596. # options - a Hash of options to control processing (default: {})
  597. # see Asciidoctor::Document#initialize for details
  598. # block - a callback block for handling include::[] directives
  599. #
  600. # returns the Asciidoctor::Document
  601. def self.load_file(filename, options = {}, &block)
  602. Asciidoctor.load(File.new(filename), options, &block)
  603. end
  604. # Public: Parse the AsciiDoc source input into an Asciidoctor::Document and render it
  605. # to the specified backend format
  606. #
  607. # Accepts input as an IO, String or String Array object. If the
  608. # input is a File, information about the file is stored in
  609. # attributes on the Document.
  610. #
  611. # If the :in_place option is true, and the input is a File, the output is
  612. # written to a file adjacent to the input file, having an extension that
  613. # corresponds to the backend format. Otherwise, if the :to_file option is
  614. # specified, the file is written to that file. If :to_file is not an absolute
  615. # path, it is resolved relative to :to_dir, if given, otherwise the
  616. # Document#base_dir. If the target directory does not exist, it will not be
  617. # created unless the :mkdirs option is set to true. If the file cannot be
  618. # written because the target directory does not exist, or because it falls
  619. # outside of the Document#base_dir in safe mode, an IOError is raised.
  620. #
  621. # If the output is going to be written to a file, the header and footer are
  622. # rendered unless specified otherwise (writing to a file implies creating a
  623. # standalone document). Otherwise, the header and footer are not rendered by
  624. # default and the rendered output is returned.
  625. #
  626. # input - the String AsciiDoc source filename
  627. # options - a Hash of options to control processing (default: {})
  628. # see Asciidoctor::Document#initialize for details
  629. # block - a callback block for handling include::[] directives
  630. #
  631. # returns the Document object if the rendered result String is written to a
  632. # file, otherwise the rendered result String
  633. def self.render(input, options = {}, &block)
  634. in_place = options.delete(:in_place) || false
  635. to_file = options.delete(:to_file)
  636. to_dir = options.delete(:to_dir)
  637. mkdirs = options.delete(:mkdirs) || false
  638. monitor = options.fetch(:monitor, false)
  639. write_in_place = in_place && input.is_a?(File)
  640. write_to_target = to_file || to_dir
  641. stream_output = !to_file.nil? && to_file.respond_to?(:write)
  642. if write_in_place && write_to_target
  643. raise ArgumentError, 'the option :in_place cannot be used with either the :to_dir or :to_file option'
  644. end
  645. if !options.has_key?(:header_footer) && (write_in_place || write_to_target)
  646. options[:header_footer] = true
  647. end
  648. doc = Asciidoctor.load(input, options, &block)
  649. if to_file == '/dev/null'
  650. return doc
  651. elsif write_in_place
  652. to_file = File.join(File.dirname(input.path), "#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']}")
  653. elsif !stream_output && write_to_target
  654. working_dir = options.has_key?(:base_dir) ? File.expand_path(opts[:base_dir]) : File.expand_path(Dir.pwd)
  655. # QUESTION should the jail be the working_dir or doc.base_dir???
  656. jail = doc.safe >= SafeMode::SAFE ? working_dir : nil
  657. if to_dir
  658. to_dir = doc.normalize_system_path(to_dir, working_dir, jail, :target_name => 'to_dir', :recover => false)
  659. if to_file
  660. to_file = doc.normalize_system_path(to_file, to_dir, nil, :target_name => 'to_dir', :recover => false)
  661. # reestablish to_dir as the final target directory (in the case to_file had directory segments)
  662. to_dir = File.dirname(to_file)
  663. else
  664. to_file = File.join(to_dir, "#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']}")
  665. end
  666. elsif to_file
  667. to_file = doc.normalize_system_path(to_file, working_dir, jail, :target_name => 'to_dir', :recover => false)
  668. # establish to_dir as the final target directory (in the case to_file had directory segments)
  669. to_dir = File.dirname(to_file)
  670. end
  671. if !File.directory? to_dir
  672. if mkdirs
  673. Helpers.require_library 'fileutils'
  674. FileUtils.mkdir_p to_dir
  675. else
  676. raise IOError, "target directory does not exist: #{to_dir}"
  677. end
  678. end
  679. end
  680. start = Time.now if monitor
  681. output = doc.render
  682. if monitor
  683. render_time = Time.now - start
  684. monitor[:render] = render_time
  685. monitor[:load_render] = monitor[:load] + render_time
  686. end
  687. if to_file
  688. start = Time.now if monitor
  689. if stream_output
  690. to_file.write output.rstrip
  691. # ensure there's a trailing endline
  692. to_file.write "\n"
  693. else
  694. File.open(to_file, 'w') {|file| file.write output }
  695. # these assignments primarily for testing, diagnostics or reporting
  696. doc.attributes['outfile'] = outfile = File.expand_path(to_file)
  697. doc.attributes['outdir'] = File.dirname(outfile)
  698. end
  699. if monitor
  700. write_time = Time.now - start
  701. monitor[:write] = write_time
  702. monitor[:total] = monitor[:load_render] + write_time
  703. end
  704. # NOTE document cannot control this behavior if safe >= SafeMode::SERVER
  705. if !stream_output && doc.attr?('copycss') &&
  706. doc.attr?('linkcss') && DEFAULT_STYLESHEET_KEYS.include?(doc.attr('stylesheet'))
  707. Helpers.require_library 'fileutils'
  708. outdir = doc.attr('outdir')
  709. stylesdir = doc.normalize_system_path(doc.attr('stylesdir'), outdir,
  710. doc.safe >= SafeMode::SAFE ? outdir : nil)
  711. FileUtils.mkdir_p stylesdir
  712. FileUtils.cp DEFAULT_STYLESHEET_PATH, stylesdir, :preserve => true
  713. end
  714. doc
  715. else
  716. output
  717. end
  718. end
  719. # Public: Parse the contents of the AsciiDoc source file into an Asciidoctor::Document
  720. # and render it to the specified backend format
  721. #
  722. # input - the String AsciiDoc source filename
  723. # options - a Hash of options to control processing (default: {})
  724. # see Asciidoctor::Document#initialize for details
  725. # block - a callback block for handling include::[] directives
  726. #
  727. # returns the Document object if the rendered result String is written to a
  728. # file, otherwise the rendered result String
  729. def self.render_file(filename, options = {}, &block)
  730. Asciidoctor.render(File.new(filename), options, &block)
  731. end
  732. # NOTE still contemplating this method
  733. #def self.parse_document_header(input, options = {})
  734. # document = Document.new [], options
  735. # reader = Reader.new input, document, true
  736. # Lexer.parse_document_header reader, document
  737. # document
  738. #end
  739. # modules
  740. require 'asciidoctor/debug'
  741. require 'asciidoctor/substituters'
  742. require 'asciidoctor/helpers'
  743. # abstract classes
  744. require 'asciidoctor/abstract_node'
  745. require 'asciidoctor/abstract_block'
  746. # concrete classes
  747. require 'asciidoctor/attribute_list'
  748. require 'asciidoctor/backends/base_template'
  749. require 'asciidoctor/block'
  750. require 'asciidoctor/callouts'
  751. require 'asciidoctor/document'
  752. require 'asciidoctor/inline'
  753. require 'asciidoctor/lexer'
  754. require 'asciidoctor/list_item'
  755. require 'asciidoctor/path_resolver'
  756. require 'asciidoctor/reader'
  757. require 'asciidoctor/renderer'
  758. require 'asciidoctor/section'
  759. require 'asciidoctor/table'
  760. # info
  761. require 'asciidoctor/version'
  762. end