PageRenderTime 57ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/asciidoctor.rb

https://github.com/aslakknutsen/asciidoctor
Ruby | 1514 lines | 551 code | 193 blank | 770 comment | 70 complexity | 06394a449536af760bc1a0feca32b703 MD5 | raw file
Possible License(s): MIT
  1. RUBY_ENGINE = 'unknown' unless defined? RUBY_ENGINE
  2. RUBY_ENGINE_OPAL = (RUBY_ENGINE == 'opal')
  3. RUBY_ENGINE_JRUBY = (RUBY_ENGINE == 'jruby')
  4. RUBY_MIN_VERSION_1_9 = (RUBY_VERSION >= '1.9')
  5. RUBY_MIN_VERSION_2 = (RUBY_VERSION >= '2')
  6. require 'set'
  7. # NOTE "RUBY_ENGINE == 'opal'" conditional blocks are filtered by the Opal preprocessor
  8. if RUBY_ENGINE == 'opal'
  9. require 'encoding' # needed for String.bytes method
  10. require 'strscan'
  11. require 'asciidoctor/opal_ext'
  12. else
  13. autoload :Base64, 'base64'
  14. autoload :FileUtils, 'fileutils'
  15. autoload :OpenURI, 'open-uri'
  16. autoload :StringScanner, 'strscan'
  17. end
  18. # ideally we should use require_relative instead of modifying the LOAD_PATH
  19. $:.unshift File.dirname __FILE__
  20. # Public: Methods for parsing AsciiDoc input files and converting documents
  21. # using eRuby templates.
  22. #
  23. # AsciiDoc documents comprise a header followed by zero or more sections.
  24. # Sections are composed of blocks of content. For example:
  25. #
  26. # = Doc Title
  27. #
  28. # == Section 1
  29. #
  30. # This is a paragraph block in the first section.
  31. #
  32. # == Section 2
  33. #
  34. # This section has a paragraph block and an olist block.
  35. #
  36. # . Item 1
  37. # . Item 2
  38. #
  39. # Examples:
  40. #
  41. # Use built-in converter:
  42. #
  43. # Asciidoctor.convert_file 'sample.adoc'
  44. #
  45. # Use custom (Tilt-supported) templates:
  46. #
  47. # Asciidoctor.convert_file 'sample.adoc', :template_dir => 'path/to/templates'
  48. #
  49. module Asciidoctor
  50. # alias the RUBY_ENGINE constant inside the Asciidoctor namespace
  51. RUBY_ENGINE = ::RUBY_ENGINE
  52. module SafeMode
  53. # A safe mode level that disables any of the security features enforced
  54. # by Asciidoctor (Ruby is still subject to its own restrictions).
  55. UNSAFE = 0;
  56. # A safe mode level that closely parallels safe mode in AsciiDoc. This value
  57. # prevents access to files which reside outside of the parent directory of
  58. # the source file and disables any macro other than the include::[] macro.
  59. SAFE = 1;
  60. # A safe mode level that disallows the document from setting attributes
  61. # that would affect the conversion of the document, in addition to all the
  62. # security features of SafeMode::SAFE. For instance, this level disallows
  63. # changing the backend or the source-highlighter using an attribute defined
  64. # in the source document. This is the most fundamental level of security
  65. # for server-side deployments (hence the name).
  66. SERVER = 10;
  67. # A safe mode level that disallows the document from attempting to read
  68. # files from the file system and including the contents of them into the
  69. # document, in additional to all the security features of SafeMode::SERVER.
  70. # For instance, this level disallows use of the include::[] macro and the
  71. # embedding of binary content (data uri), stylesheets and JavaScripts
  72. # referenced by the document.(Asciidoctor and trusted extensions may still
  73. # be allowed to embed trusted content into the document).
  74. #
  75. # Since Asciidoctor is aiming for wide adoption, this level is the default
  76. # and is recommended for server-side deployments.
  77. SECURE = 20;
  78. # A planned safe mode level that disallows the use of passthrough macros and
  79. # prevents the document from setting any known attributes, in addition to all
  80. # the security features of SafeMode::SECURE.
  81. #
  82. # Please note that this level is not currently implemented (and therefore not
  83. # enforced)!
  84. #PARANOID = 100;
  85. end
  86. # Flags to control compliance with the behavior of AsciiDoc
  87. module Compliance
  88. @keys = [].to_set
  89. class << self
  90. attr :keys
  91. end
  92. # Defines a new compliance key and assigns an initial value.
  93. def self.define key, value
  94. if key == :keys || (self.respond_to? key)
  95. raise ::ArgumentError, %(Illegal key name: #{key})
  96. end
  97. instance_variable_set %(@#{key}), value
  98. class << self; self; end.send :attr_accessor, key
  99. @keys << key
  100. end
  101. # AsciiDoc terminates paragraphs adjacent to
  102. # block content (delimiter or block attribute list)
  103. # This option allows this behavior to be modified
  104. # TODO what about literal paragraph?
  105. # Compliance value: true
  106. define :block_terminates_paragraph, true
  107. # AsciiDoc does not treat paragraphs labeled with a verbatim style
  108. # (literal, listing, source, verse) as verbatim
  109. # This options allows this behavior to be modified
  110. # Compliance value: false
  111. define :strict_verbatim_paragraphs, true
  112. # NOT CURRENTLY USED
  113. # AsciiDoc allows start and end delimiters around
  114. # a block to be different lengths
  115. # Enabling this option requires matching lengths
  116. # Compliance value: false
  117. #define :congruent_block_delimiters, true
  118. # AsciiDoc supports both single-line and underlined
  119. # section titles.
  120. # This option disables the underlined variant.
  121. # Compliance value: true
  122. define :underline_style_section_titles, true
  123. # Asciidoctor will unwrap the content in a preamble
  124. # if the document has a title and no sections.
  125. # Compliance value: false
  126. define :unwrap_standalone_preamble, true
  127. # AsciiDoc drops lines that contain references to missing attributes.
  128. # This behavior is not intuitive to most writers
  129. # Compliance value: 'drop-line'
  130. define :attribute_missing, 'skip'
  131. # AsciiDoc drops lines that contain an attribute unassignemnt.
  132. # This behavior may need to be tuned depending on the circumstances.
  133. # Compliance value: 'drop-line'
  134. define :attribute_undefined, 'drop-line'
  135. # Asciidoctor will recognize commonly-used Markdown syntax
  136. # to the degree it does not interfere with existing
  137. # AsciiDoc syntax and behavior.
  138. # Compliance value: false
  139. define :markdown_syntax, true
  140. end
  141. # The absolute root path of the Asciidoctor RubyGem
  142. ROOT_PATH = ::File.dirname ::File.dirname ::File.expand_path __FILE__
  143. # The absolute lib path of the Asciidoctor RubyGem
  144. LIB_PATH = ::File.join ROOT_PATH, 'lib'
  145. # The absolute data path of the Asciidoctor RubyGem
  146. DATA_PATH = ::File.join ROOT_PATH, 'data'
  147. # The user's home directory, as best we can determine it
  148. USER_HOME = ::Dir.home rescue ::ENV['HOME'] || ::Dir.pwd
  149. # Flag to indicate whether encoding can be coerced to UTF-8
  150. # _All_ input data must be force encoded to UTF-8 if Encoding.default_external is *not* UTF-8
  151. # Addresses failures performing string operations that are reported as "invalid byte sequence in US-ASCII"
  152. # Ruby 1.8 doesn't seem to experience this problem (perhaps because it isn't validating the encodings)
  153. COERCE_ENCODING = !::RUBY_ENGINE_OPAL && ::RUBY_MIN_VERSION_1_9
  154. # Flag to indicate whether encoding of external strings needs to be forced to UTF-8
  155. FORCE_ENCODING = COERCE_ENCODING && ::Encoding.default_external != ::Encoding::UTF_8
  156. # Byte arrays for UTF-* Byte Order Marks
  157. # hex escape sequence used for Ruby 1.8 compatibility
  158. BOM_BYTES_UTF_8 = "\xef\xbb\xbf".bytes.to_a
  159. BOM_BYTES_UTF_16LE = "\xff\xfe".bytes.to_a
  160. BOM_BYTES_UTF_16BE = "\xfe\xff".bytes.to_a
  161. # Flag to indicate that line length should be calculated using a unicode mode hint
  162. FORCE_UNICODE_LINE_LENGTH = !::RUBY_MIN_VERSION_1_9
  163. # Flag to indicate whether gsub can use a Hash to map matches to replacements
  164. SUPPORTS_GSUB_RESULT_HASH = ::RUBY_MIN_VERSION_1_9 && !::RUBY_ENGINE_OPAL
  165. # The endline character used for output; stored in constant table as an optimization
  166. EOL = "\n"
  167. # The null character to use for splitting attribute values
  168. NULL = "\0"
  169. # String for matching tab character
  170. TAB = "\t"
  171. # Regexp for replacing tab character
  172. TAB_PATTERN = /\t/
  173. # The default document type
  174. # Can influence markup generated by the converters
  175. DEFAULT_DOCTYPE = 'article'
  176. # The backend determines the format of the converted output, default to html5
  177. DEFAULT_BACKEND = 'html5'
  178. DEFAULT_STYLESHEET_KEYS = ['', 'DEFAULT'].to_set
  179. DEFAULT_STYLESHEET_NAME = 'asciidoctor.css'
  180. # Pointers to the preferred version for a given backend.
  181. BACKEND_ALIASES = {
  182. 'html' => 'html5',
  183. 'docbook' => 'docbook5'
  184. }
  185. # Default page widths for calculating absolute widths
  186. DEFAULT_PAGE_WIDTHS = {
  187. 'docbook' => 425
  188. }
  189. # Default extensions for the respective base backends
  190. DEFAULT_EXTENSIONS = {
  191. 'html' => '.html',
  192. 'docbook' => '.xml',
  193. 'pdf' => '.pdf',
  194. 'epub' => '.epub',
  195. 'asciidoc' => '.adoc'
  196. }
  197. # Set of file extensions recognized as AsciiDoc documents (stored as a truth hash)
  198. ASCIIDOC_EXTENSIONS = {
  199. '.asciidoc' => true,
  200. '.adoc' => true,
  201. '.ad' => true,
  202. '.asc' => true,
  203. # TODO .txt should be deprecated
  204. '.txt' => true
  205. }
  206. SECTION_LEVELS = {
  207. '=' => 0,
  208. '-' => 1,
  209. '~' => 2,
  210. '^' => 3,
  211. '+' => 4
  212. }
  213. ADMONITION_STYLES = ['NOTE', 'TIP', 'IMPORTANT', 'WARNING', 'CAUTION'].to_set
  214. PARAGRAPH_STYLES = ['comment', 'example', 'literal', 'listing', 'normal', 'pass', 'quote', 'sidebar', 'source', 'verse', 'abstract', 'partintro'].to_set
  215. VERBATIM_STYLES = ['literal', 'listing', 'source', 'verse'].to_set
  216. DELIMITED_BLOCKS = {
  217. '--' => [:open, ['comment', 'example', 'literal', 'listing', 'pass', 'quote', 'sidebar', 'source', 'verse', 'admonition', 'abstract', 'partintro'].to_set],
  218. '----' => [:listing, ['literal', 'source'].to_set],
  219. '....' => [:literal, ['listing', 'source'].to_set],
  220. '====' => [:example, ['admonition'].to_set],
  221. '****' => [:sidebar, ::Set.new],
  222. '____' => [:quote, ['verse'].to_set],
  223. '""' => [:quote, ['verse'].to_set],
  224. '++++' => [:pass, ['math', 'latexmath', 'asciimath'].to_set],
  225. '|===' => [:table, ::Set.new],
  226. ',===' => [:table, ::Set.new],
  227. ':===' => [:table, ::Set.new],
  228. '!===' => [:table, ::Set.new],
  229. '////' => [:comment, ::Set.new],
  230. '```' => [:fenced_code, ::Set.new],
  231. '~~~' => [:fenced_code, ::Set.new]
  232. }
  233. DELIMITED_BLOCK_LEADERS = DELIMITED_BLOCKS.keys.map {|key| key[0..1] }.to_set
  234. LAYOUT_BREAK_LINES = {
  235. '\'' => :thematic_break,
  236. '-' => :thematic_break,
  237. '*' => :thematic_break,
  238. '_' => :thematic_break,
  239. '<' => :page_break
  240. }
  241. #LIST_CONTEXTS = [:ulist, :olist, :dlist, :colist]
  242. NESTABLE_LIST_CONTEXTS = [:ulist, :olist, :dlist]
  243. # TODO validate use of explicit style name above ordered list (this list is for selecting an implicit style)
  244. ORDERED_LIST_STYLES = [:arabic, :loweralpha, :lowerroman, :upperalpha, :upperroman] #, :lowergreek]
  245. ORDERED_LIST_KEYWORDS = {
  246. 'loweralpha' => 'a',
  247. 'lowerroman' => 'i',
  248. 'upperalpha' => 'A',
  249. 'upperroman' => 'I'
  250. #'lowergreek' => 'a'
  251. #'arabic' => '1'
  252. #'decimal' => '1'
  253. }
  254. LIST_CONTINUATION = '+'
  255. # FIXME technically a preceding TAB is allowed too
  256. # alternatively, we can enforce everywhere it must be a space
  257. LINE_BREAK = ' +'
  258. BLOCK_MATH_DELIMITERS = {
  259. :asciimath => ['\\$', '\\$'],
  260. :latexmath => ['\\[', '\\]'],
  261. }
  262. INLINE_MATH_DELIMITERS = {
  263. :asciimath => ['\\$', '\\$'],
  264. :latexmath => ['\\(', '\\)'],
  265. }
  266. # attributes which be changed within the content of the document (but not
  267. # header) because it has semantic meaning; ex. numbered
  268. FLEXIBLE_ATTRIBUTES = %w(numbered)
  269. # A collection of regular expressions used by the parser.
  270. #
  271. # NOTE: The following pattern, which appears frequently, captures the
  272. # contents between square brackets, ignoring escaped closing brackets
  273. # (closing brackets prefixed with a backslash '\' character)
  274. #
  275. # Pattern: (?:\[((?:\\\]|[^\]])*?)\])
  276. # Matches: [enclosed text here] or [enclosed [text\] here]
  277. #
  278. #(pseudo)module Rx
  279. ## Regular expression character classes (to ensure regexp compatibility between Ruby and JavaScript)
  280. ## CC stands for "character class", CG stands for "character class group"
  281. # NOTE \w matches only the ASCII word characters, whereas [[:word:]] or \p{Word} matches any character in the Unicode word category.
  282. # character classes for the Regexp engine(s) in JavaScript
  283. if RUBY_ENGINE == 'opal'
  284. CC_ALPHA = 'a-zA-Z'
  285. CG_ALPHA = '[a-zA-Z]'
  286. CC_ALNUM = 'a-zA-Z0-9'
  287. CG_ALNUM = '[a-zA-Z0-9]'
  288. CG_BLANK = '[ \\t]'
  289. CC_EOL = '(?=\\n|$)'
  290. CG_GRAPH = '[\\x21-\\x7E]' # non-blank character
  291. CC_WORD = 'a-zA-Z0-9_'
  292. CG_WORD = '[a-zA-Z0-9_]'
  293. # character classes for the Regexp engine in Ruby >= 2 (Ruby 1.9 supports \p{} but has problems w/ encoding)
  294. elsif ::RUBY_MIN_VERSION_2
  295. CC_ALPHA = CG_ALPHA = '\p{Alpha}'
  296. CC_ALNUM = CG_ALNUM = '\p{Alnum}'
  297. CG_BLANK = '\p{Blank}'
  298. CC_EOL = '$'
  299. CG_GRAPH = '\p{Graph}'
  300. CC_WORD = CG_WORD = '\p{Word}'
  301. # character classes for the Regexp engine in Ruby < 2
  302. else
  303. CC_ALPHA = '[:alpha:]'
  304. CG_ALPHA = '[[:alpha:]]'
  305. CC_ALNUM = '[:alnum:]'
  306. CG_ALNUM = '[[:alnum:]]'
  307. CG_BLANK = '[[:blank:]]'
  308. CC_EOL = '$'
  309. CG_GRAPH = '[[:graph:]]' # non-blank character
  310. if ::RUBY_MIN_VERSION_1_9
  311. CC_WORD = '[:word:]'
  312. CG_WORD = '[[:word:]]'
  313. else
  314. # NOTE Ruby 1.8 cannot match word characters beyond the ASCII range; if you need this feature, upgrade!
  315. CC_WORD = '[:alnum:]_'
  316. CG_WORD = '[[:alnum:]_]'
  317. end
  318. end
  319. ## Document header
  320. # Matches the author info line immediately following the document title.
  321. #
  322. # Examples
  323. #
  324. # Doc Writer <doc@example.com>
  325. # Mary_Sue BrontĂŤ
  326. #
  327. AuthorInfoLineRx = /^(#{CG_WORD}[#{CC_WORD}\-'.]*)(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +<([^>]+)>)?$/
  328. # Matches the revision info line, which appears immediately following
  329. # the author info line beneath the document title.
  330. #
  331. # Examples
  332. #
  333. # v1.0, 2013-01-01: Ring in the new year release
  334. #
  335. RevisionInfoLineRx = /^(?:\D*(.*?),)?(?:\s*(?!:)(.*?))(?:\s*(?!^):\s*(.*))?$/
  336. # Matches the title and volnum in the manpage doctype.
  337. #
  338. # Examples
  339. #
  340. # = asciidoctor ( 1 )
  341. #
  342. ManpageTitleVolnumRx = /^(.*)\((.*)\)$/
  343. # Matches the name and purpose in the manpage doctype.
  344. #
  345. # Examples
  346. #
  347. # asciidoctor - converts AsciiDoc source files to HTML, DocBook and other formats
  348. #
  349. ManpageNamePurposeRx = /^(.*?)#{CG_BLANK}+-#{CG_BLANK}+(.*)$/
  350. ## Preprocessor directives
  351. # Matches a conditional preprocessor directive (e.g., ifdef, ifndef, ifeval and endif).
  352. #
  353. # Examples
  354. #
  355. # ifdef::basebackend-html[]
  356. # ifndef::theme[]
  357. # ifeval::["{asciidoctor-version}" >= "0.1.0"]
  358. # ifdef::asciidoctor[Asciidoctor!]
  359. # endif::theme[]
  360. # endif::basebackend-html[]
  361. # endif::[]
  362. #
  363. ConditionalDirectiveRx = /^\\?(ifdef|ifndef|ifeval|endif)::(\S*?(?:([,\+])\S+?)?)\[(.+)?\]$/
  364. # Matches a restricted (read as safe) eval expression.
  365. #
  366. # Examples
  367. #
  368. # "{asciidoctor-version}" >= "0.1.0"
  369. #
  370. EvalExpressionRx = /^(\S.*?)#{CG_BLANK}*(==|!=|<=|>=|<|>)#{CG_BLANK}*(\S.*)$/
  371. # Matches an include preprocessor directive.
  372. #
  373. # Examples
  374. #
  375. # include::chapter1.ad[]
  376. # include::example.txt[lines=1;2;5..10]
  377. #
  378. IncludeDirectiveRx = /^\\?include::([^\[]+)\[(.*?)\]$/
  379. ## Attribute entries and references
  380. # Matches a document attribute entry.
  381. #
  382. # Examples
  383. #
  384. # :foo: bar
  385. # :First Name: Dan
  386. # :numbered!:
  387. # :!toc:
  388. # :long-entry: Attribute value lines ending in ' +'
  389. # are joined together as a single value,
  390. # collapsing the line breaks and indentation to
  391. # a single space.
  392. #
  393. AttributeEntryRx = /^:(!?\w.*?):(?:#{CG_BLANK}+(.*))?$/
  394. # Matches invalid characters in an attribute name.
  395. InvalidAttributeNameCharsRx = /[^\w\-]/
  396. # Matches the pass inline macro allowed in value of attribute assignment.
  397. #
  398. # Examples
  399. #
  400. # pass:[text]
  401. #
  402. AttributeEntryPassMacroRx = /^pass:([a-z,]*)\[(.*)\]$/
  403. # Matches an inline attribute reference.
  404. #
  405. # Examples
  406. #
  407. # {foo}
  408. # {counter:pcount:1}
  409. # {set:foo:bar}
  410. # {set:name!}
  411. #
  412. AttributeReferenceRx = /(\\)?\{((set|counter2?):.+?|\w+(?:[\-]\w+)*)(\\)?\}/
  413. ## Paragraphs and delimited blocks
  414. # Matches an anchor (i.e., id + optional reference text) on a line above a block.
  415. #
  416. # Examples
  417. #
  418. # [[idname]]
  419. # [[idname,Reference Text]]
  420. #
  421. BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*))?)\]\]$/
  422. # Matches an attribute list above a block element.
  423. #
  424. # Examples
  425. #
  426. # # strictly positional
  427. # [quote, Adam Smith, Wealth of Nations]
  428. #
  429. # # name/value pairs
  430. # [NOTE, caption="Good to know"]
  431. #
  432. # # as attribute reference
  433. # [{lead}]
  434. #
  435. BlockAttributeListRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*)\]$/
  436. # A combined pattern that matches either a block anchor or a block attribute list.
  437. #
  438. # TODO this one gets hit a lot, should be optimized as much as possible
  439. BlockAttributeLineRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*|\[(?:|[#{CC_ALPHA}:_][#{CC_WORD}:.-]*(?:,#{CG_BLANK}*\S.*)?)\])\]$/
  440. # Matches a title above a block.
  441. #
  442. # Examples
  443. #
  444. # .Title goes here
  445. #
  446. BlockTitleRx = /^\.([^\s.].*)$/
  447. # Matches an admonition label at the start of a paragraph.
  448. #
  449. # Examples
  450. #
  451. # NOTE: Just a little note.
  452. # TIP: Don't forget!
  453. #
  454. AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a * '|'}):#{CG_BLANK}/
  455. # Matches a literal paragraph, which is a line of text preceded by at least one space.
  456. #
  457. # Examples
  458. #
  459. # <SPACE>Foo
  460. # <TAB>Foo
  461. LiteralParagraphRx = /^(#{CG_BLANK}+.*)$/
  462. # Matches a comment block.
  463. #
  464. # Examples
  465. #
  466. # ////
  467. # This is a block comment.
  468. # It can span one or more lines.
  469. # ////
  470. CommentBlockRx = %r{^/{4,}$}
  471. # Matches a comment line.
  472. #
  473. # Examples
  474. #
  475. # // an then whatever
  476. #
  477. CommentLineRx = %r{^//(?:[^/]|$)}
  478. ## Section titles
  479. # Matches a single-line (Atx-style) section title.
  480. #
  481. # Examples
  482. #
  483. # == Foo
  484. # # ^ a level 1 (h2) section title
  485. #
  486. # == Foo ==
  487. # # ^ also a level 1 (h2) section title
  488. #
  489. # match[1] is the delimiter, whose length determines the level
  490. # match[2] is the title itself
  491. # match[3] is an inline anchor, which becomes the section id
  492. AtxSectionRx = /^((?:=|#){1,6})#{CG_BLANK}+(\S.*?)(?:#{CG_BLANK}+\1)?$/
  493. # Matches the restricted section name for a two-line (Setext-style) section title.
  494. # The name cannot begin with a dot and has at least one alphanumeric character.
  495. SetextSectionTitleRx = /^((?=.*#{CG_WORD}+.*)[^.].*?)$/
  496. # Matches the underline in a two-line (Setext-style) section title.
  497. #
  498. # Examples
  499. #
  500. # ====== || ------ || ~~~~~~ || ^^^^^^ || ++++++
  501. #
  502. SetextSectionLineRx = /^(?:=|-|~|\^|\+)+$/
  503. # Matches an anchor (i.e., id + optional reference text) inside a section title.
  504. #
  505. # Examples
  506. #
  507. # Section Title [[idname]]
  508. # Section Title [[idname,Reference Text]]
  509. #
  510. InlineSectionAnchorRx = /^(.*?)#{CG_BLANK}+(\\)?\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]$/
  511. # Matches invalid characters in a section id.
  512. InvalidSectionIdCharsRx = /&(?:[a-zA-Z]{2,}|#\d{2,5}|#x[a-fA-F0-9]{2,4});|[^#{CC_WORD}]+?/
  513. # Matches the block style used to designate a section title as a floating title.
  514. #
  515. # Examples
  516. #
  517. # [float]
  518. # = Floating Title
  519. #
  520. FloatingTitleStyleRx = /^(?:float|discrete)\b/
  521. ## Lists
  522. # Detects the start of any list item.
  523. AnyListRx = /^(?:<?\d+>#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*(?:-|(?:\*|\.){1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*.*?(?::{2,4}|;;)(?:#{CG_BLANK}+#{CG_GRAPH}|$))/
  524. # Matches an unordered list item (one level for hyphens, up to 5 levels for asterisks).
  525. #
  526. # Examples
  527. #
  528. # * Foo
  529. # - Foo
  530. #
  531. UnorderedListRx = /^#{CG_BLANK}*(-|\*{1,5})#{CG_BLANK}+(.*)$/
  532. # Matches an ordered list item (explicit numbering or up to 5 consecutive dots).
  533. #
  534. # Examples
  535. #
  536. # . Foo
  537. # .. Foo
  538. # 1. Foo (arabic, default)
  539. # a. Foo (loweralpha)
  540. # A. Foo (upperalpha)
  541. # i. Foo (lowerroman)
  542. # I. Foo (upperroman)
  543. #
  544. # NOTE leading space match is not always necessary, but is used for list reader
  545. OrderedListRx = /^#{CG_BLANK}*(\.{1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+(.*)$/
  546. # Matches the ordinals for each type of ordered list.
  547. OrderedListMarkerRxMap = {
  548. :arabic => /\d+[.>]/,
  549. :loweralpha => /[a-z]\./,
  550. :lowerroman => /[ivx]+\)/,
  551. :upperalpha => /[A-Z]\./,
  552. :upperroman => /[IVX]+\)/
  553. #:lowergreek => /[a-z]\]/
  554. }
  555. # Matches a definition list item.
  556. #
  557. # Examples
  558. #
  559. # foo::
  560. # foo:::
  561. # foo::::
  562. # foo;;
  563. #
  564. # # should be followed by a definition, on the same line...
  565. #
  566. # foo:: That which precedes 'bar' (see also, <<bar>>)
  567. #
  568. # # ...or on a separate line
  569. #
  570. # foo::
  571. # That which precedes 'bar' (see also, <<bar>>)
  572. #
  573. # # the term may be an attribute reference
  574. #
  575. # {foo_term}:: {foo_def}
  576. #
  577. # NOTE negative match for comment line is intentional since that isn't handled when looking for next list item
  578. # QUESTION should we check for line comment in regex or when scanning the lines?
  579. #
  580. DefinitionListRx = /^(?!\/\/)#{CG_BLANK}*(.*?)(:{2,4}|;;)(?:#{CG_BLANK}+(.*))?$/
  581. # Matches a sibling definition list item (which does not include the keyed type).
  582. DefinitionListSiblingRx = {
  583. # (?:.*?[^:])? - a non-capturing group which grabs longest sequence of characters that doesn't end w/ colon
  584. '::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::)(?:#{CG_BLANK}+(.*))?$/,
  585. ':::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(:::)(?:#{CG_BLANK}+(.*))?$/,
  586. '::::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::::)(?:#{CG_BLANK}+(.*))?$/,
  587. ';;' => /^(?!\/\/)#{CG_BLANK}*(.*)(;;)(?:#{CG_BLANK}+(.*))?$/
  588. }
  589. # Matches a callout list item.
  590. #
  591. # Examples
  592. #
  593. # <1> Foo
  594. #
  595. CalloutListRx = /^<?(\d+)>#{CG_BLANK}+(.*)/
  596. # Matches a callout reference inside literal text.
  597. #
  598. # Examples
  599. # <1> (optionally prefixed by //, # or ;; line comment chars)
  600. # <1> <2> (multiple callouts on one line)
  601. # <!--1--> (for XML-based languages)
  602. #
  603. # NOTE special characters are already be replaced at this point during conversion to an SGML format
  604. CalloutConvertRx = /(?:(?:\/\/|#|;;) ?)?(\\)?&lt;!?(--|)(\d+)\2&gt;(?=(?: ?\\?&lt;!?\2\d+\2&gt;)*#{CC_EOL})/
  605. # NOTE (con't) ...but not while scanning
  606. CalloutQuickScanRx = /\\?<!?(--|)(\d+)\1>(?=(?: ?\\?<!?\1\d+\1>)*#{CC_EOL})/
  607. CalloutScanRx = /(?:(?:\/\/|#|;;) ?)?(\\)?<!?(--|)(\d+)\2>(?=(?: ?\\?<!?\2\d+\2>)*#{CC_EOL})/
  608. # A Hash of regexps for lists used for dynamic access.
  609. ListRxMap = {
  610. :ulist => UnorderedListRx,
  611. :olist => OrderedListRx,
  612. :dlist => DefinitionListRx,
  613. :colist => CalloutListRx
  614. }
  615. ## Tables
  616. # Parses the column spec (i.e., colspec) for a table.
  617. #
  618. # Examples
  619. #
  620. # 1*h,2*,^3e
  621. #
  622. ColumnSpecRx = /^(?:(\d+)\*)?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?(\d+%?)?([a-z])?$/
  623. # Parses the start and end of a cell spec (i.e., cellspec) for a table.
  624. #
  625. # Examples
  626. #
  627. # 2.3+<.>m
  628. #
  629. # FIXME use step-wise scan (or treetop) rather than this mega-regexp
  630. CellSpecStartRx = /^#{CG_BLANK}*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?\|/
  631. CellSpecEndRx = /#{CG_BLANK}+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
  632. # Block macros
  633. # Matches the general block macro pattern.
  634. #
  635. # Examples
  636. #
  637. # gist::123456[]
  638. #
  639. #--
  640. # NOTE we've relaxed the match for target to accomodate the short format (e.g., name::[attrlist])
  641. GenericBlockMacroRx = /^(#{CG_WORD}+)::(\S*?)\[((?:\\\]|[^\]])*?)\]$/
  642. # Matches an image, video or audio block macro.
  643. #
  644. # Examples
  645. #
  646. # image::filename.png[Caption]
  647. # video::http://youtube.com/12345[Cats vs Dogs]
  648. #
  649. MediaBlockMacroRx = /^(image|video|audio)::(\S+?)\[((?:\\\]|[^\]])*?)\]$/
  650. # Matches the TOC block macro.
  651. #
  652. # Examples
  653. #
  654. # toc::[]
  655. # toc::[levels=2]
  656. #
  657. TocBlockMacroRx = /^toc::\[(.*?)\]$/
  658. ## Inline macros
  659. # Matches an anchor (i.e., id + optional reference text) in the flow of text.
  660. #
  661. # Examples
  662. #
  663. # [[idname]]
  664. # [[idname,Reference Text]]
  665. # anchor:idname[]
  666. # anchor:idname[Reference Text]
  667. #
  668. InlineAnchorRx = /\\?(?:\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]|anchor:(\S+)\[(.*?[^\\])?\])/
  669. # Matches a bibliography anchor anywhere inline.
  670. #
  671. # Examples
  672. #
  673. # [[[Foo]]]
  674. #
  675. InlineBiblioAnchorRx = /\\?\[\[\[([#{CC_WORD}:][#{CC_WORD}:.-]*?)\]\]\]/
  676. # Matches an inline e-mail address.
  677. #
  678. # doc.writer@example.com
  679. #
  680. EmailInlineMacroRx = /([\\>:\/])?#{CG_WORD}[#{CC_WORD}.%+-]*@#{CG_ALNUM}[#{CC_ALNUM}.-]*\.#{CG_ALPHA}{2,4}\b/
  681. # Matches an inline footnote macro, which is allowed to span multiple lines.
  682. #
  683. # Examples
  684. # footnote:[text]
  685. # footnoteref:[id,text]
  686. # footnoteref:[id]
  687. #
  688. FootnoteInlineMacroRx = /\\?(footnote(?:ref)?):\[(.*?[^\\])\]/m
  689. # Matches an image or icon inline macro.
  690. #
  691. # Examples
  692. #
  693. # image:filename.png[Alt Text]
  694. # image:http://example.com/images/filename.png[Alt Text]
  695. # image:filename.png[More [Alt\] Text] (alt text becomes "More [Alt] Text")
  696. # icon:github[large]
  697. #
  698. ImageInlineMacroRx = /\\?(?:image|icon):([^:\[][^\[]*)\[((?:\\\]|[^\]])*?)\]/
  699. # Matches an indexterm inline macro, which may span multiple lines.
  700. #
  701. # Examples
  702. #
  703. # indexterm:[Tigers,Big cats]
  704. # (((Tigers,Big cats)))
  705. # indexterm2:[Tigers]
  706. # ((Tigers))
  707. #
  708. IndextermInlineMacroRx = /\\?(?:(indexterm2?):\[(.*?[^\\])\]|\(\((.+?)\)\)(?!\)))/m
  709. # Matches either the kbd or btn inline macro.
  710. #
  711. # Examples
  712. #
  713. # kbd:[F3]
  714. # kbd:[Ctrl+Shift+T]
  715. # kbd:[Ctrl+\]]
  716. # kbd:[Ctrl,T]
  717. # btn:[Save]
  718. #
  719. KbdBtnInlineMacroRx = /\\?(?:kbd|btn):\[((?:\\\]|[^\]])+?)\]/
  720. # Matches the delimiter used for kbd value.
  721. #
  722. # Examples
  723. #
  724. # Ctrl + Alt+T
  725. # Ctrl,T
  726. #
  727. KbdDelimiterRx = /(?:\+|,)(?=#{CG_BLANK}*[^\1])/
  728. # Matches an implicit link and some of the link inline macro.
  729. #
  730. # Examples
  731. #
  732. # http://github.com
  733. # http://github.com[GitHub]
  734. #
  735. # FIXME revisit! the main issue is we need different rules for implicit vs explicit
  736. LinkInlineRx = %r{(^|link:|&lt;|[\s>\(\)\[\];])(\\?(?:https?|file|ftp|irc)://[^\s\[\]<]*[^\s.,\[\]<])(?:\[((?:\\\]|[^\]])*?)\])?}
  737. # Match a link or e-mail inline macro.
  738. #
  739. # Examples
  740. #
  741. # link:path[label]
  742. # mailto:doc.writer@example.com[]
  743. #
  744. LinkInlineMacroRx = /\\?(?:link|mailto):([^\s\[]+)(?:\[((?:\\\]|[^\]])*?)\])/
  745. # Matches a math inline macro, which may span multiple lines.
  746. #
  747. # Examples
  748. #
  749. # math:[x != 0]
  750. # asciimath:[x != 0]
  751. # latexmath:[\sqrt{4} = 2]
  752. #
  753. MathInlineMacroRx = /\\?((?:latex|ascii)?math):([a-z,]*)\[(.*?[^\\])\]/m
  754. # Matches a menu inline macro.
  755. #
  756. # Examples
  757. #
  758. # menu:File[New...]
  759. # menu:View[Page Style > No Style]
  760. # menu:View[Page Style, No Style]
  761. #
  762. MenuInlineMacroRx = /\\?menu:(#{CG_WORD}|#{CG_WORD}.*?\S)\[#{CG_BLANK}*(.+?)?\]/
  763. # Matches an implicit menu inline macro.
  764. #
  765. # Examples
  766. #
  767. # "File > New..."
  768. #
  769. MenuInlineRx = /\\?"(#{CG_WORD}[^"]*?#{CG_BLANK}*&gt;#{CG_BLANK}*[^" \t][^"]*)"/
  770. # Matches a passthrough literal value, which may span multiple lines.
  771. #
  772. # Examples
  773. #
  774. # `text`
  775. #
  776. PassInlineLiteralRx = /(^|[^`#{CC_WORD}])(?:\[([^\]]+?)\])?(\\?`([^`\s]|[^`\s].*?\S)`)(?![`#{CC_WORD}])/m
  777. # Matches several variants of the passthrough inline macro, which may span multiple lines.
  778. #
  779. # Examples
  780. #
  781. # +++text+++
  782. # $$text$$
  783. # pass:quotes[text]
  784. #
  785. PassInlineMacroRx = /\\?(?:(\+{3}|\${2})(.*?)\1|pass:([a-z,]*)\[(.*?[^\\])\])/m
  786. # Matches an xref (i.e., cross-reference) inline macro, which may span multiple lines.
  787. #
  788. # Examples
  789. #
  790. # <<id,reftext>>
  791. # xref:id[reftext]
  792. #
  793. # NOTE special characters have already been escaped, hence the entity references
  794. XrefInlineMacroRx = /\\?(?:&lt;&lt;([#{CC_WORD}":].*?)&gt;&gt;|xref:([#{CC_WORD}":].*?)\[(.*?)\])/m
  795. ## Layout
  796. # Matches a trailing + preceded by at least one space character,
  797. # which forces a hard line break (<br> tag in HTML outputs).
  798. #
  799. # Examples
  800. #
  801. # +
  802. # Foo +
  803. #
  804. # NOTE: JavaScript only treats ^ and $ as line boundaries in multiline regexp
  805. LineBreakRx = if RUBY_ENGINE == 'opal'
  806. /^(.*)[ \t]\+$/m
  807. else
  808. /^(.*)[[:blank:]]\+$/
  809. end
  810. # Matches an AsciiDoc horizontal rule or AsciiDoc page break.
  811. #
  812. # Examples
  813. #
  814. # ''' (horizontal rule)
  815. # <<< (page break)
  816. #
  817. LayoutBreakLineRx = /^('|<){3,}$/
  818. # Matches an AsciiDoc or Markdown horizontal rule or AsciiDoc page break.
  819. #
  820. # Examples
  821. #
  822. # ''' or ' ' ' (horizontal rule)
  823. # --- or - - - (horizontal rule)
  824. # *** or * * * (horizontal rule)
  825. # <<< (page break)
  826. #
  827. LayoutBreakLinePlusRx = /^(?:'|<){3,}$|^ {0,3}([-\*_])( *)\1\2\1$/
  828. ## General
  829. # Matches a blank line.
  830. #
  831. # NOTE allows for empty space in line as it could be left by the template engine
  832. BlankLineRx = /^#{CG_BLANK}*\n/
  833. # Matches a comma or semi-colon delimiter.
  834. #
  835. # Examples
  836. #
  837. # one,two
  838. # three;four
  839. #
  840. DataDelimiterRx = /,|;/
  841. # Matches one or more consecutive digits on a single line.
  842. #
  843. # Examples
  844. #
  845. # 29
  846. #
  847. DigitsRx = /^\d+$/
  848. # Matches a single-line of text enclosed in double quotes, capturing the quote char and text.
  849. #
  850. # Examples
  851. #
  852. # "Who goes there?"
  853. #
  854. DoubleQuotedRx = /^("|)(.*)\1$/
  855. # Matches multiple lines of text enclosed in double quotes, capturing the quote char and text.
  856. #
  857. # Examples
  858. #
  859. # "I am a run-on sentence and I like
  860. # to take up multiple lines and I
  861. # still want to be matched."
  862. #
  863. DoubleQuotedMultiRx = /^("|)(.*)\1$/m
  864. # Matches one or more consecutive digits at the end of a line.
  865. #
  866. # Examples
  867. #
  868. # docbook45
  869. # html5
  870. #
  871. TrailingDigitsRx = /\d+$/
  872. # Matches a space escaped by a backslash.
  873. #
  874. # Examples
  875. #
  876. # one\ two\ three
  877. #
  878. EscapedSpaceRx = /\\(#{CG_BLANK})/
  879. # Matches a space delimiter that's not escaped.
  880. #
  881. # Examples
  882. #
  883. # one two three four
  884. #
  885. SpaceDelimiterRx = /([^\\])#{CG_BLANK}+/
  886. # Matches any character with multibyte support explicitly enabled (length of multibyte char = 1)
  887. #
  888. # NOTE If necessary to hide use of the language modifier (u) from JavaScript, use (Regexp.new '.', false, 'u')
  889. #
  890. UnicodeCharScanRx = unless RUBY_ENGINE == 'opal'
  891. FORCE_UNICODE_LINE_LENGTH ? /./u : nil
  892. end
  893. # Detects strings that resemble URIs.
  894. #
  895. # Examples
  896. # http://domain
  897. # https://domain
  898. # data:info
  899. #
  900. UriSniffRx = %r{^#{CG_ALPHA}[#{CC_ALNUM}.+-]*:/{0,2}}
  901. # Detects the end of an implicit URI in the text
  902. #
  903. # Examples
  904. #
  905. # (http://google.com)
  906. # &gt;http://google.com&lt;
  907. # (See http://google.com):
  908. #
  909. UriTerminator = /[);:]$/
  910. # Detects XML tags
  911. XmlSanitizeRx = /<[^>]+>/
  912. # Unused
  913. # Detects any fenced block delimiter, including:
  914. # listing, literal, example, sidebar, quote, passthrough, table and fenced code
  915. # Does not match open blocks or air quotes
  916. # TIP position the most common blocks towards the front of the pattern
  917. #BlockDelimiterRx = %r{^(?:(?:-|\.|=|\*|_|\+|/){4,}|[\|,;!]={3,}|(?:`|~){3,}.*)$}
  918. # Matches an escaped single quote within a word
  919. #
  920. # Examples
  921. #
  922. # Here\'s Johnny!
  923. #
  924. #EscapedSingleQuoteRx = /(#{CG_WORD})\\'(#{CG_WORD})/
  925. # an alternative if our backend generates single-quoted html/xml attributes
  926. #EscapedSingleQuoteRx = /(#{CG_WORD}|=)\\'(#{CG_WORD})/
  927. # Matches whitespace at the beginning of the line
  928. #LeadingSpacesRx = /^(#{CG_BLANK}*)/
  929. # Matches parent directory references at the beginning of a path
  930. #LeadingParentDirsRx = /^(?:\.\.\/)*/
  931. #StripLineWise = /\A(?:\s*\n)?(.*?)\s*\z/m
  932. #end
  933. INTRINSIC_ATTRIBUTES = {
  934. 'startsb' => '[',
  935. 'endsb' => ']',
  936. 'vbar' => '|',
  937. 'caret' => '^',
  938. 'asterisk' => '*',
  939. 'tilde' => '~',
  940. 'plus' => '&#43;',
  941. 'apostrophe' => '\'',
  942. 'backslash' => '\\',
  943. 'backtick' => '`',
  944. 'empty' => '',
  945. 'sp' => ' ',
  946. 'space' => ' ',
  947. 'two-colons' => '::',
  948. 'two-semicolons' => ';;',
  949. 'nbsp' => '&#160;',
  950. 'deg' => '&#176;',
  951. 'zwsp' => '&#8203;',
  952. 'quot' => '&#34;',
  953. 'apos' => '&#39;',
  954. 'lsquo' => '&#8216;',
  955. 'rsquo' => '&#8217;',
  956. 'ldquo' => '&#8220;',
  957. 'rdquo' => '&#8221;',
  958. 'wj' => '&#8288;',
  959. 'brvbar' => '&#166;',
  960. 'amp' => '&',
  961. 'lt' => '<',
  962. 'gt' => '>'
  963. }
  964. # unconstrained quotes:: can appear anywhere
  965. # constrained quotes:: must be bordered by non-word characters
  966. # NOTE these substitutions are processed in the order they appear here and
  967. # the order in which they are replaced is important
  968. QUOTE_SUBS = [
  969. # **strong**
  970. [:strong, :unconstrained, /\\?(?:\[([^\]]+?)\])?\*\*(.+?)\*\*/m],
  971. # *strong*
  972. [:strong, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\*(\S|\S.*?\S)\*(?!#{CG_WORD})/m],
  973. # ``double-quoted''
  974. [:double, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?``(\S|\S.*?\S)''(?!#{CG_WORD})/m],
  975. # 'emphasis'
  976. [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?'(\S|\S.*?\S)'(?!#{CG_WORD})/m],
  977. # `single-quoted'
  978. [:single, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?`(\S|\S.*?\S)'(?!#{CG_WORD})/m],
  979. # ++monospaced++
  980. [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?\+\+(.+?)\+\+/m],
  981. # +monospaced+
  982. [:monospaced, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\+(\S|\S.*?\S)\+(?!#{CG_WORD})/m],
  983. # __emphasis__
  984. [:emphasis, :unconstrained, /\\?(?:\[([^\]]+?)\])?__(.+?)__/m],
  985. # _emphasis_
  986. [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?_(\S|\S.*?\S)_(?!#{CG_WORD})/m],
  987. # ##unquoted##
  988. [:none, :unconstrained, /\\?(?:\[([^\]]+?)\])?##(.+?)##/m],
  989. # #unquoted#
  990. [:none, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?#(\S|\S.*?\S)#(?!#{CG_WORD})/m],
  991. # ^superscript^
  992. [:superscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\^(.+?)\^/m],
  993. # ~subscript~
  994. [:subscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?~(.+?)~/m]
  995. ]
  996. # NOTE in Ruby 1.8.7, [^\\] does not match start of line,
  997. # so we need to match it explicitly
  998. # order is significant
  999. REPLACEMENTS = [
  1000. # (C)
  1001. [/\\?\(C\)/, '&#169;', :none],
  1002. # (R)
  1003. [/\\?\(R\)/, '&#174;', :none],
  1004. # (TM)
  1005. [/\\?\(TM\)/, '&#8482;', :none],
  1006. # foo -- bar
  1007. [/(^|\n| |\\)--( |\n|$)/, '&#8201;&#8212;&#8201;', :none],
  1008. # foo--bar
  1009. [/(#{CG_WORD})\\?--(?=#{CG_WORD})/, '&#8212;', :leading],
  1010. # ellipsis
  1011. [/\\?\.\.\./, '&#8230;', :leading],
  1012. # apostrophe or a closing single quote (planned)
  1013. [/(#{CG_ALPHA})\\?'(?!')/, '&#8217;', :leading],
  1014. # an opening single quote (planned)
  1015. #[/\B\\?'(?=#{CG_ALPHA})/, '&#8216;', :none],
  1016. # right arrow ->
  1017. [/\\?-&gt;/, '&#8594;', :none],
  1018. # right double arrow =>
  1019. [/\\?=&gt;/, '&#8658;', :none],
  1020. # left arrow <-
  1021. [/\\?&lt;-/, '&#8592;', :none],
  1022. # left double arrow <=
  1023. [/\\?&lt;=/, '&#8656;', :none],
  1024. # restore entities
  1025. [/\\?(&)amp;((?:[a-zA-Z]+|#\d{2,5}|#x[a-fA-F0-9]{2,4});)/, '', :bounding]
  1026. ]
  1027. class << self
  1028. # Public: Parse the AsciiDoc source input into a {Document}
  1029. #
  1030. # Accepts input as an IO (or StringIO), String or String Array object. If the
  1031. # input is a File, information about the file is stored in attributes on the
  1032. # Document object.
  1033. #
  1034. # input - the AsciiDoc source as a IO, String or Array.
  1035. # options - a String, Array or Hash of options to control processing (default: {})
  1036. # String and Array values are converted into a Hash.
  1037. # See {Document#initialize} for details about these options.
  1038. #
  1039. # Returns the Document
  1040. def load input, options = {}
  1041. options = options.dup
  1042. if (timings = options[:timings])
  1043. timings.start :read
  1044. end
  1045. attributes = options[:attributes] = if !(attrs = options[:attributes])
  1046. {}
  1047. elsif (attrs.is_a? ::Hash) || (::RUBY_ENGINE_JRUBY && (attrs.is_a? ::Java::JavaUtil::Map))
  1048. attrs.dup
  1049. elsif attrs.is_a? ::Array
  1050. attrs.inject({}) do |accum, entry|
  1051. k, v = entry.split '=', 2
  1052. accum[k] = v || ''
  1053. accum
  1054. end
  1055. elsif attrs.is_a? ::String
  1056. # convert non-escaped spaces into null character, so we split on the
  1057. # correct spaces chars, and restore escaped spaces
  1058. capture_1 = ::RUBY_ENGINE_OPAL ? '$1' : '\1'
  1059. attrs = attrs.gsub(SpaceDelimiterRx, %(#{capture_1}#{NULL})).gsub(EscapedSpaceRx, capture_1)
  1060. attrs.split(NULL).inject({}) do |accum, entry|
  1061. k, v = entry.split '=', 2
  1062. accum[k] = v || ''
  1063. accum
  1064. end
  1065. elsif (attrs.respond_to? :keys) && (attrs.respond_to? :[])
  1066. # convert it to a Hash as we know it
  1067. original_attrs = attrs
  1068. attrs = {}
  1069. original_attrs.keys.each do |key|
  1070. attrs[key] = original_attrs[key]
  1071. end
  1072. attrs
  1073. else
  1074. raise ::ArgumentError, %(illegal type for attributes option: #{attrs.class.ancestors})
  1075. end
  1076. lines = nil
  1077. if input.is_a? ::File
  1078. lines = input.readlines
  1079. input_mtime = input.mtime
  1080. input = ::File.new ::File.expand_path input.path
  1081. input_path = input.path
  1082. # hold off on setting infile and indir until we get a better sense of their purpose
  1083. attributes['docfile'] = input_path
  1084. attributes['docdir'] = ::File.dirname input_path
  1085. attributes['docname'] = ::File.basename input_path, (::File.extname input_path)
  1086. attributes['docdate'] = docdate = input_mtime.strftime('%Y-%m-%d')
  1087. attributes['doctime'] = doctime = input_mtime.strftime('%H:%M:%S %Z')
  1088. attributes['docdatetime'] = %(#{docdate} #{doctime})
  1089. elsif input.respond_to? :readlines
  1090. # NOTE tty, pipes & sockets can't be rewound, but can't be sniffed easily either
  1091. # just fail the rewind operation silently to handle all cases
  1092. input.rewind rescue nil
  1093. lines = input.readlines
  1094. elsif input.is_a? ::String
  1095. lines = input.lines.entries
  1096. elsif input.is_a? ::Array
  1097. lines = input.dup
  1098. else
  1099. raise ::ArgumentError, %(Unsupported input type: #{input.class})
  1100. end
  1101. if timings
  1102. timings.record :read
  1103. timings.start :parse
  1104. end
  1105. options[:parse] = true unless options.key? :parse
  1106. doc = Document.new lines, options
  1107. timings.record :parse if timings
  1108. doc
  1109. end
  1110. # Public: Parse the contents of the AsciiDoc source file into an Asciidoctor::Document
  1111. #
  1112. # Accepts input as an IO, String or String Array object. If the
  1113. # input is a File, information about the file is stored in
  1114. # attributes on the Document.
  1115. #
  1116. # input - the String AsciiDoc source filename
  1117. # options - a String, Array or Hash of options to control processing (default: {})
  1118. # String and Array values are converted into a Hash.
  1119. # See Asciidoctor::Document#initialize for details about options.
  1120. #
  1121. # Returns the Asciidoctor::Document
  1122. def load_file filename, options = {}
  1123. self.load ::File.new(filename || ''), options
  1124. end
  1125. # Public: Parse the AsciiDoc source input into an Asciidoctor::Document and
  1126. # convert it to the specified backend format.
  1127. #
  1128. # Accepts input as an IO, String or String Array object. If the
  1129. # input is a File, information about the file is stored in
  1130. # attributes on the Document.
  1131. #
  1132. # If the :in_place option is true, and the input is a File, the output is
  1133. # written to a file adjacent to the input file, having an extension that
  1134. # corresponds to the backend format. Otherwise, if the :to_file option is
  1135. # specified, the file is written to that file. If :to_file is not an absolute
  1136. # path, it is resolved relative to :to_dir, if given, otherwise the
  1137. # Document#base_dir. If the target directory does not exist, it will not be
  1138. # created unless the :mkdirs option is set to true. If the file cannot be
  1139. # written because the target directory does not exist, or because it falls
  1140. # outside of the Document#base_dir in safe mode, an IOError is raised.
  1141. #
  1142. # If the output is going to be written to a file, the header and footer are
  1143. # included unless specified otherwise (writing to a file implies creating a
  1144. # standalone document). Otherwise, the header and footer are not included by
  1145. # default and the converted result is returned.
  1146. #
  1147. # input - the String AsciiDoc source filename
  1148. # options - a String, Array or Hash of options to control processing (default: {})
  1149. # String and Array values are converted into a Hash.
  1150. # See Asciidoctor::Document#initialize for details about options.
  1151. #
  1152. # Returns the Document object if the converted String is written to a
  1153. # file, otherwise the converted String
  1154. def convert input, options = {}
  1155. options = options.dup
  1156. to_file = options.delete(:to_file)
  1157. to_dir = options.delete(:to_dir)
  1158. mkdirs = options.delete(:mkdirs) || false
  1159. timings = options[:timings]
  1160. case to_file
  1161. when true, nil
  1162. write_to_same_dir = !to_dir && (input.is_a? ::File)
  1163. stream_output = false
  1164. write_to_target = to_dir
  1165. to_file = nil
  1166. when false
  1167. write_to_same_dir = false
  1168. stream_output = false
  1169. write_to_target = false
  1170. to_file = nil
  1171. else
  1172. write_to_same_dir = false
  1173. stream_output = to_file.respond_to? :write
  1174. write_to_target = stream_output ? false : to_file
  1175. end
  1176. if !options.key?(:header_footer) && (write_to_same_dir || write_to_target)
  1177. options[:header_footer] = true
  1178. end
  1179. doc = self.load input, options
  1180. if to_file == '/dev/null'
  1181. return doc
  1182. elsif write_to_same_dir
  1183. infile = ::File.expand_path input.path
  1184. outfile = ::File.join ::File.dirname(infile), %(#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']})
  1185. if outfile == infile
  1186. raise ::IOError, 'Input file and output file are the same!'
  1187. end
  1188. outdir = ::File.dirname outfile
  1189. elsif write_to_target
  1190. working_dir = options.has_key?(:base_dir) ? ::File.expand_path(options[:base_dir]) : ::File.expand_path(::Dir.pwd)
  1191. # QUESTION should the jail be the working_dir or doc.base_dir???
  1192. jail = doc.safe >= SafeMode::SAFE ? working_dir : nil
  1193. if to_dir
  1194. outdir = doc.normalize_system_path(to_dir, working_dir, jail, :target_name => 'to_dir', :recover => false)
  1195. if to_file
  1196. outfile = doc.normalize_system_path(to_file, outdir, nil, :target_name => 'to_dir', :recover => false)
  1197. # reestablish outdir as the final target directory (in the case to_file had directory segments)
  1198. outdir = ::File.dirname outfile
  1199. else
  1200. outfile = ::File.join outdir, %(#{doc.attributes['docname']}#{doc.attributes['outfilesuffix']})
  1201. end
  1202. elsif to_file
  1203. outfile = doc.normalize_system_path(to_file, working_dir, jail, :target_name => 'to_dir', :recover => false)
  1204. # establish outdir as the final target directory (in the case to_file had directory segments)
  1205. outdir = ::File.dirname outfile
  1206. end
  1207. unless ::File.directory? outdir
  1208. if mkdirs
  1209. ::FileUtils.mkdir_p outdir
  1210. else
  1211. # NOTE we intentionally refer to the directory as it was passed to the API
  1212. raise ::IOError, %(target directory does not exist: #{to_dir})
  1213. end
  1214. end
  1215. else
  1216. outfile = to_file
  1217. outdir = nil
  1218. end
  1219. timings.start :convert if timings
  1220. output = doc.convert
  1221. timings.record :convert if timings
  1222. if outfile
  1223. timings.start :write if timings
  1224. unless stream_output
  1225. doc.attributes['outfile'] = outfile
  1226. doc.attributes['outdir'] = outdir
  1227. end
  1228. doc.write output, outfile
  1229. timings.record :write if timings
  1230. # NOTE document cannot control this behavior if safe >= SafeMode::SERVER
  1231. if !stream_output && doc.safe < SafeMode::SECURE && (doc.attr? 'basebackend-html') &&
  1232. (doc.attr? 'linkcss') && (doc.attr? 'copycss')
  1233. copy_asciidoctor_stylesheet = DEFAULT_STYLESHEET_KEYS.include?(stylesheet = (doc.attr 'stylesheet'))
  1234. copy_user_stylesheet = !copy_asciidoctor_stylesheet && !stylesheet.nil_or_empty?
  1235. copy_coderay_stylesheet = (doc.attr? 'source-highlighter', 'coderay') && (doc.attr 'coderay-css', 'class') == 'class'
  1236. copy_pygments_stylesheet = (doc.attr? 'source-highlighter', 'pygments') && (doc.attr 'pygments-css', 'class') == 'class'
  1237. if copy_asciidoctor_stylesheet || copy_user_stylesheet || copy_coderay_stylesheet || copy_pygments_stylesheet
  1238. outdir = doc.attr('outdir')
  1239. stylesoutdir = doc.normalize_system_path(doc.attr('stylesdir'), outdir,
  1240. doc.safe >= SafeMode::SAFE ? outdir : nil)
  1241. Helpers.mkdir_p stylesoutdir if mkdirs
  1242. if copy_asciidoctor_stylesheet
  1243. Stylesheets.instance.write_primary_stylesheet stylesoutdir
  1244. # FIXME should Stylesheets also handle the user stylesheet?
  1245. elsif copy_user_stylesheet
  1246. if (stylesheet_src = (doc.attr 'copycss')).empty?
  1247. stylesheet_src = doc.normalize_system_path stylesheet
  1248. else
  1249. stylesheet_src = doc.normalize_system_path stylesheet_src
  1250. end
  1251. stylesheet_dst = doc.normalize_system_path stylesheet, stylesoutdir, (doc.safe >= SafeMode::SAFE ? outdir : nil)
  1252. unless stylesheet_src == stylesheet_dst || (stylesheet_content = doc.read_asset stylesheet_src).nil?
  1253. ::File.open(stylesheet_dst, 'w') {|f|
  1254. f.write stylesheet_content
  1255. }
  1256. end
  1257. end
  1258. if copy_coderay_stylesheet
  1259. Stylesheets.instance.write_coderay_stylesheet stylesoutdir
  1260. elsif copy_pygments_stylesheet
  1261. Stylesheets.instance.write_pygments_stylesheet stylesoutdir, (doc.attr 'pygments-style')
  1262. end
  1263. end
  1264. end
  1265. doc
  1266. else
  1267. output
  1268. end
  1269. end
  1270. # Alias render to convert to maintain backwards compatibility
  1271. alias :render :convert
  1272. # Public: Parse the contents of the AsciiDoc source file into an
  1273. # Asciidoctor::Document and convert it to the specified backend format.
  1274. #
  1275. # input - the String AsciiDoc source filename
  1276. # options - a String, Array or Hash of options to control processing (default: {})
  1277. # String and Array values are converted into a Hash.
  1278. # See Asciidoctor::Document#initialize for details about options.
  1279. #
  1280. # Returns the Document object if the converted String is written to a
  1281. # file, otherwise the converted String
  1282. def convert_file filename, options = {}
  1283. self.convert ::File.new(filename || ''), options
  1284. end
  1285. # Alias render_file to convert_file to maintain backwards compatibility
  1286. alias :render_file :convert_file
  1287. end
  1288. if RUBY_ENGINE == 'opal'
  1289. require 'asciidoctor/debug'
  1290. require 'asciidoctor/version'
  1291. require 'asciidoctor/timings'
  1292. else
  1293. autoload :Debug, 'asciidoctor/debug'
  1294. autoload :VERSION, 'asciidoctor/version'
  1295. autoload :Timings, 'asciidoctor/timings'
  1296. end
  1297. end
  1298. # core extensions
  1299. require 'asciidoctor/core_ext'
  1300. # modules
  1301. require 'asciidoctor/helpers'
  1302. require 'asciidoctor/substitutors'
  1303. # abstract classes
  1304. require 'asciidoctor/abstract_node'
  1305. require 'asciidoctor/abstract_block'
  1306. # concrete classes
  1307. require 'asciidoctor/attribute_list'
  1308. require 'asciidoctor/block'
  1309. require 'asciidoctor/callouts'
  1310. require 'asciidoctor/converter'
  1311. require 'asciidoctor/converter/html5' if RUBY_ENGINE_OPAL
  1312. require 'asciidoctor/document'
  1313. require 'asciidoctor/inline'
  1314. require 'asciidoctor/list'
  1315. require 'asciidoctor/parser'
  1316. require 'asciidoctor/path_resolver'
  1317. require 'asciidoctor/reader'
  1318. require 'asciidoctor/section'
  1319. require 'asciidoctor/stylesheets'
  1320. require 'asciidoctor/table'