PageRenderTime 61ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/src/Text/Pandoc/Readers/Org.hs

https://github.com/huwung/pandoc
Haskell | 1383 lines | 1026 code | 231 blank | 126 comment | 25 complexity | 7a61b90346fbd19266d56cd42ea77ea4 MD5 | raw file
Possible License(s): GPL-2.0
  1. {-# LANGUAGE OverloadedStrings #-}
  2. {-# LANGUAGE GeneralizedNewtypeDeriving #-}
  3. {-
  4. Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. -}
  17. {- |
  18. Module : Text.Pandoc.Readers.Org
  19. Copyright : Copyright (C) 2014 Albert Krewinkel
  20. License : GNU GPL, version 2 or above
  21. Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
  22. Conversion of org-mode formatted plain text to 'Pandoc' document.
  23. -}
  24. module Text.Pandoc.Readers.Org ( readOrg ) where
  25. import qualified Text.Pandoc.Builder as B
  26. import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
  27. , trimInlines )
  28. import Text.Pandoc.Definition
  29. import Text.Pandoc.Options
  30. import qualified Text.Pandoc.Parsing as P
  31. import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
  32. , newline, orderedListMarker
  33. , parseFromString
  34. , updateLastStrPos )
  35. import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
  36. import Text.Pandoc.Shared (compactify', compactify'DL)
  37. import Text.Parsec.Pos (updatePosString)
  38. import Text.TeXMath (texMathToPandoc, DisplayType(..))
  39. import Control.Applicative ( Applicative, pure
  40. , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
  41. import Control.Arrow (first)
  42. import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
  43. import Control.Monad.Reader (Reader, runReader, ask, asks)
  44. import Data.Char (isAlphaNum, toLower)
  45. import Data.Default
  46. import Data.List (intersperse, isPrefixOf, isSuffixOf)
  47. import qualified Data.Map as M
  48. import Data.Maybe (fromMaybe, isJust)
  49. import Data.Monoid (Monoid, mconcat, mempty, mappend)
  50. import Network.HTTP (urlEncode)
  51. -- | Parse org-mode string and return a Pandoc document.
  52. readOrg :: ReaderOptions -- ^ Reader options
  53. -> String -- ^ String to parse (assuming @'\n'@ line endings)
  54. -> Pandoc
  55. readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
  56. type OrgParser = Parser [Char] OrgParserState
  57. parseOrg :: OrgParser Pandoc
  58. parseOrg = do
  59. blocks' <- parseBlocks
  60. st <- getState
  61. let meta = runF (orgStateMeta' st) st
  62. return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
  63. --
  64. -- Parser State for Org
  65. --
  66. type OrgNoteRecord = (String, F Blocks)
  67. type OrgNoteTable = [OrgNoteRecord]
  68. type OrgBlockAttributes = M.Map String String
  69. type OrgLinkFormatters = M.Map String (String -> String)
  70. -- | Org-mode parser state
  71. data OrgParserState = OrgParserState
  72. { orgStateOptions :: ReaderOptions
  73. , orgStateAnchorIds :: [String]
  74. , orgStateBlockAttributes :: OrgBlockAttributes
  75. , orgStateEmphasisCharStack :: [Char]
  76. , orgStateEmphasisNewlines :: Maybe Int
  77. , orgStateLastForbiddenCharPos :: Maybe SourcePos
  78. , orgStateLastPreCharPos :: Maybe SourcePos
  79. , orgStateLastStrPos :: Maybe SourcePos
  80. , orgStateLinkFormatters :: OrgLinkFormatters
  81. , orgStateMeta :: Meta
  82. , orgStateMeta' :: F Meta
  83. , orgStateNotes' :: OrgNoteTable
  84. }
  85. instance HasReaderOptions OrgParserState where
  86. extractReaderOptions = orgStateOptions
  87. instance HasMeta OrgParserState where
  88. setMeta field val st =
  89. st{ orgStateMeta = setMeta field val $ orgStateMeta st }
  90. deleteMeta field st =
  91. st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
  92. instance HasLastStrPosition OrgParserState where
  93. getLastStrPos = orgStateLastStrPos
  94. setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
  95. instance Default OrgParserState where
  96. def = defaultOrgParserState
  97. defaultOrgParserState :: OrgParserState
  98. defaultOrgParserState = OrgParserState
  99. { orgStateOptions = def
  100. , orgStateAnchorIds = []
  101. , orgStateBlockAttributes = M.empty
  102. , orgStateEmphasisCharStack = []
  103. , orgStateEmphasisNewlines = Nothing
  104. , orgStateLastForbiddenCharPos = Nothing
  105. , orgStateLastPreCharPos = Nothing
  106. , orgStateLastStrPos = Nothing
  107. , orgStateLinkFormatters = M.empty
  108. , orgStateMeta = nullMeta
  109. , orgStateMeta' = return nullMeta
  110. , orgStateNotes' = []
  111. }
  112. recordAnchorId :: String -> OrgParser ()
  113. recordAnchorId i = updateState $ \s ->
  114. s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
  115. addBlockAttribute :: String -> String -> OrgParser ()
  116. addBlockAttribute key val = updateState $ \s ->
  117. let attrs = orgStateBlockAttributes s
  118. in s{ orgStateBlockAttributes = M.insert key val attrs }
  119. lookupBlockAttribute :: String -> OrgParser (Maybe String)
  120. lookupBlockAttribute key =
  121. M.lookup key . orgStateBlockAttributes <$> getState
  122. resetBlockAttributes :: OrgParser ()
  123. resetBlockAttributes = updateState $ \s ->
  124. s{ orgStateBlockAttributes = orgStateBlockAttributes def }
  125. updateLastStrPos :: OrgParser ()
  126. updateLastStrPos = getPosition >>= \p ->
  127. updateState $ \s -> s{ orgStateLastStrPos = Just p }
  128. updateLastForbiddenCharPos :: OrgParser ()
  129. updateLastForbiddenCharPos = getPosition >>= \p ->
  130. updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
  131. updateLastPreCharPos :: OrgParser ()
  132. updateLastPreCharPos = getPosition >>= \p ->
  133. updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
  134. pushToInlineCharStack :: Char -> OrgParser ()
  135. pushToInlineCharStack c = updateState $ \s ->
  136. s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
  137. popInlineCharStack :: OrgParser ()
  138. popInlineCharStack = updateState $ \s ->
  139. s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
  140. surroundingEmphasisChar :: OrgParser [Char]
  141. surroundingEmphasisChar =
  142. take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
  143. startEmphasisNewlinesCounting :: Int -> OrgParser ()
  144. startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
  145. s{ orgStateEmphasisNewlines = Just maxNewlines }
  146. decEmphasisNewlinesCount :: OrgParser ()
  147. decEmphasisNewlinesCount = updateState $ \s ->
  148. s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
  149. newlinesCountWithinLimits :: OrgParser Bool
  150. newlinesCountWithinLimits = do
  151. st <- getState
  152. return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
  153. resetEmphasisNewlines :: OrgParser ()
  154. resetEmphasisNewlines = updateState $ \s ->
  155. s{ orgStateEmphasisNewlines = Nothing }
  156. addLinkFormat :: String
  157. -> (String -> String)
  158. -> OrgParser ()
  159. addLinkFormat key formatter = updateState $ \s ->
  160. let fs = orgStateLinkFormatters s
  161. in s{ orgStateLinkFormatters = M.insert key formatter fs }
  162. addToNotesTable :: OrgNoteRecord -> OrgParser ()
  163. addToNotesTable note = do
  164. oldnotes <- orgStateNotes' <$> getState
  165. updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
  166. -- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
  167. -- of the state saved and restored.
  168. parseFromString :: OrgParser a -> String -> OrgParser a
  169. parseFromString parser str' = do
  170. oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
  171. updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
  172. result <- P.parseFromString parser str'
  173. updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
  174. return result
  175. --
  176. -- Adaptions and specializations of parsing utilities
  177. --
  178. newtype F a = F { unF :: Reader OrgParserState a
  179. } deriving (Monad, Applicative, Functor)
  180. runF :: F a -> OrgParserState -> a
  181. runF = runReader . unF
  182. askF :: F OrgParserState
  183. askF = F ask
  184. asksF :: (OrgParserState -> a) -> F a
  185. asksF f = F $ asks f
  186. instance Monoid a => Monoid (F a) where
  187. mempty = return mempty
  188. mappend = liftM2 mappend
  189. mconcat = fmap mconcat . sequence
  190. trimInlinesF :: F Inlines -> F Inlines
  191. trimInlinesF = liftM trimInlines
  192. returnF :: a -> OrgParser (F a)
  193. returnF = return . return
  194. -- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
  195. newline :: OrgParser Char
  196. newline =
  197. P.newline
  198. <* updateLastPreCharPos
  199. <* updateLastForbiddenCharPos
  200. --
  201. -- parsing blocks
  202. --
  203. parseBlocks :: OrgParser (F Blocks)
  204. parseBlocks = mconcat <$> manyTill block eof
  205. block :: OrgParser (F Blocks)
  206. block = choice [ mempty <$ blanklines
  207. , optionalAttributes $ choice
  208. [ orgBlock
  209. , figure
  210. , table
  211. ]
  212. , example
  213. , drawer
  214. , specialLine
  215. , header
  216. , return <$> hline
  217. , list
  218. , latexFragment
  219. , noteBlock
  220. , paraOrPlain
  221. ] <?> "block"
  222. optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
  223. optionalAttributes parser = try $
  224. resetBlockAttributes *> parseBlockAttributes *> parser
  225. parseBlockAttributes :: OrgParser ()
  226. parseBlockAttributes = do
  227. attrs <- many attribute
  228. () <$ mapM (uncurry parseAndAddAttribute) attrs
  229. where
  230. attribute :: OrgParser (String, String)
  231. attribute = try $ do
  232. key <- metaLineStart *> many1Till nonspaceChar (char ':')
  233. val <- skipSpaces *> anyLine
  234. return (map toLower key, val)
  235. parseAndAddAttribute :: String -> String -> OrgParser ()
  236. parseAndAddAttribute key value = do
  237. let key' = map toLower key
  238. () <$ addBlockAttribute key' value
  239. lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
  240. lookupInlinesAttr attr = try $ do
  241. val <- lookupBlockAttribute attr
  242. maybe (return Nothing)
  243. (fmap Just . parseFromString parseInlines)
  244. val
  245. --
  246. -- Org Blocks (#+BEGIN_... / #+END_...)
  247. --
  248. type BlockProperties = (Int, String) -- (Indentation, Block-Type)
  249. orgBlock :: OrgParser (F Blocks)
  250. orgBlock = try $ do
  251. blockProp@(_, blkType) <- blockHeaderStart
  252. ($ blockProp) $
  253. case blkType of
  254. "comment" -> withRaw' (const mempty)
  255. "html" -> withRaw' (return . (B.rawBlock blkType))
  256. "latex" -> withRaw' (return . (B.rawBlock blkType))
  257. "ascii" -> withRaw' (return . (B.rawBlock blkType))
  258. "example" -> withRaw' (return . exampleCode)
  259. "quote" -> withParsed (fmap B.blockQuote)
  260. "verse" -> verseBlock
  261. "src" -> codeBlock
  262. _ -> withParsed (fmap $ divWithClass blkType)
  263. blockHeaderStart :: OrgParser (Int, String)
  264. blockHeaderStart = try $ (,) <$> indent <*> blockType
  265. where
  266. indent = length <$> many spaceChar
  267. blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
  268. withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
  269. withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
  270. withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
  271. withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
  272. ignHeaders :: OrgParser ()
  273. ignHeaders = (() <$ newline) <|> (() <$ anyLine)
  274. divWithClass :: String -> Blocks -> Blocks
  275. divWithClass cls = B.divWith ("", [cls], [])
  276. verseBlock :: BlockProperties -> OrgParser (F Blocks)
  277. verseBlock blkProp = try $ do
  278. ignHeaders
  279. content <- rawBlockContent blkProp
  280. fmap B.para . mconcat . intersperse (pure B.linebreak)
  281. <$> mapM (parseFromString parseInlines) (lines content)
  282. codeBlock :: BlockProperties -> OrgParser (F Blocks)
  283. codeBlock blkProp = do
  284. skipSpaces
  285. (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
  286. id' <- fromMaybe "" <$> lookupBlockAttribute "name"
  287. content <- rawBlockContent blkProp
  288. let codeBlck = B.codeBlockWith ( id', classes, kv ) content
  289. maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
  290. where
  291. labelDiv blk value =
  292. B.divWith nullAttr <$> (mappend <$> labelledBlock value
  293. <*> pure blk)
  294. labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
  295. rawBlockContent :: BlockProperties -> OrgParser String
  296. rawBlockContent (indent, blockType) = try $
  297. unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  298. where
  299. indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
  300. blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
  301. parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
  302. parsedBlockContent blkProps = try $ do
  303. raw <- rawBlockContent blkProps
  304. parseFromString parseBlocks (raw ++ "\n")
  305. -- indent by specified number of spaces (or equiv. tabs)
  306. indentWith :: Int -> OrgParser String
  307. indentWith num = do
  308. tabStop <- getOption readerTabStop
  309. if num < tabStop
  310. then count num (char ' ')
  311. else choice [ try (count num (char ' '))
  312. , try (char '\t' >> count (num - tabStop) (char ' ')) ]
  313. type SwitchOption = (Char, Maybe String)
  314. orgArgWord :: OrgParser String
  315. orgArgWord = many1 orgArgWordChar
  316. -- | Parse code block arguments
  317. -- TODO: We currently don't handle switches.
  318. codeHeaderArgs :: OrgParser ([String], [(String, String)])
  319. codeHeaderArgs = try $ do
  320. language <- skipSpaces *> orgArgWord
  321. _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
  322. parameters <- manyTill blockOption newline
  323. let pandocLang = translateLang language
  324. return $
  325. if hasRundocParameters parameters
  326. then ( [ pandocLang, rundocBlockClass ]
  327. , map toRundocAttrib (("language", language) : parameters)
  328. )
  329. else ([ pandocLang ], parameters)
  330. where hasRundocParameters = not . null
  331. switch :: OrgParser SwitchOption
  332. switch = try $ simpleSwitch <|> lineNumbersSwitch
  333. where
  334. simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
  335. lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
  336. (string "-l \"" *> many1Till nonspaceChar (char '"'))
  337. translateLang :: String -> String
  338. translateLang "C" = "c"
  339. translateLang "C++" = "cpp"
  340. translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
  341. translateLang "js" = "javascript"
  342. translateLang "lisp" = "commonlisp"
  343. translateLang "R" = "r"
  344. translateLang "sh" = "bash"
  345. translateLang "sqlite" = "sql"
  346. translateLang cs = cs
  347. -- | Prefix used for Rundoc classes and arguments.
  348. rundocPrefix :: String
  349. rundocPrefix = "rundoc-"
  350. -- | The class-name used to mark rundoc blocks.
  351. rundocBlockClass :: String
  352. rundocBlockClass = rundocPrefix ++ "block"
  353. blockOption :: OrgParser (String, String)
  354. blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
  355. inlineBlockOption :: OrgParser (String, String)
  356. inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
  357. orgArgKey :: OrgParser String
  358. orgArgKey = try $
  359. skipSpaces *> char ':'
  360. *> many1 orgArgWordChar
  361. orgParamValue :: OrgParser String
  362. orgParamValue = try $
  363. skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
  364. orgInlineParamValue :: OrgParser String
  365. orgInlineParamValue = try $
  366. skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
  367. orgArgWordChar :: OrgParser Char
  368. orgArgWordChar = alphaNum <|> oneOf "-_"
  369. toRundocAttrib :: (String, String) -> (String, String)
  370. toRundocAttrib = first ("rundoc-" ++)
  371. commaEscaped :: String -> String
  372. commaEscaped (',':cs@('*':_)) = cs
  373. commaEscaped (',':cs@('#':'+':_)) = cs
  374. commaEscaped cs = cs
  375. example :: OrgParser (F Blocks)
  376. example = try $ do
  377. return . return . exampleCode =<< unlines <$> many1 exampleLine
  378. exampleCode :: String -> Blocks
  379. exampleCode = B.codeBlockWith ("", ["example"], [])
  380. exampleLine :: OrgParser String
  381. exampleLine = try $ string ": " *> anyLine
  382. -- Drawers for properties or a logbook
  383. drawer :: OrgParser (F Blocks)
  384. drawer = try $ do
  385. drawerStart
  386. manyTill drawerLine (try drawerEnd)
  387. return mempty
  388. drawerStart :: OrgParser String
  389. drawerStart = try $
  390. skipSpaces *> drawerName <* skipSpaces <* P.newline
  391. where drawerName = try $ char ':' *> validDrawerName <* char ':'
  392. validDrawerName = stringAnyCase "PROPERTIES"
  393. <|> stringAnyCase "LOGBOOK"
  394. drawerLine :: OrgParser String
  395. drawerLine = try anyLine
  396. drawerEnd :: OrgParser String
  397. drawerEnd = try $
  398. skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline
  399. --
  400. -- Figures
  401. --
  402. -- Figures (Image on a line by itself, preceded by name and/or caption)
  403. figure :: OrgParser (F Blocks)
  404. figure = try $ do
  405. (cap, nam) <- nameAndCaption
  406. src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
  407. guard (isImageFilename src)
  408. return $ do
  409. cap' <- cap
  410. return $ B.para $ B.image src nam cap'
  411. where
  412. nameAndCaption =
  413. do
  414. maybeCap <- lookupInlinesAttr "caption"
  415. maybeNam <- lookupBlockAttribute "name"
  416. guard $ isJust maybeCap || isJust maybeNam
  417. return ( fromMaybe mempty maybeCap
  418. , maybe mempty withFigPrefix maybeNam )
  419. withFigPrefix cs =
  420. if "fig:" `isPrefixOf` cs
  421. then cs
  422. else "fig:" ++ cs
  423. --
  424. -- Comments, Options and Metadata
  425. specialLine :: OrgParser (F Blocks)
  426. specialLine = fmap return . try $ metaLine <|> commentLine
  427. metaLine :: OrgParser Blocks
  428. metaLine = try $ mempty
  429. <$ (metaLineStart *> (optionLine <|> declarationLine))
  430. commentLine :: OrgParser Blocks
  431. commentLine = try $ commentLineStart *> anyLine *> pure mempty
  432. -- The order, in which blocks are tried, makes sure that we're not looking at
  433. -- the beginning of a block, so we don't need to check for it
  434. metaLineStart :: OrgParser String
  435. metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
  436. commentLineStart :: OrgParser String
  437. commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
  438. declarationLine :: OrgParser ()
  439. declarationLine = try $ do
  440. key <- metaKey
  441. inlinesF <- metaInlines
  442. updateState $ \st ->
  443. let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
  444. in st { orgStateMeta' = orgStateMeta' st <> meta' }
  445. return ()
  446. metaInlines :: OrgParser (F MetaValue)
  447. metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
  448. metaKey :: OrgParser String
  449. metaKey = map toLower <$> many1 (noneOf ": \n\r")
  450. <* char ':'
  451. <* skipSpaces
  452. optionLine :: OrgParser ()
  453. optionLine = try $ do
  454. key <- metaKey
  455. case key of
  456. "link" -> parseLinkFormat >>= uncurry addLinkFormat
  457. _ -> mzero
  458. parseLinkFormat :: OrgParser ((String, String -> String))
  459. parseLinkFormat = try $ do
  460. linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
  461. linkSubst <- parseFormat
  462. return (linkType, linkSubst)
  463. -- | An ad-hoc, single-argument-only implementation of a printf-style format
  464. -- parser.
  465. parseFormat :: OrgParser (String -> String)
  466. parseFormat = try $ do
  467. replacePlain <|> replaceUrl <|> justAppend
  468. where
  469. -- inefficient, but who cares
  470. replacePlain = try $ (\x -> concat . flip intersperse x)
  471. <$> sequence [tillSpecifier 's', rest]
  472. replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
  473. <$> sequence [tillSpecifier 'h', rest]
  474. justAppend = try $ (++) <$> rest
  475. rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
  476. tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
  477. --
  478. -- Headers
  479. --
  480. -- | Headers
  481. header :: OrgParser (F Blocks)
  482. header = try $ do
  483. level <- headerStart
  484. title <- inlinesTillNewline
  485. return $ B.header level <$> title
  486. headerStart :: OrgParser Int
  487. headerStart = try $
  488. (length <$> many1 (char '*')) <* many1 (char ' ')
  489. -- Don't use (or need) the reader wrapper here, we want hline to be
  490. -- @show@able. Otherwise we can't use it with @notFollowedBy'@.
  491. -- | Horizontal Line (five -- dashes or more)
  492. hline :: OrgParser Blocks
  493. hline = try $ do
  494. skipSpaces
  495. string "-----"
  496. many (char '-')
  497. skipSpaces
  498. newline
  499. return B.horizontalRule
  500. --
  501. -- Tables
  502. --
  503. data OrgTableRow = OrgContentRow (F [Blocks])
  504. | OrgAlignRow [Alignment]
  505. | OrgHlineRow
  506. data OrgTable = OrgTable
  507. { orgTableColumns :: Int
  508. , orgTableAlignments :: [Alignment]
  509. , orgTableHeader :: [Blocks]
  510. , orgTableRows :: [[Blocks]]
  511. }
  512. table :: OrgParser (F Blocks)
  513. table = try $ do
  514. lookAhead tableStart
  515. do
  516. rows <- tableRows
  517. cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
  518. return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
  519. orgToPandocTable :: OrgTable
  520. -> Inlines
  521. -> Blocks
  522. orgToPandocTable (OrgTable _ aligns heads lns) caption =
  523. B.table caption (zip aligns $ repeat 0) heads lns
  524. tableStart :: OrgParser Char
  525. tableStart = try $ skipSpaces *> char '|'
  526. tableRows :: OrgParser [OrgTableRow]
  527. tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
  528. tableContentRow :: OrgParser OrgTableRow
  529. tableContentRow = try $
  530. OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
  531. tableContentCell :: OrgParser (F Blocks)
  532. tableContentCell = try $
  533. fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
  534. endOfCell :: OrgParser Char
  535. endOfCell = try $ char '|' <|> lookAhead newline
  536. tableAlignRow :: OrgParser OrgTableRow
  537. tableAlignRow = try $
  538. OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
  539. tableAlignCell :: OrgParser Alignment
  540. tableAlignCell =
  541. choice [ try $ emptyCell *> return AlignDefault
  542. , try $ skipSpaces
  543. *> char '<'
  544. *> tableAlignFromChar
  545. <* many digit
  546. <* char '>'
  547. <* emptyCell
  548. ] <?> "alignment info"
  549. where emptyCell = try $ skipSpaces *> endOfCell
  550. tableAlignFromChar :: OrgParser Alignment
  551. tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
  552. , char 'c' *> return AlignCenter
  553. , char 'r' *> return AlignRight
  554. ]
  555. tableHline :: OrgParser OrgTableRow
  556. tableHline = try $
  557. OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
  558. rowsToTable :: [OrgTableRow]
  559. -> F OrgTable
  560. rowsToTable = foldM (flip rowToContent) zeroTable
  561. where zeroTable = OrgTable 0 mempty mempty mempty
  562. normalizeTable :: OrgTable
  563. -> OrgTable
  564. normalizeTable (OrgTable cols aligns heads lns) =
  565. let aligns' = fillColumns aligns AlignDefault
  566. heads' = if heads == mempty
  567. then mempty
  568. else fillColumns heads (B.plain mempty)
  569. lns' = map (`fillColumns` B.plain mempty) lns
  570. fillColumns base padding = take cols $ base ++ repeat padding
  571. in OrgTable cols aligns' heads' lns'
  572. -- One or more horizontal rules after the first content line mark the previous
  573. -- line as a header. All other horizontal lines are discarded.
  574. rowToContent :: OrgTableRow
  575. -> OrgTable
  576. -> F OrgTable
  577. rowToContent OrgHlineRow t = maybeBodyToHeader t
  578. rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
  579. rowToContent (OrgContentRow rf) t = do
  580. rs <- rf
  581. setLongestRow rs =<< appendToBody rs t
  582. setLongestRow :: [a]
  583. -> OrgTable
  584. -> F OrgTable
  585. setLongestRow rs t =
  586. return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
  587. maybeBodyToHeader :: OrgTable
  588. -> F OrgTable
  589. maybeBodyToHeader t = case t of
  590. OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
  591. return t{ orgTableHeader = b , orgTableRows = [] }
  592. _ -> return t
  593. appendToBody :: [Blocks]
  594. -> OrgTable
  595. -> F OrgTable
  596. appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
  597. setAligns :: [Alignment]
  598. -> OrgTable
  599. -> F OrgTable
  600. setAligns aligns t = return $ t{ orgTableAlignments = aligns }
  601. --
  602. -- LaTeX fragments
  603. --
  604. latexFragment :: OrgParser (F Blocks)
  605. latexFragment = try $ do
  606. envName <- latexEnvStart
  607. content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
  608. return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
  609. where
  610. c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
  611. , c
  612. , "\\end{", e, "}\n"
  613. ]
  614. latexEnvStart :: OrgParser String
  615. latexEnvStart = try $ do
  616. skipSpaces *> string "\\begin{"
  617. *> latexEnvName
  618. <* string "}"
  619. <* blankline
  620. latexEnd :: String -> OrgParser ()
  621. latexEnd envName = try $
  622. () <$ skipSpaces
  623. <* string ("\\end{" ++ envName ++ "}")
  624. <* blankline
  625. -- | Parses a LaTeX environment name.
  626. latexEnvName :: OrgParser String
  627. latexEnvName = try $ do
  628. mappend <$> many1 alphaNum
  629. <*> option "" (string "*")
  630. --
  631. -- Footnote defintions
  632. --
  633. noteBlock :: OrgParser (F Blocks)
  634. noteBlock = try $ do
  635. ref <- noteMarker <* skipSpaces
  636. content <- mconcat <$> blocksTillHeaderOrNote
  637. addToNotesTable (ref, content)
  638. return mempty
  639. where
  640. blocksTillHeaderOrNote =
  641. many1Till block (eof <|> () <$ lookAhead noteMarker
  642. <|> () <$ lookAhead headerStart)
  643. -- Paragraphs or Plain text
  644. paraOrPlain :: OrgParser (F Blocks)
  645. paraOrPlain = try $
  646. parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
  647. inlinesTillNewline :: OrgParser (F Inlines)
  648. inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
  649. --
  650. -- list blocks
  651. --
  652. list :: OrgParser (F Blocks)
  653. list = choice [ definitionList, bulletList, orderedList ] <?> "list"
  654. definitionList :: OrgParser (F Blocks)
  655. definitionList = fmap B.definitionList . fmap compactify'DL . sequence
  656. <$> many1 (definitionListItem bulletListStart)
  657. bulletList :: OrgParser (F Blocks)
  658. bulletList = fmap B.bulletList . fmap compactify' . sequence
  659. <$> many1 (listItem bulletListStart)
  660. orderedList :: OrgParser (F Blocks)
  661. orderedList = fmap B.orderedList . fmap compactify' . sequence
  662. <$> many1 (listItem orderedListStart)
  663. genericListStart :: OrgParser String
  664. -> OrgParser Int
  665. genericListStart listMarker = try $
  666. (+) <$> (length <$> many spaceChar)
  667. <*> (length <$> listMarker <* many1 spaceChar)
  668. -- parses bullet list start and returns its length (excl. following whitespace)
  669. bulletListStart :: OrgParser Int
  670. bulletListStart = genericListStart bulletListMarker
  671. where bulletListMarker = pure <$> oneOf "*-+"
  672. orderedListStart :: OrgParser Int
  673. orderedListStart = genericListStart orderedListMarker
  674. -- Ordered list markers allowed in org-mode
  675. where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
  676. definitionListItem :: OrgParser Int
  677. -> OrgParser (F (Inlines, [Blocks]))
  678. definitionListItem parseMarkerGetLength = try $ do
  679. markerLength <- parseMarkerGetLength
  680. term <- manyTill (noneOf "\n\r") (try $ string "::")
  681. line1 <- anyLineNewline
  682. blank <- option "" ("\n" <$ blankline)
  683. cont <- concat <$> many (listContinuation markerLength)
  684. term' <- parseFromString inline term
  685. contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
  686. return $ (,) <$> term' <*> fmap (:[]) contents'
  687. -- parse raw text for one list item, excluding start marker and continuations
  688. listItem :: OrgParser Int
  689. -> OrgParser (F Blocks)
  690. listItem start = try $ do
  691. markerLength <- try start
  692. firstLine <- anyLineNewline
  693. blank <- option "" ("\n" <$ blankline)
  694. rest <- concat <$> many (listContinuation markerLength)
  695. parseFromString parseBlocks $ firstLine ++ blank ++ rest
  696. -- continuation of a list item - indented and separated by blankline or endline.
  697. -- Note: nested lists are parsed as continuations.
  698. listContinuation :: Int
  699. -> OrgParser String
  700. listContinuation markerLength = try $
  701. notFollowedBy' blankline
  702. *> (mappend <$> (concat <$> many1 listLine)
  703. <*> many blankline)
  704. where listLine = try $ indentWith markerLength *> anyLineNewline
  705. anyLineNewline :: OrgParser String
  706. anyLineNewline = (++ "\n") <$> anyLine
  707. --
  708. -- inline
  709. --
  710. inline :: OrgParser (F Inlines)
  711. inline =
  712. choice [ whitespace
  713. , linebreak
  714. , cite
  715. , footnote
  716. , linkOrImage
  717. , anchor
  718. , inlineCodeBlock
  719. , str
  720. , endline
  721. , emph
  722. , strong
  723. , strikeout
  724. , underline
  725. , code
  726. , math
  727. , displayMath
  728. , verbatim
  729. , subscript
  730. , superscript
  731. , inlineLaTeX
  732. , symbol
  733. ] <* (guard =<< newlinesCountWithinLimits)
  734. <?> "inline"
  735. parseInlines :: OrgParser (F Inlines)
  736. parseInlines = trimInlinesF . mconcat <$> many1 inline
  737. -- treat these as potentially non-text when parsing inline:
  738. specialChars :: [Char]
  739. specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
  740. whitespace :: OrgParser (F Inlines)
  741. whitespace = pure B.space <$ skipMany1 spaceChar
  742. <* updateLastPreCharPos
  743. <* updateLastForbiddenCharPos
  744. <?> "whitespace"
  745. linebreak :: OrgParser (F Inlines)
  746. linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
  747. str :: OrgParser (F Inlines)
  748. str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
  749. <* updateLastStrPos
  750. -- | An endline character that can be treated as a space, not a structural
  751. -- break. This should reflect the values of the Emacs variable
  752. -- @org-element-pagaraph-separate@.
  753. endline :: OrgParser (F Inlines)
  754. endline = try $ do
  755. newline
  756. notFollowedBy blankline
  757. notFollowedBy' exampleLine
  758. notFollowedBy' hline
  759. notFollowedBy' noteMarker
  760. notFollowedBy' tableStart
  761. notFollowedBy' drawerStart
  762. notFollowedBy' headerStart
  763. notFollowedBy' metaLineStart
  764. notFollowedBy' latexEnvStart
  765. notFollowedBy' commentLineStart
  766. notFollowedBy' bulletListStart
  767. notFollowedBy' orderedListStart
  768. decEmphasisNewlinesCount
  769. guard =<< newlinesCountWithinLimits
  770. updateLastPreCharPos
  771. return . return $ B.space
  772. cite :: OrgParser (F Inlines)
  773. cite = try $ do
  774. guardEnabled Ext_citations
  775. (cs, raw) <- withRaw normalCite
  776. return $ (flip B.cite (B.text raw)) <$> cs
  777. normalCite :: OrgParser (F [Citation])
  778. normalCite = try $ char '['
  779. *> skipSpaces
  780. *> citeList
  781. <* skipSpaces
  782. <* char ']'
  783. citeList :: OrgParser (F [Citation])
  784. citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
  785. citation :: OrgParser (F Citation)
  786. citation = try $ do
  787. pref <- prefix
  788. (suppress_author, key) <- citeKey
  789. suff <- suffix
  790. return $ do
  791. x <- pref
  792. y <- suff
  793. return $ Citation{ citationId = key
  794. , citationPrefix = B.toList x
  795. , citationSuffix = B.toList y
  796. , citationMode = if suppress_author
  797. then SuppressAuthor
  798. else NormalCitation
  799. , citationNoteNum = 0
  800. , citationHash = 0
  801. }
  802. where
  803. prefix = trimInlinesF . mconcat <$>
  804. manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
  805. suffix = try $ do
  806. hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
  807. skipSpaces
  808. rest <- trimInlinesF . mconcat <$>
  809. many (notFollowedBy (oneOf ";]") *> inline)
  810. return $ if hasSpace
  811. then (B.space <>) <$> rest
  812. else rest
  813. footnote :: OrgParser (F Inlines)
  814. footnote = try $ inlineNote <|> referencedNote
  815. inlineNote :: OrgParser (F Inlines)
  816. inlineNote = try $ do
  817. string "[fn:"
  818. ref <- many alphaNum
  819. char ':'
  820. note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
  821. when (not $ null ref) $
  822. addToNotesTable ("fn:" ++ ref, note)
  823. return $ B.note <$> note
  824. referencedNote :: OrgParser (F Inlines)
  825. referencedNote = try $ do
  826. ref <- noteMarker
  827. return $ do
  828. notes <- asksF orgStateNotes'
  829. case lookup ref notes of
  830. Nothing -> return $ B.str $ "[" ++ ref ++ "]"
  831. Just contents -> do
  832. st <- askF
  833. let contents' = runF contents st{ orgStateNotes' = [] }
  834. return $ B.note contents'
  835. noteMarker :: OrgParser String
  836. noteMarker = try $ do
  837. char '['
  838. choice [ many1Till digit (char ']')
  839. , (++) <$> string "fn:"
  840. <*> many1Till (noneOf "\n\r\t ") (char ']')
  841. ]
  842. linkOrImage :: OrgParser (F Inlines)
  843. linkOrImage = explicitOrImageLink
  844. <|> selflinkOrImage
  845. <|> angleLink
  846. <|> plainLink
  847. <?> "link or image"
  848. explicitOrImageLink :: OrgParser (F Inlines)
  849. explicitOrImageLink = try $ do
  850. char '['
  851. srcF <- applyCustomLinkFormat =<< linkTarget
  852. title <- enclosedRaw (char '[') (char ']')
  853. title' <- parseFromString (mconcat <$> many inline) title
  854. char ']'
  855. return $ do
  856. src <- srcF
  857. if isImageFilename src && isImageFilename title
  858. then pure $ B.link src "" $ B.image title mempty mempty
  859. else linkToInlinesF src =<< title'
  860. selflinkOrImage :: OrgParser (F Inlines)
  861. selflinkOrImage = try $ do
  862. src <- char '[' *> linkTarget <* char ']'
  863. return $ linkToInlinesF src (B.str src)
  864. plainLink :: OrgParser (F Inlines)
  865. plainLink = try $ do
  866. (orig, src) <- uri
  867. returnF $ B.link src "" (B.str orig)
  868. angleLink :: OrgParser (F Inlines)
  869. angleLink = try $ do
  870. char '<'
  871. link <- plainLink
  872. char '>'
  873. return link
  874. selfTarget :: OrgParser String
  875. selfTarget = try $ char '[' *> linkTarget <* char ']'
  876. linkTarget :: OrgParser String
  877. linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
  878. applyCustomLinkFormat :: String -> OrgParser (F String)
  879. applyCustomLinkFormat link = do
  880. let (linkType, rest) = break (== ':') link
  881. return $ do
  882. formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
  883. return $ maybe link ($ drop 1 rest) formatter
  884. linkToInlinesF :: String -> Inlines -> F Inlines
  885. linkToInlinesF s@('#':_) = pure . B.link s ""
  886. linkToInlinesF s
  887. | isImageFilename s = const . pure $ B.image s "" ""
  888. | isUri s = pure . B.link s ""
  889. | isRelativeUrl s = pure . B.link s ""
  890. linkToInlinesF s = \title -> do
  891. anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
  892. if anchorB
  893. then pure $ B.link ('#':s) "" title
  894. else pure $ B.emph title
  895. isRelativeUrl :: String -> Bool
  896. isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
  897. isUri :: String -> Bool
  898. isUri s = let (scheme, path) = break (== ':') s
  899. in all (\c -> isAlphaNum c || c `elem` ".-") scheme
  900. && not (null path)
  901. isImageFilename :: String -> Bool
  902. isImageFilename filename =
  903. any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
  904. (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
  905. ':' `notElem` filename)
  906. where
  907. imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
  908. protocols = [ "file", "http", "https" ]
  909. -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
  910. -- @anchor-id@ set as id. Legal anchors in org-mode are defined through
  911. -- @org-target-regexp@, which is fairly liberal. Since no link is created if
  912. -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
  913. -- an anchor.
  914. anchor :: OrgParser (F Inlines)
  915. anchor = try $ do
  916. anchorId <- parseAnchor
  917. recordAnchorId anchorId
  918. returnF $ B.spanWith (solidify anchorId, [], []) mempty
  919. where
  920. parseAnchor = string "<<"
  921. *> many1 (noneOf "\t\n\r<>\"' ")
  922. <* string ">>"
  923. <* skipSpaces
  924. -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
  925. -- the org function @org-export-solidify-link-text@.
  926. solidify :: String -> String
  927. solidify = map replaceSpecialChar
  928. where replaceSpecialChar c
  929. | isAlphaNum c = c
  930. | c `elem` "_.-:" = c
  931. | otherwise = '-'
  932. -- | Parses an inline code block and marks it as an babel block.
  933. inlineCodeBlock :: OrgParser (F Inlines)
  934. inlineCodeBlock = try $ do
  935. string "src_"
  936. lang <- many1 orgArgWordChar
  937. opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
  938. inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
  939. let attrClasses = [translateLang lang, rundocBlockClass]
  940. let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
  941. returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
  942. enclosedByPair :: Char -- ^ opening char
  943. -> Char -- ^ closing char
  944. -> OrgParser a -- ^ parser
  945. -> OrgParser [a]
  946. enclosedByPair s e p = char s *> many1Till p (char e)
  947. emph :: OrgParser (F Inlines)
  948. emph = fmap B.emph <$> emphasisBetween '/'
  949. strong :: OrgParser (F Inlines)
  950. strong = fmap B.strong <$> emphasisBetween '*'
  951. strikeout :: OrgParser (F Inlines)
  952. strikeout = fmap B.strikeout <$> emphasisBetween '+'
  953. -- There is no underline, so we use strong instead.
  954. underline :: OrgParser (F Inlines)
  955. underline = fmap B.strong <$> emphasisBetween '_'
  956. code :: OrgParser (F Inlines)
  957. code = return . B.code <$> verbatimBetween '='
  958. verbatim :: OrgParser (F Inlines)
  959. verbatim = return . B.rawInline "" <$> verbatimBetween '~'
  960. subscript :: OrgParser (F Inlines)
  961. subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
  962. superscript :: OrgParser (F Inlines)
  963. superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
  964. math :: OrgParser (F Inlines)
  965. math = return . B.math <$> choice [ math1CharBetween '$'
  966. , mathStringBetween '$'
  967. , rawMathBetween "\\(" "\\)"
  968. ]
  969. displayMath :: OrgParser (F Inlines)
  970. displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
  971. , rawMathBetween "$$" "$$"
  972. ]
  973. symbol :: OrgParser (F Inlines)
  974. symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
  975. where updatePositions c
  976. | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
  977. | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
  978. | otherwise = return c
  979. emphasisBetween :: Char
  980. -> OrgParser (F Inlines)
  981. emphasisBetween c = try $ do
  982. startEmphasisNewlinesCounting emphasisAllowedNewlines
  983. res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
  984. isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
  985. when isTopLevelEmphasis
  986. resetEmphasisNewlines
  987. return res
  988. verbatimBetween :: Char
  989. -> OrgParser String
  990. verbatimBetween c = try $
  991. emphasisStart c *>
  992. many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
  993. -- | Parses a raw string delimited by @c@ using Org's math rules
  994. mathStringBetween :: Char
  995. -> OrgParser String
  996. mathStringBetween c = try $ do
  997. mathStart c
  998. body <- many1TillNOrLessNewlines mathAllowedNewlines
  999. (noneOf (c:"\n\r"))
  1000. (lookAhead $ mathEnd c)
  1001. final <- mathEnd c
  1002. return $ body ++ [final]
  1003. -- | Parse a single character between @c@ using math rules
  1004. math1CharBetween :: Char
  1005. -> OrgParser String
  1006. math1CharBetween c = try $ do
  1007. char c
  1008. res <- noneOf $ c:mathForbiddenBorderChars
  1009. char c
  1010. eof <|> () <$ lookAhead (oneOf mathPostChars)
  1011. return [res]
  1012. rawMathBetween :: String
  1013. -> String
  1014. -> OrgParser String
  1015. rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
  1016. -- | Parses the start (opening character) of emphasis
  1017. emphasisStart :: Char -> OrgParser Char
  1018. emphasisStart c = try $ do
  1019. guard =<< afterEmphasisPreChar
  1020. guard =<< notAfterString
  1021. char c
  1022. lookAhead (noneOf emphasisForbiddenBorderChars)
  1023. pushToInlineCharStack c
  1024. return c
  1025. -- | Parses the closing character of emphasis
  1026. emphasisEnd :: Char -> OrgParser Char
  1027. emphasisEnd c = try $ do
  1028. guard =<< notAfterForbiddenBorderChar
  1029. char c
  1030. eof <|> () <$ lookAhead acceptablePostChars
  1031. updateLastStrPos
  1032. popInlineCharStack
  1033. return c
  1034. where acceptablePostChars =
  1035. surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
  1036. mathStart :: Char -> OrgParser Char
  1037. mathStart c = try $
  1038. char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
  1039. mathEnd :: Char -> OrgParser Char
  1040. mathEnd c = try $ do
  1041. res <- noneOf (c:mathForbiddenBorderChars)
  1042. char c
  1043. eof <|> () <$ lookAhead (oneOf mathPostChars)
  1044. return res
  1045. enclosedInlines :: OrgParser a
  1046. -> OrgParser b
  1047. -> OrgParser (F Inlines)
  1048. enclosedInlines start end = try $
  1049. trimInlinesF . mconcat <$> enclosed start end inline
  1050. enclosedRaw :: OrgParser a
  1051. -> OrgParser b
  1052. -> OrgParser String
  1053. enclosedRaw start end = try $
  1054. start *> (onSingleLine <|> spanningTwoLines)
  1055. where onSingleLine = try $ many1Till (noneOf "\n\r") end
  1056. spanningTwoLines = try $
  1057. anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
  1058. -- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
  1059. -- newlines.
  1060. many1TillNOrLessNewlines :: Int
  1061. -> OrgParser Char
  1062. -> OrgParser a
  1063. -> OrgParser String
  1064. many1TillNOrLessNewlines n p end = try $
  1065. nMoreLines (Just n) mempty >>= oneOrMore
  1066. where
  1067. nMoreLines Nothing cs = return cs
  1068. nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
  1069. nMoreLines k cs = try $ (final k cs <|> rest k cs)
  1070. >>= uncurry nMoreLines
  1071. final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
  1072. rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
  1073. finalLine = try $ manyTill p end
  1074. minus1 k = k - 1
  1075. oneOrMore cs = guard (not $ null cs) *> return cs
  1076. -- Org allows customization of the way it reads emphasis. We use the defaults
  1077. -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
  1078. -- for details).
  1079. -- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
  1080. emphasisPreChars :: [Char]
  1081. emphasisPreChars = "\t \"'({"
  1082. -- | Chars allowed at after emphasis
  1083. emphasisPostChars :: [Char]
  1084. emphasisPostChars = "\t\n !\"'),-.:;?\\}"
  1085. -- | Chars not allowed at the (inner) border of emphasis
  1086. emphasisForbiddenBorderChars :: [Char]
  1087. emphasisForbiddenBorderChars = "\t\n\r \"',"
  1088. -- | The maximum number of newlines within
  1089. emphasisAllowedNewlines :: Int
  1090. emphasisAllowedNewlines = 1
  1091. -- LaTeX-style math: see `org-latex-regexps` for details
  1092. -- | Chars allowed after an inline ($...$) math statement
  1093. mathPostChars :: [Char]
  1094. mathPostChars = "\t\n \"'),-.:;?"
  1095. -- | Chars not allowed at the (inner) border of math
  1096. mathForbiddenBorderChars :: [Char]
  1097. mathForbiddenBorderChars = "\t\n\r ,;.$"
  1098. -- | Maximum number of newlines in an inline math statement
  1099. mathAllowedNewlines :: Int
  1100. mathAllowedNewlines = 2
  1101. -- | Whether we are right behind a char allowed before emphasis
  1102. afterEmphasisPreChar :: OrgParser Bool
  1103. afterEmphasisPreChar = do
  1104. pos <- getPosition
  1105. lastPrePos <- orgStateLastPreCharPos <$> getState
  1106. return . fromMaybe True $ (== pos) <$> lastPrePos
  1107. -- | Whether the parser is right after a forbidden border char
  1108. notAfterForbiddenBorderChar :: OrgParser Bool
  1109. notAfterForbiddenBorderChar = do
  1110. pos <- getPosition
  1111. lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
  1112. return $ lastFBCPos /= Just pos
  1113. -- | Read a sub- or superscript expression
  1114. subOrSuperExpr :: OrgParser (F Inlines)
  1115. subOrSuperExpr = try $
  1116. choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
  1117. , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
  1118. , simpleSubOrSuperString
  1119. ] >>= parseFromString (mconcat <$> many inline)
  1120. where enclosing (left, right) s = left : s ++ [right]
  1121. simpleSubOrSuperString :: OrgParser String
  1122. simpleSubOrSuperString = try $
  1123. choice [ string "*"
  1124. , mappend <$> option [] ((:[]) <$> oneOf "+-")
  1125. <*> many1 alphaNum
  1126. ]
  1127. inlineLaTeX :: OrgParser (F Inlines)
  1128. inlineLaTeX = try $ do
  1129. cmd <- inlineLaTeXCommand
  1130. maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
  1131. where
  1132. parseAsMath :: String -> Maybe Inlines
  1133. parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
  1134. parseAsInlineLaTeX :: String -> Maybe Inlines
  1135. parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
  1136. state :: ParserState
  1137. state = def{ stateOptions = def{ readerParseRaw = True }}
  1138. maybeRight :: Either a b -> Maybe b
  1139. maybeRight = either (const Nothing) Just
  1140. inlineLaTeXCommand :: OrgParser String
  1141. inlineLaTeXCommand = try $ do
  1142. rest <- getInput
  1143. pos <- getPosition
  1144. case runParser rawLaTeXInline def "source" rest of
  1145. Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
  1146. <* (setPosition $ updatePosString pos cs)
  1147. _ -> mzero