/src/Parsers.hs
http://github.com/Eelis/geordi · Haskell · 275 lines · 192 code · 55 blank · 28 comment · 24 complexity · c41cd18fea33052d1d9fb1f2db233692 MD5 · raw file
- {-# LANGUAGE UnicodeSyntax, FunctionalDependencies, MultiParamTypeClasses, FlexibleContexts, FlexibleInstances, UndecidableInstances, PatternGuards, RecordWildCards, NamedFieldPuns, ViewPatterns, TypeSynonymInstances #-}
- module Parsers where
- import qualified Data.List as List
- import qualified Text.ParserCombinators.Parsec as PS
- import qualified Editing.Commands
- import qualified Editing.Show ()
- import qualified Data.Char as Ch
- import Data.List.NonEmpty (NonEmpty((:|)))
- import Data.Foldable (toList)
- import Control.Monad (liftM, liftM2)
- import Control.Arrow (first)
- import Data.List ((\\))
- import Data.Maybe (fromMaybe)
- import Editing.Basics (positionIn)
- import Util ((.), Finite(..), commas_or, Option(..), (.∨.), isIdChar, (<<), NeList, MyMonadError(..))
- import Prelude hiding ((.))
- import Prelude.Unicode
- -- To let us write parsers that work with both Parsec and our own Parser monad, we introduce the ParserLike class:
- class (Functor m, Monad m) ⇒ ParserLike m t | m → t where
- anySymbol :: m t
- anySymbol = satisfy $ const True
- symbols :: (Eq t, Show t) ⇒ [t] → m [t]
- symbols = foldr (\ h → liftM2 (:) (satisfy (== h))) (return [])
- satisfy :: (t → Bool) → m t
- (<|>) :: m a → m a → m a
- (<?>) :: m a → String → m a
- lookAhead :: m a → m a
- pzero :: m a
- eof :: m ()
- try :: m a → m a
- getInput :: m [t]
- getInput = many anySymbol << eof
- infix 0 <?>
- infixr 1 <|>
- -- Parsec is clearly ParserLike:
- instance ParserLike (PS.GenParser Char st) Char where
- (<?>) = (PS.<?>)
- lookAhead = PS.lookAhead
- (<|>) = (PS.<|>)
- symbols = PS.string
- anySymbol = PS.anyChar
- satisfy = PS.satisfy
- pzero = PS.pzero
- eof = PS.eof
- try = PS.try
- -- Utility parser combinators:
- manyTill :: ParserLike m t ⇒ m a → m b → m ([a], b)
- manyTill p e = ((,) [] . e) <|> liftM2 (\x (y, z) → (x:y, z)) p (manyTill p e)
- many1Till :: ParserLike m t ⇒ m a → m b → m (NeList a, b)
- many1Till p e = p >>= \v → first (v :|) . manyTill p e
- optionMaybe :: ParserLike m t ⇒ m a → m (Maybe a)
- optionMaybe p = Just . p <|> return Nothing
- optional :: ParserLike m t ⇒ m a → m (Maybe a)
- optional p = Just . p <|> return Nothing
- option :: ParserLike m t ⇒ a → m a → m a
- option x p = fromMaybe x . optional p
- symbol :: (Show t, Eq t, ParserLike m t) ⇒ t → m t
- symbol x = satisfy (== x) <?> show x
- choice :: ParserLike m t ⇒ [m a] → m a
- choice = foldl (<|>) pzero
- many :: ParserLike m t ⇒ m a → m [a]
- many p = liftM2 (:) p (many p) <|> return []
- many1 :: ParserLike m t ⇒ m a → m (NeList a)
- many1 p = liftM2 (:|) p (many p)
- spaces :: ParserLike m Char ⇒ m String
- spaces = many $ symbol ' '
- noneOf :: (Eq t, ParserLike m t) ⇒ [t] → m t
- noneOf l = satisfy (∉ l)
- oneOf :: (Eq t, ParserLike m t) ⇒ [t] → m t
- oneOf l = satisfy (∈ l)
- sep :: ParserLike m t ⇒ m a → m b → m (a, [(b, a)])
- sep p p' = liftM2 (,) p (many (liftM2 (,) p' p))
- sepBy1' :: ParserLike m t ⇒ m a → m b → m (NeList a)
- sepBy1' x y = (\(h, t) → h :| map snd t) . sep x y
- sepBy1 :: ParserLike m t ⇒ m a → m b → m [a]
- sepBy1 p e = liftM2 (:) p ((e >> sepBy1 p e) <|> return [])
- kwd :: String → Parser Char String
- kwd s = try $ symbols s << notFollowedBy (satisfy isIdChar) << spaces
- kwds :: [String] → Parser Char String
- kwds = choice . (kwd .)
- char_unit :: (Eq a, Show a) ⇒ a → Parser a ()
- char_unit = (>> return ()) . char
- {-
- Parsec's parse error descriptions aren't very good for parsers that backtrack. Consider
- parse (try (char 'x' >> char 'y') <|> char 'a') "" "x3"
- This parse fails because (1) there is no 'y' at the second column, and (2) there is no 'a' at the first column. Intuitively, we feel that the first cause is the "real" cause, as it follows a longer partial match. Unfortunately, Parsec's errors do not reflect this:
- Left (line 1, column 1):
- unexpected "3"
- expecting "y" or "a"
- Here, column numbers start at 1. Thus, while it /does/ mention the 'y' expectation and the unexpected '3', it fails to give the column number where 'y' was expected, and also does not show that the 'y' expectation followed a longer partial match than the 'a' expectation.
- The parsing combinators in this module properly keep track of lengths of partial matches, to get intuitive error descriptions. For the parse above, they report only the 'y' expectation, at the second column.
- Parsec mixes ordinary parse errors with custom parse errors. One can "fail" at any point. The parsers below can only fail due to unexpected symbols. Additional errors can be added by using parsers returning (Either String a)'s.
- -}
- data Expectation = Expected { expectedAt :: Int, expectedWhat :: [String] }
- uninformativeExpectation :: Expectation
- uninformativeExpectation = Expected 0 []
- data ParseResult t a
- = ParseSuccess a [t] Int (Maybe Expectation)
- -- The Int is the length of the match. Hence, the [t] is strictly redundant, but there for efficiency. The Maybe is the furthest we've been able to parse successfully, with an indication of why we couldn't go even further.
- | ParseFailure Expectation Bool
- -- The Bool indicates whether the failure is terminal.
- instance Functor (ParseResult t) where
- fmap f (ParseSuccess x t i m) = ParseSuccess (f x) t i m; fmap _ (ParseFailure e b) = ParseFailure e b
- newtype Parser t a = Parser { run_parser :: [t] → ParseResult t a }
- peek :: Parser t [t]
- peek = Parser $ \s → ParseSuccess s s 0 Nothing
- drain :: Parser t [t]
- drain = Parser $ \s → ParseSuccess s [] (length s) Nothing
- parseSuccess :: a → Parser t a
- parseSuccess = return
- -- Parser is a Functor, a Monad, and ParserLike:
- instance Functor (Parser t) where fmap = liftM
- instance Monad (Parser t) where
- return x = Parser $ \s → ParseSuccess x s 0 Nothing
- Parser p >>= f = Parser $ \s → case p s of
- ParseSuccess r s' n m → case run_parser (f r) s' of
- ParseSuccess r' s'' n' m' → ParseSuccess r' s'' (n + n') (furthest m (offset n . m'))
- ParseFailure (offset n → m') b → ParseFailure (maybe m' (furthest' m') m) b
- ParseFailure e b → ParseFailure e b
- fail = const pzero
- instance ParserLike (Parser a) a where
- anySymbol = satisfy (const True) <?> "any symbol"
- satisfy p = Parser $ \s → case s of
- h:t | p h → ParseSuccess h t 1 Nothing
- _ → ParseFailure uninformativeExpectation False
- symbols t = Parser $ \s → case List.stripPrefix t s of
- Nothing → ParseFailure (Expected 0 [show t]) False
- Just s' → ParseSuccess t s' (length t) Nothing
- Parser p <|> Parser q = Parser $ \s → case p s of
- ParseFailure m False → case q s of
- ParseSuccess r u n' m' → ParseSuccess r u n' $ Just $ maybe m (furthest' m) m'
- ParseFailure m' b → ParseFailure (furthest' m m') b
- ps → ps
- Parser p <?> m = Parser $ \s → case p s of
- ParseFailure (Expected 0 _) b → ParseFailure (Expected 0 [m]) b
- b → b
- lookAhead (Parser p) = Parser $ \s → case p s of
- ParseSuccess r _ _ m → ParseSuccess r s 0 m
- e → e
- pzero = Parser $ const $ ParseFailure uninformativeExpectation False
- eof = Parser $ \s → if null s then ParseSuccess () [] 0 Nothing else ParseFailure (Expected 0 ["EOF"]) False
- try = id
- getInput = Parser $ \s → ParseSuccess s s 0 Nothing
- notFollowedBy :: Parser t a → Parser t ()
- notFollowedBy (Parser p) = Parser $ \s → case p s of
- ParseFailure{} → ParseSuccess () s 0 Nothing
- ParseSuccess{} → ParseFailure uninformativeExpectation False
- -- Some Parser-specific parsers:
- guarded :: (a → Bool) → Parser t a → Parser t a
- guarded f (Parser p) = Parser $ \s → case p s of
- ParseSuccess x _ _ _ | not (f x) → ParseFailure uninformativeExpectation False
- k → k
- char :: (Show t, Eq t) ⇒ t → Parser t t
- char t = satisfy (== t) <?> show t
- commit :: Parser t a → Parser t a
- commit (Parser p) = Parser $ \s → case p s of
- ParseFailure x _ → ParseFailure x True
- ps → ps
- silent :: Parser t a → Parser t a
- silent (Parser p) = Parser $ \s → case p s of
- ParseFailure _ b → ParseFailure uninformativeExpectation b
- ParseSuccess r t n _ → ParseSuccess r t n Nothing
- optParser :: (MyMonadError String m, Functor m, Finite o, Option o) ⇒ Parser Char (m [o])
- optParser = (<?> "option") $ (char '-' >>) $ do
- char '-'
- n ← (<?> "option name") $ toList . many1 (satisfy $ isIdChar .∨. (== '-'))
- spaces
- case List.find ((== n) . long) all_values of
- Nothing → return $ throwError $ "No such option: --" ++ n
- Just o → ((o:) .) . option (return []) optParser
- <|> do
- x ← many1 $ do
- d ← satisfy Ch.isAlpha <?> "option letter"
- return $ case List.find ((== Just d) . short) all_values of
- Nothing → throwError $ "No such option: -" ++ [d]
- Just o → return o
- spaces
- y ← option (return []) optParser
- return (liftM2 (++) (sequence $ toList x) y)
- -- Misc:
- showParseError :: String → String → Int → [String] → String
- showParseError subject_desc input column expectation =
- "Unexpected " ++ unexpectation ++ "." ++
- if null expectation' then "" else " Expected " ++ commas_or expectation' ++ "."
- where
- unexpectation
- | h:t ← drop column input =
- '`' : (if Ch.isAlphaNum h then h : takeWhile Ch.isAlphaNum t else [h]) ++ "` " ++
- show (Editing.Commands.describe_position_after (positionIn input column) input)
- | otherwise = "end of " ++ subject_desc
- expectation' = (List.nub expectation \\ ["EOF", "' '"]) ++
- ["end of " ++ subject_desc | "EOF" ∈ expectation]
- furthest' :: Expectation → Expectation → Expectation
- furthest' (Expected n s) (Expected n' s')
- | n < n' = Expected n' s'
- | n' < n = Expected n s
- | otherwise = Expected n (s ++ s')
- furthest :: Maybe Expectation → Maybe Expectation → Maybe Expectation
- furthest Nothing x = x
- furthest x Nothing = x
- furthest (Just x) (Just y) = Just $ furthest' x y
- offset :: Int → Expectation → Expectation
- offset n Expected{..} = Expected{expectedAt = expectedAt + n, ..}
- parseOrFailE :: Parser Char (Either String a) → String → String → Either String a
- parseOrFailE p input desc = case run_parser p input of
- ParseSuccess (Left e) _ _ _ → throwError e
- ParseSuccess (Right x) _ _ _ → return x
- ParseFailure (Expected x y) _ → throwError $ showParseError desc input x y
- parseOrFail :: Parser Char a → String → String → Either String a
- parseOrFail p input desc = case run_parser p input of
- ParseSuccess x _ _ _ → return x
- ParseFailure (Expected x y) _ → throwError $ showParseError desc input x y