/compiler/parser/Lexer.x
Alex | 1356 lines | 1174 code | 182 blank | 0 comment | 0 complexity | d4d61ffd76a3bafcc0cd15839d0ef0ad MD5 | raw file
- -----------------------------------------------------------------------------
- -- (c) The University of Glasgow, 2006
- --
- -- GHC's lexer.
- --
- -- This is a combination of an Alex-generated lexer from a regex
- -- definition, with some hand-coded bits.
- --
- -- Completely accurate information about token-spans within the source
- -- file is maintained. Every token has a start and end RealSrcLoc
- -- attached to it.
- --
- -----------------------------------------------------------------------------
- -- ToDo / known bugs:
- -- - parsing integers is a bit slow
- -- - readRational is a bit slow
- --
- -- Known bugs, that were also in the previous version:
- -- - M... should be 3 tokens, not 1.
- -- - pragma-end should be only valid in a pragma
- -- qualified operator NOTES.
- --
- -- - If M.(+) is a single lexeme, then..
- -- - Probably (+) should be a single lexeme too, for consistency.
- -- Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
- -- - But we have to rule out reserved operators, otherwise (..) becomes
- -- a different lexeme.
- -- - Should we therefore also rule out reserved operators in the qualified
- -- form? This is quite difficult to achieve. We don't do it for
- -- qualified varids.
- {
- -- XXX The above flags turn off warnings in the generated code:
- {-# LANGUAGE BangPatterns #-}
- {-# OPTIONS_GHC -fno-warn-unused-matches #-}
- {-# OPTIONS_GHC -fno-warn-unused-binds #-}
- {-# OPTIONS_GHC -fno-warn-unused-imports #-}
- {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
- -- But alex still generates some code that causes the "lazy unlifted bindings"
- -- warning, and old compilers don't know about it so we can't easily turn
- -- it off, so for now we use the sledge hammer:
- {-# OPTIONS_GHC -w #-}
- {-# OPTIONS_GHC -funbox-strict-fields #-}
- module Lexer (
- Token(..), lexer, pragState, mkPState, PState(..),
- P(..), ParseResult(..), getSrcLoc,
- getPState, getDynFlags, withThisPackage,
- failLocMsgP, failSpanMsgP, srcParseFail,
- getMessages,
- popContext, pushCurrentContext, setLastToken, setSrcLoc,
- activeContext, nextIsEOF,
- getLexState, popLexState, pushLexState,
- extension, bangPatEnabled, datatypeContextsEnabled,
- traditionalRecordSyntaxEnabled,
- typeLiteralsEnabled,
- explicitNamespacesEnabled, sccProfilingOn, hpcEnabled,
- addWarning,
- lexTokenStream
- ) where
- import Bag
- import ErrUtils
- import Outputable
- import StringBuffer
- import FastString
- import SrcLoc
- import UniqFM
- import DynFlags
- import Module
- import Ctype
- import BasicTypes ( InlineSpec(..), RuleMatchInfo(..), FractionalLit(..) )
- import Util ( readRational )
- import Control.Monad
- import Data.Bits
- import Data.Char
- import Data.List
- import Data.Maybe
- import Data.Map (Map)
- import qualified Data.Map as Map
- import Data.Ratio
- import Data.Word
- }
- $unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
- $whitechar = [\ \n\r\f\v $unispace]
- $white_no_nl = $whitechar # \n
- $tab = \t
- $ascdigit = 0-9
- $unidigit = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
- $decdigit = $ascdigit -- for now, should really be $digit (ToDo)
- $digit = [$ascdigit $unidigit]
- $special = [\(\)\,\;\[\]\`\{\}]
- $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
- $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
- $symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
- $unilarge = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
- $asclarge = [A-Z]
- $large = [$asclarge $unilarge]
- $unismall = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
- $ascsmall = [a-z]
- $small = [$ascsmall $unismall \_]
- $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
- $graphic = [$small $large $symbol $digit $special $unigraphic \:\"\']
- $octit = 0-7
- $hexit = [$decdigit A-F a-f]
- $symchar = [$symbol \:]
- $nl = [\n\r]
- $idchar = [$small $large $digit \']
- $pragmachar = [$small $large $digit]
- $docsym = [\| \^ \* \$]
- @varid = $small $idchar*
- @conid = $large $idchar*
- @varsym = $symbol $symchar*
- @consym = \: $symchar*
- @decimal = $decdigit+
- @octal = $octit+
- @hexadecimal = $hexit+
- @exponent = [eE] [\-\+]? @decimal
- -- we support the hierarchical module name extension:
- @qual = (@conid \.)+
- @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
- -- normal signed numerical literals can only be explicitly negative,
- -- not explicitly positive (contrast @exponent)
- @negative = \-
- @signed = @negative ?
- haskell :-
- -- everywhere: skip whitespace and comments
- $white_no_nl+ ;
- $tab+ { warn Opt_WarnTabs (text "Tab character") }
- -- Everywhere: deal with nested comments. We explicitly rule out
- -- pragmas, "{-#", so that we don't accidentally treat them as comments.
- -- (this can happen even though pragmas will normally take precedence due to
- -- longest-match, because pragmas aren't valid in every state, but comments
- -- are). We also rule out nested Haddock comments, if the -haddock flag is
- -- set.
- "{-" / { isNormalComment } { nested_comment lexToken }
- -- Single-line comments are a bit tricky. Haskell 98 says that two or
- -- more dashes followed by a symbol should be parsed as a varsym, so we
- -- have to exclude those.
- -- Since Haddock comments aren't valid in every state, we need to rule them
- -- out here.
- -- The following two rules match comments that begin with two dashes, but
- -- continue with a different character. The rules test that this character
- -- is not a symbol (in which case we'd have a varsym), and that it's not a
- -- space followed by a Haddock comment symbol (docsym) (in which case we'd
- -- have a Haddock comment). The rules then munch the rest of the line.
- "-- " ~[$docsym \#] .* { lineCommentToken }
- "--" [^$symbol : \ ] .* { lineCommentToken }
- -- Next, match Haddock comments if no -haddock flag
- "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
- -- Now, when we've matched comments that begin with 2 dashes and continue
- -- with a different character, we need to match comments that begin with three
- -- or more dashes (which clearly can't be Haddock comments). We only need to
- -- make sure that the first non-dash character isn't a symbol, and munch the
- -- rest of the line.
- "---"\-* [^$symbol :] .* { lineCommentToken }
- -- Since the previous rules all match dashes followed by at least one
- -- character, we also need to match a whole line filled with just dashes.
- "--"\-* / { atEOL } { lineCommentToken }
- -- We need this rule since none of the other single line comment rules
- -- actually match this case.
- "-- " / { atEOL } { lineCommentToken }
- -- 'bol' state: beginning of a line. Slurp up all the whitespace (including
- -- blank lines) until we find a non-whitespace character, then do layout
- -- processing.
- --
- -- One slight wibble here: what if the line begins with {-#? In
- -- theory, we have to lex the pragma to see if it's one we recognise,
- -- and if it is, then we backtrack and do_bol, otherwise we treat it
- -- as a nested comment. We don't bother with this: if the line begins
- -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
- <bol> {
- \n ;
- ^\# (line)? { begin line_prag1 }
- ^\# pragma .* \n ; -- GCC 3.3 CPP generated, apparently
- ^\# \! .* \n ; -- #!, for scripts
- () { do_bol }
- }
- -- after a layout keyword (let, where, do, of), we begin a new layout
- -- context if the curly brace is missing.
- -- Careful! This stuff is quite delicate.
- <layout, layout_do> {
- \{ / { notFollowedBy '-' } { hopefully_open_brace }
- -- we might encounter {-# here, but {- has been handled already
- \n ;
- ^\# (line)? { begin line_prag1 }
- }
- -- do is treated in a subtly different way, see new_layout_context
- <layout> () { new_layout_context True }
- <layout_do> () { new_layout_context False }
- -- after a new layout context which was found to be to the left of the
- -- previous context, we have generated a '{' token, and we now need to
- -- generate a matching '}' token.
- <layout_left> () { do_layout_left }
- <0,option_prags> \n { begin bol }
- "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
- { dispatch_pragmas linePrags }
- -- single-line line pragmas, of the form
- -- # <line> "<file>" <extra-stuff> \n
- <line_prag1> $decdigit+ { setLine line_prag1a }
- <line_prag1a> \" [$graphic \ ]* \" { setFile line_prag1b }
- <line_prag1b> .* { pop }
- -- Haskell-style line pragmas, of the form
- -- {-# LINE <line> "<file>" #-}
- <line_prag2> $decdigit+ { setLine line_prag2a }
- <line_prag2a> \" [$graphic \ ]* \" { setFile line_prag2b }
- <line_prag2b> "#-}"|"-}" { pop }
- -- NOTE: accept -} at the end of a LINE pragma, for compatibility
- -- with older versions of GHC which generated these.
- <0,option_prags> {
- "{-#" $whitechar* $pragmachar+
- $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
- { dispatch_pragmas twoWordPrags }
- "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
- { dispatch_pragmas oneWordPrags }
- -- We ignore all these pragmas, but don't generate a warning for them
- "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
- { dispatch_pragmas ignoredPrags }
- -- ToDo: should only be valid inside a pragma:
- "#-}" { endPrag }
- }
- <option_prags> {
- "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
- { dispatch_pragmas fileHeaderPrags }
- "-- #" { multiline_doc_comment }
- }
- <0> {
- -- In the "0" mode we ignore these pragmas
- "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
- { nested_comment lexToken }
- }
- <0> {
- "-- #" .* { lineCommentToken }
- }
- <0,option_prags> {
- "{-#" { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
- (nested_comment lexToken) }
- }
- -- '0' state: ordinary lexemes
- -- Haddock comments
- <0,option_prags> {
- "-- " $docsym / { ifExtension haddockEnabled } { multiline_doc_comment }
- "{-" \ ? $docsym / { ifExtension haddockEnabled } { nested_doc_comment }
- }
- -- "special" symbols
- <0> {
- "[:" / { ifExtension parrEnabled } { token ITopabrack }
- ":]" / { ifExtension parrEnabled } { token ITcpabrack }
- }
- <0> {
- "[|" / { ifExtension thEnabled } { token ITopenExpQuote }
- "[e|" / { ifExtension thEnabled } { token ITopenExpQuote }
- "[p|" / { ifExtension thEnabled } { token ITopenPatQuote }
- "[d|" / { ifExtension thEnabled } { layout_token ITopenDecQuote }
- "[t|" / { ifExtension thEnabled } { token ITopenTypQuote }
- "|]" / { ifExtension thEnabled } { token ITcloseQuote }
- \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
- "$(" / { ifExtension thEnabled } { token ITparenEscape }
- -- For backward compatibility, accept the old dollar syntax
- "[$" @varid "|" / { ifExtension qqEnabled }
- { lex_quasiquote_tok }
- "[" @varid "|" / { ifExtension qqEnabled }
- { lex_quasiquote_tok }
- -- qualified quasi-quote (#5555)
- "[" @qual @varid "|" / { ifExtension qqEnabled }
- { lex_qquasiquote_tok }
- }
- <0> {
- "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
- { special IToparenbar }
- "|)" / { ifExtension arrowsEnabled } { special ITcparenbar }
- }
- <0> {
- \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
- }
- <0> {
- "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
- { token IToubxparen }
- "#)" / { ifExtension unboxedTuplesEnabled }
- { token ITcubxparen }
- }
- <0,option_prags> {
- \( { special IToparen }
- \) { special ITcparen }
- \[ { special ITobrack }
- \] { special ITcbrack }
- \, { special ITcomma }
- \; { special ITsemi }
- \` { special ITbackquote }
- \{ { open_brace }
- \} { close_brace }
- }
- <0,option_prags> {
- @qual @varid { idtoken qvarid }
- @qual @conid { idtoken qconid }
- @varid { varid }
- @conid { idtoken conid }
- }
- <0> {
- @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
- @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
- @varid "#"+ / { ifExtension magicHashEnabled } { varid }
- @conid "#"+ / { ifExtension magicHashEnabled } { idtoken conid }
- }
- -- ToDo: - move `var` and (sym) into lexical syntax?
- -- - remove backquote from $special?
- <0> {
- @qual @varsym { idtoken qvarsym }
- @qual @consym { idtoken qconsym }
- @varsym { varsym }
- @consym { consym }
- }
- -- For the normal boxed literals we need to be careful
- -- when trying to be close to Haskell98
- <0> {
- -- Normal integral literals (:: Num a => a, from Integer)
- @decimal { tok_num positive 0 0 decimal }
- 0[oO] @octal { tok_num positive 2 2 octal }
- 0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
- -- Normal rational literals (:: Fractional a => a, from Rational)
- @floating_point { strtoken tok_float }
- }
- <0> {
- -- Unboxed ints (:: Int#) and words (:: Word#)
- -- It's simpler (and faster?) to give separate cases to the negatives,
- -- especially considering octal/hexadecimal prefixes.
- @decimal \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
- 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
- 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
- @negative @decimal \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
- @negative 0[oO] @octal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
- @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
- @decimal \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
- 0[oO] @octal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
- 0[xX] @hexadecimal \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
- -- Unboxed floats and doubles (:: Float#, :: Double#)
- -- prim_{float,double} work with signed literals
- @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
- @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
- }
- -- Strings and chars are lexed by hand-written code. The reason is
- -- that even if we recognise the string or char here in the regex
- -- lexer, we would still have to parse the string afterward in order
- -- to convert it to a String.
- <0> {
- \' { lex_char_tok }
- \" { lex_string_tok }
- }
- {
- -- -----------------------------------------------------------------------------
- -- The token type
- data Token
- = ITas -- Haskell keywords
- | ITcase
- | ITclass
- | ITdata
- | ITdefault
- | ITderiving
- | ITdo
- | ITelse
- | IThiding
- | ITif
- | ITimport
- | ITin
- | ITinfix
- | ITinfixl
- | ITinfixr
- | ITinstance
- | ITlet
- | ITmodule
- | ITnewtype
- | ITof
- | ITqualified
- | ITthen
- | ITtype
- | ITwhere
- | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now)
- | ITforall -- GHC extension keywords
- | ITforeign
- | ITexport
- | ITlabel
- | ITdynamic
- | ITsafe
- | ITinterruptible
- | ITunsafe
- | ITstdcallconv
- | ITccallconv
- | ITcapiconv
- | ITprimcallconv
- | ITmdo
- | ITfamily
- | ITgroup
- | ITby
- | ITusing
- -- Pragmas
- | ITinline_prag InlineSpec RuleMatchInfo
- | ITspec_prag -- SPECIALISE
- | ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE)
- | ITsource_prag
- | ITrules_prag
- | ITwarning_prag
- | ITdeprecated_prag
- | ITline_prag
- | ITscc_prag
- | ITgenerated_prag
- | ITcore_prag -- hdaume: core annotations
- | ITunpack_prag
- | ITnounpack_prag
- | ITann_prag
- | ITclose_prag
- | IToptions_prag String
- | ITinclude_prag String
- | ITlanguage_prag
- | ITvect_prag
- | ITvect_scalar_prag
- | ITnovect_prag
- | ITctype
- | ITdotdot -- reserved symbols
- | ITcolon
- | ITdcolon
- | ITequal
- | ITlam
- | ITlcase
- | ITvbar
- | ITlarrow
- | ITrarrow
- | ITat
- | ITtilde
- | ITtildehsh
- | ITdarrow
- | ITminus
- | ITbang
- | ITstar
- | ITdot
- | ITbiglam -- GHC-extension symbols
- | ITocurly -- special symbols
- | ITccurly
- | ITvocurly
- | ITvccurly
- | ITobrack
- | ITopabrack -- [:, for parallel arrays with -XParallelArrays
- | ITcpabrack -- :], for parallel arrays with -XParallelArrays
- | ITcbrack
- | IToparen
- | ITcparen
- | IToubxparen
- | ITcubxparen
- | ITsemi
- | ITcomma
- | ITunderscore
- | ITbackquote
- | ITsimpleQuote -- '
- | ITvarid FastString -- identifiers
- | ITconid FastString
- | ITvarsym FastString
- | ITconsym FastString
- | ITqvarid (FastString,FastString)
- | ITqconid (FastString,FastString)
- | ITqvarsym (FastString,FastString)
- | ITqconsym (FastString,FastString)
- | ITprefixqvarsym (FastString,FastString)
- | ITprefixqconsym (FastString,FastString)
- | ITdupipvarid FastString -- GHC extension: implicit param: ?x
- | ITchar Char
- | ITstring FastString
- | ITinteger Integer
- | ITrational FractionalLit
- | ITprimchar Char
- | ITprimstring FastBytes
- | ITprimint Integer
- | ITprimword Integer
- | ITprimfloat FractionalLit
- | ITprimdouble FractionalLit
- -- Template Haskell extension tokens
- | ITopenExpQuote -- [| or [e|
- | ITopenPatQuote -- [p|
- | ITopenDecQuote -- [d|
- | ITopenTypQuote -- [t|
- | ITcloseQuote -- |]
- | ITidEscape FastString -- $x
- | ITparenEscape -- $(
- | ITtyQuote -- ''
- | ITquasiQuote (FastString,FastString,RealSrcSpan)
- -- ITquasiQuote(quoter, quote, loc)
- -- represents a quasi-quote of the form
- -- [quoter| quote |]
- | ITqQuasiQuote (FastString,FastString,FastString,RealSrcSpan)
- -- ITqQuasiQuote(Qual, quoter, quote, loc)
- -- represents a qualified quasi-quote of the form
- -- [Qual.quoter| quote |]
- -- Arrow notation extension
- | ITproc
- | ITrec
- | IToparenbar -- (|
- | ITcparenbar -- |)
- | ITlarrowtail -- -<
- | ITrarrowtail -- >-
- | ITLarrowtail -- -<<
- | ITRarrowtail -- >>-
- | ITunknown String -- Used when the lexer can't make sense of it
- | ITeof -- end of file token
- -- Documentation annotations
- | ITdocCommentNext String -- something beginning '-- |'
- | ITdocCommentPrev String -- something beginning '-- ^'
- | ITdocCommentNamed String -- something beginning '-- $'
- | ITdocSection Int String -- a section heading
- | ITdocOptions String -- doc options (prune, ignore-exports, etc)
- | ITdocOptionsOld String -- doc options declared "-- # ..."-style
- | ITlineComment String -- comment starting by "--"
- | ITblockComment String -- comment in {- -}
- deriving Show
- -- the bitmap provided as the third component indicates whether the
- -- corresponding extension keyword is valid under the extension options
- -- provided to the compiler; if the extension corresponding to *any* of the
- -- bits set in the bitmap is enabled, the keyword is valid (this setup
- -- facilitates using a keyword in two different extensions that can be
- -- activated independently)
- --
- reservedWordsFM :: UniqFM (Token, Int)
- reservedWordsFM = listToUFM $
- map (\(x, y, z) -> (mkFastString x, (y, z)))
- [( "_", ITunderscore, 0 ),
- ( "as", ITas, 0 ),
- ( "case", ITcase, 0 ),
- ( "class", ITclass, 0 ),
- ( "data", ITdata, 0 ),
- ( "default", ITdefault, 0 ),
- ( "deriving", ITderiving, 0 ),
- ( "do", ITdo, 0 ),
- ( "else", ITelse, 0 ),
- ( "hiding", IThiding, 0 ),
- ( "if", ITif, 0 ),
- ( "import", ITimport, 0 ),
- ( "in", ITin, 0 ),
- ( "infix", ITinfix, 0 ),
- ( "infixl", ITinfixl, 0 ),
- ( "infixr", ITinfixr, 0 ),
- ( "instance", ITinstance, 0 ),
- ( "let", ITlet, 0 ),
- ( "module", ITmodule, 0 ),
- ( "newtype", ITnewtype, 0 ),
- ( "of", ITof, 0 ),
- ( "qualified", ITqualified, 0 ),
- ( "then", ITthen, 0 ),
- ( "type", ITtype, 0 ),
- ( "where", ITwhere, 0 ),
- ( "_scc_", ITscc, 0 ), -- ToDo: remove
- ( "forall", ITforall, bit explicitForallBit .|.
- bit inRulePragBit),
- ( "mdo", ITmdo, bit recursiveDoBit),
- ( "family", ITfamily, bit tyFamBit),
- ( "group", ITgroup, bit transformComprehensionsBit),
- ( "by", ITby, bit transformComprehensionsBit),
- ( "using", ITusing, bit transformComprehensionsBit),
- ( "foreign", ITforeign, bit ffiBit),
- ( "export", ITexport, bit ffiBit),
- ( "label", ITlabel, bit ffiBit),
- ( "dynamic", ITdynamic, bit ffiBit),
- ( "safe", ITsafe, bit ffiBit .|.
- bit safeHaskellBit),
- ( "interruptible", ITinterruptible, bit interruptibleFfiBit),
- ( "unsafe", ITunsafe, bit ffiBit),
- ( "stdcall", ITstdcallconv, bit ffiBit),
- ( "ccall", ITccallconv, bit ffiBit),
- ( "capi", ITcapiconv, bit cApiFfiBit),
- ( "prim", ITprimcallconv, bit ffiBit),
- ( "rec", ITrec, bit arrowsBit .|.
- bit recursiveDoBit),
- ( "proc", ITproc, bit arrowsBit)
- ]
- reservedSymsFM :: UniqFM (Token, Int -> Bool)
- reservedSymsFM = listToUFM $
- map (\ (x,y,z) -> (mkFastString x,(y,z)))
- [ ("..", ITdotdot, always)
- -- (:) is a reserved op, meaning only list cons
- ,(":", ITcolon, always)
- ,("::", ITdcolon, always)
- ,("=", ITequal, always)
- ,("\\", ITlam, always)
- ,("|", ITvbar, always)
- ,("<-", ITlarrow, always)
- ,("->", ITrarrow, always)
- ,("@", ITat, always)
- ,("~", ITtilde, always)
- ,("~#", ITtildehsh, always)
- ,("=>", ITdarrow, always)
- ,("-", ITminus, always)
- ,("!", ITbang, always)
- -- For data T (a::*) = MkT
- ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
- -- For 'forall a . t'
- ,(".", ITdot, always) -- \i -> explicitForallEnabled i || inRulePrag i)
- ,("-<", ITlarrowtail, arrowsEnabled)
- ,(">-", ITrarrowtail, arrowsEnabled)
- ,("-<<", ITLarrowtail, arrowsEnabled)
- ,(">>-", ITRarrowtail, arrowsEnabled)
- ,("∷", ITdcolon, unicodeSyntaxEnabled)
- ,("⇒", ITdarrow, unicodeSyntaxEnabled)
- ,("∀", ITforall, \i -> unicodeSyntaxEnabled i &&
- explicitForallEnabled i)
- ,("→", ITrarrow, unicodeSyntaxEnabled)
- ,("←", ITlarrow, unicodeSyntaxEnabled)
- ,("⤙", ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
- ,("⤚", ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
- ,("⤛", ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
- ,("⤜", ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
- ,("★", ITstar, unicodeSyntaxEnabled)
- -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
- -- form part of a large operator. This would let us have a better
- -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
- ]
- -- -----------------------------------------------------------------------------
- -- Lexer actions
- type Action = RealSrcSpan -> StringBuffer -> Int -> P (RealLocated Token)
- special :: Token -> Action
- special tok span _buf _len = return (L span tok)
- token, layout_token :: Token -> Action
- token t span _buf _len = return (L span t)
- layout_token t span _buf _len = pushLexState layout >> return (L span t)
- idtoken :: (StringBuffer -> Int -> Token) -> Action
- idtoken f span buf len = return (L span $! (f buf len))
- skip_one_varid :: (FastString -> Token) -> Action
- skip_one_varid f span buf len
- = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
- strtoken :: (String -> Token) -> Action
- strtoken f span buf len =
- return (L span $! (f $! lexemeToString buf len))
- init_strtoken :: Int -> (String -> Token) -> Action
- -- like strtoken, but drops the last N character(s)
- init_strtoken drop f span buf len =
- return (L span $! (f $! lexemeToString buf (len-drop)))
- begin :: Int -> Action
- begin code _span _str _len = do pushLexState code; lexToken
- pop :: Action
- pop _span _buf _len = do _ <- popLexState
- lexToken
- hopefully_open_brace :: Action
- hopefully_open_brace span buf len
- = do relaxed <- extension relaxedLayout
- ctx <- getContext
- (AI l _) <- getInput
- let offset = srcLocCol l
- isOK = relaxed ||
- case ctx of
- Layout prev_off : _ -> prev_off < offset
- _ -> True
- if isOK then pop_and open_brace span buf len
- else failSpanMsgP (RealSrcSpan span) (text "Missing block")
- pop_and :: Action -> Action
- pop_and act span buf len = do _ <- popLexState
- act span buf len
- {-# INLINE nextCharIs #-}
- nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
- nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
- {-# INLINE nextCharIsNot #-}
- nextCharIsNot :: StringBuffer -> (Char -> Bool) -> Bool
- nextCharIsNot buf p = not (nextCharIs buf p)
- notFollowedBy :: Char -> AlexAccPred Int
- notFollowedBy char _ _ _ (AI _ buf)
- = nextCharIsNot buf (== char)
- notFollowedBySymbol :: AlexAccPred Int
- notFollowedBySymbol _ _ _ (AI _ buf)
- = nextCharIsNot buf (`elem` "!#$%&*+./<=>?@\\^|-~")
- -- We must reject doc comments as being ordinary comments everywhere.
- -- In some cases the doc comment will be selected as the lexeme due to
- -- maximal munch, but not always, because the nested comment rule is
- -- valid in all states, but the doc-comment rules are only valid in
- -- the non-layout states.
- isNormalComment :: AlexAccPred Int
- isNormalComment bits _ _ (AI _ buf)
- | haddockEnabled bits = notFollowedByDocOrPragma
- | otherwise = nextCharIsNot buf (== '#')
- where
- notFollowedByDocOrPragma
- = afterOptionalSpace buf (\b -> nextCharIsNot b (`elem` "|^*$#"))
- afterOptionalSpace :: StringBuffer -> (StringBuffer -> Bool) -> Bool
- afterOptionalSpace buf p
- = if nextCharIs buf (== ' ')
- then p (snd (nextChar buf))
- else p buf
- atEOL :: AlexAccPred Int
- atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
- ifExtension :: (Int -> Bool) -> AlexAccPred Int
- ifExtension pred bits _ _ _ = pred bits
- multiline_doc_comment :: Action
- multiline_doc_comment span buf _len = withLexedDocType (worker "")
- where
- worker commentAcc input docType oneLine = case alexGetChar' input of
- Just ('\n', input')
- | oneLine -> docCommentEnd input commentAcc docType buf span
- | otherwise -> case checkIfCommentLine input' of
- Just input -> worker ('\n':commentAcc) input docType False
- Nothing -> docCommentEnd input commentAcc docType buf span
- Just (c, input) -> worker (c:commentAcc) input docType oneLine
- Nothing -> docCommentEnd input commentAcc docType buf span
- checkIfCommentLine input = check (dropNonNewlineSpace input)
- where
- check input = case alexGetChar' input of
- Just ('-', input) -> case alexGetChar' input of
- Just ('-', input) -> case alexGetChar' input of
- Just (c, _) | c /= '-' -> Just input
- _ -> Nothing
- _ -> Nothing
- _ -> Nothing
- dropNonNewlineSpace input = case alexGetChar' input of
- Just (c, input')
- | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
- | otherwise -> input
- Nothing -> input
- lineCommentToken :: Action
- lineCommentToken span buf len = do
- b <- extension rawTokenStreamEnabled
- if b then strtoken ITlineComment span buf len else lexToken
- {-
- nested comments require traversing by hand, they can't be parsed
- using regular expressions.
- -}
- nested_comment :: P (RealLocated Token) -> Action
- nested_comment cont span _str _len = do
- input <- getInput
- go "" (1::Int) input
- where
- go commentAcc 0 input = do setInput input
- b <- extension rawTokenStreamEnabled
- if b
- then docCommentEnd input commentAcc ITblockComment _str span
- else cont
- go commentAcc n input = case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('-',input) -> case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('\125',input) -> go commentAcc (n-1) input
- Just (_,_) -> go ('-':commentAcc) n input
- Just ('\123',input) -> case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
- Just (_,_) -> go ('\123':commentAcc) n input
- Just (c,input) -> go (c:commentAcc) n input
- nested_doc_comment :: Action
- nested_doc_comment span buf _len = withLexedDocType (go "")
- where
- go commentAcc input docType _ = case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('-',input) -> case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('\125',input) ->
- docCommentEnd input commentAcc docType buf span
- Just (_,_) -> go ('-':commentAcc) input docType False
- Just ('\123', input) -> case alexGetChar' input of
- Nothing -> errBrace input span
- Just ('-',input) -> do
- setInput input
- let cont = do input <- getInput; go commentAcc input docType False
- nested_comment cont span buf _len
- Just (_,_) -> go ('\123':commentAcc) input docType False
- Just (c,input) -> go (c:commentAcc) input docType False
- withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (RealLocated Token))
- -> P (RealLocated Token)
- withLexedDocType lexDocComment = do
- input@(AI _ buf) <- getInput
- case prevChar buf ' ' of
- '|' -> lexDocComment input ITdocCommentNext False
- '^' -> lexDocComment input ITdocCommentPrev False
- '$' -> lexDocComment input ITdocCommentNamed False
- '*' -> lexDocSection 1 input
- '#' -> lexDocComment input ITdocOptionsOld False
- _ -> panic "withLexedDocType: Bad doc type"
- where
- lexDocSection n input = case alexGetChar' input of
- Just ('*', input) -> lexDocSection (n+1) input
- Just (_, _) -> lexDocComment input (ITdocSection n) True
- Nothing -> do setInput input; lexToken -- eof reached, lex it normally
- -- RULES pragmas turn on the forall and '.' keywords, and we turn them
- -- off again at the end of the pragma.
- rulePrag :: Action
- rulePrag span _buf _len = do
- setExts (.|. bit inRulePragBit)
- return (L span ITrules_prag)
- endPrag :: Action
- endPrag span _buf _len = do
- setExts (.&. complement (bit inRulePragBit))
- return (L span ITclose_prag)
- -- docCommentEnd
- -------------------------------------------------------------------------------
- -- This function is quite tricky. We can't just return a new token, we also
- -- need to update the state of the parser. Why? Because the token is longer
- -- than what was lexed by Alex, and the lexToken function doesn't know this, so
- -- it writes the wrong token length to the parser state. This function is
- -- called afterwards, so it can just update the state.
- docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
- RealSrcSpan -> P (RealLocated Token)
- docCommentEnd input commentAcc docType buf span = do
- setInput input
- let (AI loc nextBuf) = input
- comment = reverse commentAcc
- span' = mkRealSrcSpan (realSrcSpanStart span) loc
- last_len = byteDiff buf nextBuf
- span `seq` setLastToken span' last_len
- return (L span' (docType comment))
- errBrace :: AlexInput -> RealSrcSpan -> P a
- errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) end "unterminated `{-'"
- open_brace, close_brace :: Action
- open_brace span _str _len = do
- ctx <- getContext
- setContext (NoLayout:ctx)
- return (L span ITocurly)
- close_brace span _str _len = do
- popContext
- return (L span ITccurly)
- qvarid, qconid :: StringBuffer -> Int -> Token
- qvarid buf len = ITqvarid $! splitQualName buf len False
- qconid buf len = ITqconid $! splitQualName buf len False
- splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
- -- takes a StringBuffer and a length, and returns the module name
- -- and identifier parts of a qualified name. Splits at the *last* dot,
- -- because of hierarchical module names.
- splitQualName orig_buf len parens = split orig_buf orig_buf
- where
- split buf dot_buf
- | orig_buf `byteDiff` buf >= len = done dot_buf
- | c == '.' = found_dot buf'
- | otherwise = split buf' dot_buf
- where
- (c,buf') = nextChar buf
- -- careful, we might get names like M....
- -- so, if the character after the dot is not upper-case, this is
- -- the end of the qualifier part.
- found_dot buf -- buf points after the '.'
- | isUpper c = split buf' buf
- | otherwise = done buf
- where
- (c,buf') = nextChar buf
- done dot_buf =
- (lexemeToFastString orig_buf (qual_size - 1),
- if parens -- Prelude.(+)
- then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
- else lexemeToFastString dot_buf (len - qual_size))
- where
- qual_size = orig_buf `byteDiff` dot_buf
- varid :: Action
- varid span buf len =
- case lookupUFM reservedWordsFM fs of
- Just (ITcase, _) -> do
- lambdaCase <- extension lambdaCaseEnabled
- keyword <- if lambdaCase
- then do
- lastTk <- getLastTk
- return $ case lastTk of
- Just ITlam -> ITlcase
- _ -> ITcase
- else
- return ITcase
- maybe_layout keyword
- return $ L span keyword
- Just (keyword, 0) -> do
- maybe_layout keyword
- return $ L span keyword
- Just (keyword, exts) -> do
- extsEnabled <- extension $ \i -> exts .&. i /= 0
- if extsEnabled
- then do
- maybe_layout keyword
- return $ L span keyword
- else
- return $ L span $ ITvarid fs
- Nothing ->
- return $ L span $ ITvarid fs
- where
- !fs = lexemeToFastString buf len
- conid :: StringBuffer -> Int -> Token
- conid buf len = ITconid $! lexemeToFastString buf len
- qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
- qvarsym buf len = ITqvarsym $! splitQualName buf len False
- qconsym buf len = ITqconsym $! splitQualName buf len False
- prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
- prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
- varsym, consym :: Action
- varsym = sym ITvarsym
- consym = sym ITconsym
- sym :: (FastString -> Token) -> Action
- sym con span buf len =
- case lookupUFM reservedSymsFM fs of
- Just (keyword, exts) -> do
- extsEnabled <- extension exts
- let !tk | extsEnabled = keyword
- | otherwise = con fs
- return $ L span tk
- Nothing ->
- return $ L span $! con fs
- where
- !fs = lexemeToFastString buf len
- -- Variations on the integral numeric literal.
- tok_integral :: (Integer -> Token)
- -> (Integer -> Integer)
- -> Int -> Int
- -> (Integer, (Char -> Int))
- -> Action
- tok_integral itint transint transbuf translen (radix,char_to_int) span buf len
- = return $ L span $ itint $! transint $ parseUnsignedInteger
- (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
- -- some conveniences for use with tok_integral
- tok_num :: (Integer -> Integer)
- -> Int -> Int
- -> (Integer, (Char->Int)) -> Action
- tok_num = tok_integral ITinteger
- tok_primint :: (Integer -> Integer)
- -> Int -> Int
- -> (Integer, (Char->Int)) -> Action
- tok_primint = tok_integral ITprimint
- tok_primword :: Int -> Int
- -> (Integer, (Char->Int)) -> Action
- tok_primword = tok_integral ITprimword positive
- positive, negative :: (Integer -> Integer)
- positive = id
- negative = negate
- decimal, octal, hexadecimal :: (Integer, Char -> Int)
- decimal = (10,octDecDigit)
- octal = (8,octDecDigit)
- hexadecimal = (16,hexDigit)
- -- readRational can understand negative rationals, exponents, everything.
- tok_float, tok_primfloat, tok_primdouble :: String -> Token
- tok_float str = ITrational $! readFractionalLit str
- tok_primfloat str = ITprimfloat $! readFractionalLit str
- tok_primdouble str = ITprimdouble $! readFractionalLit str
- readFractionalLit :: String -> FractionalLit
- readFractionalLit str = (FL $! str) $! readRational str
- -- -----------------------------------------------------------------------------
- -- Layout processing
- -- we're at the first token on a line, insert layout tokens if necessary
- do_bol :: Action
- do_bol span _str _len = do
- pos <- getOffside
- case pos of
- LT -> do
- --trace "layout: inserting '}'" $ do
- popContext
- -- do NOT pop the lex state, we might have a ';' to insert
- return (L span ITvccurly)
- EQ -> do
- --trace "layout: inserting ';'" $ do
- _ <- popLexState
- return (L span ITsemi)
- GT -> do
- _ <- popLexState
- lexToken
- -- certain keywords put us in the "layout" state, where we might
- -- add an opening curly brace.
- maybe_layout :: Token -> P ()
- maybe_layout t = do -- If the alternative layout rule is enabled then
- -- we never create an implicit layout context here.
- -- Layout is handled XXX instead.
- -- The code for closing implicit contexts, or
- -- inserting implicit semi-colons, is therefore
- -- irrelevant as it only applies in an implicit
- -- context.
- alr <- extension alternativeLayoutRule
- unless alr $ f t
- where f ITdo = pushLexState layout_do
- f ITmdo = pushLexState layout_do
- f ITof = pushLexState layout
- f ITlcase = pushLexState layout
- f ITlet = pushLexState layout
- f ITwhere = pushLexState layout
- f ITrec = pushLexState layout
- f _ = return ()
- -- Pushing a new implicit layout context. If the indentation of the
- -- next token is not greater than the previous layout context, then
- -- Haskell 98 says that the new layout context should be empty; that is
- -- the lexer must generate {}.
- --
- -- We are slightly more lenient than this: when the new context is started
- -- by a 'do', then we allow the new context to be at the same indentation as
- -- the previous context. This is what the 'strict' argument is for.
- --
- new_layout_context :: Bool -> Action
- new_layout_context strict span _buf _len = do
- _ <- popLexState
- (AI l _) <- getInput
- let offset = srcLocCol l
- ctx <- getContext
- nondecreasing <- extension nondecreasingIndentation
- let strict' = strict || not nondecreasing
- case ctx of
- Layout prev_off : _ |
- (strict' && prev_off >= offset ||
- not strict' && prev_off > offset) -> do
- -- token is indented to the left of the previous context.
- -- we must generate a {} sequence now.
- pushLexState layout_left
- return (L span ITvocurly)
- _ -> do
- setContext (Layout offset : ctx)
- return (L span ITvocurly)
- do_layout_left :: Action
- do_layout_left span _buf _len = do
- _ <- popLexState
- pushLexState bol -- we must be at the start of a line
- return (L span ITvccurly)
- -- -----------------------------------------------------------------------------
- -- LINE pragmas
- setLine :: Int -> Action
- setLine code span buf len = do
- let line = parseUnsignedInteger buf len 10 octDecDigit
- setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
- -- subtract one: the line number refers to the *following* line
- _ <- popLexState
- pushLexState code
- lexToken
- setFile :: Int -> Action
- setFile code span buf len = do
- let file = mkFastString (go (lexemeToString (stepOn buf) (len-2)))
- where go ('\\':c:cs) = c : go cs
- go (c:cs) = c : go cs
- go [] = []
- -- decode escapes in the filename. e.g. on Windows
- -- when our filenames have backslashes in, gcc seems to
- -- escape the backslashes. One symptom of not doing this
- -- is that filenames in error messages look a bit strange:
- -- C:\\foo\bar.hs
- -- only the first backslash is doubled, because we apply
- -- System.FilePath.normalise before printing out
- -- filenames and it does not remove duplicate
- -- backslashes after the drive letter (should it?).
- setAlrLastLoc $ alrInitialLoc file
- setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
- addSrcFile file
- _ <- popLexState
- pushLexState code
- lexToken
- alrInitialLoc :: FastString -> RealSrcSpan
- alrInitialLoc file = mkRealSrcSpan loc loc
- where -- This is a hack to ensure that the first line in a file
- -- looks like it is after the initial location:
- loc = mkRealSrcLoc file (-1) (-1)
- -- -----------------------------------------------------------------------------
- -- Options, includes and language pragmas.
- lex_string_prag :: (String -> Token) -> Action
- lex_string_prag mkTok span _buf _len
- = do input <- getInput
- start <- getSrcLoc
- tok <- go [] input
- end <- getSrcLoc
- return (L (mkRealSrcSpan start end) tok)
- where go acc input
- = if isString input "#-}"
- then do setInput input
- return (mkTok (reverse acc))
- else case alexGetChar input of
- Just (c,i) -> go (c:acc) i
- Nothing -> err input
- isString _ [] = True
- isString i (x:xs)
- = case alexGetChar i of
- Just (c,i') | c == x -> isString i' xs
- _other -> False
- err (AI end _) = failLocMsgP (realSrcSpanStart span) end "unterminated options pragma"
- -- -----------------------------------------------------------------------------
- -- Strings & Chars
- -- This stuff is horrible. I hates it.
- lex_string_tok :: Action
- lex_string_tok span _buf _len = do
- tok <- lex_string ""
- end <- getSrcLoc
- return (L (mkRealSrcSpan (realSrcSpanStart span) end) tok)
- lex_string :: String -> P Token
- lex_string s = do
- i <- getInput
- case alexGetChar' i of
- Nothing -> lit_error i
- Just ('"',i) -> do
- setInput i
- magicHash <- extension magicHashEnabled
- if magicHash
- then do
- i <- getInput
- case alexGetChar' i of
- Just ('#',i) -> do
- setInput i
- if any (> '\xFF') s
- then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
- else let fb = unsafeMkFastBytesString (reverse s)
- in return (ITprimstring fb)
- _other ->
- return (ITstring (mkFastString (reverse s)))
- else
- return (ITstring (mkFastString (reverse s)))
- Just ('\\',i)
- | Just ('&',i) <- next -> do
- setInput i; lex_string s
- | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
- -- is_space only works for <= '\x7f' (#3751, #5425)
- setInput i; lex_stringgap s
- where next = alexGetChar' i
- Just (c, i1) -> do
- case c of
- '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
- c | isAny c -> do setInput i1; lex_string (c:s)
- _other -> lit_error i
- lex_stringgap :: String -> P Token
- lex_stringgap s = do
- i <- getInput
- c <- getCharOrFail i
- case c of
- '\\' -> lex_string s
- c | c <= '\x7f' && is_space c -> lex_stringgap s
- -- is_space only works for <= '\x7f' (#3751, #5425)
- _other -> lit_error i
- lex_char_tok :: Action
- -- Here we are basically parsing character literals, such as 'x' or '\n'
- -- but, when Template Haskell is on, we additionally spot
- -- 'x and ''T, returning ITsimpleQuote and ITtyQuote respectively,
- -- but WITHOUT CONSUMING the x or T part (the parser does that).
- -- So we have to do two characters of lookahead: when we see 'x we need to
- -- see if there's a trailing quote
- lex_char_tok span _buf _len = do -- We've seen '
- i1 <- getInput -- Look ahead to first character
- let loc = realSrcSpanStart span
- case alexGetChar' i1 of
- Nothing -> lit_error i1
- Just ('\'', i2@(AI end2 _)) -> do -- We've seen ''
- setInput i2
- return (L (mkRealSrcSpan loc end2) ITtyQuote)
- Just ('\\', i2@(AI _end2 _)) -> do -- We've seen 'backslash
- setInput i2
- lit_ch <- lex_escape
- i3 <- getInput
- mc <- getCharOrFail i3 -- Trailing quote
- if mc == '\'' then finish_char_tok loc lit_ch
- else lit_error i3
- Just (c, i2@(AI _end2 _))
- | not (isAny c) -> lit_error i1
- | otherwise ->
- -- We've seen 'x, where x is a valid character
- -- (i.e. not newline etc) but not a quote or backslash
- case alexGetChar' i2 of -- Look ahead one more character
- Just ('\'', i3) -> do -- We've seen 'x'
- setInput i3
- finish_char_tok loc c
- _other -> do -- We've seen 'x not followed by quote
- -- (including the possibility of EOF)
- -- If TH is on, just parse the quote only
- let (AI end _) = i1
- return (L (mkRealSrcSpan loc end) ITsimpleQuote)
- finish_char_tok :: RealSrcLoc -> Char -> P (RealLocated Token)
- finish_char_tok loc ch -- We've already seen the closing quote
- -- Just need to check for trailing #
- = do magicHash <- extension magicHashEnabled
- i@(AI end _) <- getInput
- if magicHash then do
- case alexGetChar' i of
- Just ('#',i@(AI end _)) -> do
- setInput i
- return (L (mkRealSrcSpan loc end) (ITprimchar ch))
- _other ->
- return (L (mkRealSrcSpan loc end) (ITchar ch))
- else do
- return (L (mkRealSrcSpan loc end) (ITchar ch))
- isAny :: Char -> Bool
- isAny c | c > '\x7f' = isPrint c
- | otherwise = is_any c
- lex_escape :: P Char
- lex_escape = do
- i0 <- getInput
- c <- getCharOrFail i0
- case c of
- 'a' -> return '\a'
- 'b' -> return '\b'
- 'f' -> return '\f'
- 'n' -> return '\n'
- 'r' -> return '\r'
- 't' -> return '\t'
- 'v' -> return '\v'
- '\\' -> return '\\'
- '"' -> return '\"'
- '\'' -> return '\''
- '^' -> do i1 <- getInput
- c <- getCharOrFail i1
-