PageRenderTime 72ms CodeModel.GetById 13ms app.highlight 54ms RepoModel.GetById 1ms app.codeStats 0ms

/fparsec/main/FParsec/Internals.fs

http://github.com/sandersn/fing
F# | 444 lines | 380 code | 45 blank | 19 comment | 114 complexity | a86a4ec0e41f8a57a5e1794fae925ace MD5 | raw file
  1// Copyright (c) Stephan Tolksdorf 2009
  2// License: Simplified BSD License. See accompanying documentation.
  3
  4module FParsec.Internals
  5
  6open System.Diagnostics
  7
  8// The following functions are defined using inline IL to help fsc generate code
  9// the JIT knows better how to optimize.
 10// Should F# stop supporting inline IL outside the standard library, you can switch
 11// to the commented out alternatives (which by then will probably be just as efficient).
 12let inline referenceEquals<'a when 'a : not struct> (x: 'a) (y: 'a) =
 13    (# "ceq" x y : bool #) // LanguagePrimitives.PhysicalEquality x y
 14let inline isNull<'a when 'a : not struct> (x: 'a) =
 15    (# "ldnull ceq" x : bool #) // referenceEquals (box x) null
 16let inline isNotNull<'a when 'a : not struct> (x: 'a) =
 17    (# "ldnull cgt.un" x : bool #) // not (isNull x)
 18
 19let inline isNullOrEmpty (s: string) = isNull s || s.Length = 0
 20
 21// These operators are faster than = and <>. They are not public because
 22// their names conflict with the operators in the OCaml compatibility module
 23let inline (==) (s1: State<'u>) (s2: State<'u>) = s1.Equals(s2)
 24let inline (!=) (s1: State<'u>) (s2: State<'u>) = not (s1 == s2)
 25
 26// the F# compiler doesn't yet "fuse" multiple '+' string concatenations into one, as the C# compiler does
 27let inline concat3 (a: string) (b: string) (c: string) = System.String.Concat(a, b, c)
 28let inline concat4 (a: string) (b: string) (c: string) (d: string) = System.String.Concat(a, b, c, d)
 29let inline concat5 (a: string) (b: string) (c: string) (d: string) (e: string) = System.String.Concat([|a;b;c;d;e|])
 30let inline concat6 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) = System.String.Concat([|a;b;c;d;e;f|])
 31let inline concat7 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) (g: string) = System.String.Concat([|a;b;c;d;e;f;g|])
 32
 33let containsNewlineChar = Helper.ContainsNewlineChar
 34
 35let ordinalEnding (i: int) =
 36    match i%10 with
 37    | 1 -> "st"
 38    | 2 -> "nd"
 39    | 3 -> "rd"
 40    | _ -> "th"
 41
 42let hexEscapeChar c =
 43    let n = int c
 44    let cs = Array.zeroCreate 6
 45    cs.[0] <- '\\'; cs.[1] <- 'u'
 46    for j = 0 to 3 do
 47        cs.[5 - j] <- "0123456789abcdef".[((n >>> 4*j) &&& 0xf)]
 48    new string(cs)
 49
 50[<NoDynamicInvocation>]
 51let inline private escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii (c: char) (f: char -> string) =
 52    if c > '\'' && c < '\u007f' then
 53        if c <> '\\' then f c else "\\\\"
 54    else
 55        match c with
 56        | '\b' -> "\\b"
 57        | '\t' -> "\\t"
 58        | '\n' -> "\\n"
 59        | '\r' -> "\\r"
 60        | '\"' when escapeDoubleQuote -> "\\\""
 61        | '\'' when escapeSingleQuote -> "\\'"
 62        | _ -> if (escapeNonAscii && c >= '\u007f') || System.Char.IsControl(c) then hexEscapeChar c else f c
 63
 64[<NoDynamicInvocation>]
 65let inline escapeStringHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii (s: string) =
 66    let rec escape sb i start =
 67        if i < s.Length then
 68            let esc = escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii s.[i] (fun _ -> null)
 69            if isNull esc then escape sb (i + 1) start
 70            else
 71                let sb = if isNull sb then (new System.Text.StringBuilder(s.Length + 6))
 72                         else sb
 73                sb.Append(s, start, i - start).Append(esc) |> ignore
 74                escape sb (i + 1) (i + 1)
 75        elif isNull sb then s
 76        else sb.Append(s, start, s.Length - start).ToString()
 77    escape null 0 0
 78
 79[<NoDynamicInvocation>]
 80let inline quoteStringHelper (quote: string) escapeSingleQuote escapeDoubleQuote escapeNonAscii (s: string) =
 81    let rec escape sb i start =
 82        if i < s.Length then
 83            let esc = escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii s.[i] (fun _ -> null)
 84            if isNull esc then escape sb (i + 1) start
 85            else
 86                let sb = if isNull sb then (new System.Text.StringBuilder(s.Length + 8)).Append(quote)
 87                         else sb
 88                sb.Append(s, start, i - start).Append(esc) |> ignore
 89                escape sb (i + 1) (i + 1)
 90        elif isNull sb then concat3 quote s quote
 91        else sb.Append(s, start, s.Length - start).Append(quote).ToString()
 92    escape null 0 0
 93
 94let escapeStringInDoubleQuotes s = escapeStringHelper false true false s
 95
 96let quoteChar c =
 97    if c <> '\'' then concat3 "'" (escapeCharHelper false false false c string) "'"
 98    else "\"'\""
 99
100let quoteString s      = quoteStringHelper "'" true false false s
101let asciiQuoteString s = quoteStringHelper "'" true false true  s
102
103
104/// A primitive pretty printer.
105type LineWrapper(tw: System.IO.TextWriter, columnWidth: int, writerIsMultiCharGraphemeSafe: bool) =
106    do if columnWidth < 1 then invalidArg "columnWidth" "columnWidth must be positive."
107
108    let mutable indentation = ""
109    let mutable maxSpace = columnWidth
110    let mutable space = columnWidth
111    let mutable afterNewline = true
112    let mutable afterSpace = false
113
114    new (tw: System.IO.TextWriter, columnWidth: int) =
115        new LineWrapper(tw, columnWidth, not tw.Encoding.IsSingleByte)
116
117    member t.TextWriter = tw
118    member t.ColumnWidth = columnWidth
119    member t.WriterIsMultiCharGraphemeSafe = writerIsMultiCharGraphemeSafe
120
121    member t.Indentation
122      with get() = indentation
123       and set (s: string) =
124               let s = if s.Length <= columnWidth - 1 then s
125                       else s.Substring(0, columnWidth - 1) // guarantee maxSpace >= 1
126               indentation <- s
127               maxSpace <- columnWidth - s.Length
128               if afterNewline then space <- maxSpace
129
130    member t.Newline() =
131        tw.WriteLine()
132        afterNewline <- true
133        afterSpace <- false
134        space <- maxSpace
135
136    member t.Space() =
137        afterSpace <- true
138
139    member t.Print(s: string) =
140        if isNotNull s then
141            let mutable start = 0
142            for i = 0 to s.Length - 1 do
143                let c = s.[i]
144                if (if   c <= ' '  then c = ' ' || (c >= '\t' && c <= '\r')
145                    else c >= '\u0085' && (c = '\u0085' || c = '\u2028' || c = '\u2029'))
146                then // any ' ', tab or newlines
147                    if start < i then
148                        t.Write(s.Substring(start, i - start))
149                    t.Space()
150                    start <- i + 1
151            if start < s.Length then
152                if start = 0 then t.Write(s)
153                else t.Write(s.Substring(start, s.Length - start))
154
155    member t.Print(s1, s2) = t.Print(s1); t.Print(s2)
156    member t.Print(s1, s2, s3) = t.Print(s1); t.Print(s2); t.Print(s3)
157    member t.PrintLine(s: string) = t.Print(s); t.Newline()
158    member t.PrintLine(s1: string, s2: string) = t.Print(s1); t.Print(s2); t.Newline()
159    member t.PrintLine(s1: string, s2: string, s3: string) = t.Print(s1); t.Print(s2); t.Print(s3); t.Newline()
160
161    member private t.Write(s: string) =
162        Debug.Assert(s.Length > 0)
163        if afterNewline then
164            tw.Write(indentation)
165            afterNewline <- false
166        let n = if writerIsMultiCharGraphemeSafe then Helper.CountTextElements(s) else s.Length
167        match afterSpace with
168        | true when n + 1 <= space ->
169            tw.Write(' ')
170            tw.Write(s)
171            space <- space - 1 - n
172            afterSpace <- false
173        | false when n <= space ->
174            tw.Write(s)
175            space <- space - n
176        | _ when s.Length <= maxSpace ->
177            tw.WriteLine()
178            tw.Write(indentation)
179            tw.Write(s)
180            space <- maxSpace - n
181            afterSpace <- false
182        | _ ->
183            t.Break(s)
184
185    /// breaks a string into multiple lines along text element boundaries.
186    member private t.Break(s: string) =
187        Debug.Assert(s.Length > 0 && not afterNewline)
188        if afterSpace then
189            afterSpace <- false
190            if space > 1 then
191                tw.Write(' ')
192                space <- space - 1
193            else
194                tw.WriteLine()
195                tw.Write(indentation)
196                space <- maxSpace
197        elif space = 0 then
198            tw.WriteLine()
199            tw.Write(indentation)
200            space <- maxSpace
201        let te = System.Globalization.StringInfo.GetTextElementEnumerator(s)
202        te.MoveNext() |> ignore
203        Debug.Assert(te.ElementIndex = 0)
204        if writerIsMultiCharGraphemeSafe then
205            let mutable startIndex = 0
206            while te.MoveNext() do
207                space <- space - 1
208                if space = 0 then
209                    let index = te.ElementIndex
210                    tw.WriteLine(s.Substring(startIndex, index - startIndex))
211                    tw.Write(indentation)
212                    space <- maxSpace
213                    startIndex <- index
214            space <- space - 1
215            tw.Write(s.Substring(startIndex, s.Length - startIndex))
216        else
217            // We don't break up text elements, but when we fit string pieces into lines we
218            // use UTF-16 lengths instead of text element counts (in order to support displays
219            // that have problems with combining character sequences).
220            let mutable startIndex = 0
221            let mutable lastIndex = 0
222            while te.MoveNext() do
223                let index = te.ElementIndex
224                let count = index - startIndex
225                if count < space then
226                    lastIndex <- index
227                elif count = space || lastIndex <= startIndex then
228                    tw.WriteLine(s.Substring(startIndex, count))
229                    tw.Write(indentation)
230                    space <- maxSpace
231                    startIndex <- index
232                else
233                    tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex))
234                    tw.Write(indentation)
235                    space <- maxSpace
236                    startIndex <- lastIndex
237            let index = s.Length
238            let count = index - startIndex
239            if count <= space then
240                tw.Write(s.Substring(startIndex, count))
241                space <- space - count
242            elif lastIndex <= startIndex then
243                tw.WriteLine(s.Substring(startIndex, index - startIndex))
244                space <- maxSpace
245                afterNewline <- true
246            else
247                tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex))
248                tw.Write(indentation)
249                tw.Write(s.Substring(lastIndex, index - lastIndex))
250                space <- maxSpace - (index - lastIndex)
251                if space < 0 then
252                    tw.WriteLine()
253                    space <- maxSpace
254                    afterNewline <- true
255
256
257type LineSnippet = {
258    String: string
259    TextElementIndex: int
260    Index: int
261    IndexOfTextElement: int
262    LengthOfTextElement: int
263    UnaccountedNewlines: int
264    Column: int64
265    Utf16Column: int64 // the UTF16 tabs are only counted as 1 char
266    LineContainsTabsBeforeIndex: bool
267    IsBetweenCRAndLF: bool
268}
269
270let getLineSnippet (stream: CharStream) (p: Position) (space: int) (tabSize: int) multiCharGraphemeSafe =
271    Debug.Assert(space > 0 && tabSize > 0)
272    Debug.Assert(p.Index >= stream.BeginIndex && p.Index <= stream.EndIndex)
273
274    let isCombiningChar (s: string) =
275        match System.Char.GetUnicodeCategory(s, 0) with
276        | System.Globalization.UnicodeCategory.NonSpacingMark
277        | System.Globalization.UnicodeCategory.SpacingCombiningMark
278        | System.Globalization.UnicodeCategory.EnclosingMark
279        | System.Globalization.UnicodeCategory.Surrogate
280            -> true
281        | _ -> false
282
283    let isUnicodeNewlineOrEos c =
284        match c with
285        | '\n' | '\u000C' | '\r'| '\u0085'| '\u2028'| '\u2029'
286        | '\uffff' -> true
287        | _  -> false
288
289    // we restrict the maximum column count, so that we don't accidentally
290    // completely reread a multi-gigabyte file when it has no newlines
291    let maxColForColCount = 1000
292    let maxExtraChars = 32
293    let colTooLarge = p.Column > int64 maxColForColCount
294
295    let mutable index = p.Index
296    let mutable iterBegin = stream.Seek(index) // throws if index is too small
297    let mutable iterEnd = iterBegin
298    if index <> iterEnd.Index then
299        raise (System.ArgumentException("The error position lies beyond the end of the stream."))
300    let isBetweenCRAndLF = iterEnd.Read() = '\n' && iterEnd.Peek(-1) = '\r'
301    if not isBetweenCRAndLF then
302        let mutable c = iterEnd.Read()
303        let mutable n = 2*space + maxExtraChars
304        // skip to end of line, but not over more than n chars
305        while not (isUnicodeNewlineOrEos c) && n <> 0 do
306            c <- iterEnd._Increment()
307            n <- n - 1
308        if not (isUnicodeNewlineOrEos c) then
309            n <- maxExtraChars
310            while isCombiningChar (iterEnd.Read(2)) && n <> 0 do
311                iterEnd._Increment() |> ignore
312                n <- n - 1
313    else
314        iterEnd._Decrement() |> ignore
315        iterBegin <- iterEnd
316        index <- index - 1L
317
318    let lineBegin = index - p.Column + 1L
319    // use _Decrement instead of Advance, so that we don't move past the beginning of the stream
320    iterBegin._Decrement(if not colTooLarge then uint32 p.Column - 1u else uint32 maxColForColCount - 1u) |> ignore
321    if colTooLarge then
322        let mutable n = if p.Column < int64 System.Int32.MaxValue then
323                            min maxExtraChars (int32 p.Column - maxColForColCount)
324                        else maxExtraChars
325        while isCombiningChar (iterBegin.Read(2)) && n <> 0 do
326            iterBegin._Decrement() |> ignore
327            n <- n - 1
328    let iterBeginIndex = iterBegin.Index
329    let mutable columnOffset = iterBeginIndex - lineBegin
330    let mutable idx = int (index - iterBeginIndex)
331    let mutable str = iterBegin.ReadUntil(iterEnd)
332
333    let mutable lastLineBeginIdx = 0
334    let mutable unaccountedNLs = 0
335    let mutable mayContainMultiCharGraphemes = false
336    let mutable nTabs = 0
337
338    for i = 0 to str.Length - 1 do
339        let c = str.[i]
340        if c >= ' ' then
341            if c >= '\u0300' then
342                mayContainMultiCharGraphemes <- true
343        elif c = '\t' then
344            nTabs <- nTabs + 1
345        elif c = '\n' || (c = '\r' && (i + 1 >= str.Length || str.[i + 1] <> '\n')) then
346            // there can be no newline after idx
347            lastLineBeginIdx <- i + 1
348            unaccountedNLs <- unaccountedNLs + 1
349            mayContainMultiCharGraphemes <- false
350            nTabs <- 0
351
352    if unaccountedNLs <> 0 then
353        str <- str.Substring(lastLineBeginIdx)
354        idx <- idx - lastLineBeginIdx
355        columnOffset <- 0L
356
357    let utf16Column = columnOffset + int64 (idx + 1)
358    let mutable lineContainsTabsBeforeIndex = false
359    if nTabs > 0 then // replace tabs with spaces
360        let off = if columnOffset = 0L then 0
361                  else int32 (columnOffset%(int64 tabSize))
362        let sb = new System.Text.StringBuilder(str.Length + nTabs*tabSize)
363        let mutable i0 = 0
364        let mutable idxIncr = 0
365        for i = 0 to str.Length - 1 do
366            if str.[i] = '\t' then
367                if i > i0 then sb.Append(str, i0, i - i0) |> ignore
368                let n = tabSize - (off + i)%tabSize
369                sb.Append(' ', n) |> ignore
370                if i < idx then // correct idx for added spaces
371                    lineContainsTabsBeforeIndex <- true
372                    idxIncr <- idxIncr + (n - 1)
373                i0 <- i + 1
374        if i0 < str.Length then sb.Append(str, i0, str.Length - i0) |> ignore
375        str <- sb.ToString()
376        idx <- idx + idxIncr
377
378    let clip nBefore nAfter =
379        let mutable nBefore, nAfter = nBefore, nAfter
380        let mutable diff = nBefore + nAfter + 1 - space
381        if diff > 0 then
382            let d = nBefore - nAfter
383            if d > 0 then
384                let dd = min diff d
385                nBefore <- nBefore - dd
386                diff    <- diff - dd
387            elif d < 0 then
388                let dd = min diff -d
389                nAfter <- nAfter - dd
390                diff   <- diff - dd
391            if diff <> 0 then
392                if diff%2 = 0 then
393                    nBefore <- nBefore - diff/2
394                    nAfter  <- nAfter  - diff/2
395                else
396                    nBefore <- nBefore - diff/2
397                    nAfter  <- nAfter  - diff/2 - 1
398        nBefore, nAfter
399
400    if not mayContainMultiCharGraphemes then
401        let nBefore, nAfter = clip idx (if idx < str.Length then str.Length - idx - 1 else 0)
402        {String = str.Substring(idx - nBefore, nBefore + nAfter + (if idx < str.Length then 1 else 0))
403         Index = nBefore
404         TextElementIndex = nBefore
405         IndexOfTextElement = nBefore
406         LengthOfTextElement = 1
407         UnaccountedNewlines = unaccountedNLs
408         Column = columnOffset + int64 (idx + 1)
409         Utf16Column = utf16Column
410         LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex
411         IsBetweenCRAndLF = isBetweenCRAndLF}
412    else
413        let indices = System.Globalization.StringInfo.ParseCombiningCharacters(str)
414        let mutable idxIdx = 0 // the indices index of the text element containing the str char at idx
415        while idxIdx < indices.Length && indices.[idxIdx] < idx do idxIdx <- idxIdx + 1
416        if (if idxIdx < indices.Length then indices.[idxIdx] > idx else idxIdx <> 0) then idxIdx <- idxIdx - 1
417        let col = columnOffset + int64 (idxIdx + 1)
418        let teIdx    =  if idxIdx     < indices.Length then indices.[idxIdx]     else str.Length
419        let teLength = (if idxIdx + 1 < indices.Length then indices.[idxIdx + 1] else str.Length) - teIdx
420        let mutable nBefore, nAfter = clip idxIdx (if idxIdx = indices.Length then 0 else indices.Length - idxIdx - 1)
421        let mutable strBegin = let ii = idxIdx - nBefore    in if ii < indices.Length then indices.[ii] else str.Length
422        let mutable strEnd   = let ii = idxIdx + nAfter + 1 in if ii < indices.Length then indices.[ii] else str.Length
423        if not multiCharGraphemeSafe then
424            while strEnd - strBegin > space && (nBefore > 0 || nAfter > 0) do
425                if nBefore > nAfter then
426                    nBefore  <- nBefore - 1
427                    strBegin <- indices.[idxIdx - nBefore]
428                else
429                    nAfter <- nAfter - 1
430                    strEnd <- indices.[idxIdx + nAfter + 1]
431        {String = str.Substring(strBegin, strEnd - strBegin)
432         Index = idx - strBegin
433         TextElementIndex = nBefore
434         IndexOfTextElement = teIdx - strBegin
435         LengthOfTextElement = teLength
436         UnaccountedNewlines = unaccountedNLs
437         Column = col
438         Utf16Column = utf16Column
439         LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex
440         IsBetweenCRAndLF = isBetweenCRAndLF}
441
442
443
444