/fparsec/main/FParsec/Internals.fs

http://github.com/sandersn/fing · F# · 444 lines · 380 code · 45 blank · 19 comment · 114 complexity · a86a4ec0e41f8a57a5e1794fae925ace MD5 · raw file

  1. // Copyright (c) Stephan Tolksdorf 2009
  2. // License: Simplified BSD License. See accompanying documentation.
  3. module FParsec.Internals
  4. open System.Diagnostics
  5. // The following functions are defined using inline IL to help fsc generate code
  6. // the JIT knows better how to optimize.
  7. // Should F# stop supporting inline IL outside the standard library, you can switch
  8. // to the commented out alternatives (which by then will probably be just as efficient).
  9. let inline referenceEquals<'a when 'a : not struct> (x: 'a) (y: 'a) =
  10. (# "ceq" x y : bool #) // LanguagePrimitives.PhysicalEquality x y
  11. let inline isNull<'a when 'a : not struct> (x: 'a) =
  12. (# "ldnull ceq" x : bool #) // referenceEquals (box x) null
  13. let inline isNotNull<'a when 'a : not struct> (x: 'a) =
  14. (# "ldnull cgt.un" x : bool #) // not (isNull x)
  15. let inline isNullOrEmpty (s: string) = isNull s || s.Length = 0
  16. // These operators are faster than = and <>. They are not public because
  17. // their names conflict with the operators in the OCaml compatibility module
  18. let inline (==) (s1: State<'u>) (s2: State<'u>) = s1.Equals(s2)
  19. let inline (!=) (s1: State<'u>) (s2: State<'u>) = not (s1 == s2)
  20. // the F# compiler doesn't yet "fuse" multiple '+' string concatenations into one, as the C# compiler does
  21. let inline concat3 (a: string) (b: string) (c: string) = System.String.Concat(a, b, c)
  22. let inline concat4 (a: string) (b: string) (c: string) (d: string) = System.String.Concat(a, b, c, d)
  23. let inline concat5 (a: string) (b: string) (c: string) (d: string) (e: string) = System.String.Concat([|a;b;c;d;e|])
  24. let inline concat6 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) = System.String.Concat([|a;b;c;d;e;f|])
  25. let inline concat7 (a: string) (b: string) (c: string) (d: string) (e: string) (f: string) (g: string) = System.String.Concat([|a;b;c;d;e;f;g|])
  26. let containsNewlineChar = Helper.ContainsNewlineChar
  27. let ordinalEnding (i: int) =
  28. match i%10 with
  29. | 1 -> "st"
  30. | 2 -> "nd"
  31. | 3 -> "rd"
  32. | _ -> "th"
  33. let hexEscapeChar c =
  34. let n = int c
  35. let cs = Array.zeroCreate 6
  36. cs.[0] <- '\\'; cs.[1] <- 'u'
  37. for j = 0 to 3 do
  38. cs.[5 - j] <- "0123456789abcdef".[((n >>> 4*j) &&& 0xf)]
  39. new string(cs)
  40. [<NoDynamicInvocation>]
  41. let inline private escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii (c: char) (f: char -> string) =
  42. if c > '\'' && c < '\u007f' then
  43. if c <> '\\' then f c else "\\\\"
  44. else
  45. match c with
  46. | '\b' -> "\\b"
  47. | '\t' -> "\\t"
  48. | '\n' -> "\\n"
  49. | '\r' -> "\\r"
  50. | '\"' when escapeDoubleQuote -> "\\\""
  51. | '\'' when escapeSingleQuote -> "\\'"
  52. | _ -> if (escapeNonAscii && c >= '\u007f') || System.Char.IsControl(c) then hexEscapeChar c else f c
  53. [<NoDynamicInvocation>]
  54. let inline escapeStringHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii (s: string) =
  55. let rec escape sb i start =
  56. if i < s.Length then
  57. let esc = escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii s.[i] (fun _ -> null)
  58. if isNull esc then escape sb (i + 1) start
  59. else
  60. let sb = if isNull sb then (new System.Text.StringBuilder(s.Length + 6))
  61. else sb
  62. sb.Append(s, start, i - start).Append(esc) |> ignore
  63. escape sb (i + 1) (i + 1)
  64. elif isNull sb then s
  65. else sb.Append(s, start, s.Length - start).ToString()
  66. escape null 0 0
  67. [<NoDynamicInvocation>]
  68. let inline quoteStringHelper (quote: string) escapeSingleQuote escapeDoubleQuote escapeNonAscii (s: string) =
  69. let rec escape sb i start =
  70. if i < s.Length then
  71. let esc = escapeCharHelper escapeSingleQuote escapeDoubleQuote escapeNonAscii s.[i] (fun _ -> null)
  72. if isNull esc then escape sb (i + 1) start
  73. else
  74. let sb = if isNull sb then (new System.Text.StringBuilder(s.Length + 8)).Append(quote)
  75. else sb
  76. sb.Append(s, start, i - start).Append(esc) |> ignore
  77. escape sb (i + 1) (i + 1)
  78. elif isNull sb then concat3 quote s quote
  79. else sb.Append(s, start, s.Length - start).Append(quote).ToString()
  80. escape null 0 0
  81. let escapeStringInDoubleQuotes s = escapeStringHelper false true false s
  82. let quoteChar c =
  83. if c <> '\'' then concat3 "'" (escapeCharHelper false false false c string) "'"
  84. else "\"'\""
  85. let quoteString s = quoteStringHelper "'" true false false s
  86. let asciiQuoteString s = quoteStringHelper "'" true false true s
  87. /// A primitive pretty printer.
  88. type LineWrapper(tw: System.IO.TextWriter, columnWidth: int, writerIsMultiCharGraphemeSafe: bool) =
  89. do if columnWidth < 1 then invalidArg "columnWidth" "columnWidth must be positive."
  90. let mutable indentation = ""
  91. let mutable maxSpace = columnWidth
  92. let mutable space = columnWidth
  93. let mutable afterNewline = true
  94. let mutable afterSpace = false
  95. new (tw: System.IO.TextWriter, columnWidth: int) =
  96. new LineWrapper(tw, columnWidth, not tw.Encoding.IsSingleByte)
  97. member t.TextWriter = tw
  98. member t.ColumnWidth = columnWidth
  99. member t.WriterIsMultiCharGraphemeSafe = writerIsMultiCharGraphemeSafe
  100. member t.Indentation
  101. with get() = indentation
  102. and set (s: string) =
  103. let s = if s.Length <= columnWidth - 1 then s
  104. else s.Substring(0, columnWidth - 1) // guarantee maxSpace >= 1
  105. indentation <- s
  106. maxSpace <- columnWidth - s.Length
  107. if afterNewline then space <- maxSpace
  108. member t.Newline() =
  109. tw.WriteLine()
  110. afterNewline <- true
  111. afterSpace <- false
  112. space <- maxSpace
  113. member t.Space() =
  114. afterSpace <- true
  115. member t.Print(s: string) =
  116. if isNotNull s then
  117. let mutable start = 0
  118. for i = 0 to s.Length - 1 do
  119. let c = s.[i]
  120. if (if c <= ' ' then c = ' ' || (c >= '\t' && c <= '\r')
  121. else c >= '\u0085' && (c = '\u0085' || c = '\u2028' || c = '\u2029'))
  122. then // any ' ', tab or newlines
  123. if start < i then
  124. t.Write(s.Substring(start, i - start))
  125. t.Space()
  126. start <- i + 1
  127. if start < s.Length then
  128. if start = 0 then t.Write(s)
  129. else t.Write(s.Substring(start, s.Length - start))
  130. member t.Print(s1, s2) = t.Print(s1); t.Print(s2)
  131. member t.Print(s1, s2, s3) = t.Print(s1); t.Print(s2); t.Print(s3)
  132. member t.PrintLine(s: string) = t.Print(s); t.Newline()
  133. member t.PrintLine(s1: string, s2: string) = t.Print(s1); t.Print(s2); t.Newline()
  134. member t.PrintLine(s1: string, s2: string, s3: string) = t.Print(s1); t.Print(s2); t.Print(s3); t.Newline()
  135. member private t.Write(s: string) =
  136. Debug.Assert(s.Length > 0)
  137. if afterNewline then
  138. tw.Write(indentation)
  139. afterNewline <- false
  140. let n = if writerIsMultiCharGraphemeSafe then Helper.CountTextElements(s) else s.Length
  141. match afterSpace with
  142. | true when n + 1 <= space ->
  143. tw.Write(' ')
  144. tw.Write(s)
  145. space <- space - 1 - n
  146. afterSpace <- false
  147. | false when n <= space ->
  148. tw.Write(s)
  149. space <- space - n
  150. | _ when s.Length <= maxSpace ->
  151. tw.WriteLine()
  152. tw.Write(indentation)
  153. tw.Write(s)
  154. space <- maxSpace - n
  155. afterSpace <- false
  156. | _ ->
  157. t.Break(s)
  158. /// breaks a string into multiple lines along text element boundaries.
  159. member private t.Break(s: string) =
  160. Debug.Assert(s.Length > 0 && not afterNewline)
  161. if afterSpace then
  162. afterSpace <- false
  163. if space > 1 then
  164. tw.Write(' ')
  165. space <- space - 1
  166. else
  167. tw.WriteLine()
  168. tw.Write(indentation)
  169. space <- maxSpace
  170. elif space = 0 then
  171. tw.WriteLine()
  172. tw.Write(indentation)
  173. space <- maxSpace
  174. let te = System.Globalization.StringInfo.GetTextElementEnumerator(s)
  175. te.MoveNext() |> ignore
  176. Debug.Assert(te.ElementIndex = 0)
  177. if writerIsMultiCharGraphemeSafe then
  178. let mutable startIndex = 0
  179. while te.MoveNext() do
  180. space <- space - 1
  181. if space = 0 then
  182. let index = te.ElementIndex
  183. tw.WriteLine(s.Substring(startIndex, index - startIndex))
  184. tw.Write(indentation)
  185. space <- maxSpace
  186. startIndex <- index
  187. space <- space - 1
  188. tw.Write(s.Substring(startIndex, s.Length - startIndex))
  189. else
  190. // We don't break up text elements, but when we fit string pieces into lines we
  191. // use UTF-16 lengths instead of text element counts (in order to support displays
  192. // that have problems with combining character sequences).
  193. let mutable startIndex = 0
  194. let mutable lastIndex = 0
  195. while te.MoveNext() do
  196. let index = te.ElementIndex
  197. let count = index - startIndex
  198. if count < space then
  199. lastIndex <- index
  200. elif count = space || lastIndex <= startIndex then
  201. tw.WriteLine(s.Substring(startIndex, count))
  202. tw.Write(indentation)
  203. space <- maxSpace
  204. startIndex <- index
  205. else
  206. tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex))
  207. tw.Write(indentation)
  208. space <- maxSpace
  209. startIndex <- lastIndex
  210. let index = s.Length
  211. let count = index - startIndex
  212. if count <= space then
  213. tw.Write(s.Substring(startIndex, count))
  214. space <- space - count
  215. elif lastIndex <= startIndex then
  216. tw.WriteLine(s.Substring(startIndex, index - startIndex))
  217. space <- maxSpace
  218. afterNewline <- true
  219. else
  220. tw.WriteLine(s.Substring(startIndex, lastIndex - startIndex))
  221. tw.Write(indentation)
  222. tw.Write(s.Substring(lastIndex, index - lastIndex))
  223. space <- maxSpace - (index - lastIndex)
  224. if space < 0 then
  225. tw.WriteLine()
  226. space <- maxSpace
  227. afterNewline <- true
  228. type LineSnippet = {
  229. String: string
  230. TextElementIndex: int
  231. Index: int
  232. IndexOfTextElement: int
  233. LengthOfTextElement: int
  234. UnaccountedNewlines: int
  235. Column: int64
  236. Utf16Column: int64 // the UTF16 tabs are only counted as 1 char
  237. LineContainsTabsBeforeIndex: bool
  238. IsBetweenCRAndLF: bool
  239. }
  240. let getLineSnippet (stream: CharStream) (p: Position) (space: int) (tabSize: int) multiCharGraphemeSafe =
  241. Debug.Assert(space > 0 && tabSize > 0)
  242. Debug.Assert(p.Index >= stream.BeginIndex && p.Index <= stream.EndIndex)
  243. let isCombiningChar (s: string) =
  244. match System.Char.GetUnicodeCategory(s, 0) with
  245. | System.Globalization.UnicodeCategory.NonSpacingMark
  246. | System.Globalization.UnicodeCategory.SpacingCombiningMark
  247. | System.Globalization.UnicodeCategory.EnclosingMark
  248. | System.Globalization.UnicodeCategory.Surrogate
  249. -> true
  250. | _ -> false
  251. let isUnicodeNewlineOrEos c =
  252. match c with
  253. | '\n' | '\u000C' | '\r'| '\u0085'| '\u2028'| '\u2029'
  254. | '\uffff' -> true
  255. | _ -> false
  256. // we restrict the maximum column count, so that we don't accidentally
  257. // completely reread a multi-gigabyte file when it has no newlines
  258. let maxColForColCount = 1000
  259. let maxExtraChars = 32
  260. let colTooLarge = p.Column > int64 maxColForColCount
  261. let mutable index = p.Index
  262. let mutable iterBegin = stream.Seek(index) // throws if index is too small
  263. let mutable iterEnd = iterBegin
  264. if index <> iterEnd.Index then
  265. raise (System.ArgumentException("The error position lies beyond the end of the stream."))
  266. let isBetweenCRAndLF = iterEnd.Read() = '\n' && iterEnd.Peek(-1) = '\r'
  267. if not isBetweenCRAndLF then
  268. let mutable c = iterEnd.Read()
  269. let mutable n = 2*space + maxExtraChars
  270. // skip to end of line, but not over more than n chars
  271. while not (isUnicodeNewlineOrEos c) && n <> 0 do
  272. c <- iterEnd._Increment()
  273. n <- n - 1
  274. if not (isUnicodeNewlineOrEos c) then
  275. n <- maxExtraChars
  276. while isCombiningChar (iterEnd.Read(2)) && n <> 0 do
  277. iterEnd._Increment() |> ignore
  278. n <- n - 1
  279. else
  280. iterEnd._Decrement() |> ignore
  281. iterBegin <- iterEnd
  282. index <- index - 1L
  283. let lineBegin = index - p.Column + 1L
  284. // use _Decrement instead of Advance, so that we don't move past the beginning of the stream
  285. iterBegin._Decrement(if not colTooLarge then uint32 p.Column - 1u else uint32 maxColForColCount - 1u) |> ignore
  286. if colTooLarge then
  287. let mutable n = if p.Column < int64 System.Int32.MaxValue then
  288. min maxExtraChars (int32 p.Column - maxColForColCount)
  289. else maxExtraChars
  290. while isCombiningChar (iterBegin.Read(2)) && n <> 0 do
  291. iterBegin._Decrement() |> ignore
  292. n <- n - 1
  293. let iterBeginIndex = iterBegin.Index
  294. let mutable columnOffset = iterBeginIndex - lineBegin
  295. let mutable idx = int (index - iterBeginIndex)
  296. let mutable str = iterBegin.ReadUntil(iterEnd)
  297. let mutable lastLineBeginIdx = 0
  298. let mutable unaccountedNLs = 0
  299. let mutable mayContainMultiCharGraphemes = false
  300. let mutable nTabs = 0
  301. for i = 0 to str.Length - 1 do
  302. let c = str.[i]
  303. if c >= ' ' then
  304. if c >= '\u0300' then
  305. mayContainMultiCharGraphemes <- true
  306. elif c = '\t' then
  307. nTabs <- nTabs + 1
  308. elif c = '\n' || (c = '\r' && (i + 1 >= str.Length || str.[i + 1] <> '\n')) then
  309. // there can be no newline after idx
  310. lastLineBeginIdx <- i + 1
  311. unaccountedNLs <- unaccountedNLs + 1
  312. mayContainMultiCharGraphemes <- false
  313. nTabs <- 0
  314. if unaccountedNLs <> 0 then
  315. str <- str.Substring(lastLineBeginIdx)
  316. idx <- idx - lastLineBeginIdx
  317. columnOffset <- 0L
  318. let utf16Column = columnOffset + int64 (idx + 1)
  319. let mutable lineContainsTabsBeforeIndex = false
  320. if nTabs > 0 then // replace tabs with spaces
  321. let off = if columnOffset = 0L then 0
  322. else int32 (columnOffset%(int64 tabSize))
  323. let sb = new System.Text.StringBuilder(str.Length + nTabs*tabSize)
  324. let mutable i0 = 0
  325. let mutable idxIncr = 0
  326. for i = 0 to str.Length - 1 do
  327. if str.[i] = '\t' then
  328. if i > i0 then sb.Append(str, i0, i - i0) |> ignore
  329. let n = tabSize - (off + i)%tabSize
  330. sb.Append(' ', n) |> ignore
  331. if i < idx then // correct idx for added spaces
  332. lineContainsTabsBeforeIndex <- true
  333. idxIncr <- idxIncr + (n - 1)
  334. i0 <- i + 1
  335. if i0 < str.Length then sb.Append(str, i0, str.Length - i0) |> ignore
  336. str <- sb.ToString()
  337. idx <- idx + idxIncr
  338. let clip nBefore nAfter =
  339. let mutable nBefore, nAfter = nBefore, nAfter
  340. let mutable diff = nBefore + nAfter + 1 - space
  341. if diff > 0 then
  342. let d = nBefore - nAfter
  343. if d > 0 then
  344. let dd = min diff d
  345. nBefore <- nBefore - dd
  346. diff <- diff - dd
  347. elif d < 0 then
  348. let dd = min diff -d
  349. nAfter <- nAfter - dd
  350. diff <- diff - dd
  351. if diff <> 0 then
  352. if diff%2 = 0 then
  353. nBefore <- nBefore - diff/2
  354. nAfter <- nAfter - diff/2
  355. else
  356. nBefore <- nBefore - diff/2
  357. nAfter <- nAfter - diff/2 - 1
  358. nBefore, nAfter
  359. if not mayContainMultiCharGraphemes then
  360. let nBefore, nAfter = clip idx (if idx < str.Length then str.Length - idx - 1 else 0)
  361. {String = str.Substring(idx - nBefore, nBefore + nAfter + (if idx < str.Length then 1 else 0))
  362. Index = nBefore
  363. TextElementIndex = nBefore
  364. IndexOfTextElement = nBefore
  365. LengthOfTextElement = 1
  366. UnaccountedNewlines = unaccountedNLs
  367. Column = columnOffset + int64 (idx + 1)
  368. Utf16Column = utf16Column
  369. LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex
  370. IsBetweenCRAndLF = isBetweenCRAndLF}
  371. else
  372. let indices = System.Globalization.StringInfo.ParseCombiningCharacters(str)
  373. let mutable idxIdx = 0 // the indices index of the text element containing the str char at idx
  374. while idxIdx < indices.Length && indices.[idxIdx] < idx do idxIdx <- idxIdx + 1
  375. if (if idxIdx < indices.Length then indices.[idxIdx] > idx else idxIdx <> 0) then idxIdx <- idxIdx - 1
  376. let col = columnOffset + int64 (idxIdx + 1)
  377. let teIdx = if idxIdx < indices.Length then indices.[idxIdx] else str.Length
  378. let teLength = (if idxIdx + 1 < indices.Length then indices.[idxIdx + 1] else str.Length) - teIdx
  379. let mutable nBefore, nAfter = clip idxIdx (if idxIdx = indices.Length then 0 else indices.Length - idxIdx - 1)
  380. let mutable strBegin = let ii = idxIdx - nBefore in if ii < indices.Length then indices.[ii] else str.Length
  381. let mutable strEnd = let ii = idxIdx + nAfter + 1 in if ii < indices.Length then indices.[ii] else str.Length
  382. if not multiCharGraphemeSafe then
  383. while strEnd - strBegin > space && (nBefore > 0 || nAfter > 0) do
  384. if nBefore > nAfter then
  385. nBefore <- nBefore - 1
  386. strBegin <- indices.[idxIdx - nBefore]
  387. else
  388. nAfter <- nAfter - 1
  389. strEnd <- indices.[idxIdx + nAfter + 1]
  390. {String = str.Substring(strBegin, strEnd - strBegin)
  391. Index = idx - strBegin
  392. TextElementIndex = nBefore
  393. IndexOfTextElement = teIdx - strBegin
  394. LengthOfTextElement = teLength
  395. UnaccountedNewlines = unaccountedNLs
  396. Column = col
  397. Utf16Column = utf16Column
  398. LineContainsTabsBeforeIndex = lineContainsTabsBeforeIndex
  399. IsBetweenCRAndLF = isBetweenCRAndLF}