PageRenderTime 67ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/fparsec/main/FParsecCS/CharStream.cs

http://github.com/sandersn/fing
C# | 2073 lines | 1467 code | 161 blank | 445 comment | 673 complexity | cbb1fa5f9620a84c92c17759d91abb76 MD5 | raw file
  1. // Copyright (c) Stephan Tolksdorf 2007-2009
  2. // License: Simplified BSD License. See accompanying documentation.
  3. #if !LOW_TRUST
  4. using System;
  5. using System.IO;
  6. using System.Collections.Generic;
  7. using System.Text;
  8. using System.Text.RegularExpressions;
  9. using System.Diagnostics;
  10. using System.Reflection;
  11. using System.Runtime.Serialization;
  12. using System.Runtime.InteropServices;
  13. using System.Runtime.CompilerServices;
  14. namespace FParsec {
  15. /// <summary>Provides access to the char content of a binary Stream (or a String) through
  16. /// an iterator-based interface that is especially well suited for parser applications.</summary>
  17. public unsafe sealed class CharStream : IDisposable {
  18. // In order to facilitate efficient backtracking we divide the stream into overlapping
  19. // blocks with equal number of chars. The blocks are overlapping, so that
  20. // backtracking over short distances at a block boundary doesn't trigger a reread of the
  21. // previous block.
  22. //
  23. // Block 0
  24. //
  25. // -----------------|-------- Block 1
  26. // Overlap
  27. // --------|--------|-------- Block 2
  28. // Overlap
  29. // --------|--------|--------
  30. // (...)
  31. // a '-' symbolizes a char, a '|' a block boundary.
  32. //
  33. //
  34. // In general there's no fixed relationship between the number of input bytes and the
  35. // number of input chars. Worse, the encoding can be stateful, which makes it necessary
  36. // to persist the decoder state over block boundaries. If we later want to
  37. // be able to reread a certain block, we therefore need to keep record of various
  38. // bits of information describing the state of the input stream at the beginning of a block:
  39. private class BlockInfo {
  40. /// <summary>the byte stream index of the first char in the block after the OverhangCharsAtBlockBegin</summary>
  41. public long ByteIndex;
  42. /// <summary>the value of the CharStream's ByteBufferIndex before the block is read</summary>
  43. public int ByteBufferIndex;
  44. /// <summary>the number of bytes in the stream from ByteIndex to the first char after the OverhangCharsAfterOverlap</summary>
  45. public int NumberOfBytesInOverlap;
  46. /// <summary>the last char in the overlap with the previous block (used for integrity checking)</summary>
  47. public char LastCharInOverlap;
  48. /// <summary>chars at the block begin that were already read together with chars of the last block before the overlap</summary>
  49. public string OverhangCharsAtBlockBegin;
  50. /// <summary>chars after the overlap with the previous block that were already read together with the overlap chars</summary>
  51. public string OverhangCharsAfterOverlap;
  52. public DecoderState DecoderStateAtBlockBegin;
  53. public DecoderState DecoderStateAfterOverlap;
  54. public BlockInfo(long byteIndex, int byteBufferIndex,
  55. int nBytesInOverlapCount, char lastCharInOverlap,
  56. string overhangCharsAtBlockBegin, DecoderState decoderStateAtBlockBegin,
  57. string overhangCharsAfterOverlap, DecoderState decoderStateAfterOverlap)
  58. {
  59. this.ByteIndex = byteIndex;
  60. this.ByteBufferIndex = byteBufferIndex;
  61. this.NumberOfBytesInOverlap = nBytesInOverlapCount;
  62. this.LastCharInOverlap = lastCharInOverlap;
  63. this.OverhangCharsAtBlockBegin = overhangCharsAtBlockBegin;
  64. this.OverhangCharsAfterOverlap = overhangCharsAfterOverlap;
  65. this.DecoderStateAtBlockBegin = decoderStateAtBlockBegin;
  66. this.DecoderStateAfterOverlap = decoderStateAfterOverlap;
  67. }
  68. }
  69. // Unfortunately the Decoder API has no explicit methods for managing the state,
  70. // which forces us to abuse the comparatively inefficient serialization API for this purpose.
  71. // (The absence of explicit state management or at least a deep cloning method in the Decoder interface
  72. // is almost as puzzling as the absence of such methods in System.Random).
  73. private static Dictionary<Type, MemberInfo[]> SerializableMemberInfoCache;
  74. private static MemberInfo[] GetSerializableDecoderMemberInfo(Decoder decoder) {
  75. Type type = decoder.GetType();
  76. if (!type.IsSerializable) return null;
  77. MemberInfo[] smis;
  78. if (SerializableMemberInfoCache == null) {
  79. SerializableMemberInfoCache = new Dictionary<Type,MemberInfo[]>(8);
  80. }
  81. lock (SerializableMemberInfoCache) {
  82. if (!SerializableMemberInfoCache.TryGetValue(type, out smis) ) {
  83. smis = FormatterServices.GetSerializableMembers(type, new StreamingContext(StreamingContextStates.Clone));
  84. SerializableMemberInfoCache.Add(type, smis);
  85. }
  86. }
  87. return smis;
  88. }
  89. private struct DecoderState {
  90. private object[] DecoderData;
  91. public DecoderState(Decoder decoder, MemberInfo[] serializableDecoderMembers) {
  92. DecoderData = serializableDecoderMembers != null
  93. ? FormatterServices.GetObjectData(decoder, serializableDecoderMembers)
  94. : null;
  95. }
  96. public void WriteTo(ref Decoder decoder, MemberInfo[] serializableDecoderMembers) {
  97. if (DecoderData != null) {
  98. //Decoder newDecoder = (Decoder) FormatterServices.GetUninitializedObject(decoder.GetType());
  99. //FormatterServices.PopulateObjectMembers(newDecoder, serializableDecoderMembers, DecoderData);
  100. //decoder = newDecoder;
  101. FormatterServices.PopulateObjectMembers(decoder, serializableDecoderMembers, DecoderData);
  102. } else {
  103. decoder.Reset();
  104. }
  105. }
  106. }
  107. private const int DefaultBlockSize = 3*(1 << 16); // 3*2^16 = 200k
  108. private const int DefaultByteBufferLength = (1 << 12);
  109. private static int MinimumByteBufferLength = 128; // must be larger than longest detectable preamble (we can only guess here)
  110. private const char EOS = '\uFFFF';
  111. // For ease of use, we need the iterators to hold a reference to the CharStream. If we stored
  112. // a CharStream reference directly in the iterator, the JIT would emit a call to the write barrier
  113. // thunk for each write to the reference field. As we want to use iterators mainly as immutable values,
  114. // we need them to be structs for performance reasons, and since structs are constantly copied
  115. // by design, we would get frequent write barrier calls*. Redirecting the CharStream
  116. // access through an "Anchor" allows us to relieve the GC from having to keep track of all the
  117. // CharStream references in the iterators. The trick is that an Anchor instance does not contain
  118. // any reference to a managed object, only a GCHandle to the CharStream and other value type members
  119. // important to the Iterators. Because the Anchor struct only has primitive members, we can take
  120. // an unmanaged pointer which the GC doesn't need to track. To avoid most GCHandle.Target accesses,
  121. // the CharStream stores pieces of information important to the iterators directly in the Anchor.
  122. //
  123. // * Just to be clear: Write barrier calls are rather cheap (about the cost of a virtual function
  124. // call) and overall FParsec performance is only marginally influenced by this optimization.
  125. // (Each Reply<_,_> value alone currently triggers 2-3 write barrier calls, even when it is
  126. // allocated on the stack and all fields are initialized to 0/null!).
  127. internal Anchor* anchor; // allocated and assigned during construction,
  128. // freed and set to null during disposal
  129. /// <summary>Represents the link between a CharStream and its Iterators.
  130. /// May be allocated on the unmanaged heap and holds a GCHandle, hence must be properly freed.</summary>
  131. internal struct Anchor {
  132. public int Block;
  133. /// <summary>The index of the last block of the stream, or Int32.MaxValue if the end of stream has not yet been detected.</summary>
  134. public int LastBlock;
  135. public GCHandle StreamHandle;
  136. /// <summary>Begin of the used part of the char buffer (stays constant). Is null if the CharStream is empty.</summary>
  137. public char* BufferBegin;
  138. /// <summary>End of the used part of the char buffer (varies for a multi-block stream). Is null if the CharStream is empty.</summary>
  139. public char* BufferEnd;
  140. public long CharIndex;
  141. public long CharIndexPlusOffset;
  142. public long CharIndexOffset;
  143. public long EndIndex;
  144. public int BlockSizeMinusOverlap;
  145. public bool NeedToFree;
  146. public static Anchor* Create(CharStream stream) {
  147. // We create the anchor instance on the unmanaged heap. An alternative would be to use a
  148. // pinned pointer, but that would carry the risk of fragmenting the managed heap
  149. // (because an Anchor is a small object that can be long-lived).
  150. // (If AllocHGlobal becomes a bottleneck, we could replace it with a pool allocator.)
  151. Anchor* p = (Anchor*) Marshal.AllocHGlobal(sizeof(Anchor));
  152. p->NeedToFree = true;
  153. p->StreamHandle = GCHandle.Alloc(stream, GCHandleType.Normal);
  154. return p;
  155. }
  156. public static void Free(Anchor *p) {
  157. p->StreamHandle.Free();
  158. if (p->NeedToFree) Marshal.FreeHGlobal((IntPtr) p);
  159. }
  160. }
  161. /// <summary>The Encoding that is used for decoding the underlying byte stream, or
  162. /// System.Text.UnicodeEncoding in case the stream was directly constructed
  163. /// from a string.</summary>
  164. public Encoding Encoding { get; private set; }
  165. // If the CharStream is constructed from a binary stream, we use a managed string as the char
  166. // buffer. This allows us to apply regular expressions directly to the input.
  167. // In the case of multi-block CharStreams we thus have to mutate the buffer string through pointers.
  168. // This is safe as long as we use a newly constructed string and we don't pass a reference
  169. // to the internal buffer string to the "outside world". (The one instance where we have to pass
  170. // a reference to the buffer string is regex matching. See the docs for Iterator.Match(regex) for more info.)
  171. //
  172. // Apart from Iter.Match(regex) we access the internal buffer only through a pinned pointer.
  173. // This way we avoid the overhead of redundant bounds checking and can support strings, char arrays
  174. // and unmanaged char buffers through the same interface. Accessing the buffer through pointers
  175. // is also a requirement for accessing the CharStream data through an Anchor pointer (see above).
  176. //
  177. // Pinning a string or char array makes life more difficult for the GC. However, as long as
  178. // the buffer is only short-lived or large enough to be allocated on the large object heap,
  179. // there shouldn't be a problem. Furthermore, the buffer strings for CharStreams constructed
  180. // from a binary stream are allocated through the StringBuffer interface and hence always live
  181. // on the large object heap. Thus, the only scenario to really worry about (and which the
  182. // documentation explicitly warns about) is when a large number of small CharStreams
  183. // are constructed directly from strings or char arrays and are used for an extended period of time.
  184. /// <summary>The string holding the char buffer, or null if the buffer is not part of a .NET string.</summary>
  185. internal string BufferString;
  186. /// <summary>A pointer to the beginning of BufferString, or null if BufferString is null.</summary>
  187. internal char* BufferStringPointer;
  188. /// <summary>Holds the GCHandle for CharStreams directly constructed from strings or char arrays.</summary>
  189. private GCHandle BufferHandle;
  190. /// <summary>Holds the StringBuffer for CharStreams constructed from a binary stream.</summary>
  191. private StringBuffer Buffer;
  192. private MultiBlockData Data;
  193. /// <summary>Contains the data and methods needed in case the input byte stream
  194. /// is large enough to span multiple blocks of the CharStream.</summary>
  195. private class MultiBlockData {
  196. public Anchor* anchor;
  197. public Stream Stream;
  198. // we keep a seperate record of the Stream.Position, so that we don't need to require Stream.CanSeek
  199. public long StreamPosition;
  200. public bool LeaveOpen;
  201. public int MaxCharCountForOneByte;
  202. public Decoder Decoder;
  203. public MemberInfo[] SerializableDecoderMembers;
  204. public int BlockSize;
  205. public int BlockOverlap;
  206. /// <summary>BufferBegin + BlockSize - minRegexSpace</summary>
  207. public char* RegexSpaceThreshold;
  208. /// <summary>The byte stream index of the first unused byte in the ByteBuffer.</summary>
  209. public long ByteIndex { get { return StreamPosition - (ByteBufferCount - ByteBufferIndex); } }
  210. public List<BlockInfo> Blocks;
  211. public byte[] ByteBuffer;
  212. public int ByteBufferIndex;
  213. public int ByteBufferCount;
  214. /// <summary>Refills the ByteBuffer if no unused byte is remaining.
  215. /// Returns the number of unused bytes in the (refilled) ByteBuffer.</summary>
  216. private int FillByteBuffer() {
  217. int n = ByteBufferCount - ByteBufferIndex;
  218. if (n > 0) return n;
  219. return ClearAndRefillByteBuffer(0);
  220. }
  221. /// <summary>Refills the ByteBuffer starting at the given index. If the underlying byte
  222. /// stream contains enough bytes, the ByteBuffer is filled up to the ByteBuffer.Length.
  223. /// Returns the number of bytes available for consumption in the refilled ByteBuffer.</summary>
  224. private int ClearAndRefillByteBuffer(int byteBufferIndex) {
  225. Debug.Assert(byteBufferIndex >= 0 && byteBufferIndex <= ByteBuffer.Length);
  226. // Stream.Read is not guaranteed to use all the provided output buffer, so we need
  227. // to call it in a loop when we want to rely on the buffer being fully filled
  228. // (unless we reach the end of the stream). Knowing that the buffer always gets
  229. // completely filled allows us to calculate the buffer utilization after skipping
  230. // a certain number of input bytes. For most streams there will be only one loop
  231. // iteration anyway (or two at the end of the stream).
  232. int i = byteBufferIndex;
  233. int m = ByteBuffer.Length - byteBufferIndex;
  234. while (m != 0) {
  235. int c = Stream.Read(ByteBuffer, i, m);
  236. if (c == 0) break;
  237. i += c;
  238. m -= c;
  239. }
  240. int n = i - byteBufferIndex;
  241. ByteBufferIndex = byteBufferIndex;
  242. ByteBufferCount = byteBufferIndex + n;
  243. StreamPosition += n;
  244. return n;
  245. }
  246. /// <summary>Reads up to the given maximum number of chars into the given buffer.
  247. /// If more than the maximum number of chars have to be read from the stream in order to
  248. /// fill the buffer (due to the way the Decoder API works), the overhang chars are
  249. /// returned through the output parameter.
  250. /// Returns a pointer to one char after the last char read.</summary>
  251. private char* ReadCharsFromStream(char* buffer, int maxCount, out string overhangChars) {
  252. Debug.Assert(maxCount >= 0);
  253. fixed (byte* byteBuffer = ByteBuffer) {
  254. overhangChars = null;
  255. try {
  256. while (maxCount >= MaxCharCountForOneByte) {// if maxCount < MaxCharCountForOneByte, Convert could throw
  257. int nBytesInByteBuffer = FillByteBuffer();
  258. bool flush = nBytesInByteBuffer == 0;
  259. int bytesUsed, charsUsed; bool completed = false;
  260. Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer,
  261. buffer, maxCount, flush,
  262. out bytesUsed, out charsUsed, out completed);
  263. ByteBufferIndex += bytesUsed; // GetChars consumed bytesUsed bytes from the byte buffer
  264. buffer += charsUsed;
  265. maxCount -= charsUsed;
  266. if (flush && completed) return buffer;
  267. }
  268. if (maxCount == 0) return buffer;
  269. char* cs = stackalloc char[MaxCharCountForOneByte];
  270. for (;;) {
  271. int nBytesInByteBuffer = FillByteBuffer();
  272. bool flush = nBytesInByteBuffer == 0;
  273. int bytesUsed, charsUsed; bool completed;
  274. Decoder.Convert(byteBuffer + ByteBufferIndex, nBytesInByteBuffer,
  275. cs, MaxCharCountForOneByte, flush,
  276. out bytesUsed, out charsUsed, out completed);
  277. ByteBufferIndex += bytesUsed;
  278. if (charsUsed > 0) {
  279. int i = 0;
  280. do {
  281. *(buffer++) = cs[i++];
  282. if (--maxCount == 0) {
  283. if (i < charsUsed) overhangChars = new string(cs, i, charsUsed - i);
  284. return buffer;
  285. }
  286. } while (i < charsUsed);
  287. }
  288. if (flush && completed) return buffer;
  289. }
  290. } catch (DecoderFallbackException e) {
  291. e.Data.Add("Stream.Position", ByteIndex + e.Index);
  292. throw;
  293. }
  294. }
  295. }
  296. /// <summary> Reads a block of chars (must be different from the current block)
  297. /// into the BufferString. Returns a pointer to the first char of the new block,
  298. /// or null if no chars could be read.</summary>
  299. internal char* ReadBlock(int block) {
  300. if (block > anchor->LastBlock) return null;
  301. int prevBlock = anchor->Block;
  302. if (block == prevBlock) throw new InvalidOperationException();
  303. if (SerializableDecoderMembers == null && block > 0) {
  304. if (prevBlock > block)
  305. throw new NotSupportedException("The CharStream does not support seeking backwards over ranges longer than the block overlap because the Encoding's Decoder is not serializable.");
  306. while (prevBlock + 1 < block) ReadBlock(++prevBlock);
  307. }
  308. BlockInfo bi = Blocks[block];
  309. int blockSizeMinusOverlap = BlockSize - BlockOverlap;
  310. long charIndex = Math.BigMul(block, blockSizeMinusOverlap);
  311. char* bufferBegin = anchor->BufferBegin;
  312. char* begin, buffer;
  313. int nCharsToRead;
  314. // fill [0 ... BlockOverlap-1] if block > 0
  315. if (prevBlock == block - 1) {
  316. MemMove(bufferBegin, bufferBegin + blockSizeMinusOverlap, BlockOverlap*2);
  317. Debug.Assert(bufferBegin[BlockOverlap - 1] == bi.LastCharInOverlap);
  318. begin = buffer = bufferBegin + BlockOverlap;
  319. } else if (prevBlock >= 0) {
  320. Stream.Seek(bi.ByteIndex, SeekOrigin.Begin); // will throw if Stream can't seek
  321. // now that there was no exception, we can change the state...
  322. StreamPosition = bi.ByteIndex;
  323. ClearAndRefillByteBuffer(bi.ByteBufferIndex);
  324. bi.DecoderStateAtBlockBegin.WriteTo(ref Decoder, SerializableDecoderMembers); // will reset Decoder if block == 0
  325. if (prevBlock == block + 1) {
  326. // move the overlap into [BlockSize - BlockOverlap, BlockSize - 1] before it gets overwritten
  327. MemMove(bufferBegin + blockSizeMinusOverlap, bufferBegin, BlockOverlap*2);
  328. }
  329. begin = buffer = bufferBegin;
  330. if (block > 0) {
  331. nCharsToRead = BlockOverlap;
  332. if (bi.OverhangCharsAtBlockBegin != null) {
  333. nCharsToRead -= bi.OverhangCharsAtBlockBegin.Length;
  334. for (int i = 0; i < bi.OverhangCharsAtBlockBegin.Length; ++i)
  335. *(buffer++) = bi.OverhangCharsAtBlockBegin[i];
  336. }
  337. string overhangCharsAfterOverlap;
  338. buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlap);
  339. if ( buffer != bufferBegin + BlockOverlap
  340. || ByteIndex != bi.ByteIndex + bi.NumberOfBytesInOverlap
  341. || *(buffer - 1) != bi.LastCharInOverlap
  342. || overhangCharsAfterOverlap != bi.OverhangCharsAfterOverlap)
  343. throw new IOException("CharStream: stream integrity error");
  344. }
  345. } else { // ReadBlock was called from the constructor
  346. if (block != 0) throw new InvalidOperationException();
  347. begin = buffer = bufferBegin;
  348. }
  349. // fill [0 ... BlockSize-BlockOverlap-1] if block == 0
  350. // and [BlockOverlap ... BlockSize-BlockOverlap-1] otherwise
  351. if (block == 0) {
  352. nCharsToRead = blockSizeMinusOverlap;
  353. } else {
  354. nCharsToRead = blockSizeMinusOverlap - BlockOverlap;
  355. if (bi.OverhangCharsAfterOverlap != null) {
  356. nCharsToRead -= bi.OverhangCharsAfterOverlap.Length;
  357. for (int i = 0; i < bi.OverhangCharsAfterOverlap.Length; ++i)
  358. *(buffer++) = bi.OverhangCharsAfterOverlap[i];
  359. }
  360. }
  361. string overhangCharsAtNextBlockBegin;
  362. buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAtNextBlockBegin);
  363. long byteIndexAtNextBlockBegin = ByteIndex;
  364. int byteBufferIndexAtNextBlockBegin = ByteBufferIndex;
  365. // fill [BlockSize-BlockOverlap ... BlockSize-1]
  366. if (block == Blocks.Count - 1) { // next block hasn't yet been read
  367. DecoderState decoderStateAtNextBlockBegin = new DecoderState(Decoder, SerializableDecoderMembers);
  368. nCharsToRead = BlockOverlap;
  369. if (overhangCharsAtNextBlockBegin != null) {
  370. nCharsToRead -= overhangCharsAtNextBlockBegin.Length;
  371. for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i)
  372. *(buffer++) = overhangCharsAtNextBlockBegin[i];
  373. }
  374. string overhangCharsAfterOverlapWithNextBlock;
  375. buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock);
  376. if (anchor->LastBlock == Int32.MaxValue) { // last block hasn't yet been detected
  377. if (buffer == bufferBegin + BlockSize) {
  378. DecoderState decoderStateAfterOverlapWithNextBlock = new DecoderState(Decoder, SerializableDecoderMembers);
  379. int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin);
  380. Blocks.Add(new BlockInfo(byteIndexAtNextBlockBegin, byteBufferIndexAtNextBlockBegin,
  381. nBytesInOverlapWithNextBlock, *(buffer - 1),
  382. overhangCharsAtNextBlockBegin, decoderStateAtNextBlockBegin,
  383. overhangCharsAfterOverlapWithNextBlock, decoderStateAfterOverlapWithNextBlock));
  384. } else { // we reached the end of the stream
  385. anchor->LastBlock = block;
  386. anchor->EndIndex = anchor->CharIndexOffset + charIndex + (buffer - bufferBegin);
  387. }
  388. } else if (anchor->EndIndex != anchor->CharIndexOffset + charIndex + (buffer - bufferBegin)) {
  389. throw new IOException("CharStream: stream integrity error");
  390. }
  391. } else {
  392. BlockInfo nbi = Blocks[block + 1];
  393. if (buffer != bufferBegin + blockSizeMinusOverlap
  394. || byteIndexAtNextBlockBegin != nbi.ByteIndex
  395. || byteBufferIndexAtNextBlockBegin != nbi.ByteBufferIndex
  396. || overhangCharsAtNextBlockBegin != nbi.OverhangCharsAtBlockBegin)
  397. throw new IOException("CharStream: stream integrity error");
  398. if (prevBlock != block + 1 || (block == 0 && SerializableDecoderMembers == null)) { // jumping back to block 0 is supported even if the decoder is not serializable
  399. nCharsToRead = BlockOverlap;
  400. if (overhangCharsAtNextBlockBegin != null) {
  401. nCharsToRead -= overhangCharsAtNextBlockBegin.Length;
  402. for (int i = 0; i < overhangCharsAtNextBlockBegin.Length; ++i)
  403. *(buffer++) = overhangCharsAtNextBlockBegin[i];
  404. }
  405. string overhangCharsAfterOverlapWithNextBlock;
  406. buffer = ReadCharsFromStream(buffer, nCharsToRead, out overhangCharsAfterOverlapWithNextBlock);
  407. int nBytesInOverlapWithNextBlock = (int)(ByteIndex - byteIndexAtNextBlockBegin);
  408. if (buffer != bufferBegin + BlockSize
  409. || nBytesInOverlapWithNextBlock != nbi.NumberOfBytesInOverlap
  410. || *(buffer - 1) != nbi.LastCharInOverlap
  411. || overhangCharsAfterOverlapWithNextBlock != nbi.OverhangCharsAfterOverlap)
  412. throw new IOException("CharStream: stream integrity error");
  413. } else {
  414. Debug.Assert(bufferBegin[BlockSize - 1] == nbi.LastCharInOverlap);
  415. buffer += BlockOverlap; // we already copied the chars at the beginning of this function
  416. int off = nbi.NumberOfBytesInOverlap - (ByteBufferCount - ByteBufferIndex);
  417. if (off > 0) {
  418. // we wouldn't have gotten here if the Stream didn't support seeking
  419. Stream.Seek(off, SeekOrigin.Current);
  420. StreamPosition += off;
  421. ClearAndRefillByteBuffer(off%ByteBuffer.Length);
  422. } else {
  423. ByteBufferIndex += nbi.NumberOfBytesInOverlap;
  424. }
  425. nbi.DecoderStateAfterOverlap.WriteTo(ref Decoder, SerializableDecoderMembers);
  426. }
  427. }
  428. anchor->Block = block;
  429. anchor->CharIndex = charIndex;
  430. anchor->CharIndexPlusOffset = anchor->CharIndexOffset + charIndex;
  431. anchor->BufferEnd = buffer;
  432. return begin == buffer ? null : begin;
  433. }
  434. }
  435. /// <summary>Reads all remaining chars into the given buffer. If the remaining stream
  436. /// content holds more than the given maximum number of chars, an exception will be thrown.</summary>
  437. private static int ReadAllRemainingCharsFromStream(char* buffer, int maxCount, byte[] byteBuffer, int byteBufferIndex, int byteBufferCount, Stream stream, long streamPosition, Decoder decoder) {
  438. Debug.Assert(maxCount > 0 && byteBufferIndex >= 0 && byteBufferIndex < byteBufferCount);
  439. fixed (byte* pByteBuffer = byteBuffer) {
  440. bool flush = false;
  441. int bufferCount = 0;
  442. for (;;) {
  443. try {
  444. bufferCount += decoder.GetChars(pByteBuffer + byteBufferIndex, byteBufferCount - byteBufferIndex,
  445. buffer + bufferCount, maxCount - bufferCount, flush);
  446. } catch (DecoderFallbackException e) {
  447. e.Data.Add("Stream.Position", streamPosition - (byteBufferCount - byteBufferIndex) + e.Index);
  448. throw;
  449. }
  450. if (flush) break;
  451. byteBufferIndex = 0; // GetChars consumed all bytes in the byte buffer
  452. byteBufferCount = stream.Read(byteBuffer, 0, byteBuffer.Length);
  453. streamPosition += byteBufferCount;
  454. flush = byteBufferCount == 0;
  455. }
  456. return bufferCount;
  457. }
  458. }
  459. /// <summary>The current block in BufferString.</summary>
  460. private int Block { get { return anchor->Block; } }
  461. /// <summary>The number of chars in BufferString.</summary>
  462. private int BufferCount { get { return PositiveDistance(anchor->BufferBegin, anchor->BufferEnd); } }
  463. /// <summary>The index of the first char in the stream, i.e. Begin.Index.
  464. /// This value is determined by the streamBeginIndex argument of some of the CharStream constructors.
  465. /// By default this value is 0.</summary>
  466. public long BeginIndex { get { return anchor->CharIndexOffset; } }
  467. /// <summary>The index of the last char of the stream plus 1,
  468. /// or Int64.MaxValue if the end of stream has not yet been detected.</summary>
  469. public long EndIndex { get { return anchor->EndIndex; } }
  470. [Obsolete("CharStream.IndexOffset has been renamed to CharStream.BeginIndex.")]
  471. public long IndexOffset { get { return BeginIndex; } }
  472. [Obsolete("CharStream.EndOfStream has been renamed to CharStream.EndIndex.")]
  473. public long EndOfStream { get { return EndIndex; } }
  474. // we don't have a public constructor that only takes a string to avoid potential confusion with a filepath constructor
  475. internal CharStream(string chars) {
  476. Debug.Assert(chars != null);
  477. BufferString = chars;
  478. // ByteBufferIndex = 0; // we recycle ByteBufferIndex for BufferStringIndex
  479. BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned);
  480. char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject();
  481. BufferStringPointer = bufferBegin;
  482. CharConstructorContinue(bufferBegin, chars.Length, 0);
  483. }
  484. /// <summary>Constructs a CharStream from the chars in the string argument between the indices index (inclusive) and index + length (exclusive).</summary>
  485. /// <exception cref="ArgumentNullException">chars is null.</exception>
  486. /// <exception cref="ArgumentOutOfRangeException">At least one of the following conditions is not satisfied: index ≥ 0, length ≥ 0 and index + length ≤ chars.Length.</exception>
  487. public CharStream(string chars, int index, int length) : this(chars, index, length, 0) {}
  488. /// <summary>Constructs a CharStream from the chars in the string argument between the indices index (inclusive) and index + length (exclusive). The first char in the stream is assigned the index streamBeginIndex.</summary>
  489. /// <exception cref="ArgumentNullException">chars is null.</exception>
  490. /// <exception cref="ArgumentOutOfRangeException">At least one of the following conditions is not satisfied: index ≥ 0, length ≥ 0, index + length ≤ chars.Length and 0 ≤ streamBeginIndex &lt; 2^60.</exception>
  491. public CharStream(string chars, int index, int length, long streamBeginIndex) {
  492. if (chars == null) throw new ArgumentNullException("chars");
  493. if (index < 0) throw new ArgumentOutOfRangeException("index", "The index is negative.");
  494. if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "The length is out of range.");
  495. if (streamBeginIndex < 0 || streamBeginIndex >= (1L << 60)) throw new ArgumentOutOfRangeException("streamBeginIndex", "streamBeginIndex must be non-negative and less than 2^60.");
  496. BufferString = chars;
  497. BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned);
  498. char* pBufferString = (char*)BufferHandle.AddrOfPinnedObject();
  499. BufferStringPointer = pBufferString;
  500. CharConstructorContinue(pBufferString + index, length, streamBeginIndex);
  501. }
  502. /// <summary>Constructs a CharStream from the chars in the char array argument between the indices index (inclusive) and index + length (exclusive).</summary>
  503. /// <exception cref="ArgumentNullException">chars is null.</exception>
  504. /// <exception cref="ArgumentOutOfRangeException">At least one of the following conditions is not satisfied: index ≥ 0, length ≥ 0 and index + length ≤ chars.Length.</exception>
  505. public CharStream(char[] chars, int index, int length) : this(chars, index, length, 0) { }
  506. /// <summary>Constructs a CharStream from the chars in the char array argument between the indices index (inclusive) and index + length (exclusive). The first char in the stream is assigned the index streamBeginIndex.</summary>
  507. /// <exception cref="NullReferenceException">chars is null.</exception>
  508. /// <exception cref="ArgumentOutOfRangeException">At least one of the following conditions is not satisfied: index ≥ 0, length ≥ 0, index + length ≤ chars.Length and 0 ≤ streamBeginIndex &lt; 2^60.</exception>
  509. public CharStream(char[] chars, int index, int length, long streamBeginIndex) {
  510. if (chars == null) throw new ArgumentNullException("chars");
  511. if (index < 0) throw new ArgumentOutOfRangeException("index", "The index is negative.");
  512. if (length < 0 || length > chars.Length - index) throw new ArgumentOutOfRangeException("length", "The length is out of range.");
  513. if (streamBeginIndex < 0 || streamBeginIndex >= (1L << 60)) throw new ArgumentOutOfRangeException("streamBeginIndex", "streamBeginIndex must be non-negative and less than 2^60.");
  514. BufferHandle = GCHandle.Alloc(chars, GCHandleType.Pinned);
  515. char* bufferBegin = (char*)BufferHandle.AddrOfPinnedObject() + index;
  516. CharConstructorContinue(bufferBegin, length, streamBeginIndex);
  517. }
  518. /// <summary>Constructs a CharStream from the length chars at the pointer address.</summary>
  519. /// <exception cref="ArgumentNullException">chars is null.</exception>
  520. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  521. public CharStream(char* chars, int length) : this(chars, length, 0) {}
  522. /// <summary>Constructs a CharStream from the length chars at the pointer address. The first char in the stream is assigned the index streamBeginIndex.</summary>
  523. /// <exception cref="ArgumentNullException">chars is null.</exception>
  524. /// <exception cref="ArgumentOutOfRangeException">At least one of the following conditions is not satisfied: length ≥ 0 and 0 ≤ streamBeginIndex &lt; 2^60.</exception>
  525. public CharStream(char* chars, int length, long streamBeginIndex) {
  526. if (chars == null) throw new ArgumentNullException("chars");
  527. if (length < 0) throw new ArgumentOutOfRangeException("length", "The length is negative.");
  528. if (chars > unchecked(chars + length))
  529. throw new ArgumentOutOfRangeException("length", "The length is out of range.");
  530. if (streamBeginIndex < 0 || streamBeginIndex >= (1L << 60)) throw new ArgumentOutOfRangeException("streamBeginIndex", "streamBeginIndex must be non-negative and less than 2^60.");
  531. CharConstructorContinue(chars, length, streamBeginIndex);
  532. }
  533. private void CharConstructorContinue(char* bufferBegin, int length, long streamBeginIndex) {
  534. Debug.Assert((bufferBegin != null || length == 0) && length >= 0 && bufferBegin <= unchecked(bufferBegin + length) && streamBeginIndex >= 0 && streamBeginIndex < (1L << 60));
  535. Encoding = Encoding.Unicode;
  536. var anchor = Anchor.Create(this);
  537. this.anchor = anchor;
  538. if (length != 0) {
  539. anchor->BufferBegin = bufferBegin;
  540. anchor->BufferEnd = bufferBegin + length;
  541. anchor->BlockSizeMinusOverlap = length;
  542. } else {
  543. anchor->BufferBegin = null; // ensure that BufferBegin is null if length is 0
  544. anchor->BufferEnd = null;
  545. anchor->BlockSizeMinusOverlap = 0;
  546. }
  547. anchor->Block = 0;
  548. anchor->LastBlock = 0;
  549. anchor->CharIndex = 0;
  550. anchor->CharIndexPlusOffset = streamBeginIndex;
  551. anchor->CharIndexOffset = streamBeginIndex;
  552. anchor->EndIndex = streamBeginIndex + length;
  553. }
  554. internal CharStream(string chars, char* pChars, char* begin, int length, long streamIndexOffset, Anchor* newUninitializedAnchor) {
  555. Debug.Assert((chars == null ? pChars == null : pChars <= begin)
  556. && (begin != null || length == 0) && length >= 0 && begin <= unchecked(begin + length) && streamIndexOffset >= 0 && streamIndexOffset < (1L << 60));
  557. Debug.Assert(newUninitializedAnchor->NeedToFree == false && !newUninitializedAnchor->StreamHandle.IsAllocated
  558. && newUninitializedAnchor->Block == 0 && newUninitializedAnchor->LastBlock == 0 && newUninitializedAnchor->CharIndex == 0);
  559. BufferString = chars;
  560. BufferStringPointer = pChars;
  561. Encoding = Encoding.Unicode;
  562. var anchor = newUninitializedAnchor;
  563. this.anchor = anchor;
  564. if (length != 0) {
  565. anchor->BufferBegin = begin;
  566. anchor->BufferEnd = begin + length;
  567. anchor->BlockSizeMinusOverlap = length;
  568. } else {
  569. anchor->BufferBegin = null; // ensure that BufferBegin is null if length is 0
  570. anchor->BufferEnd = null;
  571. anchor->BlockSizeMinusOverlap = 0;
  572. }
  573. anchor->CharIndexPlusOffset = streamIndexOffset;
  574. anchor->CharIndexOffset = streamIndexOffset;
  575. anchor->EndIndex = streamIndexOffset + length;
  576. anchor->StreamHandle = GCHandle.Alloc(this, GCHandleType.Normal);
  577. }
  578. /// <summary>Constructs a CharStream from the file at the given path.<br/>Is equivalent to CharStream(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan), false, encoding, true, defaultBlockSize, defaultBlockSize/3, ((defaultBlockSize/3)*2)/3, defaultByteBufferLength).</summary>
  579. public CharStream(string path, Encoding encoding)
  580. : this(path, encoding, true,
  581. DefaultBlockSize, DefaultBlockSize/3, ((DefaultBlockSize/3)*2)/3, DefaultByteBufferLength) { }
  582. /// <summary>Constructs a CharStream from the file at the given path.<br/>Is equivalent to CharStream(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan), false, encoding, detectEncodingFromByteOrderMarks, defaultBlockSize, defaultBlockSize/3, ((defaultBlockSize/3)*2)/3, defaultByteBufferLength).</summary>
  583. public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  584. : this(path, encoding, detectEncodingFromByteOrderMarks,
  585. DefaultBlockSize, DefaultBlockSize/3, ((DefaultBlockSize/3)*2)/3, DefaultByteBufferLength) { }
  586. /// <summary>Constructs a CharStream from the file at the given path.<br/>Is equivalent to CharStream(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan), false, encoding, detectEncodingFromByteOrderMarks, blockSize, blockOverlap, minRegexSpace, byteBufferLength).</summary>
  587. public CharStream(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks,
  588. int blockSize, int blockOverlap, int minRegexSpace, int byteBufferLength)
  589. {
  590. if (encoding == null) throw new ArgumentNullException("encoding");
  591. var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan);
  592. try {
  593. StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks,
  594. blockSize, blockOverlap, minRegexSpace, byteBufferLength);
  595. } catch {
  596. stream.Dispose();
  597. throw;
  598. }
  599. }
  600. /// <summary>Constructs a CharStream from a byte Stream.<br/>Is equivalent to CharStream(stream, false, encoding, true, defaultBlockSize, defaultBlockSize/3, ((defaultBlockSize/3)*2)/3, defaultByteBufferLength).</summary>
  601. public CharStream(Stream stream, Encoding encoding)
  602. : this(stream,
  603. false, encoding, true,
  604. DefaultBlockSize, DefaultBlockSize/3, ((DefaultBlockSize/3)*2)/3, DefaultByteBufferLength) { }
  605. /// <summary>Constructs a CharStream from a byte Stream.<br/>Is equivalent to CharStream(stream, leaveOpen, encoding, true, defaultBlockSize, defaultBlockSize/3, ((defaultBlockSize/3)*2)/3, defaultByteBufferLength).</summary>
  606. public CharStream(Stream stream, bool leaveOpen, Encoding encoding)
  607. : this(stream,
  608. leaveOpen, encoding, true,
  609. DefaultBlockSize, DefaultBlockSize/3, ((DefaultBlockSize/3)*2)/3, DefaultByteBufferLength) { }
  610. /// <summary>Constructs a CharStream from a byte Stream.<br/>Is equivalent to CharStream(stream, leaveOpen, encoding, detectEncodingFromByteOrderMarks, defaultBlockSize, defaultBlockSize/3, ((defaultBlockSize/3)*2)/3, defaultByteBufferLength).</summary>
  611. public CharStream(Stream stream, bool leaveOpen, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  612. : this(stream,
  613. leaveOpen, encoding, detectEncodingFromByteOrderMarks,
  614. DefaultBlockSize, DefaultBlockSize/3, ((DefaultBlockSize/3)*2)/3, DefaultByteBufferLength) { }
  615. /// <summary>Constructs a CharStream from a byte Stream.</summary>
  616. /// <param name="stream">The byte stream providing the input.</param>
  617. /// <param name="leaveOpen">Indicates whether the byte Stream should be left open when the CharStream has finished reading it.</param>
  618. /// <param name="encoding">The (default) Encoding used for decoding the byte Stream into chars.</param>
  619. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether the constructor should detect the encoding from a unicode byte-order mark at the beginning of the stream. An encoding detected from a byte-order mark overrides the default encoding.</param>
  620. /// <param name="blockSize">The number of chars per block. The default is 3×2^16 ≈ 200k.</param>
  621. /// <param name="blockOverlap">The number of chars at the end of a block that are preserved when reading the next block into the char buffer. It must be less than blockSize/2, but not less than encoding.GetMaxCharCount(1). The default is blockSize/3.</param>
  622. /// <param name="minRegexSpace">The number of chars that are guaranteed to be visible to a regular expression when it is matched on the stream (assuming there are enough chars remaining in the stream). Must not be greater than blockOverlap. The default is 2/3 of blockOverlap.</param>
  623. /// <param name="byteBufferLength">The size of the byte buffer used for decoding purposes. The default is 2^12 = 4KB.</param>
  624. public CharStream(Stream stream, bool leaveOpen,
  625. Encoding encoding, bool detectEncodingFromByteOrderMarks,
  626. int blockSize, int blockOverlap, int minRegexSpace, int byteBufferLength)
  627. {
  628. if (stream == null) throw new ArgumentNullException("stream");
  629. if (!stream.CanRead) throw new ArgumentException("stream is not readable");
  630. if (encoding == null) throw new ArgumentNullException("encoding");
  631. StreamConstructorContinue(stream, false, encoding, detectEncodingFromByteOrderMarks,
  632. blockSize, blockOverlap, minRegexSpace, byteBufferLength);
  633. }
  634. /// <summary>we modify this flag via reflection in the unit test</summary>
  635. private static bool DoNotRoundUpBlockSizeToSimplifyTesting = false;
  636. private void StreamConstructorContinue(Stream stream, bool leaveOpen,
  637. Encoding encoding, bool detectEncodingFromByteOrderMarks,
  638. int blockSize, int blockOverlap, int minRegexSpace, int byteBufferLength)
  639. {
  640. if (byteBufferLength < MinimumByteBufferLength) byteBufferLength = MinimumByteBufferLength;
  641. int bytesInStream = -1;
  642. long streamPosition;
  643. if (stream.CanSeek) {
  644. streamPosition = stream.Position;
  645. long streamLength = stream.Length - streamPosition;
  646. if (streamLength <= Int32.MaxValue) {
  647. bytesInStream = (int)streamLength;
  648. if (bytesInStream < byteBufferLength) byteBufferLength = bytesInStream;
  649. }
  650. } else {
  651. streamPosition = 0;
  652. }
  653. byte[] byteBuffer = new byte[byteBufferLength];
  654. int byteBufferCount = 0;
  655. do {
  656. int n = stream.Read(byteBuffer, byteBufferCount, byteBufferLength - byteBufferCount);
  657. if (n == 0) {
  658. bytesInStream = byteBufferCount;
  659. break;
  660. }
  661. byteBufferCount += n;
  662. } while (byteBufferCount < MinimumByteBufferLength);
  663. streamPosition += byteBufferCount;
  664. int preambleLength = Helper.DetectPreamble(byteBuffer, byteBufferCount, ref encoding, detectEncodingFromByteOrderMarks);
  665. bytesInStream -= preambleLength;
  666. Encoding = encoding;
  667. Decoder decoder = encoding.GetDecoder();
  668. // we allow such small block sizes only to simplify testing
  669. if (blockSize < 8) blockSize = DefaultBlockSize;
  670. bool allCharsFitIntoOneBlock = false;
  671. if (bytesInStream >= 0 && bytesInStream/4 <= blockSize) {
  672. if (bytesInStream != 0) {
  673. try {
  674. int maxCharCount = Encoding.GetMaxCharCount(bytesInStream); // may throw ArgumentOutOfRangeException
  675. if (blockSize >= maxCharCount) {
  676. allCharsFitIntoOneBlock = true;
  677. blockSize = maxCharCount;
  678. }
  679. } catch (ArgumentOutOfRangeException) { }
  680. } else {
  681. allCharsFitIntoOneBlock = true;
  682. blockSize = 0;
  683. }
  684. }
  685. var buffer = StringBuffer.Create(blockSize);
  686. Debug.Assert(buffer.Length >= blockSize && (blockSize > 0 || buffer.StringPointer == null));
  687. Buffer = buffer;
  688. BufferString = buffer.String;
  689. BufferStringPointer = buffer.StringPointer;
  690. char* bufferBegin = buffer.StringPointer + buffer.Index;
  691. try {
  692. if (allCharsFitIntoOneBlock) {
  693. int bufferCount = preambleLength == byteBufferCount
  694. ? 0
  695. : ReadAllRemainingCharsFromStream(bufferBegin, buffer.Length, byteBuffer, preambleLength, byteBufferCount, stream, streamPosition, decoder);
  696. if (!leaveOpen) stream.Close();
  697. var anchor = Anchor.Create(this);
  698. this.anchor = anchor;
  699. anchor->BlockSizeMinusOverlap = bufferCount;
  700. anchor->EndIndex = bufferCount;
  701. if (bufferCount != 0) {
  702. anchor->BufferBegin = bufferBegin;
  703. anchor->BufferEnd = bufferBegin + bufferCount;
  704. } else {
  705. anchor->BufferBegin = null;
  706. anchor->BufferEnd = null;
  707. }
  708. anchor->Block = 0;
  709. anchor->LastBlock = 0;
  710. anchor->CharIndex = 0;
  711. anchor->CharIndexOffset = 0;
  712. anchor->CharIndexPlusOffset = 0;
  713. } else {
  714. if (!DoNotRoundUpBlockSizeToSimplifyTesting) blockSize = buffer.Length;
  715. var d = new MultiBlockData();
  716. Data = d;
  717. d.Stream = stream;
  718. d.StreamPosition = streamPosition;
  719. d.LeaveOpen = leaveOpen;
  720. d.Decoder = decoder;
  721. d.ByteBuffer = byteBuffer;
  722. d.ByteBufferIndex = preambleLength;
  723. d.ByteBufferCount = byteBufferCount;
  724. d.MaxCharCountForOneByte = Math.Max(1, Encoding.GetMaxCharCount(1));
  725. d.SerializableDecoderMembers = GetSerializableDecoderMemberInfo(decoder);
  726. if (blockSize < 3*d.MaxCharCountForOneByte) blockSize = 3*d.MaxCharCountForOneByte;
  727. // MaxCharCountForOneByte == the maximum number of overhang chars
  728. if( Math.Min(blockOverlap, blockSize - 2*blockOverlap) < d.MaxCharCountForOneByte
  729. || blockOverlap >= blockSize/2) blockOverlap = blockSize/3;
  730. if (minRegexSpace < 0 || minRegexSpace > blockOverlap) minRegexSpace = 2*blockOverlap/3;
  731. d.BlockSize = blockSize;
  732. d.BlockOverlap = blockOverlap;
  733. d.RegexSpaceThreshold = bufferBegin + (blockSize - minRegexSpace);
  734. var anchor = Anchor.Create(this);
  735. this.anchor = anchor;
  736. d.anchor = anchor;
  737. anchor->BlockSizeMinusOverlap = blockSize - blockOverlap;
  738. anchor->EndIndex = Int64.MaxValue;
  739. anchor->BufferBegin = bufferBegin;
  740. anchor->BufferEnd = bufferBegin;
  741. anchor->Block = -2; // special value recognized by ReadBlock
  742. anchor->LastBlock = Int32.MaxValue;
  743. anchor->CharIndex = 0;
  744. anchor->CharIndexOffset = 0;
  745. anchor->CharIndexPlusOffset = 0;
  746. d.Blocks = new List<BlockInfo>();
  747. // the first block has no overlap with a previous block
  748. d.Blocks.Add(new BlockInfo(preambleLength, preambleLength, 0, EOS, null, new DecoderState(), null, new DecoderState()));
  749. d.ReadBlock(0);
  750. if (anchor->BufferBegin == anchor->BufferEnd) {
  751. Debug.Assert(anchor->EndIndex == anchor->CharIndexOffset);
  752. anchor->BufferBegin = null;
  753. anchor->BufferEnd = null;
  754. }
  755. }
  756. } catch {
  757. buffer.Dispose();
  758. if (anchor != null) Anchor.Free(anchor);
  759. throw;
  760. }
  761. }
  762. public void Dispose() {
  763. if (anchor == null) return;
  764. Anchor.Free(anchor);
  765. anchor = null;
  766. if (BufferHandle.IsAllocated) BufferHandle.Free();
  767. if (Buffer != null) Buffer.Dispose();
  768. if (Data != null && !Data.LeaveOpen) Data.Stream.Close();
  769. }
  770. /// <summary>an optimized version of end - begin, which assumes that 2^31 > end - begin >= 0. </summary>
  771. internal static int PositiveDistance(char* begin, char* end) {
  772. return (int)((uint)((byte*)end - (byte*)begin)/2);
  773. }
  774. internal static long PositiveDistance64(char* begin, char* end) {
  775. return (long)((ulong)((byte*)end - (byte*)begin)/2);
  776. }
  777. /// <summary>An iterator pointing to the beginning of the stream (or to the end if the CharStream is empty).</summary>
  778. public Iterator Begin { get {
  779. Anchor* anchor = this.anchor;
  780. if (anchor == null) throw new ObjectDisposedException("CharStream");
  781. char* bufferBegin = anchor->BufferBegin;
  782. if (bufferBegin != null) {
  783. return new Iterator{Anchor = anchor, Ptr = bufferBegin, Block = 0};
  784. } else {
  785. return new Iterator{Anchor = anchor, Ptr = null, Block = -1};
  786. }
  787. } }
  788. // do not directly provide an iterator to the end of the stream in order to
  789. // ensure that such iterators only exists once the end's position has been detected
  790. /// <summary>Returns an iterator pointing to the given index in the stream,
  791. /// or to the end of the stream if the indexed position lies beyond the last char in the stream.</summary>
  792. /// <exception cref="ArgumentOutOfRangeException">The index is negative or less than the BeginIndex.</exception>
  793. /// <exception cref="NotSupportedException">Accessing the char with the given index requires seeking in the underlying byte stream, but the byte stream does not support seeking or the Encoding's Decoder is not serializable.</exception>
  794. /// <exception cref="IOException">An I/O error occured.</exception>
  795. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  796. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  797. /// <exception cref="OutOfMemoryException">Can not allocate enough memory for the internal data structure.</exception>
  798. /// <exception cref="ObjectDisposedException">Method is called after the stream was disposed.</exception>
  799. public Iterator Seek(long index) {
  800. Anchor* anchor = this.anchor;
  801. if (anchor == null) throw new ObjectDisposedException("CharStream");
  802. // safe in case of overflow since CharIndexPlusOffset < 2^60 + (2^31/sizeof(IntPtr))*2^31 < 2^61 and BufferEnd - BufferBegin < 2^30 (where 2^31/sizeof(IntPtr) is the approximate maximum number of blocks)
  803. long off = unchecked(index - anchor->CharIndexPlusOffset);
  804. if (0 <= off && off < PositiveDistance(anchor->BufferBegin, anchor->BufferEnd))
  805. return new Iterator{Anchor = anchor, Ptr = anchor->BufferBegin + (int)off, Block = anchor->Block};
  806. if (index >= anchor->EndIndex) return new Iterator{Anchor = anchor, Ptr = null, Block = -1};
  807. if (index < anchor->CharIndexOffset) throw (new ArgumentOutOfRangeException("index", "The index is negative or less than the BeginIndex."));
  808. // we never get here for streams with only one block
  809. index -= anchor->CharIndexOffset;
  810. long idx_;
  811. long block_ = Math.DivRem(index, anchor->BlockSizeMinusOverlap, out idx_);
  812. int block = block_ > Int32.MaxValue ? Int32.MaxValue : (int)block_;
  813. int idx = (int)idx_;
  814. return Seek(block, idx);
  815. }
  816. private Iterator Seek(int block, int idx) {
  817. Anchor* anchor = this.anchor;
  818. if (anchor->Block < block && idx < Data.BlockOverlap) {
  819. --block;
  820. idx += anchor->BlockSizeMinusOverlap;
  821. }
  822. int last = Data.Blocks.Count - 1;
  823. if (block >= last) {
  824. int b = last;
  825. while (Data.ReadBlock(b) != null && b < block) ++b;
  826. if (block != anchor->Block || idx >= PositiveDistance(anchor->BufferBegin, anchor->BufferEnd))
  827. return new Iterator{Anchor = anchor, Ptr = null, Block = -1};
  828. } else Data.ReadBlock(block);
  829. return new Iterator{Anchor = anchor, Ptr = anchor->BufferBegin + idx, Block = block};
  830. }
  831. // On platform where unaligned 4-byte reads are fast (e.g. x86 or x64),
  832. // defining UNALIGNED_READS will increase the speed of longer
  833. // reading and matching operations. On other platforms it should not be defined.
  834. /// <summary>The iterator type for CharStreams.</summary>
  835. public struct Iterator : IEquatable<Iterator> {
  836. internal Anchor* Anchor;
  837. /// <summary>Pointer to the current char in the CharStream's buffer (if the CharStream's current block equals Block).</summary>
  838. internal char* Ptr;
  839. /// <summary>The buffer block for which Ptr is valid.</summary>
  840. internal int Block;
  841. /// <summary>The CharStream over which the Iterator iterates.</summary>
  842. public CharStream Stream { get { return (CharStream)Anchor->StreamHandle.Target; } }
  843. /// <summary>Indicates whether the Iterator points to the beginning of the CharStream.
  844. /// If the CharStream is empty, this property is always true.</summary>
  845. public bool IsBeginOfStream { get {
  846. return Ptr == Anchor->BufferBegin && Block <= 0;
  847. } }
  848. /// <summary>Indicates whether the Iterator points to the end of the CharStream,
  849. /// i.e. whether it points to one char beyond the last char in the CharStream.</summary>
  850. public bool IsEndOfStream { get { return Block < 0; } }
  851. /// <summary>The char returned by Read() if the iterator has
  852. /// reached the end of the stream. The value is '\uFFFF'.</summary>
  853. public const char EndOfStreamChar = EOS;
  854. // Trivial variations in the code, such as introducing a temporary variable
  855. // or changing the order of expressions, can have a significant effect on the
  856. // performance of the machine code generated by the current JIT(s) on .NET.
  857. // When the following function implementations sometimes look a bit inconsistent or
  858. // verbose, it's mostly because they have been optimized for optimal performance
  859. // on the x86 JIT (.Net 3.5 SP1).
  860. /// <summary>The index of the stream char pointed to by the Iterator.</summary>
  861. public long Index { get {
  862. Anchor* anchor = Anchor;
  863. int block = Block;
  864. if (block == anchor->Block) {
  865. Debug.Assert(anchor->BufferBegin <= Ptr && Ptr < anchor->BufferEnd);
  866. if (sizeof(System.IntPtr) != 8) // the JIT removes the inactive branch
  867. return (uint)PositiveDistance(anchor->BufferBegin, Ptr) + anchor->CharIndexPlusOffset;
  868. else
  869. return PositiveDistance64(anchor->BufferBegin, Ptr) + anchor->CharIndexPlusOffset;
  870. } else if (block < 0) {
  871. Debug.Assert(block == -1 && Ptr == null);
  872. // this is safe, as there can only be an end-of-stream iterator
  873. // once the end of stream has been detected
  874. return anchor->EndIndex;
  875. } else {
  876. Debug.Assert(anchor->BufferBegin <= Ptr && (Ptr < anchor->BufferEnd || anchor->Block == anchor->LastBlock));
  877. long charIndexPlusOffset = anchor->CharIndexOffset + Math.BigMul(block, anchor->BlockSizeMinusOverlap);
  878. if (sizeof(System.IntPtr) != 8)
  879. return (uint)PositiveDistance(anchor->BufferBegin, Ptr) + charIndexPlusOffset;
  880. else
  881. return PositiveDistance64(anchor->BufferBegin, Ptr) + charIndexPlusOffset;
  882. }
  883. } }
  884. /// <summary>Returns an Iterator pointing to the next char in the stream. If the Iterator already
  885. /// has reached the end of the stream, i.e. if it points to one char beyond
  886. /// the last char, the same Iterator is returned.</summary>
  887. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  888. /// <exception cref="IOException">An I/O error occured.</exception>
  889. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  890. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  891. [DebuggerBrowsable(DebuggerBrowsableState.Never)]
  892. public Iterator Next { get {
  893. Anchor* anchor = Anchor;
  894. char* newPtr = Ptr + 1;
  895. if (Block == anchor->Block && newPtr < anchor->BufferEnd)
  896. return new Iterator{Anchor = anchor, Ptr = newPtr, Block = Block};
  897. return NextContinue();
  898. } }
  899. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  900. private Iterator NextContinue() { return AdvanceContinue(1u); }
  901. /// <summary>Returns an Iterator that is advanced by offset chars. The Iterator can't
  902. /// move past the end of the stream, i.e. any position beyond the last char
  903. /// in the stream is interpreted as precisely one char beyond the last char.</summary>
  904. /// <exception cref="ArgumentOutOfRangeException">The new position would lie before the beginning of the `CharStream`.</exception>
  905. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  906. /// <exception cref="IOException">An I/O error occured.</exception>
  907. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  908. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  909. public Iterator Advance(int offset) {
  910. if (offset >= 0) {
  911. Anchor* anchor = Anchor;
  912. if (Block == anchor->Block && offset < PositiveDistance(Ptr, anchor->BufferEnd))
  913. return new Iterator{Anchor = Anchor, Ptr = Ptr + offset, Block = Block};
  914. return AdvanceContinue((uint)offset);
  915. } else {
  916. if (Block >= 0 && unchecked((uint)-offset) <= (uint)PositiveDistance(Anchor->BufferBegin, Ptr))
  917. return new Iterator{Anchor = Anchor, Ptr = unchecked(Ptr + offset), Block = Block};
  918. return AdvanceContinue(offset);
  919. }
  920. }
  921. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  922. internal Iterator AdvanceContinue(int offset) {
  923. return Stream.Seek(Index + offset);
  924. }
  925. /// <summary>Returns an Iterator that is advanced by offset chars. The Iterator can't
  926. /// move past the end of the stream, i.e. any position beyond the last char
  927. /// in the stream is interpreted as precisely one char beyond the last char.</summary>
  928. /// <exception cref="ArgumentOutOfRangeException">The new position would lie before the beginning of the `CharStream`.</exception>
  929. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  930. /// <exception cref="IOException">An I/O error occured.</exception>
  931. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  932. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  933. /// <exception cref="OutOfMemoryException">Can not allocate enough memory for the internal data structure.</exception>
  934. public Iterator Advance(long offset) {
  935. if (Block == Anchor->Block
  936. && (offset >= 0 ? offset < PositiveDistance(Ptr, Anchor->BufferEnd)
  937. : offset >= -PositiveDistance(Anchor->BufferBegin, Ptr)))
  938. {
  939. int nn = (int)offset;
  940. char* newPtr = unchecked(Ptr + nn); // we need unchecked here because C# always uses
  941. // unsigned arithmetic for pointer calculations and
  942. // otherwise would report an overflow for any negative numberOfChars
  943. // if overflow checking is activated
  944. return new Iterator{Anchor = Anchor, Ptr = newPtr, Block = Block};
  945. }
  946. long index = Index;
  947. return Stream.Seek(offset > long.MaxValue - index ? long.MaxValue : index + offset);
  948. }
  949. /// <summary>Returns an Iterator that is advanced by offset chars. The Iterator can't
  950. /// move past the end of the stream, i.e. any position beyond the last char
  951. /// in the stream is interpreted as precisely one char beyond the last char.</summary>
  952. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  953. /// <exception cref="IOException">An I/O error occured.</exception>
  954. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  955. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  956. public Iterator Advance(uint offset) {
  957. Anchor* anchor = Anchor;
  958. if (Block == anchor->Block && offset < (uint)PositiveDistance(Ptr, anchor->BufferEnd))
  959. return new Iterator{Anchor = Anchor, Ptr = Ptr + offset, Block = Block};
  960. return AdvanceContinue(offset);
  961. }
  962. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  963. private Iterator AdvanceContinue(uint offset) {
  964. Debug.Assert(offset != 0 || Block != Anchor->Block);
  965. if (Anchor->LastBlock == 0 || Block < 0 || (Block == Anchor->LastBlock && Block == Anchor->Block))
  966. return new Iterator{Anchor = Anchor, Ptr = null, Block = -1};
  967. return Stream.Seek(Index + offset);
  968. }
  969. // The following methods with a leading underscore don't belong into the
  970. // "offical" API, because they mutate the Iterator struct, which otherwise
  971. // is expected to be immutable. However, users who know what they're doing
  972. // can use them to implement very efficent parser loops, which is why these
  973. // methods are declared public.
  974. /// <summary>Advances the Iterator *in-place* by 1 char and returns the char on the new position.
  975. ///`c &lt;- iter._Increment()` is equivalent to `iter &lt;- iter.Next; c &lt;- iter.Read()`.</summary>
  976. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  977. /// <exception cref="IOException">An I/O error occured.</exception>
  978. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  979. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  980. public char _Increment() {
  981. Anchor* anchor = Anchor;
  982. char* newPtr = Ptr + 1;
  983. if (Block == anchor->Block && newPtr < anchor->BufferEnd) {
  984. Ptr = newPtr;
  985. return *newPtr;
  986. }
  987. return IncrementContinue();
  988. }
  989. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  990. private char IncrementContinue() { return IncrementContinue(1u); }
  991. /// <summary>Advances the Iterator *in-place* by offset chars and returns the char on the new position.
  992. /// `c &lt;- iter._Increment(offset)` is an optimized implementation of `iter &lt;- iter.Advance(offset); c &lt;- iter.Read()`.</summary>
  993. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  994. /// <exception cref="IOException">An I/O error occured.</exception>
  995. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  996. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  997. public char _Increment(uint offset) {
  998. Anchor* anchor = Anchor;
  999. char* ptr = Ptr;
  1000. if (Block == anchor->Block && offset < (uint)PositiveDistance(ptr, anchor->BufferEnd)) {
  1001. char* newPtr = ptr + offset;
  1002. Ptr = newPtr;
  1003. return *newPtr;
  1004. }
  1005. return IncrementContinue(offset);
  1006. }
  1007. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1008. internal char IncrementContinue(uint offset) {
  1009. Debug.Assert(offset != 0 || Block != Anchor->Block);
  1010. if (Anchor->LastBlock == 0 || Block < 0 || (Block == Anchor->LastBlock && Block == Anchor->Block)) {
  1011. Ptr = null;
  1012. Block = -1;
  1013. return EOS;
  1014. }
  1015. this = Stream.Seek(Index + offset);
  1016. return Read();
  1017. }
  1018. /// <summary>Advances the Iterator *in-place* by -1 char and returns the char on the new position,
  1019. /// except if the Iterator already points to the beginning of the CharStream,
  1020. /// in which case the position does not change and the EndOfStreamChar ('\uFFFF') is returned.</summary>
  1021. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1022. /// <exception cref="IOException">An I/O error occured.</exception>
  1023. public char _Decrement() {
  1024. Anchor* anchor = Anchor;
  1025. char* newPtr = unchecked(Ptr - 1);
  1026. if (Block == anchor->Block && newPtr >= anchor->BufferBegin) {
  1027. Ptr = newPtr;
  1028. return *newPtr;
  1029. }
  1030. return DecrementContinue();
  1031. }
  1032. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1033. private char DecrementContinue() { return DecrementContinue(1u); }
  1034. /// <summary>Advances the Iterator *in-place* by -offset chars and returns the char on the new position,
  1035. /// except if the new position would lie before the beginning of the CharStream,
  1036. /// in which case the Iterator is advanced to the beginning of the stream and the EndOfStreamChar ('\uFFFF') is returned.</summary>
  1037. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1038. /// <exception cref="IOException">An I/O error occured.</exception>
  1039. public char _Decrement(uint offset) {
  1040. Anchor* anchor = Anchor;
  1041. if (Block == anchor->Block && offset <= (uint)PositiveDistance(anchor->BufferBegin, Ptr)) {
  1042. char* newPtr = Ptr - offset;
  1043. Ptr = newPtr;
  1044. return *newPtr;
  1045. }
  1046. return DecrementContinue(offset);
  1047. }
  1048. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1049. private char DecrementContinue(uint offset) {
  1050. Debug.Assert(offset != 0 || Block != Anchor->Block);
  1051. if (Block == 0 && Anchor->Block == 0) {
  1052. Ptr = Anchor->BufferBegin;
  1053. return EOS;
  1054. }
  1055. long newIndex = Index - offset;
  1056. if (newIndex >= Anchor->CharIndexOffset) {
  1057. this = Stream.Seek(newIndex);
  1058. return Read();
  1059. } else {
  1060. this = Stream.Begin;
  1061. return EOS;
  1062. }
  1063. }
  1064. /// <summary>A helper routine for optimizing State methods</summary>
  1065. internal void _AdvanceInPlace(int offset) { // uses the same logic as Peek(int)
  1066. char* newPtr = unchecked(Ptr + offset);
  1067. if (offset >= 0) {
  1068. if (newPtr >= Ptr && newPtr < Anchor->BufferEnd && Anchor->Block == Block)
  1069. Ptr = newPtr;
  1070. else
  1071. IncrementContinue((uint)offset);
  1072. } else {
  1073. // we must exclude ptrOff == Ptr here, because Ptr + Int32.MinValue == Ptr
  1074. if (newPtr < Ptr && newPtr >= Anchor->BufferBegin && Anchor->Block == Block)
  1075. Ptr = newPtr;
  1076. else
  1077. this = AdvanceContinue(offset);
  1078. }
  1079. }
  1080. /// <summary>Is an optimized implementation of Next.Read().</summary>
  1081. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1082. /// <exception cref="IOException">An I/O error occured.</exception>
  1083. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1084. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1085. public char Peek() {
  1086. Anchor* anchor = Anchor;
  1087. char* ptr1 = Ptr + 1;
  1088. if (Block == anchor->Block && ptr1 < anchor->BufferEnd) return *ptr1;
  1089. return PeekContinue();
  1090. }
  1091. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1092. private char PeekContinue() { return PeekContinue(1u); }
  1093. /// <summary>Is an optimized implementation of Advance(offset).Read(),
  1094. /// except that the EndOfStreamChar ('\uFFFF') is returned if Index + offset &lt; 0 (instead of an exception being thrown).</summary>
  1095. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1096. /// <exception cref="IOException">An I/O error occured.</exception>
  1097. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1098. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1099. public char Peek(int offset) {
  1100. char* ptrOff = unchecked(Ptr + offset);
  1101. if (offset < 0) {
  1102. // we must exclude ptrOff == Ptr here, because Ptr + Int32.MinValue == Ptr
  1103. if (ptrOff < Ptr && ptrOff >= Anchor->BufferBegin && Anchor->Block == Block) return *ptrOff;
  1104. return PeekContinue(offset);
  1105. } else {
  1106. if (ptrOff >= Ptr && ptrOff < Anchor->BufferEnd && Anchor->Block == Block) return *ptrOff;
  1107. return PeekContinue((uint)offset);
  1108. }
  1109. }
  1110. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1111. private char PeekContinue(int offset) {
  1112. Debug.Assert(offset < 0);
  1113. if (Block == 0 && Anchor->Block == 0) return EOS;
  1114. long newIndex = Index + offset;
  1115. return newIndex < Anchor->CharIndexOffset ? EOS : Stream.Seek(newIndex).Read();
  1116. }
  1117. /// <summary>Is an optimized implementation of Advance(offset).Read().</summary>
  1118. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1119. /// <exception cref="IOException">An I/O error occured.</exception>
  1120. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1121. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1122. public char Peek(uint offset) {
  1123. Anchor* anchor = Anchor;
  1124. char* ptr = Ptr;
  1125. if (Block == anchor->Block && offset < (uint)PositiveDistance(ptr, anchor->BufferEnd))
  1126. return ptr[offset];
  1127. return PeekContinue(offset);
  1128. }
  1129. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1130. private char PeekContinue(uint offset) {
  1131. Debug.Assert(offset != 0 || Block != Anchor->Block);
  1132. if (Anchor->LastBlock == 0 || Block < 0 || (Block == Anchor->LastBlock && Block == Anchor->Block)) return EOS;
  1133. return Stream.Seek(Index + offset).Read();
  1134. }
  1135. /// <summary>Returns true if and only if the char argument matches the char pointed to by the Iterator.
  1136. /// At the end of the stream Match always returns false.</summary>
  1137. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1138. /// <exception cref="IOException">An I/O error occurs.</exception>
  1139. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1140. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1141. public bool Match(char ch) {
  1142. if (Block == Anchor->Block) return *Ptr == ch;
  1143. return MatchContinue(ch);
  1144. }
  1145. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1146. private bool MatchContinue(char ch) {
  1147. if (Block < 0) return false;
  1148. Stream.Data.ReadBlock(Block);
  1149. return *Ptr == ch;
  1150. }
  1151. /// <summary>Returns true if chars matches the chars in the stream beginning with the char pointed to by the Iterator.
  1152. /// If the chars do not match or if there are not enough chars remaining in the stream, false is returned.
  1153. /// If chars is empty, true is returned.</summary>
  1154. /// <exception cref="NullReferenceException">chars is null.</exception>
  1155. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1156. /// <exception cref="IOException">An I/O error occured.</exception>
  1157. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1158. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1159. public bool Match(string chars) {
  1160. Anchor* anchor = Anchor;
  1161. if (Block == anchor->Block && chars.Length <= PositiveDistance(Ptr, anchor->BufferEnd)) {
  1162. for (int i = 0; i < chars.Length; ++i)
  1163. if (Ptr[i] != chars[i]) goto ReturnFalse;
  1164. return true;
  1165. ReturnFalse:
  1166. return false;
  1167. }
  1168. return MatchContinue(chars);
  1169. }
  1170. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1171. private bool MatchContinue(string chars) {
  1172. int length = chars.Length; // throws if chars is null
  1173. if (Anchor->LastBlock == 0 || Block < 0 || (Block == Anchor->LastBlock && Block == Anchor->Block))
  1174. return length == 0;
  1175. fixed (char* pChars = chars) return MatchContinue(pChars, length);
  1176. }
  1177. /// <summary>Returns true if caseFoldedChars matches the chars in the stream
  1178. /// beginning with the char pointed to by the Iterator.
  1179. /// The chars in the stream are case-folded before they are matched,
  1180. /// while the chars in the string argument are assumed to already be case-folded.
  1181. /// If the chars do not match or if there are not enough chars remaining in the stream, false is returned.
  1182. /// If caseFoldedChars is empty, true is returned.</summary>
  1183. /// <exception cref="NullReferenceException">caseFoldedChars is null.</exception>
  1184. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1185. /// <exception cref="IOException">An I/O error occured.</exception>
  1186. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1187. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1188. public bool MatchCaseFolded(string caseFoldedChars) {
  1189. Anchor* anchor = Anchor;
  1190. if (Block == anchor->Block && caseFoldedChars.Length <= PositiveDistance(Ptr, anchor->BufferEnd)
  1191. && CaseFoldTable.FoldedChars != null)
  1192. {
  1193. for (int i = 0; i < caseFoldedChars.Length; ++i)
  1194. if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse;
  1195. return true;
  1196. ReturnFalse:
  1197. return false;
  1198. }
  1199. return MatchCaseFoldedContinue(caseFoldedChars);
  1200. }
  1201. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1202. private bool MatchCaseFoldedContinue(string caseFoldedChars) {
  1203. int length = caseFoldedChars.Length; // throws if chars is null
  1204. if (Anchor->LastBlock == 0 || Block < 0 || (Block == Anchor->LastBlock && Block == Anchor->Block))
  1205. return length == 0;
  1206. fixed (char* pCaseFoldedCharsChars = caseFoldedChars)
  1207. return MatchCaseFoldedContinue(pCaseFoldedCharsChars, length);
  1208. }
  1209. /// <summary>Returns true if the chars in chars between the indices charsIndex (inclusive) and
  1210. /// charsIndex + length (exclusive) match the chars in the stream beginning with the char pointed to by the Iterator.
  1211. /// If the chars do not match or if there are not enough chars remaining in the stream, false is returned.
  1212. /// If length is 0, true is returned.</summary>
  1213. /// <exception cref="ArgumentOutOfRangeException">charsIndex is negative, length is negative or charsIndex + length > chars.Length.</exception>
  1214. /// <exception cref="NullReferenceException">chars is null.</exception>
  1215. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1216. /// <exception cref="IOException">An I/O error occured.</exception>
  1217. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1218. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1219. public bool Match(string chars, int charsIndex, int length) {
  1220. if (charsIndex < 0)
  1221. throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative.");
  1222. if (length > chars.Length - charsIndex) // throws if chars is null
  1223. throw new ArgumentOutOfRangeException("length", "Length is out of range.");
  1224. fixed (char* pChars = chars) return Match(pChars + charsIndex, length); // checks length >= 0
  1225. }
  1226. /// <summary>Returns true if the chars in the char array between the indices charsIndex (inclusive) and
  1227. /// charsIndex + length (exclusive) match the chars in the stream beginning with the char pointed to by the Iterator.
  1228. /// If the chars do not match or if there are not enough chars remaining in the stream, false is returned.
  1229. /// If length is 0, true is returned.</summary>
  1230. /// <exception cref="ArgumentOutOfRangeException">charsIndex is negative, length is negative or charsIndex + length > chars.Length.</exception>
  1231. /// <exception cref="NullReferenceException">chars is null.</exception>
  1232. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1233. /// <exception cref="IOException">An I/O error occured.</exception>
  1234. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1235. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1236. public bool Match(char[] chars, int charsIndex, int length) {
  1237. if (charsIndex < 0)
  1238. throw new ArgumentOutOfRangeException("charsIndex", "charsIndex is negative.");
  1239. if (length > chars.Length - charsIndex) // throws if chars is null
  1240. throw new ArgumentOutOfRangeException("length", "Length is out of range.");
  1241. fixed (char* pChars = chars) return Match(pChars + charsIndex, length); // checks length >= 0
  1242. }
  1243. /// <summary>Returns true if the length chars at the pointer address match the chars
  1244. /// in the stream beginning with the char pointed to by the Iterator.
  1245. /// If the chars do not match or if there are not enough chars remaining in the stream,
  1246. /// false is returned. If length is 0, true is returned.</summary>
  1247. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  1248. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1249. /// <exception cref="IOException">An I/O error occured.</exception>
  1250. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1251. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1252. public bool Match(char* chars, int length) {
  1253. Anchor* anchor = Anchor; // the unsigned comparison will correctly handle negative length values
  1254. if (Block == anchor->Block && unchecked((uint)length <= (uint)PositiveDistance(Ptr, anchor->BufferEnd))) {
  1255. #if UNALIGNED_READS
  1256. int len = length & 0x7ffffffe;
  1257. for (int i = 0; i < len; i += 2) {
  1258. if (*((int*)(Ptr + i)) != *((int*)(chars + i))) goto ReturnFalse;
  1259. }
  1260. if (len != length) {
  1261. if (Ptr[len] != chars[len]) goto ReturnFalse;
  1262. }
  1263. return true;
  1264. #else
  1265. for (int i = 0; i < length; ++i) {
  1266. if (Ptr[i] != chars[i]) goto ReturnFalse;
  1267. }
  1268. return true;
  1269. #endif
  1270. ReturnFalse:
  1271. return false;
  1272. }
  1273. return MatchContinue(chars, length);
  1274. }
  1275. /// <summary>Returns true if the length chars at the pointer address match the chars
  1276. /// in the stream beginning with the char pointed to by the Iterator.
  1277. /// The chars in the stream are case-folded before they are matched,
  1278. /// while the chars at the pointer address are assumed to already be case-folded.
  1279. /// If the chars do not match or if there are not enough chars remaining in the stream,
  1280. /// false is returned. If length is 0, true is returned.</summary>
  1281. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  1282. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1283. /// <exception cref="IOException">An I/O error occured.</exception>
  1284. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1285. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1286. public bool MatchCaseFolded(char* caseFoldedChars, int length) {
  1287. if (Block == Anchor->Block && unchecked((uint)length) <= (uint)PositiveDistance(Ptr, Anchor->BufferEnd)
  1288. && CaseFoldTable.FoldedChars != null)
  1289. {
  1290. for (int i = 0; i < length; ++i) {
  1291. if (CaseFoldTable.FoldedChars[Ptr[i]] != caseFoldedChars[i]) goto ReturnFalse;
  1292. }
  1293. return true;
  1294. ReturnFalse:
  1295. return false;
  1296. }
  1297. return MatchCaseFoldedContinue(caseFoldedChars, length);
  1298. }
  1299. private bool MatchContinue(char* chars, int length) {
  1300. if (length <= 0) {
  1301. if (length == 0) return true;
  1302. throw new ArgumentOutOfRangeException("length", "Length is negative.");
  1303. }
  1304. int block = Block; // local copy that might be modified below
  1305. if (block < 0 || Anchor->LastBlock == 0 || (block == Anchor->LastBlock && block == Anchor->Block)) return false;
  1306. CharStream stream = null;
  1307. if (block != Anchor->Block) {
  1308. stream = Stream;
  1309. stream.Data.ReadBlock(block);
  1310. }
  1311. char* ptr = Ptr;
  1312. // requires length > 0
  1313. for (;;) {
  1314. int len = Math.Min(PositiveDistance(ptr, Anchor->BufferEnd), length);
  1315. length -= len;
  1316. #if UNALIGNED_READS
  1317. while (len >= 2) {
  1318. if (*((int*)ptr) != *((int*)chars)) goto ReturnFalse;
  1319. ptr += 2; chars += 2; len -= 2;
  1320. }
  1321. if (len != 0) {
  1322. if (*ptr != *chars) goto ReturnFalse;
  1323. ++chars;
  1324. }
  1325. #else
  1326. do {
  1327. if (*ptr != *chars) goto ReturnFalse;
  1328. ++ptr; ++chars; --len;
  1329. } while (len != 0);
  1330. #endif
  1331. if (length == 0) return true;
  1332. else {
  1333. if (stream == null) stream = Stream;
  1334. ptr = stream.Data.ReadBlock(++block);
  1335. if (ptr == null) return false;
  1336. }
  1337. }
  1338. ReturnFalse:
  1339. return false;
  1340. }
  1341. private bool MatchCaseFoldedContinue(char* caseFoldedChars, int length) {
  1342. if (length <= 0) {
  1343. if (length == 0) return true;
  1344. throw new ArgumentOutOfRangeException("length", "Length is negative.");
  1345. }
  1346. int block = Block; // local copy that might be modified below
  1347. if (block < 0 || Anchor->LastBlock == 0 || (block == Anchor->LastBlock && block == Anchor->Block)) return false;
  1348. CharStream stream = null;
  1349. if (block != Anchor->Block) {
  1350. stream = Stream;
  1351. stream.Data.ReadBlock(block);
  1352. }
  1353. char* ptr = Ptr;
  1354. char* cftable = CaseFoldTable.FoldedChars;
  1355. if (cftable == null) cftable = CaseFoldTable.Initialize();
  1356. // requires length > 0
  1357. for (;;) {
  1358. int len = Math.Min(PositiveDistance(ptr, Anchor->BufferEnd), length);
  1359. length -= len;
  1360. do {
  1361. if (cftable[*ptr] != *caseFoldedChars) goto ReturnFalse;
  1362. ++ptr; ++caseFoldedChars; --len;
  1363. } while (len != 0);
  1364. if (length == 0) return true;
  1365. else {
  1366. if (stream == null) stream = Stream;
  1367. ptr = stream.Data.ReadBlock(++block);
  1368. if (ptr == null) return false;
  1369. }
  1370. }
  1371. ReturnFalse:
  1372. return false;
  1373. }
  1374. /// <summary>Applies the given regular expression to stream chars beginning with the char pointed to by the Iterator.
  1375. /// Returns the resulting Match object. (Not supported by CharStreams constructed from char arrays or pointers.)</summary>
  1376. /// <remarks><para>For performance reasons you should specify the regular expression
  1377. /// such that it can only match at the beginning of a string,
  1378. /// for example by prepending "\A".</para>
  1379. /// <para>For CharStreams constructed from large binary streams the regular expression is not applied
  1380. /// to a string containing all the remaining chars in the stream. The minRegexSpace parameter
  1381. /// of the CharStream constructors determines the minimum number of chars that are guaranteed
  1382. /// to be visible to the regular expression.</para>
  1383. /// <para>
  1384. /// IMPORTANT:<br/>
  1385. /// If the CharStream has been constructed from a System.IO.Stream or a file path, the regular expression is
  1386. /// applied to an internal mutable buffer. Since the Match object may work lazily, i.e. compute matched strings
  1387. /// not before they are needed, you need to retrieve all the required information from the Match object before
  1388. /// you continue to access the CharStream, otherwise you might get invalid results.</para>
  1389. /// </remarks>
  1390. /// <exception cref="NullReferenceException">regex is null.</exception>
  1391. /// <exception cref="NotSupportedException">Two possible reasons: 1) The CharStream was constructed from a char array or char pointer, in which case it does not support regular expression matching.
  1392. /// 2) Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1393. /// <exception cref="IOException">An I/O error occured.</exception>
  1394. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1395. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1396. public Match Match(Regex regex) {
  1397. CharStream stream = Stream;
  1398. if (stream.BufferString == null) throw new NotSupportedException("CharStream instances constructed from char arrays or char pointers do not support regular expression matching.");
  1399. int block = Block;
  1400. if (block >= 0) {
  1401. var data = stream.Data;
  1402. if (data != null) {
  1403. if (Ptr <= data.RegexSpaceThreshold || block == Anchor->LastBlock) {
  1404. if (block != Anchor->Block) data.ReadBlock(block);
  1405. } else {
  1406. // BlockOverlap > MinRegexSpace
  1407. if (block + 1 == Anchor->Block || data.ReadBlock(block + 1) != null) {
  1408. // the char pointed to by the iterator has moved to beginning of the buffer
  1409. Block = block + 1;
  1410. Ptr -= Anchor->BlockSizeMinusOverlap;
  1411. Debug.Assert(Anchor->BufferBegin <= Ptr && Ptr < Anchor->BufferEnd);
  1412. } else {
  1413. // block < LastBlock and we failed to read new chars from block + 1,
  1414. // so we now definitely need to read the current block
  1415. data.ReadBlock(block);
  1416. }
  1417. }
  1418. }
  1419. return regex.Match(stream.BufferString, PositiveDistance(stream.BufferStringPointer, Ptr), PositiveDistance(Ptr, Anchor->BufferEnd));
  1420. }
  1421. return regex.Match("");
  1422. }
  1423. /// <summary>Returns the stream char pointed to by the Iterator,
  1424. /// or the EndOfStreamChar ('\uFFFF') if the Iterator has reached the end of the stream.</summary>
  1425. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1426. /// <exception cref="IOException">An I/O error occured.</exception>
  1427. public char Read() {
  1428. if (Block == Anchor->Block) return *Ptr;
  1429. return ReadContinue();
  1430. }
  1431. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1432. private char ReadContinue() {
  1433. if (Block < 0) return EOS;
  1434. Stream.Data.ReadBlock(Block);
  1435. return *Ptr;
  1436. }
  1437. public struct TwoChars : IEquatable<TwoChars> {
  1438. private uint chars;
  1439. internal TwoChars(uint chars) {
  1440. this.chars = chars;
  1441. }
  1442. public TwoChars(char char0, char char1) {
  1443. this.chars = ((uint)char1 << 16) | (uint)char0;
  1444. }
  1445. public char Char0 { get { return unchecked((char)chars); } }
  1446. public char Char1 { get { return (char)(chars >> 16); } }
  1447. public override bool Equals(object obj) { return (obj is TwoChars) && chars == ((TwoChars) obj).chars; }
  1448. public bool Equals(TwoChars other) { return chars == other.chars; }
  1449. public override int GetHashCode() { return (int)chars; }
  1450. public static bool operator==(TwoChars left, TwoChars right) { return left.chars == right.chars; }
  1451. public static bool operator!=(TwoChars left, TwoChars right) { return left.chars != right.chars; }
  1452. }
  1453. /// <summary>Is an optimized implementation of new TwoChars(Read(), Next.Read()).</summary>
  1454. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1455. /// <exception cref="IOException">An I/O error occured.</exception>
  1456. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1457. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1458. public TwoChars Read2() {
  1459. Anchor* anchor = Anchor;
  1460. char* ptr = Ptr;
  1461. if (Block == anchor->Block && ptr + 1 < anchor->BufferEnd) {
  1462. #if UNALIGNED_READS
  1463. if (BitConverter.IsLittleEndian) {
  1464. return new TwoChars(*((uint*)(ptr)));
  1465. } else {
  1466. return new TwoChars(ptr[0], ptr[1]);
  1467. }
  1468. #else
  1469. return new TwoChars(ptr[0], ptr[1]);
  1470. #endif
  1471. }
  1472. return Read2Continue();
  1473. }
  1474. [MethodImplAttribute(MethodImplOptions.NoInlining)]
  1475. private TwoChars Read2Continue() {
  1476. if (Block < 0)
  1477. return new TwoChars(EOS, EOS);
  1478. else
  1479. return new TwoChars(Read(), Peek());
  1480. }
  1481. /// <summary>Returns a string with the length stream chars beginning with the char pointed to by the Iterator.
  1482. /// If less than length chars are remaining in the stream, only the remaining chars are returned.</summary>
  1483. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  1484. /// <exception cref="OutOfMemoryException">There is not enough memory for the string or the requested string is too large.</exception>
  1485. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1486. /// <exception cref="IOException">An I/O error occured.</exception>
  1487. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1488. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1489. public string Read(int length) {
  1490. Anchor* anchor = Anchor;
  1491. if (Block == anchor->Block && unchecked((uint)length) <= (uint)PositiveDistance(Ptr, anchor->BufferEnd))
  1492. return new String(Ptr, 0, length);
  1493. return ReadContinue(length, false);
  1494. }
  1495. /// <summary>Returns a string with the length stream chars beginning with the char pointed to by the Iterator.
  1496. /// If less than length chars are remaining in the stream,
  1497. /// only the remaining chars are returned, or an empty string if allOrEmpty is true.</summary>
  1498. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  1499. /// <exception cref="OutOfMemoryException">There is not enough memory for the string or the requested string is too large.</exception>
  1500. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1501. /// <exception cref="IOException">An I/O error occured.</exception>
  1502. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1503. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1504. public string Read(int length, bool allOrEmpty) {
  1505. Anchor* anchor = Anchor;
  1506. if (Block == anchor->Block && unchecked((uint)length) <= (uint)PositiveDistance(Ptr, anchor->BufferEnd))
  1507. return new String(Ptr, 0, length);
  1508. return ReadContinue(length, allOrEmpty);
  1509. }
  1510. private string ReadContinue(int length, bool allOrEmpty) {
  1511. if (length < 0) throw new ArgumentOutOfRangeException("length", "Length is negative.");
  1512. if (length == 0 || Block < 0) return "";
  1513. if (Anchor->LastBlock != Int32.MaxValue) {
  1514. long maxLength = Anchor->EndIndex - Index;
  1515. if (length > maxLength) {
  1516. if (allOrEmpty) return "";
  1517. length = (int)maxLength;
  1518. }
  1519. }
  1520. string str = new String('\u0000', length);
  1521. fixed (char* pStr = str) {
  1522. int cc = Read(pStr, length);
  1523. if (cc == length) return str;
  1524. if (allOrEmpty) return "";
  1525. return new String(pStr, 0, cc);
  1526. }
  1527. }
  1528. /// <summary>Copies the length stream chars beginning with the char pointed to by the Iterator into buffer.
  1529. /// The chars are written into buffer beginning at the index bufferIndex.
  1530. /// If less than length chars are remaining in the stream, only the remaining chars are copied.
  1531. /// Returns the actual number of chars copied.</summary>
  1532. /// <exception cref="ArgumentOutOfRangeException">bufferIndex is negative, length is negative or bufferIndex + length > buffer.Length.</exception>
  1533. /// <exception cref="NullReferenceException">buffer is null.</exception>
  1534. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1535. /// <exception cref="IOException">An I/O error occured.</exception>
  1536. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1537. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1538. public int Read(char[] buffer, int bufferIndex, int length) {
  1539. if (bufferIndex < 0)
  1540. throw new ArgumentOutOfRangeException("bufferIndex", "bufferIndex is negative.");
  1541. if (length > buffer.Length - bufferIndex)
  1542. throw new ArgumentOutOfRangeException("length", "Length is out of range.");
  1543. fixed (char* pBuffer = buffer)
  1544. return Read(pBuffer + bufferIndex, length); // will check length >= 0
  1545. }
  1546. /// <summary>Copies the length stream chars beginning with the char pointed to by the Iterator into the buffer at the given pointer address.
  1547. /// If less than length chars are remaining in the stream, only the remaining chars are copied.
  1548. /// Returns the actual number of chars copied.</summary>
  1549. /// <exception cref="NullReferenceException">buffer is null.</exception>
  1550. /// <exception cref="ArgumentOutOfRangeException">length is negative.</exception>
  1551. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1552. /// <exception cref="IOException">An I/O error occured.</exception>
  1553. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1554. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1555. public int Read(char* buffer, int length) {
  1556. Anchor* anchor = Anchor;
  1557. char* ptr = Ptr;
  1558. if (Block == anchor->Block && unchecked((uint)length) <= (uint)PositiveDistance(ptr, anchor->BufferEnd)) {
  1559. #if UNALIGNED_READS
  1560. int len = length;
  1561. if ((unchecked((int)buffer) & 2) != 0) { // align buffer pointer
  1562. *buffer = *ptr;
  1563. ++buffer; ++ptr; --len;
  1564. }
  1565. while (len >= 8) {
  1566. ((int*)buffer)[0] = ((int*)ptr)[0];
  1567. ((int*)buffer)[1] = ((int*)ptr)[1];
  1568. ((int*)buffer)[2] = ((int*)ptr)[2];
  1569. ((int*)buffer)[3] = ((int*)ptr)[3];
  1570. buffer += 8; ptr += 8; len -= 8;
  1571. }
  1572. if ((len & 4) != 0) {
  1573. ((int*)buffer)[0] = ((int*)ptr)[0];
  1574. ((int*)buffer)[1] = ((int*)ptr)[1];
  1575. buffer += 4; ptr += 4;
  1576. }
  1577. if ((len & 2) != 0) {
  1578. ((int*)buffer)[0] = ((int*)ptr)[0];
  1579. buffer += 2; ptr += 2;
  1580. }
  1581. if ((len & 1) != 0) {
  1582. *buffer = *ptr;
  1583. }
  1584. #else
  1585. int len = length & 0x7ffffffe;
  1586. for (int i = 0; i < len; i += 2) {
  1587. buffer[i] = ptr[i];
  1588. buffer[i + 1] = ptr[i + 1];
  1589. }
  1590. if (len != length) {
  1591. buffer[len] = ptr[len];
  1592. }
  1593. #endif
  1594. return length;
  1595. }
  1596. return ReadContinue(buffer, length);
  1597. }
  1598. private int ReadContinue(char* buffer, int length) {
  1599. if (length <= 0) {
  1600. if (length == 0) return 0;
  1601. throw new ArgumentOutOfRangeException("length", "Length is negative.");
  1602. }
  1603. int block = Block; // local copy that might be modified below
  1604. if (block < 0) return 0;
  1605. CharStream stream = null;
  1606. if (block != Anchor->Block) {
  1607. stream = Stream;
  1608. stream.Data.ReadBlock(block);
  1609. }
  1610. char* ptr = Ptr;
  1611. int oldLength = length;
  1612. // requires length > 0
  1613. for (;;) {
  1614. int len = Math.Min(PositiveDistance(ptr, Anchor->BufferEnd), length);
  1615. length -= len;
  1616. #if UNALIGNED_READS
  1617. if ((unchecked((int)buffer) & 2) != 0) { // align buffer pointer
  1618. *buffer = *ptr;
  1619. ++buffer; ++ptr; --len;
  1620. }
  1621. while (len >= 8) {
  1622. ((int*)buffer)[0] = ((int*)ptr)[0];
  1623. ((int*)buffer)[1] = ((int*)ptr)[1];
  1624. ((int*)buffer)[2] = ((int*)ptr)[2];
  1625. ((int*)buffer)[3] = ((int*)ptr)[3];
  1626. buffer += 8; ptr += 8; len -= 8;
  1627. }
  1628. if ((len & 4) != 0) {
  1629. ((int*)buffer)[0] = ((int*)ptr)[0];
  1630. ((int*)buffer)[1] = ((int*)ptr)[1];
  1631. buffer += 4; ptr += 4;
  1632. }
  1633. if ((len & 2) != 0) {
  1634. ((int*)buffer)[0] = ((int*)ptr)[0];
  1635. buffer += 2; ptr += 2;
  1636. }
  1637. if ((len & 1) != 0) {
  1638. *buffer = *ptr;
  1639. ++buffer;
  1640. }
  1641. #else
  1642. do {
  1643. *buffer = *ptr;
  1644. ++buffer; ++ptr; --len;
  1645. } while (len != 0);
  1646. #endif
  1647. if (length == 0) return oldLength;
  1648. else {
  1649. if (stream == null) {
  1650. if (Anchor->LastBlock == 0) return oldLength - length;
  1651. stream = Stream;
  1652. }
  1653. ptr = stream.Data.ReadBlock(++block);
  1654. if (ptr == null) return oldLength - length;
  1655. }
  1656. }
  1657. }
  1658. /// <summary>Returns a string with all the chars in the stream between the position of this Iterator (inclusive)
  1659. /// and the position of the Iterator in the argument (exclusive).
  1660. /// If the Iterator argument does not point to a position after the position of this Iterator, the returned string is empty.</summary>
  1661. /// <exception cref="ArgumentOutOfRangeException">iterToCharAfterLastInString belongs to a different CharStream.</exception>
  1662. /// <exception cref="OutOfMemoryException">There is not enough memory for the string or the requested string is too large.</exception>
  1663. /// <exception cref="NotSupportedException">Seeking of the underlying byte stream is required, but the byte stream does not support seeking or the Encodings's Decoder is not serializable.</exception>
  1664. /// <exception cref="IOException">An I/O error occured.</exception>
  1665. /// <exception cref="ArgumentException">The input stream contains invalid bytes and the encoding was constructed with the throwOnInvalidBytes option.</exception>
  1666. /// <exception cref="DecoderFallbackException">The input stream contains invalid bytes for which the decoder fallback threw this exception.</exception>
  1667. public string ReadUntil(Iterator iterToCharAfterLastInString) {
  1668. if (Anchor != iterToCharAfterLastInString.Anchor)
  1669. throw new ArgumentOutOfRangeException("iterToCharAfterLastInString", "The Iterator argument belongs to a different CharStream.");
  1670. int block = Block;
  1671. if (block == Anchor->Block && block == iterToCharAfterLastInString.Block) {
  1672. char* ptr = Ptr;
  1673. char* end = iterToCharAfterLastInString.Ptr;
  1674. if (ptr < end) return new String(ptr, 0, PositiveDistance(ptr, end));
  1675. else return "";
  1676. }
  1677. return ReadUntilContinue(iterToCharAfterLastInString);
  1678. }
  1679. private string ReadUntilContinue(Iterator iterToCharAfterLastInString) {
  1680. ulong index1 = (ulong)Index;
  1681. ulong index2 = (ulong)iterToCharAfterLastInString.Index;
  1682. if (index2 <= index1) return "";
  1683. ulong length_ = index2 - index1;
  1684. // length >= Int32.MaxValue will trigger an exception anyway (because the string is too large)
  1685. int length = length_ > (uint)System.Int32.MaxValue ? System.Int32.MaxValue : (int)length_;
  1686. string str = new String('\u0000', length);
  1687. fixed (char* pStr = str) ReadContinue(pStr, length);
  1688. return str;
  1689. }
  1690. public override bool Equals(object obj) {
  1691. return (obj is Iterator) && Equals((Iterator) obj);
  1692. }
  1693. public bool Equals(Iterator other) {
  1694. char* ptr = Ptr;
  1695. return (ptr == other.Ptr && ptr != null && Block == other.Block)
  1696. || (Anchor == other.Anchor && Index == other.Index);
  1697. }
  1698. public override int GetHashCode() {
  1699. return Index.GetHashCode();
  1700. }
  1701. public static bool operator==(Iterator left, Iterator right) { return left.Equals(right); }
  1702. public static bool operator!=(Iterator left, Iterator right) { return !left.Equals(right); }
  1703. }
  1704. /// <summary>Returns a case-folded copy of the string argument. All chars are mapped
  1705. /// using the (non-Turkic) 1-to-1 case folding mappings (v. 5.1) for Unicode code
  1706. /// points in the Basic Multilingual Plane, i.e. code points below 0x10000.
  1707. /// If the argument is null, null is returned.</summary>
  1708. static public string FoldCase(string str) {
  1709. if (str != null) {
  1710. fixed (char* src0 = str) {
  1711. char* end = src0 + str.Length;
  1712. char* cftable_ = CaseFoldTable.FoldedChars;
  1713. char* cftable = cftable_ == null ? CaseFoldTable.Initialize() : cftable_;
  1714. char* src = src0;
  1715. for (;;) { // src is null-terminated, so we can always read one char
  1716. char c = *src;
  1717. if (c == cftable[c]) {
  1718. if (++src >= end) break;
  1719. } else {
  1720. string newString = new String('\u0000', str.Length);
  1721. fixed (char* dst_ = newString) {
  1722. src = src0;
  1723. char* dst = dst_;
  1724. do {
  1725. *dst = cftable[*src];
  1726. ++src; ++dst;
  1727. } while (src != end);
  1728. }
  1729. return newString;
  1730. }
  1731. }
  1732. }
  1733. }
  1734. return str;
  1735. }
  1736. /// <summary>Returns the given string with all occurrences of "\r\n" and "\r" replaced
  1737. /// by "\n". If the argument is null, null is returned.</summary>
  1738. static public string NormalizeNewlines(string str) {
  1739. int length;
  1740. if (str == null || (length = str.Length) == 0) return str;
  1741. fixed (char* src = str) { // the char buffer is guaranteed to be null-terminated (C# language specification on fixed statement)
  1742. int nCR = 0;
  1743. int nCRLF = 0;
  1744. for (int i = 0; i < length; ++i) {
  1745. if (src[i] == '\r') {
  1746. if (src[i + 1] == '\n') ++nCRLF; // relies on null-termination
  1747. else ++nCR;
  1748. }
  1749. }
  1750. if (nCRLF == 0) {
  1751. return nCR == 0 ? str : str.Replace('\r', '\n');
  1752. } else {
  1753. return CopyWithNormalizedNewlines(src, length, nCRLF, nCR);
  1754. }
  1755. }
  1756. }
  1757. static internal string CopyWithNormalizedNewlines(char* src, int length, int nCRLF, int nCR) {
  1758. Debug.Assert(length > 0 && nCRLF >= 0 && nCR >= 0 && (nCRLF | nCR) != 0);
  1759. string newString = new String('\n', length - nCRLF);
  1760. fixed (char* dst_ = newString) {
  1761. char* dst = dst_;
  1762. char* end = src + length;
  1763. if (nCRLF != 0) {
  1764. if (nCR == 0) {
  1765. int nn = nCRLF;
  1766. for (;;) {
  1767. char c = *src;
  1768. ++src;
  1769. if (c != '\r') {
  1770. *dst = c;
  1771. ++dst;
  1772. } else {
  1773. ++src; // skip over the '\n' in "\r\n"
  1774. *dst = '\n';
  1775. ++dst;;
  1776. if (--nn == 0) break;
  1777. }
  1778. }
  1779. } else {
  1780. int nn = nCRLF + nCR;
  1781. for (;;) {
  1782. char c = *src;
  1783. ++src;
  1784. if (c != '\r') {
  1785. *dst = c;
  1786. ++dst;
  1787. } else {
  1788. if (*src == '\n') ++src; // skip over the '\n' in "\r\n" (relies on null-termination)
  1789. *dst = '\n';
  1790. ++dst;
  1791. if (--nn == 0) break;
  1792. }
  1793. }
  1794. }
  1795. } else {
  1796. int nn = nCR;
  1797. for (;;) {
  1798. char c = *src;
  1799. ++src;
  1800. if (c != '\r') {
  1801. *dst = c;
  1802. ++dst;
  1803. } else {
  1804. *dst = '\n';
  1805. ++dst;
  1806. if (--nn == 0) break;
  1807. }
  1808. }
  1809. }
  1810. // copy remaining chars
  1811. #if UNALIGNED_READS
  1812. if (src != end) {
  1813. int len = PositiveDistance(src, end);
  1814. if ((unchecked((int)dst) & 2) != 0) { // align dest
  1815. *dst = *src;
  1816. ++src; ++dst; --len;
  1817. }
  1818. while (len >= 8) {
  1819. ((int*)dst)[0] = ((int*)src)[0];
  1820. ((int*)dst)[1] = ((int*)src)[1];
  1821. ((int*)dst)[2] = ((int*)src)[2];
  1822. ((int*)dst)[3] = ((int*)src)[3];
  1823. src += 8; dst += 8; len -= 8;
  1824. }
  1825. if ((len & 4) != 0) {
  1826. ((int*)dst)[0] = ((int*)src)[0];
  1827. ((int*)dst)[1] = ((int*)src)[1];
  1828. src += 4; dst += 4;
  1829. }
  1830. if ((len & 2) != 0) {
  1831. ((int*)dst)[0] = ((int*)src)[0];
  1832. src += 2; dst += 2;
  1833. }
  1834. if ((len & 1) != 0) {
  1835. *dst = *src;
  1836. }
  1837. }
  1838. #else
  1839. while (src < end) {
  1840. *dst = *src;
  1841. ++src; ++dst;
  1842. }
  1843. #endif
  1844. }
  1845. return newString;
  1846. }
  1847. // probably for pedagogical reasons there is no Buffer.BlockCopy that takes pointers,
  1848. // hence we are forced to write our own version
  1849. static private void MemMove(void* dst_, void* src_, int n) {
  1850. byte* dst = (byte*)dst_;
  1851. byte* src = (byte*)src_;
  1852. if (n <= 0) return;
  1853. // we assume the pointers are aligned
  1854. if (dst < src) {
  1855. while (n >= 16) {
  1856. ((int*)dst)[0] = ((int*)src)[0];
  1857. ((int*)dst)[1] = ((int*)src)[1];
  1858. ((int*)dst)[2] = ((int*)src)[2];
  1859. ((int*)dst)[3] = ((int*)src)[3];
  1860. src += 16; dst += 16; n -= 16;
  1861. }
  1862. if ((n != 0)) {
  1863. if ((n & 8) != 0) {
  1864. ((int*)dst)[0] = ((int*)src)[0];
  1865. ((int*)dst)[1] = ((int*)src)[1];
  1866. src += 8; dst += 8; n -= 8;
  1867. }
  1868. if ((n & 4) != 0) {
  1869. ((int*)dst)[0] = ((int*)src)[0];
  1870. src += 4; dst += 4; n -= 4;
  1871. }
  1872. if ((n & 2) != 0) {
  1873. ((short*)dst)[0] = ((short*)src)[0];
  1874. src += 2; dst += 2; n -= 2;
  1875. }
  1876. if ((n & 1) != 0) {
  1877. ((byte*)dst)[0] = ((byte*)src)[0];
  1878. }
  1879. }
  1880. } else {
  1881. src += n; dst += n;
  1882. if ((n & 0xf) != 0) {
  1883. if ((n & 1) != 0) {
  1884. src -= 1; dst -= 1; n -= 1;
  1885. ((byte*)dst)[0] = ((byte*)src)[0];
  1886. }
  1887. if ((n & 2) != 0) {
  1888. src -= 2; dst -= 2; n -= 2;
  1889. ((short*)dst)[0] = ((short*)src)[0];
  1890. }
  1891. if ((n & 4) != 0) {
  1892. src -= 4; dst -= 4; n -= 4;
  1893. ((int*)dst)[0] = ((int*)src)[0];
  1894. }
  1895. if ((n & 8) != 0) {
  1896. src -= 8; dst -= 8; n -= 8;
  1897. ((int*)dst)[1] = ((int*)src)[1];
  1898. ((int*)dst)[0] = ((int*)src)[0];
  1899. }
  1900. }
  1901. while (n >= 16) {
  1902. src -= 16; dst -= 16; n -= 16;
  1903. ((int*)dst)[3] = ((int*)src)[3];
  1904. ((int*)dst)[2] = ((int*)src)[2];
  1905. ((int*)dst)[1] = ((int*)src)[1];
  1906. ((int*)dst)[0] = ((int*)src)[0];
  1907. }
  1908. }
  1909. }
  1910. } // class CharStream
  1911. }
  1912. #endif // !LOW_TRUST