/AvalonEdit/ICSharpCode.AvalonEdit/Xml/TagReader.cs

http://github.com/icsharpcode/ILSpy · C# · 740 lines · 646 code · 41 blank · 53 comment · 187 complexity · 77c8afecec91eee81431195dac97a856 MD5 · raw file

  1. // Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt)
  2. // This code is distributed under the GNU LGPL (for details please see \doc\license.txt)
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Globalization;
  6. using System.Linq;
  7. using System.Text;
  8. namespace ICSharpCode.AvalonEdit.Xml
  9. {
  10. class TagReader: TokenReader
  11. {
  12. AXmlParser parser;
  13. TrackedSegmentCollection trackedSegments;
  14. string input;
  15. public TagReader(AXmlParser parser, string input): base(input)
  16. {
  17. this.parser = parser;
  18. this.trackedSegments = parser.TrackedSegments;
  19. this.input = input;
  20. }
  21. bool TryReadFromCacheOrNew<T>(out T res) where T: AXmlObject, new()
  22. {
  23. return TryReadFromCacheOrNew(out res, t => true);
  24. }
  25. bool TryReadFromCacheOrNew<T>(out T res, Predicate<T> condition) where T: AXmlObject, new()
  26. {
  27. T cached = trackedSegments.GetCachedObject<T>(this.CurrentLocation, 0, condition);
  28. if (cached != null) {
  29. Skip(cached.Length);
  30. AXmlParser.Assert(cached.Length > 0, "cached elements must not have zero length");
  31. res = cached;
  32. return true;
  33. } else {
  34. res = new T();
  35. return false;
  36. }
  37. }
  38. void OnParsed(AXmlObject obj)
  39. {
  40. AXmlParser.Log("Parsed {0}", obj);
  41. trackedSegments.AddParsedObject(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null);
  42. }
  43. /// <summary>
  44. /// Read all tags in the document in a flat sequence.
  45. /// It also includes the text between tags and possibly some properly nested Elements from cache.
  46. /// </summary>
  47. public List<AXmlObject> ReadAllTags()
  48. {
  49. List<AXmlObject> stream = new List<AXmlObject>();
  50. while(true) {
  51. if (IsEndOfFile()) {
  52. break;
  53. } else if (TryPeek('<')) {
  54. AXmlElement elem;
  55. if (TryReadFromCacheOrNew(out elem, e => e.IsProperlyNested)) {
  56. stream.Add(elem);
  57. } else {
  58. stream.Add(ReadTag());
  59. }
  60. } else {
  61. stream.AddRange(ReadText(TextType.CharacterData));
  62. }
  63. }
  64. return stream;
  65. }
  66. /// <summary>
  67. /// Context: "&lt;"
  68. /// </summary>
  69. AXmlTag ReadTag()
  70. {
  71. AssertHasMoreData();
  72. AXmlTag tag;
  73. if (TryReadFromCacheOrNew(out tag)) return tag;
  74. tag.StartOffset = this.CurrentLocation;
  75. // Read the opening bracket
  76. // It identifies the type of tag and parsing behavior for the rest of it
  77. tag.OpeningBracket = ReadOpeningBracket();
  78. if (tag.IsUnknownBang && !TryPeekWhiteSpace())
  79. OnSyntaxError(tag, tag.StartOffset, this.CurrentLocation, "Unknown tag");
  80. if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) {
  81. // Read the name
  82. string name;
  83. if (TryReadName(out name)) {
  84. if (!IsValidName(name)) {
  85. OnSyntaxError(tag, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name);
  86. }
  87. } else {
  88. OnSyntaxError(tag, "Element name expected");
  89. }
  90. tag.Name = name;
  91. } else {
  92. tag.Name = string.Empty;
  93. }
  94. bool isXmlDeclr = tag.StartOffset == 0 && tag.Name == "xml";
  95. if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) {
  96. // Read attributes for the tag
  97. while(true) {
  98. // Chech for all forbiden 'name' charcters first - see ReadName
  99. if (IsEndOfFile()) break;
  100. if (TryPeekWhiteSpace()) {
  101. tag.AddChildren(ReadText(TextType.WhiteSpace));
  102. continue; // End of file might be next
  103. }
  104. if (TryPeek('<')) break;
  105. string endBr;
  106. int endBrStart = this.CurrentLocation; // Just peek
  107. if (TryReadClosingBracket(out endBr)) { // End tag
  108. GoBack(endBrStart);
  109. break;
  110. }
  111. // We have "=\'\"" or name - read attribute
  112. AXmlAttribute attr = ReadAttribulte();
  113. tag.AddChild(attr);
  114. if (tag.IsEndTag)
  115. OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute not allowed in end tag.");
  116. }
  117. } else if (tag.IsDocumentType) {
  118. tag.AddChildren(ReadContentOfDTD());
  119. } else {
  120. int start = this.CurrentLocation;
  121. IEnumerable<AXmlObject> text;
  122. if (tag.IsComment) {
  123. text = ReadText(TextType.Comment);
  124. } else if (tag.IsCData) {
  125. text = ReadText(TextType.CData);
  126. } else if (tag.IsProcessingInstruction) {
  127. text = ReadText(TextType.ProcessingInstruction);
  128. } else if (tag.IsUnknownBang) {
  129. text = ReadText(TextType.UnknownBang);
  130. } else {
  131. throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket));
  132. }
  133. // Enumerate
  134. text = text.ToList();
  135. // Backtrack at complete start
  136. if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) {
  137. GoBack(start);
  138. } else {
  139. tag.AddChildren(text);
  140. }
  141. }
  142. // Read closing bracket
  143. string bracket;
  144. TryReadClosingBracket(out bracket);
  145. tag.ClosingBracket = bracket;
  146. // Error check
  147. int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length;
  148. int brEnd = this.CurrentLocation;
  149. if (tag.Name == null) {
  150. // One error was reported already
  151. } else if (tag.IsStartOrEmptyTag) {
  152. if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(tag, brStart, brEnd, "'>' or '/>' expected");
  153. } else if (tag.IsEndTag) {
  154. if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected");
  155. } else if (tag.IsComment) {
  156. if (tag.ClosingBracket != "-->") OnSyntaxError(tag, brStart, brEnd, "'-->' expected");
  157. } else if (tag.IsCData) {
  158. if (tag.ClosingBracket != "]]>") OnSyntaxError(tag, brStart, brEnd, "']]>' expected");
  159. } else if (tag.IsProcessingInstruction) {
  160. if (tag.ClosingBracket != "?>") OnSyntaxError(tag, brStart, brEnd, "'?>' expected");
  161. } else if (tag.IsUnknownBang) {
  162. if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected");
  163. } else if (tag.IsDocumentType) {
  164. if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected");
  165. } else {
  166. throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket));
  167. }
  168. // Attribute name may not apper multiple times
  169. var duplicates = tag.Children.OfType<AXmlAttribute>().GroupBy(attr => attr.Name).SelectMany(g => g.Skip(1));
  170. foreach(AXmlAttribute attr in duplicates) {
  171. OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute with name '{0}' already exists", attr.Name);
  172. }
  173. tag.EndOffset = this.CurrentLocation;
  174. OnParsed(tag);
  175. return tag;
  176. }
  177. /// <summary>
  178. /// Reads any of the know opening brackets. (only full bracket)
  179. /// Context: "&lt;"
  180. /// </summary>
  181. string ReadOpeningBracket()
  182. {
  183. // We are using a lot of string literals so that the memory instances are shared
  184. //int start = this.CurrentLocation;
  185. if (TryRead('<')) {
  186. if (TryRead('/')) {
  187. return "</";
  188. } else if (TryRead('?')) {
  189. return "<?";
  190. } else if (TryRead('!')) {
  191. if (TryRead("--")) {
  192. return "<!--";
  193. } else if (TryRead("[CDATA[")) {
  194. return "<![CDATA[";
  195. } else {
  196. foreach(string dtdName in AXmlTag.DtdNames) {
  197. // the dtdName includes "<!"
  198. if (TryRead(dtdName.Remove(0, 2))) return dtdName;
  199. }
  200. return "<!";
  201. }
  202. } else {
  203. return "<";
  204. }
  205. } else {
  206. throw new InternalException("'<' expected");
  207. }
  208. }
  209. /// <summary>
  210. /// Reads any of the know closing brackets. (only full bracket)
  211. /// Context: any
  212. /// </summary>
  213. bool TryReadClosingBracket(out string bracket)
  214. {
  215. // We are using a lot of string literals so that the memory instances are shared
  216. if (TryRead('>')) {
  217. bracket = ">";
  218. } else if (TryRead("/>")) {
  219. bracket = "/>";
  220. } else if (TryRead("?>")) {
  221. bracket = "?>";
  222. } else if (TryRead("-->")) {
  223. bracket = "-->";
  224. } else if (TryRead("]]>")) {
  225. bracket = "]]>";
  226. } else {
  227. bracket = string.Empty;
  228. return false;
  229. }
  230. return true;
  231. }
  232. IEnumerable<AXmlObject> ReadContentOfDTD()
  233. {
  234. int start = this.CurrentLocation;
  235. while(true) {
  236. if (IsEndOfFile()) break; // End of file
  237. TryMoveToNonWhiteSpace(); // Skip whitespace
  238. if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug
  239. if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string
  240. if (TryRead('[')) { // Start of nested infoset
  241. // Reading infoset
  242. while(true) {
  243. if (IsEndOfFile()) break;
  244. TryMoveToAnyOf('<', ']');
  245. if (TryPeek('<')) {
  246. if (start != this.CurrentLocation) { // Two following tags
  247. yield return MakeText(start, this.CurrentLocation);
  248. }
  249. yield return ReadTag();
  250. start = this.CurrentLocation;
  251. }
  252. if (TryPeek(']')) break;
  253. }
  254. }
  255. TryRead(']'); // End of nested infoset
  256. if (TryPeek('>')) break; // Proper closing
  257. if (TryPeek('<')) break; // Malformed XML
  258. TryMoveNext(); // Skip anything else
  259. }
  260. if (start != this.CurrentLocation) {
  261. yield return MakeText(start, this.CurrentLocation);
  262. }
  263. }
  264. /// <summary>
  265. /// Context: name or "=\'\""
  266. /// </summary>
  267. AXmlAttribute ReadAttribulte()
  268. {
  269. AssertHasMoreData();
  270. AXmlAttribute attr;
  271. if (TryReadFromCacheOrNew(out attr)) return attr;
  272. attr.StartOffset = this.CurrentLocation;
  273. // Read name
  274. string name;
  275. if (TryReadName(out name)) {
  276. if (!IsValidName(name)) {
  277. OnSyntaxError(attr, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name);
  278. }
  279. } else {
  280. OnSyntaxError(attr, "Attribute name expected");
  281. }
  282. attr.Name = name;
  283. // Read equals sign and surrounding whitespace
  284. int checkpoint = this.CurrentLocation;
  285. TryMoveToNonWhiteSpace();
  286. if (TryRead('=')) {
  287. int chk2 = this.CurrentLocation;
  288. TryMoveToNonWhiteSpace();
  289. if (!TryPeek('"') && !TryPeek('\'')) {
  290. // Do not read whitespace if quote does not follow
  291. GoBack(chk2);
  292. }
  293. attr.EqualsSign = GetText(checkpoint, this.CurrentLocation);
  294. } else {
  295. GoBack(checkpoint);
  296. OnSyntaxError(attr, "'=' expected");
  297. attr.EqualsSign = string.Empty;
  298. }
  299. // Read attribute value
  300. int start = this.CurrentLocation;
  301. char quoteChar = TryPeek('"') ? '"' : '\'';
  302. bool startsWithQuote;
  303. if (TryRead(quoteChar)) {
  304. startsWithQuote = true;
  305. int valueStart = this.CurrentLocation;
  306. TryMoveToAnyOf(quoteChar, '<');
  307. if (TryRead(quoteChar)) {
  308. if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) {
  309. if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) {
  310. // This actually most likely means that we are in the next attribute value
  311. GoBack(valueStart);
  312. ReadAttributeValue(quoteChar);
  313. if (TryRead(quoteChar)) {
  314. OnSyntaxError(attr, "White space or end of tag expected");
  315. } else {
  316. OnSyntaxError(attr, "Quote {0} expected (or add whitespace after the following one)", quoteChar);
  317. }
  318. } else {
  319. OnSyntaxError(attr, "White space or end of tag expected");
  320. }
  321. }
  322. } else {
  323. // '<' or end of file
  324. GoBack(valueStart);
  325. ReadAttributeValue(quoteChar);
  326. OnSyntaxError(attr, "Quote {0} expected", quoteChar);
  327. }
  328. } else {
  329. startsWithQuote = false;
  330. int valueStart = this.CurrentLocation;
  331. ReadAttributeValue(null);
  332. TryRead('\"');
  333. TryRead('\'');
  334. if (valueStart == this.CurrentLocation) {
  335. OnSyntaxError(attr, "Attribute value expected");
  336. } else {
  337. OnSyntaxError(attr, valueStart, this.CurrentLocation, "Attribute value must be quoted");
  338. }
  339. }
  340. attr.QuotedValue = GetText(start, this.CurrentLocation);
  341. attr.Value = Unquote(attr.QuotedValue);
  342. attr.Value = Dereference(attr, attr.Value, startsWithQuote ? start + 1 : start);
  343. attr.EndOffset = this.CurrentLocation;
  344. OnParsed(attr);
  345. return attr;
  346. }
  347. /// <summary>
  348. /// Read everything up to quote (excluding), opening/closing tag or attribute signature
  349. /// </summary>
  350. void ReadAttributeValue(char? quote)
  351. {
  352. while(true) {
  353. if (IsEndOfFile()) return;
  354. // What is next?
  355. int start = this.CurrentLocation;
  356. TryMoveToNonWhiteSpace(); // Read white space (if any)
  357. if (quote.HasValue) {
  358. if (TryPeek(quote.Value)) return;
  359. } else {
  360. if (TryPeek('"') || TryPeek('\'')) return;
  361. }
  362. // Opening/closing tag
  363. string endBr;
  364. if (TryPeek('<') || TryReadClosingBracket(out endBr)) {
  365. GoBack(start);
  366. return;
  367. }
  368. // Try reading attribute signature
  369. string name;
  370. if (TryReadName(out name)) {
  371. int nameEnd = this.CurrentLocation;
  372. if (TryMoveToNonWhiteSpace() && TryRead("=") &&
  373. TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\''))
  374. {
  375. // Start of attribute. Great
  376. GoBack(start);
  377. return; // Done
  378. } else {
  379. // Just some gargabe - make it part of the value
  380. GoBack(nameEnd);
  381. continue; // Read more
  382. }
  383. }
  384. TryMoveNext(); // Accept everyting else
  385. }
  386. }
  387. AXmlText MakeText(int start, int end)
  388. {
  389. AXmlParser.DebugAssert(end > start, "Empty text");
  390. AXmlText text = new AXmlText() {
  391. StartOffset = start,
  392. EndOffset = end,
  393. EscapedValue = GetText(start, end),
  394. Type = TextType.Other
  395. };
  396. OnParsed(text);
  397. return text;
  398. }
  399. const int maxEntityLength = 16; // The longest build-in one is 10 ("&#1114111;")
  400. const int maxTextFragmentSize = 64;
  401. const int lookAheadLength = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments
  402. /// <summary>
  403. /// Reads text and optionaly separates it into fragments.
  404. /// It can also return empty set for no appropriate text input.
  405. /// Make sure you enumerate it only once
  406. /// </summary>
  407. IEnumerable<AXmlObject> ReadText(TextType type)
  408. {
  409. bool lookahead = false;
  410. while(true) {
  411. AXmlText text;
  412. if (TryReadFromCacheOrNew(out text, t => t.Type == type)) {
  413. // Cached text found
  414. yield return text;
  415. continue; // Read next fragment; the method can handle "no text left"
  416. }
  417. text.Type = type;
  418. // Limit the reading to just a few characters
  419. // (the first character not to be read)
  420. int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength);
  421. // Look if some futher text has been already processed and align so that
  422. // we hit that chache point. It is expensive so it is off for the first run
  423. if (lookahead) {
  424. // Note: Must fit entity
  425. AXmlObject nextFragment = trackedSegments.GetCachedObject<AXmlText>(this.CurrentLocation + maxEntityLength, lookAheadLength - maxEntityLength, t => t.Type == type);
  426. if (nextFragment != null) {
  427. fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength);
  428. AXmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd);
  429. }
  430. }
  431. lookahead = true;
  432. text.StartOffset = this.CurrentLocation;
  433. int start = this.CurrentLocation;
  434. // Whitespace would be skipped anyway by any operation
  435. TryMoveToNonWhiteSpace(fragmentEnd);
  436. int wsEnd = this.CurrentLocation;
  437. // Try move to the terminator given by the context
  438. if (type == TextType.WhiteSpace) {
  439. TryMoveToNonWhiteSpace(fragmentEnd);
  440. } else if (type == TextType.CharacterData) {
  441. while(true) {
  442. if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment
  443. if (TryPeek('<')) break;
  444. if (TryPeek(']')) {
  445. if (TryPeek("]]>")) {
  446. OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text");
  447. }
  448. TryMoveNext();
  449. continue;
  450. }
  451. throw new Exception("Infinite loop");
  452. }
  453. } else if (type == TextType.Comment) {
  454. // Do not report too many errors
  455. bool errorReported = false;
  456. while(true) {
  457. if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment
  458. if (TryPeek("-->")) break;
  459. if (TryPeek("--") && !errorReported) {
  460. OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment");
  461. errorReported = true;
  462. }
  463. TryMoveNext();
  464. }
  465. } else if (type == TextType.CData) {
  466. while(true) {
  467. // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment
  468. if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment
  469. if (TryPeek("]]>")) break;
  470. TryMoveNext();
  471. }
  472. } else if (type == TextType.ProcessingInstruction) {
  473. while(true) {
  474. if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment
  475. if (TryPeek("?>")) break;
  476. TryMoveNext();
  477. }
  478. } else if (type == TextType.UnknownBang) {
  479. TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd);
  480. } else {
  481. throw new Exception("Uknown type " + type);
  482. }
  483. text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation);
  484. // Terminal found or real end was reached;
  485. bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile();
  486. if (!finished) {
  487. // We have to continue reading more text fragments
  488. // If there is entity reference, make sure the next segment starts with it to prevent framentation
  489. int entitySearchStart = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength);
  490. int entitySearchLength = this.CurrentLocation - entitySearchStart;
  491. if (entitySearchLength > 0) {
  492. // Note that LastIndexOf works backward
  493. int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength);
  494. if (entityIndex != -1) {
  495. GoBack(entityIndex);
  496. }
  497. }
  498. }
  499. text.EscapedValue = GetText(start, this.CurrentLocation);
  500. if (type == TextType.CharacterData) {
  501. // Normalize end of line first
  502. text.Value = Dereference(text, NormalizeEndOfLine(text.EscapedValue), start);
  503. } else {
  504. text.Value = text.EscapedValue;
  505. }
  506. text.EndOffset = this.CurrentLocation;
  507. if (text.EscapedValue.Length > 0) {
  508. OnParsed(text);
  509. yield return text;
  510. }
  511. if (finished) {
  512. yield break;
  513. }
  514. }
  515. }
  516. #region Helper methods
  517. void OnSyntaxError(AXmlObject obj, string message, params object[] args)
  518. {
  519. OnSyntaxError(obj, this.CurrentLocation, this.CurrentLocation + 1, message, args);
  520. }
  521. public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args)
  522. {
  523. if (end <= start) end = start + 1;
  524. string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args);
  525. AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, formattedMessage);
  526. obj.AddSyntaxError(new SyntaxError() {
  527. Object = obj,
  528. StartOffset = start,
  529. EndOffset = end,
  530. Message = formattedMessage,
  531. });
  532. }
  533. static bool IsValidName(string name)
  534. {
  535. try {
  536. System.Xml.XmlConvert.VerifyName(name);
  537. return true;
  538. } catch (System.Xml.XmlException) {
  539. return false;
  540. }
  541. }
  542. /// <summary> Remove quoting from the given string </summary>
  543. static string Unquote(string quoted)
  544. {
  545. if (string.IsNullOrEmpty(quoted)) return string.Empty;
  546. char first = quoted[0];
  547. if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted;
  548. char last = quoted[quoted.Length - 1];
  549. if (first == '"' || first == '\'') {
  550. if (first == last) {
  551. // Remove both quotes
  552. return quoted.Substring(1, quoted.Length - 2);
  553. } else {
  554. // Remove first quote
  555. return quoted.Remove(0, 1);
  556. }
  557. } else {
  558. if (last == '"' || last == '\'') {
  559. // Remove last quote
  560. return quoted.Substring(0, quoted.Length - 1);
  561. } else {
  562. // Keep whole string
  563. return quoted;
  564. }
  565. }
  566. }
  567. static string NormalizeEndOfLine(string text)
  568. {
  569. return text.Replace("\r\n", "\n").Replace("\r", "\n");
  570. }
  571. string Dereference(AXmlObject owner, string text, int textLocation)
  572. {
  573. StringBuilder sb = null; // The dereferenced text so far (all up to 'curr')
  574. int curr = 0;
  575. while(true) {
  576. // Reached end of input
  577. if (curr == text.Length) {
  578. if (sb != null) {
  579. return sb.ToString();
  580. } else {
  581. return text;
  582. }
  583. }
  584. // Try to find reference
  585. int start = text.IndexOf('&', curr);
  586. // No more references found
  587. if (start == -1) {
  588. if (sb != null) {
  589. sb.Append(text, curr, text.Length - curr); // Add rest
  590. return sb.ToString();
  591. } else {
  592. return text;
  593. }
  594. }
  595. // Append text before the enitiy reference
  596. if (sb == null) sb = new StringBuilder(text.Length);
  597. sb.Append(text, curr, start - curr);
  598. curr = start;
  599. // Process the entity
  600. int errorLoc = textLocation + sb.Length;
  601. // Find entity name
  602. int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1)));
  603. if (end == -1 || text[end] == '&') {
  604. // Not found
  605. OnSyntaxError(owner, errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'");
  606. // Keep '&'
  607. sb.Append('&');
  608. curr++;
  609. continue; // Restart and next character location
  610. }
  611. string name = text.Substring(start + 1, end - (start + 1));
  612. // Resolve the name
  613. string replacement;
  614. if (name.Length == 0) {
  615. replacement = null;
  616. OnSyntaxError(owner, errorLoc + 1, errorLoc + 1, "Entity name expected");
  617. } else if (name == "amp") {
  618. replacement = "&";
  619. } else if (name == "lt") {
  620. replacement = "<";
  621. } else if (name == "gt") {
  622. replacement = ">";
  623. } else if (name == "apos") {
  624. replacement = "'";
  625. } else if (name == "quot") {
  626. replacement = "\"";
  627. } else if (name.Length > 0 && name[0] == '#') {
  628. int num;
  629. if (name.Length > 1 && name[1] == 'x') {
  630. if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) {
  631. num = -1;
  632. OnSyntaxError(owner, errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected");
  633. }
  634. } else {
  635. if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) {
  636. num = -1;
  637. OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected");
  638. }
  639. }
  640. if (num != -1) {
  641. try {
  642. replacement = char.ConvertFromUtf32(num);
  643. } catch (ArgumentOutOfRangeException) {
  644. replacement = null;
  645. OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num);
  646. }
  647. } else {
  648. replacement = null;
  649. }
  650. } else if (!IsValidName(name)) {
  651. replacement = null;
  652. OnSyntaxError(owner, errorLoc + 1, errorLoc + 1, "Invalid entity name");
  653. } else {
  654. replacement = null;
  655. if (parser.UnknownEntityReferenceIsError) {
  656. OnSyntaxError(owner, errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name);
  657. }
  658. }
  659. // Append the replacement to output
  660. if (replacement != null) {
  661. sb.Append(replacement);
  662. } else {
  663. sb.Append('&');
  664. sb.Append(name);
  665. sb.Append(';');
  666. }
  667. curr = end + 1;
  668. continue;
  669. }
  670. }
  671. #endregion
  672. }
  673. }