PageRenderTime 54ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/NBoilerpipePortable/Util/SGML/SgmlParser.cs

https://github.com/hippiehunter/Baconography
C# | 3222 lines | 2856 code | 121 blank | 245 comment | 356 complexity | 8cdcc85c0288b0626b85742f57e3d3e3 MD5 | raw file
  1. /*
  2. *
  3. * Copyright (c) 2007-2013 MindTouch. All rights reserved.
  4. * www.mindtouch.com oss@mindtouch.com
  5. *
  6. * For community documentation and downloads visit wiki.developer.mindtouch.com;
  7. * please review the licensing section.
  8. *
  9. * Licensed under the Apache License, Version 2.0 (the "License");
  10. * you may not use this file except in compliance with the License.
  11. * You may obtain a copy of the License at
  12. *
  13. * http://www.apache.org/licenses/LICENSE-2.0
  14. *
  15. * Unless required by applicable law or agreed to in writing, software
  16. * distributed under the License is distributed on an "AS IS" BASIS,
  17. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18. * See the License for the specific language governing permissions and
  19. * limitations under the License.
  20. *
  21. */
  22. using System;
  23. using System.Collections;
  24. using System.Collections.Generic;
  25. using System.Diagnostics.CodeAnalysis;
  26. using System.Globalization;
  27. using System.IO;
  28. using System.Net;
  29. using System.Runtime.Serialization;
  30. using System.Text;
  31. using System.Xml;
  32. namespace Sgml {
  33. /// <summary>
  34. /// Thrown if any errors occur while parsing the source.
  35. /// </summary>
  36. public class SgmlParseException : Exception
  37. {
  38. private string m_entityContext;
  39. /// <summary>
  40. /// Instantiates a new instance of SgmlParseException with no specific error information.
  41. /// </summary>
  42. public SgmlParseException()
  43. {
  44. }
  45. /// <summary>
  46. /// Instantiates a new instance of SgmlParseException with an error message describing the problem.
  47. /// </summary>
  48. /// <param name="message">A message describing the error that occurred</param>
  49. public SgmlParseException(string message)
  50. : base(message)
  51. {
  52. }
  53. /// <summary>
  54. /// Instantiates a new instance of SgmlParseException with an error message describing the problem.
  55. /// </summary>
  56. /// <param name="message">A message describing the error that occurred</param>
  57. /// <param name="e">The entity on which the error occurred.</param>
  58. public SgmlParseException(string message, Entity e)
  59. : base(message)
  60. {
  61. if (e != null)
  62. m_entityContext = e.Context();
  63. }
  64. /// <summary>
  65. /// Instantiates a new instance of SgmlParseException with an error message describing the problem.
  66. /// </summary>
  67. /// <param name="message">A message describing the error that occurred</param>
  68. /// <param name="innerException">The original exception that caused the problem.</param>
  69. public SgmlParseException(string message, Exception innerException)
  70. : base(message, innerException)
  71. {
  72. }
  73. /// <summary>
  74. /// Contextual information detailing the entity on which the error occurred.
  75. /// </summary>
  76. public string EntityContext
  77. {
  78. get
  79. {
  80. return m_entityContext;
  81. }
  82. }
  83. }
  84. /// <summary>
  85. /// The different types of literal text returned by the SgmlParser.
  86. /// </summary>
  87. public enum LiteralType
  88. {
  89. /// <summary>
  90. /// CDATA text literals.
  91. /// </summary>
  92. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  93. CDATA,
  94. /// <summary>
  95. /// SDATA entities.
  96. /// </summary>
  97. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  98. SDATA,
  99. /// <summary>
  100. /// The contents of a Processing Instruction.
  101. /// </summary>
  102. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  103. PI
  104. };
  105. /// <summary>
  106. /// An Entity declared in a DTD.
  107. /// </summary>
  108. public class Entity : IDisposable
  109. {
  110. /// <summary>
  111. /// The character indicating End Of File.
  112. /// </summary>
  113. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "The capitalisation is correct since EOF is an acronym.")]
  114. public const char EOF = (char)65535;
  115. private string m_proxy;
  116. private string m_name;
  117. private bool m_isInternal;
  118. private string m_publicId;
  119. private string m_uri;
  120. private string m_literal;
  121. private LiteralType m_literalType;
  122. private Entity m_parent;
  123. private bool m_isHtml;
  124. private int m_line;
  125. private char m_lastchar;
  126. private bool m_isWhitespace;
  127. private Encoding m_encoding;
  128. private Uri m_resolvedUri;
  129. private TextReader m_stm;
  130. private bool m_weOwnTheStream;
  131. private int m_lineStart;
  132. private int m_absolutePos;
  133. /// <summary>
  134. /// Initialises a new instance of an Entity declared in a DTD.
  135. /// </summary>
  136. /// <param name="name">The name of the entity.</param>
  137. /// <param name="pubid">The public id of the entity.</param>
  138. /// <param name="uri">The uri of the entity.</param>
  139. /// <param name="proxy">The proxy server to use when retrieving any web content.</param>
  140. public Entity(string name, string pubid, string uri, string proxy)
  141. {
  142. m_name = name;
  143. m_publicId = pubid;
  144. m_uri = uri;
  145. m_proxy = proxy;
  146. m_isHtml = (name != null && StringUtilities.EqualsIgnoreCase(name, "html"));
  147. }
  148. /// <summary>
  149. /// Initialises a new instance of an Entity declared in a DTD.
  150. /// </summary>
  151. /// <param name="name">The name of the entity.</param>
  152. /// <param name="literal">The literal value of the entity.</param>
  153. public Entity(string name, string literal)
  154. {
  155. m_name = name;
  156. m_literal = literal;
  157. m_isInternal = true;
  158. }
  159. /// <summary>
  160. /// Initialises a new instance of an Entity declared in a DTD.
  161. /// </summary>
  162. /// <param name="name">The name of the entity.</param>
  163. /// <param name="baseUri">The baseUri for the entity to read from the TextReader.</param>
  164. /// <param name="stm">The TextReader to read the entity from.</param>
  165. /// <param name="proxy">The proxy server to use when retrieving any web content.</param>
  166. public Entity(string name, Uri baseUri, TextReader stm, string proxy)
  167. {
  168. m_name = name;
  169. m_isInternal = true;
  170. m_stm = stm;
  171. m_resolvedUri = baseUri;
  172. m_proxy = proxy;
  173. m_isHtml = string.Equals(name, "html", StringComparison.OrdinalIgnoreCase);
  174. }
  175. /// <summary>
  176. /// The name of the entity.
  177. /// </summary>
  178. public string Name
  179. {
  180. get
  181. {
  182. return m_name;
  183. }
  184. }
  185. /// <summary>
  186. /// True if the entity is the html element entity.
  187. /// </summary>
  188. public bool IsHtml
  189. {
  190. get
  191. {
  192. return m_isHtml;
  193. }
  194. set
  195. {
  196. m_isHtml = value;
  197. }
  198. }
  199. /// <summary>
  200. /// The public identifier of this entity.
  201. /// </summary>
  202. public string PublicId
  203. {
  204. get
  205. {
  206. return m_publicId;
  207. }
  208. }
  209. /// <summary>
  210. /// The Uri that is the source for this entity.
  211. /// </summary>
  212. public string Uri
  213. {
  214. get
  215. {
  216. return m_uri;
  217. }
  218. }
  219. /// <summary>
  220. /// The resolved location of the DTD this entity is from.
  221. /// </summary>
  222. public Uri ResolvedUri
  223. {
  224. get
  225. {
  226. if (this.m_resolvedUri != null)
  227. return this.m_resolvedUri;
  228. else if (m_parent != null)
  229. return m_parent.ResolvedUri;
  230. else
  231. return null;
  232. }
  233. }
  234. /// <summary>
  235. /// Gets the parent Entity of this Entity.
  236. /// </summary>
  237. public Entity Parent
  238. {
  239. get
  240. {
  241. return m_parent;
  242. }
  243. }
  244. /// <summary>
  245. /// The last character read from the input stream for this entity.
  246. /// </summary>
  247. public char Lastchar
  248. {
  249. get
  250. {
  251. return m_lastchar;
  252. }
  253. }
  254. /// <summary>
  255. /// The line on which this entity was defined.
  256. /// </summary>
  257. public int Line
  258. {
  259. get
  260. {
  261. return m_line;
  262. }
  263. }
  264. /// <summary>
  265. /// The index into the line where this entity is defined.
  266. /// </summary>
  267. public int LinePosition
  268. {
  269. get
  270. {
  271. return this.m_absolutePos - this.m_lineStart + 1;
  272. }
  273. }
  274. /// <summary>
  275. /// Whether this entity is an internal entity or not.
  276. /// </summary>
  277. /// <value>true if this entity is internal, otherwise false.</value>
  278. public bool IsInternal
  279. {
  280. get
  281. {
  282. return m_isInternal;
  283. }
  284. }
  285. /// <summary>
  286. /// The literal value of this entity.
  287. /// </summary>
  288. public string Literal
  289. {
  290. get
  291. {
  292. return m_literal;
  293. }
  294. }
  295. /// <summary>
  296. /// The <see cref="LiteralType"/> of this entity.
  297. /// </summary>
  298. public LiteralType LiteralType
  299. {
  300. get
  301. {
  302. return m_literalType;
  303. }
  304. }
  305. /// <summary>
  306. /// Whether the last char read for this entity is a whitespace character.
  307. /// </summary>
  308. public bool IsWhitespace
  309. {
  310. get
  311. {
  312. return m_isWhitespace;
  313. }
  314. }
  315. /// <summary>
  316. /// The proxy server to use when making web requests to resolve entities.
  317. /// </summary>
  318. public string Proxy
  319. {
  320. get
  321. {
  322. return m_proxy;
  323. }
  324. }
  325. /// <summary>
  326. /// Reads the next character from the DTD stream.
  327. /// </summary>
  328. /// <returns>The next character from the DTD stream.</returns>
  329. public char ReadChar()
  330. {
  331. char ch = (char)this.m_stm.Read();
  332. if (ch == 0)
  333. {
  334. // convert nulls to whitespace, since they are not valid in XML anyway.
  335. ch = ' ';
  336. }
  337. this.m_absolutePos++;
  338. if (ch == 0xa)
  339. {
  340. m_isWhitespace = true;
  341. this.m_lineStart = this.m_absolutePos + 1;
  342. this.m_line++;
  343. }
  344. else if (ch == ' ' || ch == '\t')
  345. {
  346. m_isWhitespace = true;
  347. if (m_lastchar == 0xd)
  348. {
  349. this.m_lineStart = this.m_absolutePos;
  350. m_line++;
  351. }
  352. }
  353. else if (ch == 0xd)
  354. {
  355. m_isWhitespace = true;
  356. }
  357. else
  358. {
  359. m_isWhitespace = false;
  360. if (m_lastchar == 0xd)
  361. {
  362. m_line++;
  363. this.m_lineStart = this.m_absolutePos;
  364. }
  365. }
  366. m_lastchar = ch;
  367. return ch;
  368. }
  369. /// <summary>
  370. /// Begins processing an entity.
  371. /// </summary>
  372. /// <param name="parent">The parent of this entity.</param>
  373. /// <param name="baseUri">The base Uri for processing this entity within.</param>
  374. public void Open(Entity parent, Uri baseUri)
  375. {
  376. this.m_parent = parent;
  377. if (parent != null)
  378. this.m_isHtml = parent.IsHtml;
  379. this.m_line = 1;
  380. if (m_isInternal)
  381. {
  382. if (this.m_literal != null)
  383. this.m_stm = new StringReader(this.m_literal);
  384. }
  385. else if (this.m_uri == null)
  386. {
  387. this.Error("Unresolvable entity '{0}'", this.m_name);
  388. }
  389. else
  390. {
  391. if (baseUri != null)
  392. {
  393. this.m_resolvedUri = new Uri(baseUri, this.m_uri);
  394. }
  395. else
  396. {
  397. this.m_resolvedUri = new Uri(this.m_uri);
  398. }
  399. Stream stream = null;
  400. Encoding e = Encoding.UTF8;
  401. //this stuff should be happening but i dont quite know whats going on
  402. throw new NotImplementedException();
  403. this.m_weOwnTheStream = true;
  404. HtmlStream html = new HtmlStream(stream, e);
  405. this.m_encoding = html.Encoding;
  406. this.m_stm = html;
  407. }
  408. }
  409. /// <summary>
  410. /// Gets the character encoding for this entity.
  411. /// </summary>
  412. public Encoding Encoding
  413. {
  414. get
  415. {
  416. return this.m_encoding;
  417. }
  418. }
  419. /// <summary>
  420. /// Closes the reader from which the entity is being read.
  421. /// </summary>
  422. public void Close()
  423. {
  424. if (this.m_weOwnTheStream)
  425. this.m_stm.Dispose();
  426. }
  427. /// <summary>
  428. /// Returns the next character after any whitespace.
  429. /// </summary>
  430. /// <returns>The next character that is not whitespace.</returns>
  431. public char SkipWhitespace()
  432. {
  433. char ch = m_lastchar;
  434. while (ch != Entity.EOF && (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'))
  435. {
  436. ch = ReadChar();
  437. }
  438. return ch;
  439. }
  440. /// <summary>
  441. /// Scans a token from the input stream and returns the result.
  442. /// </summary>
  443. /// <param name="sb">The <see cref="StringBuilder"/> to use to process the token.</param>
  444. /// <param name="term">A set of characters to look for as terminators for the token.</param>
  445. /// <param name="nmtoken">true if the token should be a NMToken, otherwise false.</param>
  446. /// <returns>The scanned token.</returns>
  447. public string ScanToken(StringBuilder sb, string term, bool nmtoken)
  448. {
  449. if (sb == null)
  450. throw new ArgumentNullException("sb");
  451. if (term == null)
  452. throw new ArgumentNullException("term");
  453. sb.Length = 0;
  454. char ch = m_lastchar;
  455. if (nmtoken && ch != '_' && !char.IsLetter(ch))
  456. {
  457. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Invalid name start character '{0}'", ch));
  458. }
  459. while (ch != Entity.EOF && term.IndexOf(ch) < 0)
  460. {
  461. if (!nmtoken || ch == '_' || ch == '.' || ch == '-' || ch == ':' || char.IsLetterOrDigit(ch)) {
  462. sb.Append(ch);
  463. }
  464. else {
  465. throw new SgmlParseException(
  466. string.Format(CultureInfo.CurrentUICulture, "Invalid name character '{0}'", ch));
  467. }
  468. ch = ReadChar();
  469. }
  470. return sb.ToString();
  471. }
  472. /// <summary>
  473. /// Read a literal from the input stream.
  474. /// </summary>
  475. /// <param name="sb">The <see cref="StringBuilder"/> to use to build the literal.</param>
  476. /// <param name="quote">The delimiter for the literal.</param>
  477. /// <returns>The literal scanned from the input stream.</returns>
  478. public string ScanLiteral(StringBuilder sb, char quote)
  479. {
  480. if (sb == null)
  481. throw new ArgumentNullException("sb");
  482. sb.Length = 0;
  483. char ch = ReadChar();
  484. while (ch != Entity.EOF && ch != quote)
  485. {
  486. if (ch == '&')
  487. {
  488. ch = ReadChar();
  489. if (ch == '#')
  490. {
  491. string charent = ExpandCharEntity();
  492. sb.Append(charent);
  493. ch = this.m_lastchar;
  494. }
  495. else
  496. {
  497. sb.Append('&');
  498. sb.Append(ch);
  499. ch = ReadChar();
  500. }
  501. }
  502. else
  503. {
  504. sb.Append(ch);
  505. ch = ReadChar();
  506. }
  507. }
  508. ReadChar(); // consume end quote.
  509. return sb.ToString();
  510. }
  511. /// <summary>
  512. /// Reads input until the end of the input stream or until a string of terminator characters is found.
  513. /// </summary>
  514. /// <param name="sb">The <see cref="StringBuilder"/> to use to build the string.</param>
  515. /// <param name="type">The type of the element being read (only used in reporting errors).</param>
  516. /// <param name="terminators">The string of terminator characters to look for.</param>
  517. /// <returns>The string read from the input stream.</returns>
  518. public string ScanToEnd(StringBuilder sb, string type, string terminators)
  519. {
  520. if (terminators == null)
  521. throw new ArgumentNullException("terminators");
  522. if (sb != null)
  523. sb.Length = 0;
  524. int start = m_line;
  525. // This method scans over a chunk of text looking for the
  526. // termination sequence specified by the 'terminators' parameter.
  527. char ch = ReadChar();
  528. int state = 0;
  529. char next = terminators[state];
  530. while (ch != Entity.EOF)
  531. {
  532. if (ch == next)
  533. {
  534. state++;
  535. if (state >= terminators.Length)
  536. {
  537. // found it!
  538. break;
  539. }
  540. next = terminators[state];
  541. }
  542. else if (state > 0)
  543. {
  544. // char didn't match, so go back and see how much does still match.
  545. int i = state - 1;
  546. int newstate = 0;
  547. while (i >= 0 && newstate == 0)
  548. {
  549. if (terminators[i] == ch)
  550. {
  551. // character is part of the terminators pattern, ok, so see if we can
  552. // match all the way back to the beginning of the pattern.
  553. int j = 1;
  554. while (i - j >= 0)
  555. {
  556. if (terminators[i - j] != terminators[state - j])
  557. break;
  558. j++;
  559. }
  560. if (j > i)
  561. {
  562. newstate = i + 1;
  563. }
  564. }
  565. else
  566. {
  567. i--;
  568. }
  569. }
  570. if (sb != null)
  571. {
  572. i = (i < 0) ? 1 : 0;
  573. for (int k = 0; k <= state - newstate - i; k++)
  574. {
  575. sb.Append(terminators[k]);
  576. }
  577. if (i > 0) // see if we've matched this char or not
  578. sb.Append(ch); // if not then append it to buffer.
  579. }
  580. state = newstate;
  581. next = terminators[newstate];
  582. }
  583. else
  584. {
  585. if (sb != null)
  586. sb.Append(ch);
  587. }
  588. ch = ReadChar();
  589. }
  590. if (ch == 0)
  591. Error(type + " starting on line {0} was never closed", start);
  592. ReadChar(); // consume last char in termination sequence.
  593. if (sb != null)
  594. return sb.ToString();
  595. else
  596. return string.Empty;
  597. }
  598. /// <summary>
  599. /// Expands a character entity to be read from the input stream.
  600. /// </summary>
  601. /// <returns>The string for the character entity.</returns>
  602. public string ExpandCharEntity()
  603. {
  604. string value;
  605. int v = ReadNumericEntityCode(out value);
  606. if(v == -1)
  607. {
  608. return value;
  609. }
  610. // HACK ALERT: IE and Netscape map the unicode characters
  611. if (this.m_isHtml && v >= 0x80 & v <= 0x9F)
  612. {
  613. // This range of control characters is mapped to Windows-1252!
  614. int i = v - 0x80;
  615. int unicode = CtrlMap[i];
  616. return Convert.ToChar(unicode).ToString();
  617. }
  618. if (0xD800 <= v && v <= 0xDBFF)
  619. {
  620. // high surrogate
  621. if (m_lastchar == '&')
  622. {
  623. char ch = ReadChar();
  624. if (ch == '#')
  625. {
  626. string value2;
  627. int v2 = ReadNumericEntityCode(out value2);
  628. if(v2 == -1)
  629. {
  630. return value + ";" + value2;
  631. }
  632. if (0xDC00 <= v2 && v2 <= 0xDFFF)
  633. {
  634. // low surrogate
  635. v = char.ConvertToUtf32((char)v, (char)v2);
  636. }
  637. }
  638. else
  639. {
  640. Error("Premature {0} parsing surrogate pair", ch);
  641. }
  642. }
  643. else
  644. {
  645. Error("Premature {0} parsing surrogate pair", m_lastchar);
  646. }
  647. }
  648. // NOTE (steveb): we need to use ConvertFromUtf32 to allow for extended numeric encodings
  649. return char.ConvertFromUtf32(v);
  650. }
  651. private int ReadNumericEntityCode(out string value)
  652. {
  653. int v = 0;
  654. char ch = ReadChar();
  655. value = "&#";
  656. if (ch == 'x')
  657. {
  658. bool sawHexDigit = false;
  659. value += "x";
  660. ch = ReadChar();
  661. for (; ch != Entity.EOF && ch != ';'; ch = ReadChar())
  662. {
  663. int p = 0;
  664. if (ch >= '0' && ch <= '9')
  665. {
  666. p = (int)(ch - '0');
  667. sawHexDigit = true;
  668. }
  669. else if (ch >= 'a' && ch <= 'f')
  670. {
  671. p = (int)(ch - 'a') + 10;
  672. sawHexDigit = true;
  673. }
  674. else if (ch >= 'A' && ch <= 'F')
  675. {
  676. p = (int)(ch - 'A') + 10;
  677. sawHexDigit = true;
  678. }
  679. else
  680. {
  681. break; //we must be done!
  682. //Error("Hex digit out of range '{0}'", (int)ch);
  683. }
  684. value += ch;
  685. v = (v*16) + p;
  686. }
  687. if (!sawHexDigit)
  688. {
  689. return -1;
  690. }
  691. }
  692. else
  693. {
  694. bool sawDigit = false;
  695. for (; ch != Entity.EOF && ch != ';'; ch = ReadChar())
  696. {
  697. if (ch >= '0' && ch <= '9')
  698. {
  699. v = (v*10) + (int)(ch - '0');
  700. sawDigit = true;
  701. }
  702. else
  703. {
  704. break; // we must be done!
  705. //Error("Decimal digit out of range '{0}'", (int)ch);
  706. }
  707. value += ch;
  708. }
  709. if (!sawDigit)
  710. {
  711. return -1;
  712. }
  713. }
  714. if (ch == 0)
  715. {
  716. Error("Premature {0} parsing entity reference", ch);
  717. }
  718. else if (ch == ';')
  719. {
  720. ReadChar();
  721. }
  722. return v;
  723. }
  724. static int[] CtrlMap = new int[] {
  725. // This is the windows-1252 mapping of the code points 0x80 through 0x9f.
  726. 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141,
  727. 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250,
  728. 339, 157, 382, 376
  729. };
  730. /// <summary>
  731. /// Raise a processing error.
  732. /// </summary>
  733. /// <param name="msg">The error message to use in the exception.</param>
  734. /// <exception cref="SgmlParseException">Always thrown.</exception>
  735. public void Error(string msg)
  736. {
  737. throw new SgmlParseException(msg, this);
  738. }
  739. /// <summary>
  740. /// Raise a processing error.
  741. /// </summary>
  742. /// <param name="msg">The error message to use in the exception.</param>
  743. /// <param name="ch">The unexpected character causing the error.</param>
  744. /// <exception cref="SgmlParseException">Always thrown.</exception>
  745. public void Error(string msg, char ch)
  746. {
  747. string str = (ch == Entity.EOF) ? "EOF" : char.ToString(ch);
  748. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, msg, str), this);
  749. }
  750. /// <summary>
  751. /// Raise a processing error.
  752. /// </summary>
  753. /// <param name="msg">The error message to use in the exception.</param>
  754. /// <param name="x">The value causing the error.</param>
  755. /// <exception cref="SgmlParseException">Always thrown.</exception>
  756. public void Error(string msg, int x)
  757. {
  758. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, msg, x), this);
  759. }
  760. /// <summary>
  761. /// Raise a processing error.
  762. /// </summary>
  763. /// <param name="msg">The error message to use in the exception.</param>
  764. /// <param name="arg">The argument for the error.</param>
  765. /// <exception cref="SgmlParseException">Always thrown.</exception>
  766. public void Error(string msg, string arg)
  767. {
  768. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, msg, arg), this);
  769. }
  770. /// <summary>
  771. /// Returns a string giving information on how the entity is referenced and declared, walking up the parents until the top level parent entity is found.
  772. /// </summary>
  773. /// <returns>Contextual information for the entity.</returns>
  774. public string Context()
  775. {
  776. Entity p = this;
  777. StringBuilder sb = new StringBuilder();
  778. while (p != null)
  779. {
  780. string msg;
  781. if (p.m_isInternal)
  782. {
  783. msg = string.Format(CultureInfo.InvariantCulture, "\nReferenced on line {0}, position {1} of internal entity '{2}'", p.m_line, p.LinePosition, p.m_name);
  784. }
  785. else {
  786. msg = string.Format(CultureInfo.InvariantCulture, "\nReferenced on line {0}, position {1} of '{2}' entity at [{3}]", p.m_line, p.LinePosition, p.m_name, p.ResolvedUri.AbsolutePath);
  787. }
  788. sb.Append(msg);
  789. p = p.Parent;
  790. }
  791. return sb.ToString();
  792. }
  793. /// <summary>
  794. /// Checks whether a token denotes a literal entity or not.
  795. /// </summary>
  796. /// <param name="token">The token to check.</param>
  797. /// <returns>true if the token is "CDATA", "SDATA" or "PI", otherwise false.</returns>
  798. public static bool IsLiteralType(string token)
  799. {
  800. return string.Equals(token, "CDATA", StringComparison.OrdinalIgnoreCase) ||
  801. string.Equals(token, "SDATA", StringComparison.OrdinalIgnoreCase) ||
  802. string.Equals(token, "PI", StringComparison.OrdinalIgnoreCase);
  803. }
  804. /// <summary>
  805. /// Sets the entity to be a literal of the type specified.
  806. /// </summary>
  807. /// <param name="token">One of "CDATA", "SDATA" or "PI".</param>
  808. public void SetLiteralType(string token)
  809. {
  810. switch (token)
  811. {
  812. case "CDATA":
  813. this.m_literalType = LiteralType.CDATA;
  814. break;
  815. case "SDATA":
  816. this.m_literalType = LiteralType.SDATA;
  817. break;
  818. case "PI":
  819. this.m_literalType = LiteralType.PI;
  820. break;
  821. }
  822. }
  823. #region IDisposable Members
  824. /// <summary>
  825. /// The finalizer for the Entity class.
  826. /// </summary>
  827. ~Entity()
  828. {
  829. Dispose(false);
  830. }
  831. /// <summary>
  832. /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
  833. /// </summary>
  834. public void Dispose()
  835. {
  836. Dispose(true);
  837. GC.SuppressFinalize(this);
  838. }
  839. /// <summary>
  840. /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
  841. /// </summary>
  842. /// <param name="isDisposing">true if this method has been called by user code, false if it has been called through a finalizer.</param>
  843. protected virtual void Dispose(bool isDisposing)
  844. {
  845. if (isDisposing)
  846. {
  847. if (m_stm != null)
  848. {
  849. m_stm.Dispose();
  850. m_stm = null;
  851. }
  852. }
  853. }
  854. #endregion
  855. }
  856. // This class decodes an HTML/XML stream correctly.
  857. internal class HtmlStream : TextReader
  858. {
  859. private Stream stm;
  860. private byte[] rawBuffer;
  861. private int rawPos;
  862. private int rawUsed;
  863. private Encoding m_encoding;
  864. private Decoder m_decoder;
  865. private char[] m_buffer;
  866. private int used;
  867. private int pos;
  868. private const int BUFSIZE = 16384;
  869. private const int EOF = -1;
  870. public HtmlStream(Stream stm, Encoding defaultEncoding)
  871. {
  872. if (defaultEncoding == null) defaultEncoding = Encoding.UTF8; // default is UTF8
  873. if (!stm.CanSeek){
  874. // Need to be able to seek to sniff correctly.
  875. stm = CopyToMemoryStream(stm);
  876. }
  877. this.stm = stm;
  878. rawBuffer = new Byte[BUFSIZE];
  879. rawUsed = stm.Read(rawBuffer, 0, 4); // maximum byte order mark
  880. this.m_buffer = new char[BUFSIZE];
  881. // Check byte order marks
  882. this.m_decoder = AutoDetectEncoding(rawBuffer, ref rawPos, rawUsed);
  883. int bom = rawPos;
  884. if (this.m_decoder == null)
  885. {
  886. this.m_decoder = defaultEncoding.GetDecoder();
  887. rawUsed += stm.Read(rawBuffer, 4, BUFSIZE-4);
  888. DecodeBlock();
  889. // Now sniff to see if there is an XML declaration or HTML <META> tag.
  890. Decoder sd = SniffEncoding();
  891. if (sd != null) {
  892. this.m_decoder = sd;
  893. }
  894. }
  895. // Reset to get ready for Read()
  896. this.stm.Seek(0, SeekOrigin.Begin);
  897. this.pos = this.used = 0;
  898. // skip bom
  899. if (bom>0){
  900. stm.Read(this.rawBuffer, 0, bom);
  901. }
  902. this.rawPos = this.rawUsed = 0;
  903. }
  904. public Encoding Encoding
  905. {
  906. get
  907. {
  908. return this.m_encoding;
  909. }
  910. }
  911. private static Stream CopyToMemoryStream(Stream s)
  912. {
  913. int size = 100000; // large heap is more efficient
  914. byte[] copyBuff = new byte[size];
  915. int len;
  916. MemoryStream r = new MemoryStream();
  917. while ((len = s.Read(copyBuff, 0, size)) > 0)
  918. r.Write(copyBuff, 0, len);
  919. r.Seek(0, SeekOrigin.Begin);
  920. s.Dispose();
  921. return r;
  922. }
  923. internal void DecodeBlock() {
  924. // shift current chars to beginning.
  925. if (pos > 0) {
  926. if (pos < used) {
  927. System.Array.Copy(m_buffer, pos, m_buffer, 0, used - pos);
  928. }
  929. used -= pos;
  930. pos = 0;
  931. }
  932. int len = m_decoder.GetCharCount(rawBuffer, rawPos, rawUsed - rawPos);
  933. int available = m_buffer.Length - used;
  934. if (available < len) {
  935. char[] newbuf = new char[m_buffer.Length + len];
  936. System.Array.Copy(m_buffer, pos, newbuf, 0, used - pos);
  937. m_buffer = newbuf;
  938. }
  939. used = pos + m_decoder.GetChars(rawBuffer, rawPos, rawUsed - rawPos, m_buffer, pos);
  940. rawPos = rawUsed; // consumed the whole buffer!
  941. }
  942. internal static Decoder AutoDetectEncoding(byte[] buffer, ref int index, int length) {
  943. if (4 <= (length - index)) {
  944. uint w = (uint)buffer[index + 0] << 24 | (uint)buffer[index + 1] << 16 | (uint)buffer[index + 2] << 8 | (uint)buffer[index + 3];
  945. // see if it's a 4-byte encoding
  946. switch (w) {
  947. case 0xfefffeff:
  948. index += 4;
  949. return new Ucs4DecoderBigEngian();
  950. case 0xfffefffe:
  951. index += 4;
  952. return new Ucs4DecoderLittleEndian();
  953. case 0x3c000000:
  954. goto case 0xfefffeff;
  955. case 0x0000003c:
  956. goto case 0xfffefffe;
  957. }
  958. w >>= 8;
  959. if (w == 0xefbbbf) {
  960. index += 3;
  961. return Encoding.UTF8.GetDecoder();
  962. }
  963. w >>= 8;
  964. switch (w) {
  965. case 0xfeff:
  966. index += 2;
  967. return UnicodeEncoding.BigEndianUnicode.GetDecoder();
  968. case 0xfffe:
  969. index += 2;
  970. return new UnicodeEncoding(false, false).GetDecoder();
  971. case 0x3c00:
  972. goto case 0xfeff;
  973. case 0x003c:
  974. goto case 0xfffe;
  975. }
  976. }
  977. return null;
  978. }
  979. private int ReadChar() {
  980. // Read only up to end of current buffer then stop.
  981. if (pos < used) return m_buffer[pos++];
  982. return EOF;
  983. }
  984. private int PeekChar() {
  985. int ch = ReadChar();
  986. if (ch != EOF) {
  987. pos--;
  988. }
  989. return ch;
  990. }
  991. private bool SniffPattern(string pattern) {
  992. int ch = PeekChar();
  993. if (ch != pattern[0]) return false;
  994. for (int i = 0, n = pattern.Length; ch != EOF && i < n; i++) {
  995. ch = ReadChar();
  996. char m = pattern[i];
  997. if (ch != m) {
  998. return false;
  999. }
  1000. }
  1001. return true;
  1002. }
  1003. private void SniffWhitespace() {
  1004. char ch = (char)PeekChar();
  1005. while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
  1006. int i = pos;
  1007. ch = (char)ReadChar();
  1008. if (ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n')
  1009. pos = i;
  1010. }
  1011. }
  1012. private string SniffLiteral() {
  1013. int quoteChar = PeekChar();
  1014. if (quoteChar == '\'' || quoteChar == '"') {
  1015. ReadChar();// consume quote char
  1016. int i = this.pos;
  1017. int ch = ReadChar();
  1018. while (ch != EOF && ch != quoteChar) {
  1019. ch = ReadChar();
  1020. }
  1021. return (pos>i) ? new string(m_buffer, i, pos - i - 1) : "";
  1022. }
  1023. return null;
  1024. }
  1025. private string SniffAttribute(string name) {
  1026. SniffWhitespace();
  1027. string id = SniffName();
  1028. if (string.Equals(name, id, StringComparison.OrdinalIgnoreCase)) {
  1029. SniffWhitespace();
  1030. if (SniffPattern("=")) {
  1031. SniffWhitespace();
  1032. return SniffLiteral();
  1033. }
  1034. }
  1035. return null;
  1036. }
  1037. private string SniffAttribute(out string name) {
  1038. SniffWhitespace();
  1039. name = SniffName();
  1040. if (name != null){
  1041. SniffWhitespace();
  1042. if (SniffPattern("=")) {
  1043. SniffWhitespace();
  1044. return SniffLiteral();
  1045. }
  1046. }
  1047. return null;
  1048. }
  1049. private void SniffTerminator(string term) {
  1050. int ch = ReadChar();
  1051. int i = 0;
  1052. int n = term.Length;
  1053. while (i < n && ch != EOF) {
  1054. if (term[i] == ch) {
  1055. i++;
  1056. if (i == n) break;
  1057. } else {
  1058. i = 0; // reset.
  1059. }
  1060. ch = ReadChar();
  1061. }
  1062. }
  1063. internal Decoder SniffEncoding()
  1064. {
  1065. Decoder decoder = null;
  1066. if (SniffPattern("<?xml"))
  1067. {
  1068. string version = SniffAttribute("version");
  1069. if (version != null)
  1070. {
  1071. string encoding = SniffAttribute("encoding");
  1072. if (encoding != null)
  1073. {
  1074. try
  1075. {
  1076. Encoding enc = Encoding.GetEncoding(encoding);
  1077. if (enc != null)
  1078. {
  1079. this.m_encoding = enc;
  1080. return enc.GetDecoder();
  1081. }
  1082. }
  1083. catch (ArgumentException)
  1084. {
  1085. // oh well then.
  1086. }
  1087. }
  1088. SniffTerminator(">");
  1089. }
  1090. }
  1091. if (decoder == null) {
  1092. return SniffMeta();
  1093. }
  1094. return null;
  1095. }
  1096. internal Decoder SniffMeta()
  1097. {
  1098. int i = ReadChar();
  1099. while (i != EOF)
  1100. {
  1101. char ch = (char)i;
  1102. if (ch == '<')
  1103. {
  1104. string name = SniffName();
  1105. if (name != null && StringUtilities.EqualsIgnoreCase(name, "meta"))
  1106. {
  1107. string httpequiv = null;
  1108. string content = null;
  1109. while (true)
  1110. {
  1111. string value = SniffAttribute(out name);
  1112. if (name == null)
  1113. break;
  1114. if (StringUtilities.EqualsIgnoreCase(name, "http-equiv"))
  1115. {
  1116. httpequiv = value;
  1117. }
  1118. else if (StringUtilities.EqualsIgnoreCase(name, "content"))
  1119. {
  1120. content = value;
  1121. }
  1122. }
  1123. if (httpequiv != null && StringUtilities.EqualsIgnoreCase(httpequiv, "content-type") && content != null)
  1124. {
  1125. int j = content.IndexOf("charset");
  1126. if (j >= 0)
  1127. {
  1128. //charset=utf-8
  1129. j = content.IndexOf("=", j);
  1130. if (j >= 0)
  1131. {
  1132. j++;
  1133. int k = content.IndexOf(";", j);
  1134. if (k<0) k = content.Length;
  1135. string charset = content.Substring(j, k-j).Trim();
  1136. try
  1137. {
  1138. Encoding e = Encoding.GetEncoding(charset);
  1139. this.m_encoding = e;
  1140. return e.GetDecoder();
  1141. } catch (ArgumentException) {}
  1142. }
  1143. }
  1144. }
  1145. }
  1146. }
  1147. i = ReadChar();
  1148. }
  1149. return null;
  1150. }
  1151. internal string SniffName()
  1152. {
  1153. int c = PeekChar();
  1154. if (c == EOF)
  1155. return null;
  1156. char ch = (char)c;
  1157. int start = pos;
  1158. while (pos < used - 1 && (char.IsLetterOrDigit(ch) || ch == '-' || ch == '_' || ch == ':'))
  1159. ch = m_buffer[++pos];
  1160. if (start == pos)
  1161. return null;
  1162. return new string(m_buffer, start, pos - start);
  1163. }
  1164. [SuppressMessage("Microsoft.Performance", "CA1811", Justification = "Kept for potential future usage.")]
  1165. internal void SkipWhitespace()
  1166. {
  1167. char ch = (char)PeekChar();
  1168. while (pos < used - 1 && (ch == ' ' || ch == '\r' || ch == '\n'))
  1169. ch = m_buffer[++pos];
  1170. }
  1171. [SuppressMessage("Microsoft.Performance", "CA1811", Justification = "Kept for potential future usage.")]
  1172. internal void SkipTo(char what)
  1173. {
  1174. char ch = (char)PeekChar();
  1175. while (pos < used - 1 && (ch != what))
  1176. ch = m_buffer[++pos];
  1177. }
  1178. [SuppressMessage("Microsoft.Performance", "CA1811", Justification = "Kept for potential future usage.")]
  1179. internal string ParseAttribute()
  1180. {
  1181. SkipTo('=');
  1182. if (pos < used)
  1183. {
  1184. pos++;
  1185. SkipWhitespace();
  1186. if (pos < used) {
  1187. char quote = m_buffer[pos];
  1188. pos++;
  1189. int start = pos;
  1190. SkipTo(quote);
  1191. if (pos < used) {
  1192. string result = new string(m_buffer, start, pos - start);
  1193. pos++;
  1194. return result;
  1195. }
  1196. }
  1197. }
  1198. return null;
  1199. }
  1200. public override int Peek() {
  1201. int result = Read();
  1202. if (result != EOF) {
  1203. pos--;
  1204. }
  1205. return result;
  1206. }
  1207. public override int Read()
  1208. {
  1209. if (pos == used)
  1210. {
  1211. rawUsed = stm.Read(rawBuffer, 0, rawBuffer.Length);
  1212. rawPos = 0;
  1213. if (rawUsed == 0) return EOF;
  1214. DecodeBlock();
  1215. }
  1216. if (pos < used) return m_buffer[pos++];
  1217. return -1;
  1218. }
  1219. public override int Read(char[] buffer, int start, int length) {
  1220. if (pos == used) {
  1221. rawUsed = stm.Read(rawBuffer, 0, rawBuffer.Length);
  1222. rawPos = 0;
  1223. if (rawUsed == 0) return -1;
  1224. DecodeBlock();
  1225. }
  1226. if (pos < used) {
  1227. length = Math.Min(used - pos, length);
  1228. Array.Copy(this.m_buffer, pos, buffer, start, length);
  1229. pos += length;
  1230. return length;
  1231. }
  1232. return 0;
  1233. }
  1234. public override int ReadBlock(char[] data, int index, int count)
  1235. {
  1236. return Read(data, index, count);
  1237. }
  1238. // Read up to end of line, or full buffer, whichever comes first.
  1239. [SuppressMessage("Microsoft.Performance", "CA1811", Justification = "Kept for potential future usage.")]
  1240. public int ReadLine(char[] buffer, int start, int length)
  1241. {
  1242. int i = 0;
  1243. int ch = ReadChar();
  1244. while (ch != EOF) {
  1245. buffer[i+start] = (char)ch;
  1246. i++;
  1247. if (i+start == length)
  1248. break; // buffer is full
  1249. if (ch == '\r' ) {
  1250. if (PeekChar() == '\n') {
  1251. ch = ReadChar();
  1252. buffer[i + start] = (char)ch;
  1253. i++;
  1254. }
  1255. break;
  1256. } else if (ch == '\n') {
  1257. break;
  1258. }
  1259. ch = ReadChar();
  1260. }
  1261. return i;
  1262. }
  1263. public override string ReadToEnd() {
  1264. char[] buffer = new char[100000]; // large block heap is more efficient
  1265. int len = 0;
  1266. StringBuilder sb = new StringBuilder();
  1267. while ((len = Read(buffer, 0, buffer.Length)) > 0) {
  1268. sb.Append(buffer, 0, len);
  1269. }
  1270. return sb.ToString();
  1271. }
  1272. protected override void Dispose(bool disposing)
  1273. {
  1274. if(stm != null)
  1275. stm.Dispose();
  1276. base.Dispose(disposing);
  1277. }
  1278. }
  1279. internal abstract class Ucs4Decoder : Decoder {
  1280. internal byte[] temp = new byte[4];
  1281. internal int tempBytes = 0;
  1282. public override int GetCharCount(byte[] bytes, int index, int count) {
  1283. return (count + tempBytes) / 4;
  1284. }
  1285. internal abstract int GetFullChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex);
  1286. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1287. int i = tempBytes;
  1288. if (tempBytes > 0) {
  1289. for (; i < 4; i++) {
  1290. temp[i] = bytes[byteIndex];
  1291. byteIndex++;
  1292. byteCount--;
  1293. }
  1294. i = 1;
  1295. GetFullChars(temp, 0, 4, chars, charIndex);
  1296. charIndex++;
  1297. } else
  1298. i = 0;
  1299. i = GetFullChars(bytes, byteIndex, byteCount, chars, charIndex) + i;
  1300. int j = (tempBytes + byteCount) % 4;
  1301. byteCount += byteIndex;
  1302. byteIndex = byteCount - j;
  1303. tempBytes = 0;
  1304. if (byteIndex >= 0)
  1305. for (; byteIndex < byteCount; byteIndex++) {
  1306. temp[tempBytes] = bytes[byteIndex];
  1307. tempBytes++;
  1308. }
  1309. return i;
  1310. }
  1311. internal static char UnicodeToUTF16(UInt32 code) {
  1312. byte lowerByte, higherByte;
  1313. lowerByte = (byte)(0xD7C0 + (code >> 10));
  1314. higherByte = (byte)(0xDC00 | code & 0x3ff);
  1315. return ((char)((higherByte << 8) | lowerByte));
  1316. }
  1317. }
  1318. internal class Ucs4DecoderBigEngian : Ucs4Decoder {
  1319. internal override int GetFullChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1320. UInt32 code;
  1321. int i, j;
  1322. byteCount += byteIndex;
  1323. for (i = byteIndex, j = charIndex; i + 3 < byteCount; ) {
  1324. code = (UInt32)(((bytes[i + 3]) << 24) | (bytes[i + 2] << 16) | (bytes[i + 1] << 8) | (bytes[i]));
  1325. if (code > 0x10FFFF) {
  1326. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Invalid character 0x{0:x} in encoding", code));
  1327. } else if (code > 0xFFFF) {
  1328. chars[j] = UnicodeToUTF16(code);
  1329. j++;
  1330. } else {
  1331. if (code >= 0xD800 && code <= 0xDFFF) {
  1332. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Invalid character 0x{0:x} in encoding", code));
  1333. } else {
  1334. chars[j] = (char)code;
  1335. }
  1336. }
  1337. j++;
  1338. i += 4;
  1339. }
  1340. return j - charIndex;
  1341. }
  1342. }
  1343. internal class Ucs4DecoderLittleEndian : Ucs4Decoder {
  1344. internal override int GetFullChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1345. UInt32 code;
  1346. int i, j;
  1347. byteCount += byteIndex;
  1348. for (i = byteIndex, j = charIndex; i + 3 < byteCount; ) {
  1349. code = (UInt32)(((bytes[i]) << 24) | (bytes[i + 1] << 16) | (bytes[i + 2] << 8) | (bytes[i + 3]));
  1350. if (code > 0x10FFFF) {
  1351. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Invalid character 0x{0:x} in encoding", code));
  1352. } else if (code > 0xFFFF) {
  1353. chars[j] = UnicodeToUTF16(code);
  1354. j++;
  1355. } else {
  1356. if (code >= 0xD800 && code <= 0xDFFF) {
  1357. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Invalid character 0x{0:x} in encoding", code));
  1358. } else {
  1359. chars[j] = (char)code;
  1360. }
  1361. }
  1362. j++;
  1363. i += 4;
  1364. }
  1365. return j - charIndex;
  1366. }
  1367. }
  1368. /// <summary>
  1369. /// An element declaration in a DTD.
  1370. /// </summary>
  1371. public class ElementDecl
  1372. {
  1373. private string m_name;
  1374. private bool m_startTagOptional;
  1375. private bool m_endTagOptional;
  1376. private ContentModel m_contentModel;
  1377. private string[] m_inclusions;
  1378. private string[] m_exclusions;
  1379. private Dictionary<string, AttDef> m_attList;
  1380. /// <summary>
  1381. /// Initialises a new element declaration instance.
  1382. /// </summary>
  1383. /// <param name="name">The name of the element.</param>
  1384. /// <param name="sto">Whether the start tag is optional.</param>
  1385. /// <param name="eto">Whether the end tag is optional.</param>
  1386. /// <param name="cm">The <see cref="ContentModel"/> of the element.</param>
  1387. /// <param name="inclusions"></param>
  1388. /// <param name="exclusions"></param>
  1389. public ElementDecl(string name, bool sto, bool eto, ContentModel cm, string[] inclusions, string[] exclusions)
  1390. {
  1391. m_name = name;
  1392. m_startTagOptional = sto;
  1393. m_endTagOptional = eto;
  1394. m_contentModel = cm;
  1395. m_inclusions = inclusions;
  1396. m_exclusions = exclusions;
  1397. }
  1398. /// <summary>
  1399. /// The element name.
  1400. /// </summary>
  1401. public string Name
  1402. {
  1403. get
  1404. {
  1405. return m_name;
  1406. }
  1407. }
  1408. /// <summary>
  1409. /// The <see cref="Sgml.ContentModel"/> of the element declaration.
  1410. /// </summary>
  1411. public ContentModel ContentModel
  1412. {
  1413. get
  1414. {
  1415. return m_contentModel;
  1416. }
  1417. }
  1418. /// <summary>
  1419. /// Whether the end tag of the element is optional.
  1420. /// </summary>
  1421. /// <value>true if the end tag of the element is optional, otherwise false.</value>
  1422. public bool EndTagOptional
  1423. {
  1424. get
  1425. {
  1426. return m_endTagOptional;
  1427. }
  1428. }
  1429. /// <summary>
  1430. /// Whether the start tag of the element is optional.
  1431. /// </summary>
  1432. /// <value>true if the start tag of the element is optional, otherwise false.</value>
  1433. public bool StartTagOptional
  1434. {
  1435. get
  1436. {
  1437. return m_startTagOptional;
  1438. }
  1439. }
  1440. /// <summary>
  1441. /// Finds the attribute definition with the specified name.
  1442. /// </summary>
  1443. /// <param name="name">The name of the <see cref="AttDef"/> to find.</param>
  1444. /// <returns>The <see cref="AttDef"/> with the specified name.</returns>
  1445. /// <exception cref="InvalidOperationException">If the attribute list has not yet been initialised.</exception>
  1446. public AttDef FindAttribute(string name)
  1447. {
  1448. if (m_attList == null)
  1449. throw new InvalidOperationException("The attribute list for the element declaration has not been initialised.");
  1450. AttDef a;
  1451. m_attList.TryGetValue(name.ToUpperInvariant(), out a);
  1452. return a;
  1453. }
  1454. /// <summary>
  1455. /// Adds attribute definitions to the element declaration.
  1456. /// </summary>
  1457. /// <param name="list">The list of attribute definitions to add.</param>
  1458. public void AddAttDefs(Dictionary<string, AttDef> list)
  1459. {
  1460. if (list == null)
  1461. throw new ArgumentNullException("list");
  1462. if (m_attList == null)
  1463. {
  1464. m_attList = list;
  1465. }
  1466. else
  1467. {
  1468. foreach (AttDef a in list.Values)
  1469. {
  1470. if (!m_attList.ContainsKey(a.Name))
  1471. {
  1472. m_attList.Add(a.Name, a);
  1473. }
  1474. }
  1475. }
  1476. }
  1477. /// <summary>
  1478. /// Tests whether this element can contain another specified element.
  1479. /// </summary>
  1480. /// <param name="name">The name of the element to check for.</param>
  1481. /// <param name="dtd">The DTD to use to do the check.</param>
  1482. /// <returns>True if the specified element can be contained by this element.</returns>
  1483. public bool CanContain(string name, SgmlDtd dtd)
  1484. {
  1485. // return true if this element is allowed to contain the given element.
  1486. if (m_exclusions != null)
  1487. {
  1488. foreach (string s in m_exclusions)
  1489. {
  1490. if (string.Equals(s, name, StringComparison.OrdinalIgnoreCase))
  1491. return false;
  1492. }
  1493. }
  1494. if (m_inclusions != null)
  1495. {
  1496. foreach (string s in m_inclusions)
  1497. {
  1498. if (string.Equals(s, name, StringComparison.OrdinalIgnoreCase))
  1499. return true;
  1500. }
  1501. }
  1502. return m_contentModel.CanContain(name, dtd);
  1503. }
  1504. }
  1505. /// <summary>
  1506. /// Where nested subelements cannot occur within an element, its contents can be declared to consist of one of the types of declared content contained in this enumeration.
  1507. /// </summary>
  1508. public enum DeclaredContent
  1509. {
  1510. /// <summary>
  1511. /// Not defined.
  1512. /// </summary>
  1513. Default,
  1514. /// <summary>
  1515. /// Character data (CDATA), which contains only valid SGML characters.
  1516. /// </summary>
  1517. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1518. CDATA,
  1519. /// <summary>
  1520. /// Replaceable character data (RCDATA), which can contain text, character references and/or general entity references that resolve to character data.
  1521. /// </summary>
  1522. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1523. RCDATA,
  1524. /// <summary>
  1525. /// Empty element (EMPTY), i.e. having no contents, or contents that can be generated by the program.
  1526. /// </summary>
  1527. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1528. EMPTY
  1529. }
  1530. /// <summary>
  1531. /// Defines the content model for an element.
  1532. /// </summary>
  1533. public class ContentModel
  1534. {
  1535. private DeclaredContent m_declaredContent;
  1536. private int m_currentDepth;
  1537. private Group m_model;
  1538. /// <summary>
  1539. /// Initialises a new instance of the <see cref="ContentModel"/> class.
  1540. /// </summary>
  1541. public ContentModel()
  1542. {
  1543. m_model = new Group(null);
  1544. }
  1545. /// <summary>
  1546. /// The number of groups on the stack.
  1547. /// </summary>
  1548. public int CurrentDepth
  1549. {
  1550. get
  1551. {
  1552. return m_currentDepth;
  1553. }
  1554. }
  1555. /// <summary>
  1556. /// The allowed child content, specifying if nested children are not allowed and if so, what content is allowed.
  1557. /// </summary>
  1558. public DeclaredContent DeclaredContent
  1559. {
  1560. get
  1561. {
  1562. return m_declaredContent;
  1563. }
  1564. }
  1565. /// <summary>
  1566. /// Begins processing of a nested model group.
  1567. /// </summary>
  1568. public void PushGroup()
  1569. {
  1570. m_model = new Group(m_model);
  1571. m_currentDepth++;
  1572. }
  1573. /// <summary>
  1574. /// Finishes processing of a nested model group.
  1575. /// </summary>
  1576. /// <returns>The current depth of the group nesting, or -1 if there are no more groups to pop.</returns>
  1577. public int PopGroup()
  1578. {
  1579. if (m_currentDepth == 0)
  1580. return -1;
  1581. m_currentDepth--;
  1582. m_model.Parent.AddGroup(m_model);
  1583. m_model = m_model.Parent;
  1584. return m_currentDepth;
  1585. }
  1586. /// <summary>
  1587. /// Adds a new symbol to the current group's members.
  1588. /// </summary>
  1589. /// <param name="sym">The symbol to add.</param>
  1590. public void AddSymbol(string sym)
  1591. {
  1592. m_model.AddSymbol(sym);
  1593. }
  1594. /// <summary>
  1595. /// Adds a connector onto the member list for the current group.
  1596. /// </summary>
  1597. /// <param name="c">The connector character to add.</param>
  1598. /// <exception cref="SgmlParseException">
  1599. /// If the content is not mixed and has no members yet, or if the group type has been set and the
  1600. /// connector does not match the group type.
  1601. /// </exception>
  1602. public void AddConnector(char c)
  1603. {
  1604. m_model.AddConnector(c);
  1605. }
  1606. /// <summary>
  1607. /// Adds an occurrence character for the current model group, setting it's <see cref="Occurrence"/> value.
  1608. /// </summary>
  1609. /// <param name="c">The occurrence character.</param>
  1610. public void AddOccurrence(char c)
  1611. {
  1612. m_model.AddOccurrence(c);
  1613. }
  1614. /// <summary>
  1615. /// Sets the contained content for the content model.
  1616. /// </summary>
  1617. /// <param name="dc">The text specified the permissible declared child content.</param>
  1618. public void SetDeclaredContent(string dc)
  1619. {
  1620. // TODO: Validate that this can never combine with nexted groups?
  1621. switch (dc)
  1622. {
  1623. case "EMPTY":
  1624. this.m_declaredContent = DeclaredContent.EMPTY;
  1625. break;
  1626. case "RCDATA":
  1627. this.m_declaredContent = DeclaredContent.RCDATA;
  1628. break;
  1629. case "CDATA":
  1630. this.m_declaredContent = DeclaredContent.CDATA;
  1631. break;
  1632. default:
  1633. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Declared content type '{0}' is not supported", dc));
  1634. }
  1635. }
  1636. /// <summary>
  1637. /// Checks whether an element using this group can contain a specified element.
  1638. /// </summary>
  1639. /// <param name="name">The name of the element to look for.</param>
  1640. /// <param name="dtd">The DTD to use during the checking.</param>
  1641. /// <returns>true if an element using this group can contain the element, otherwise false.</returns>
  1642. public bool CanContain(string name, SgmlDtd dtd)
  1643. {
  1644. if (m_declaredContent != DeclaredContent.Default)
  1645. return false; // empty or text only node.
  1646. return m_model.CanContain(name, dtd);
  1647. }
  1648. }
  1649. /// <summary>
  1650. /// The type of the content model group, defining the order in which child elements can occur.
  1651. /// </summary>
  1652. public enum GroupType
  1653. {
  1654. /// <summary>
  1655. /// No model group.
  1656. /// </summary>
  1657. None,
  1658. /// <summary>
  1659. /// All elements must occur, in any order.
  1660. /// </summary>
  1661. And,
  1662. /// <summary>
  1663. /// One (and only one) must occur.
  1664. /// </summary>
  1665. Or,
  1666. /// <summary>
  1667. /// All element must occur, in the specified order.
  1668. /// </summary>
  1669. Sequence
  1670. };
  1671. /// <summary>
  1672. /// Qualifies the occurrence of a child element within a content model group.
  1673. /// </summary>
  1674. public enum Occurrence
  1675. {
  1676. /// <summary>
  1677. /// The element is required and must occur only once.
  1678. /// </summary>
  1679. Required,
  1680. /// <summary>
  1681. /// The element is optional and must occur once at most.
  1682. /// </summary>
  1683. Optional,
  1684. /// <summary>
  1685. /// The element is optional and can be repeated.
  1686. /// </summary>
  1687. ZeroOrMore,
  1688. /// <summary>
  1689. /// The element must occur at least once or more times.
  1690. /// </summary>
  1691. OneOrMore
  1692. }
  1693. /// <summary>
  1694. /// Defines a group of elements nested within another element.
  1695. /// </summary>
  1696. public class Group
  1697. {
  1698. private Group m_parent;
  1699. private List<Object> Members;
  1700. private GroupType m_groupType;
  1701. private Occurrence m_occurrence;
  1702. private bool Mixed;
  1703. /// <summary>
  1704. /// The <see cref="Occurrence"/> of this group.
  1705. /// </summary>
  1706. public Occurrence Occurrence
  1707. {
  1708. get
  1709. {
  1710. return m_occurrence;
  1711. }
  1712. }
  1713. /// <summary>
  1714. /// Checks whether the group contains only text.
  1715. /// </summary>
  1716. /// <value>true if the group is of mixed content and has no members, otherwise false.</value>
  1717. public bool TextOnly
  1718. {
  1719. get
  1720. {
  1721. return this.Mixed && Members.Count == 0;
  1722. }
  1723. }
  1724. /// <summary>
  1725. /// The parent group of this group.
  1726. /// </summary>
  1727. public Group Parent
  1728. {
  1729. get
  1730. {
  1731. return m_parent;
  1732. }
  1733. }
  1734. /// <summary>
  1735. /// Initialises a new Content Model Group.
  1736. /// </summary>
  1737. /// <param name="parent">The parent model group.</param>
  1738. public Group(Group parent)
  1739. {
  1740. m_parent = parent;
  1741. Members = new List<Object>();
  1742. m_groupType = GroupType.None;
  1743. m_occurrence = Occurrence.Required;
  1744. }
  1745. /// <summary>
  1746. /// Adds a new child model group to the end of the group's members.
  1747. /// </summary>
  1748. /// <param name="g">The model group to add.</param>
  1749. public void AddGroup(Group g)
  1750. {
  1751. Members.Add(g);
  1752. }
  1753. /// <summary>
  1754. /// Adds a new symbol to the group's members.
  1755. /// </summary>
  1756. /// <param name="sym">The symbol to add.</param>
  1757. public void AddSymbol(string sym)
  1758. {
  1759. if (string.Equals(sym, "#PCDATA", StringComparison.OrdinalIgnoreCase))
  1760. {
  1761. Mixed = true;
  1762. }
  1763. else
  1764. {
  1765. Members.Add(sym);
  1766. }
  1767. }
  1768. /// <summary>
  1769. /// Adds a connector onto the member list.
  1770. /// </summary>
  1771. /// <param name="c">The connector character to add.</param>
  1772. /// <exception cref="SgmlParseException">
  1773. /// If the content is not mixed and has no members yet, or if the group type has been set and the
  1774. /// connector does not match the group type.
  1775. /// </exception>
  1776. public void AddConnector(char c)
  1777. {
  1778. if (!Mixed && Members.Count == 0)
  1779. {
  1780. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Missing token before connector '{0}'.", c));
  1781. }
  1782. GroupType gt = GroupType.None;
  1783. switch (c)
  1784. {
  1785. case ',':
  1786. gt = GroupType.Sequence;
  1787. break;
  1788. case '|':
  1789. gt = GroupType.Or;
  1790. break;
  1791. case '&':
  1792. gt = GroupType.And;
  1793. break;
  1794. }
  1795. if (this.m_groupType != GroupType.None && this.m_groupType != gt)
  1796. {
  1797. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Connector '{0}' is inconsistent with {1} group.", c, m_groupType.ToString()));
  1798. }
  1799. m_groupType = gt;
  1800. }
  1801. /// <summary>
  1802. /// Adds an occurrence character for this group, setting it's <see cref="Occurrence"/> value.
  1803. /// </summary>
  1804. /// <param name="c">The occurrence character.</param>
  1805. public void AddOccurrence(char c)
  1806. {
  1807. Occurrence o = Occurrence.Required;
  1808. switch (c)
  1809. {
  1810. case '?':
  1811. o = Occurrence.Optional;
  1812. break;
  1813. case '+':
  1814. o = Occurrence.OneOrMore;
  1815. break;
  1816. case '*':
  1817. o = Occurrence.ZeroOrMore;
  1818. break;
  1819. }
  1820. m_occurrence = o;
  1821. }
  1822. /// <summary>
  1823. /// Checks whether an element using this group can contain a specified element.
  1824. /// </summary>
  1825. /// <param name="name">The name of the element to look for.</param>
  1826. /// <param name="dtd">The DTD to use during the checking.</param>
  1827. /// <returns>true if an element using this group can contain the element, otherwise false.</returns>
  1828. /// <remarks>
  1829. /// Rough approximation - this is really assuming an "Or" group
  1830. /// </remarks>
  1831. public bool CanContain(string name, SgmlDtd dtd)
  1832. {
  1833. if (dtd == null)
  1834. throw new ArgumentNullException("dtd");
  1835. // Do a simple search of members.
  1836. foreach (object obj in Members)
  1837. {
  1838. if (obj is string)
  1839. {
  1840. if( string.Equals((string)obj, name, StringComparison.OrdinalIgnoreCase))
  1841. return true;
  1842. }
  1843. }
  1844. // didn't find it, so do a more expensive search over child elements
  1845. // that have optional start tags and over child groups.
  1846. foreach (object obj in Members)
  1847. {
  1848. string s = obj as string;
  1849. if (s != null)
  1850. {
  1851. ElementDecl e = dtd.FindElement(s);
  1852. if (e != null)
  1853. {
  1854. if (e.StartTagOptional)
  1855. {
  1856. // tricky case, the start tag is optional so element may be
  1857. // allowed inside this guy!
  1858. if (e.CanContain(name, dtd))
  1859. return true;
  1860. }
  1861. }
  1862. }
  1863. else
  1864. {
  1865. Group m = (Group)obj;
  1866. if (m.CanContain(name, dtd))
  1867. return true;
  1868. }
  1869. }
  1870. return false;
  1871. }
  1872. }
  1873. /// <summary>
  1874. /// Defines the different possible attribute types.
  1875. /// </summary>
  1876. public enum AttributeType
  1877. {
  1878. /// <summary>
  1879. /// Attribute type not specified.
  1880. /// </summary>
  1881. Default,
  1882. /// <summary>
  1883. /// The attribute contains text (with no markup).
  1884. /// </summary>
  1885. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1886. CDATA,
  1887. /// <summary>
  1888. /// The attribute contains an entity declared in a DTD.
  1889. /// </summary>
  1890. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1891. ENTITY,
  1892. /// <summary>
  1893. /// The attribute contains a number of entities declared in a DTD.
  1894. /// </summary>
  1895. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1896. ENTITIES,
  1897. /// <summary>
  1898. /// The attribute is an id attribute uniquely identifie the element it appears on.
  1899. /// </summary>
  1900. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1901. [SuppressMessage("Microsoft.Naming", "CA1706", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1902. ID,
  1903. /// <summary>
  1904. /// The attribute value can be any declared subdocument or data entity name.
  1905. /// </summary>
  1906. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1907. IDREF,
  1908. /// <summary>
  1909. /// The attribute value is a list of (space separated) declared subdocument or data entity names.
  1910. /// </summary>
  1911. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1912. IDREFS,
  1913. /// <summary>
  1914. /// The attribute value is a SGML Name.
  1915. /// </summary>
  1916. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1917. NAME,
  1918. /// <summary>
  1919. /// The attribute value is a list of (space separated) SGML Names.
  1920. /// </summary>
  1921. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1922. NAMES,
  1923. /// <summary>
  1924. /// The attribute value is an XML name token (i.e. contains only name characters, but in this case with digits and other valid name characters accepted as the first character).
  1925. /// </summary>
  1926. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1927. NMTOKEN,
  1928. /// <summary>
  1929. /// The attribute value is a list of (space separated) XML NMTokens.
  1930. /// </summary>
  1931. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1932. NMTOKENS,
  1933. /// <summary>
  1934. /// The attribute value is a number.
  1935. /// </summary>
  1936. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1937. NUMBER,
  1938. /// <summary>
  1939. /// The attribute value is a list of (space separated) numbers.
  1940. /// </summary>
  1941. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1942. NUMBERS,
  1943. /// <summary>
  1944. /// The attribute value is a number token (i.e. a name that starts with a number).
  1945. /// </summary>
  1946. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1947. NUTOKEN,
  1948. /// <summary>
  1949. /// The attribute value is a list of number tokens.
  1950. /// </summary>
  1951. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1952. NUTOKENS,
  1953. /// <summary>
  1954. /// Attribute value is a member of the bracketed list of notation names that qualifies this reserved name.
  1955. /// </summary>
  1956. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1957. NOTATION,
  1958. /// <summary>
  1959. /// The attribute value is one of a set of allowed names.
  1960. /// </summary>
  1961. [SuppressMessage("Microsoft.Naming", "CA1705", Justification = "This capitalisation is appropriate since the value it represents has all upper-case capitalisation.")]
  1962. ENUMERATION
  1963. }
  1964. /// <summary>
  1965. /// Defines the different constraints on an attribute's presence on an element.
  1966. /// </summary>
  1967. public enum AttributePresence
  1968. {
  1969. /// <summary>
  1970. /// The attribute has a default value, and its presence is optional.
  1971. /// </summary>
  1972. Default,
  1973. /// <summary>
  1974. /// The attribute has a fixed value, if present.
  1975. /// </summary>
  1976. Fixed,
  1977. /// <summary>
  1978. /// The attribute must always be present on every element.
  1979. /// </summary>
  1980. Required,
  1981. /// <summary>
  1982. /// The element is optional.
  1983. /// </summary>
  1984. Implied
  1985. }
  1986. /// <summary>
  1987. /// An attribute definition in a DTD.
  1988. /// </summary>
  1989. public class AttDef
  1990. {
  1991. private string m_name;
  1992. private AttributeType m_type;
  1993. private string[] m_enumValues;
  1994. private string m_default;
  1995. private AttributePresence m_presence;
  1996. /// <summary>
  1997. /// Initialises a new instance of the <see cref="AttDef"/> class.
  1998. /// </summary>
  1999. /// <param name="name">The name of the attribute.</param>
  2000. public AttDef(string name)
  2001. {
  2002. m_name = name;
  2003. }
  2004. /// <summary>
  2005. /// The name of the attribute declared by this attribute definition.
  2006. /// </summary>
  2007. public string Name
  2008. {
  2009. get
  2010. {
  2011. return m_name;
  2012. }
  2013. }
  2014. /// <summary>
  2015. /// Gets of sets the default value of the attribute.
  2016. /// </summary>
  2017. public string Default
  2018. {
  2019. get
  2020. {
  2021. return m_default;
  2022. }
  2023. set
  2024. {
  2025. m_default = value;
  2026. }
  2027. }
  2028. /// <summary>
  2029. /// The constraints on the attribute's presence on an element.
  2030. /// </summary>
  2031. public AttributePresence AttributePresence
  2032. {
  2033. get
  2034. {
  2035. return m_presence;
  2036. }
  2037. }
  2038. /// <summary>
  2039. /// Gets or sets the possible enumerated values for the attribute.
  2040. /// </summary>
  2041. [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Changing this would break backwards compatibility with previous code using this library.")]
  2042. public string[] EnumValues
  2043. {
  2044. get
  2045. {
  2046. return m_enumValues;
  2047. }
  2048. }
  2049. /// <summary>
  2050. /// Sets the attribute definition to have an enumerated value.
  2051. /// </summary>
  2052. /// <param name="enumValues">The possible values in the enumeration.</param>
  2053. /// <param name="type">The type to set the attribute to.</param>
  2054. /// <exception cref="ArgumentException">If the type parameter is not either <see cref="AttributeType.ENUMERATION"/> or <see cref="AttributeType.NOTATION"/>.</exception>
  2055. public void SetEnumeratedType(string[] enumValues, AttributeType type)
  2056. {
  2057. if (type != AttributeType.ENUMERATION && type != AttributeType.NOTATION)
  2058. throw new ArgumentException(string.Format(CultureInfo.CurrentUICulture, "AttributeType {0} is not valid for an attribute definition with an enumerated value.", type));
  2059. m_enumValues = enumValues;
  2060. m_type = type;
  2061. }
  2062. /// <summary>
  2063. /// The <see cref="AttributeType"/> of the attribute declaration.
  2064. /// </summary>
  2065. public AttributeType Type
  2066. {
  2067. get
  2068. {
  2069. return m_type;
  2070. }
  2071. }
  2072. /// <summary>
  2073. /// Sets the type of the attribute definition.
  2074. /// </summary>
  2075. /// <param name="type">The string representation of the attribute type, corresponding to the values in the <see cref="AttributeType"/> enumeration.</param>
  2076. public void SetType(string type)
  2077. {
  2078. switch (type)
  2079. {
  2080. case "CDATA":
  2081. m_type = AttributeType.CDATA;
  2082. break;
  2083. case "ENTITY":
  2084. m_type = AttributeType.ENTITY;
  2085. break;
  2086. case "ENTITIES":
  2087. m_type = AttributeType.ENTITIES;
  2088. break;
  2089. case "ID":
  2090. m_type = AttributeType.ID;
  2091. break;
  2092. case "IDREF":
  2093. m_type = AttributeType.IDREF;
  2094. break;
  2095. case "IDREFS":
  2096. m_type = AttributeType.IDREFS;
  2097. break;
  2098. case "NAME":
  2099. m_type = AttributeType.NAME;
  2100. break;
  2101. case "NAMES":
  2102. m_type = AttributeType.NAMES;
  2103. break;
  2104. case "NMTOKEN":
  2105. m_type = AttributeType.NMTOKEN;
  2106. break;
  2107. case "NMTOKENS":
  2108. m_type = AttributeType.NMTOKENS;
  2109. break;
  2110. case "NUMBER":
  2111. m_type = AttributeType.NUMBER;
  2112. break;
  2113. case "NUMBERS":
  2114. m_type = AttributeType.NUMBERS;
  2115. break;
  2116. case "NUTOKEN":
  2117. m_type = AttributeType.NUTOKEN;
  2118. break;
  2119. case "NUTOKENS":
  2120. m_type = AttributeType.NUTOKENS;
  2121. break;
  2122. default:
  2123. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Attribute type '{0}' is not supported", type));
  2124. }
  2125. }
  2126. /// <summary>
  2127. /// Sets the attribute presence declaration.
  2128. /// </summary>
  2129. /// <param name="token">The string representation of the attribute presence, corresponding to one of the values in the <see cref="AttributePresence"/> enumeration.</param>
  2130. /// <returns>true if the attribute presence implies the element has a default value.</returns>
  2131. public bool SetPresence(string token)
  2132. {
  2133. bool hasDefault = true;
  2134. if (string.Equals(token, "FIXED", StringComparison.OrdinalIgnoreCase))
  2135. {
  2136. m_presence = AttributePresence.Fixed;
  2137. }
  2138. else if (string.Equals(token, "REQUIRED", StringComparison.OrdinalIgnoreCase))
  2139. {
  2140. m_presence = AttributePresence.Required;
  2141. hasDefault = false;
  2142. }
  2143. else if (string.Equals(token, "IMPLIED", StringComparison.OrdinalIgnoreCase))
  2144. {
  2145. m_presence = AttributePresence.Implied;
  2146. hasDefault = false;
  2147. }
  2148. else
  2149. {
  2150. throw new SgmlParseException(string.Format(CultureInfo.CurrentUICulture, "Attribute value '{0}' not supported", token));
  2151. }
  2152. return hasDefault;
  2153. }
  2154. }
  2155. /* JB: Replaced this with a Dictionary<string, AttDef>
  2156. public class AttList : IEnumerable
  2157. {
  2158. Hashtable AttDefs;
  2159. public AttList()
  2160. {
  2161. AttDefs = new Hashtable();
  2162. }
  2163. public void Add(AttDef a)
  2164. {
  2165. AttDefs.Add(a.Name, a);
  2166. }
  2167. public AttDef this[string name]
  2168. {
  2169. get
  2170. {
  2171. return (AttDef)AttDefs[name];
  2172. }
  2173. }
  2174. public IEnumerator GetEnumerator()
  2175. {
  2176. return AttDefs.Values.GetEnumerator();
  2177. }
  2178. }
  2179. */
  2180. /// <summary>
  2181. /// Provides DTD parsing and support for the SgmlParser framework.
  2182. /// </summary>
  2183. public class SgmlDtd
  2184. {
  2185. private string m_name;
  2186. private Dictionary<string, ElementDecl> m_elements;
  2187. private Dictionary<string, Entity> m_pentities;
  2188. private Dictionary<string, Entity> m_entities;
  2189. private StringBuilder m_sb;
  2190. private Entity m_current;
  2191. /// <summary>
  2192. /// Initialises a new instance of the <see cref="SgmlDtd"/> class.
  2193. /// </summary>
  2194. /// <param name="name">The name of the DTD.</param>
  2195. /// <param name="nt">The <see cref="XmlNameTable"/> is NOT used.</param>
  2196. public SgmlDtd(string name, XmlNameTable nt)
  2197. {
  2198. this.m_name = name;
  2199. this.m_elements = new Dictionary<string,ElementDecl>();
  2200. this.m_pentities = new Dictionary<string, Entity>();
  2201. this.m_entities = new Dictionary<string, Entity>();
  2202. this.m_sb = new StringBuilder();
  2203. }
  2204. /// <summary>
  2205. /// The name of the DTD.
  2206. /// </summary>
  2207. public string Name
  2208. {
  2209. get
  2210. {
  2211. return m_name;
  2212. }
  2213. }
  2214. /// <summary>
  2215. /// Gets the XmlNameTable associated with this implementation.
  2216. /// </summary>
  2217. /// <value>The XmlNameTable enabling you to get the atomized version of a string within the node.</value>
  2218. public XmlNameTable NameTable
  2219. {
  2220. get
  2221. {
  2222. return null;
  2223. }
  2224. }
  2225. /// <summary>
  2226. /// Parses a DTD and creates a <see cref="SgmlDtd"/> instance that encapsulates the DTD.
  2227. /// </summary>
  2228. /// <param name="baseUri">The base URI of the DTD.</param>
  2229. /// <param name="name">The name of the DTD.</param>
  2230. /// <param name="pubid"></param>
  2231. /// <param name="url"></param>
  2232. /// <param name="subset"></param>
  2233. /// <param name="proxy"></param>
  2234. /// <param name="nt">The <see cref="XmlNameTable"/> is NOT used.</param>
  2235. /// <returns>A new <see cref="SgmlDtd"/> instance that encapsulates the DTD.</returns>
  2236. public static SgmlDtd Parse(Uri baseUri, string name, string pubid, string url, string subset, string proxy, XmlNameTable nt)
  2237. {
  2238. SgmlDtd dtd = new SgmlDtd(name, nt);
  2239. if (!string.IsNullOrEmpty(url))
  2240. {
  2241. dtd.PushEntity(baseUri, new Entity(dtd.Name, pubid, url, proxy));
  2242. }
  2243. if (!string.IsNullOrEmpty(subset))
  2244. {
  2245. dtd.PushEntity(baseUri, new Entity(name, subset));
  2246. }
  2247. try
  2248. {
  2249. dtd.Parse();
  2250. }
  2251. catch(Exception e)
  2252. {
  2253. throw new SgmlParseException(e.Message + dtd.m_current.Context());
  2254. }
  2255. return dtd;
  2256. }
  2257. /// <summary>
  2258. /// Parses a DTD and creates a <see cref="SgmlDtd"/> instance that encapsulates the DTD.
  2259. /// </summary>
  2260. /// <param name="baseUri">The base URI of the DTD.</param>
  2261. /// <param name="name">The name of the DTD.</param>
  2262. /// <param name="input">The reader to load the DTD from.</param>
  2263. /// <param name="subset"></param>
  2264. /// <param name="proxy">The proxy server to use when loading resources.</param>
  2265. /// <param name="nt">The <see cref="XmlNameTable"/> is NOT used.</param>
  2266. /// <returns>A new <see cref="SgmlDtd"/> instance that encapsulates the DTD.</returns>
  2267. [SuppressMessage("Microsoft.Reliability", "CA2000", Justification = "The entities created here are not temporary and should not be disposed here.")]
  2268. public static SgmlDtd Parse(Uri baseUri, string name, TextReader input, string subset, string proxy, XmlNameTable nt)
  2269. {
  2270. SgmlDtd dtd = new SgmlDtd(name, nt);
  2271. dtd.PushEntity(baseUri, new Entity(dtd.Name, baseUri, input, proxy));
  2272. if (!string.IsNullOrEmpty(subset))
  2273. {
  2274. dtd.PushEntity(baseUri, new Entity(name, subset));
  2275. }
  2276. try
  2277. {
  2278. dtd.Parse();
  2279. }
  2280. catch (Exception e)
  2281. {
  2282. throw new SgmlParseException(e.Message + dtd.m_current.Context());
  2283. }
  2284. return dtd;
  2285. }
  2286. /// <summary>
  2287. /// Finds an entity in the DTD with the specified name.
  2288. /// </summary>
  2289. /// <param name="name">The name of the <see cref="Entity"/> to find.</param>
  2290. /// <returns>The specified Entity from the DTD.</returns>
  2291. public Entity FindEntity(string name)
  2292. {
  2293. Entity e;
  2294. this.m_entities.TryGetValue(name, out e);
  2295. return e;
  2296. }
  2297. /// <summary>
  2298. /// Finds an element declaration in the DTD with the specified name.
  2299. /// </summary>
  2300. /// <param name="name">The name of the <see cref="ElementDecl"/> to find and return.</param>
  2301. /// <returns>The <see cref="ElementDecl"/> matching the specified name.</returns>
  2302. public ElementDecl FindElement(string name)
  2303. {
  2304. ElementDecl el;
  2305. m_elements.TryGetValue(name.ToUpperInvariant(), out el);
  2306. return el;
  2307. }
  2308. //-------------------------------- Parser -------------------------
  2309. private void PushEntity(Uri baseUri, Entity e)
  2310. {
  2311. e.Open(this.m_current, baseUri);
  2312. this.m_current = e;
  2313. this.m_current.ReadChar();
  2314. }
  2315. private void PopEntity()
  2316. {
  2317. if (this.m_current != null) this.m_current.Close();
  2318. if (this.m_current.Parent != null)
  2319. {
  2320. this.m_current = this.m_current.Parent;
  2321. }
  2322. else
  2323. {
  2324. this.m_current = null;
  2325. }
  2326. }
  2327. private void Parse()
  2328. {
  2329. char ch = this.m_current.Lastchar;
  2330. while (true)
  2331. {
  2332. switch (ch)
  2333. {
  2334. case Entity.EOF:
  2335. PopEntity();
  2336. if (this.m_current == null)
  2337. return;
  2338. ch = this.m_current.Lastchar;
  2339. break;
  2340. case ' ':
  2341. case '\n':
  2342. case '\r':
  2343. case '\t':
  2344. ch = this.m_current.ReadChar();
  2345. break;
  2346. case '<':
  2347. ParseMarkup();
  2348. ch = this.m_current.ReadChar();
  2349. break;
  2350. case '%':
  2351. Entity e = ParseParameterEntity(SgmlDtd.WhiteSpace);
  2352. try
  2353. {
  2354. PushEntity(this.m_current.ResolvedUri, e);
  2355. }
  2356. catch (Exception ex)
  2357. {
  2358. // BUG: need an error log.
  2359. //Console.WriteLine(ex.Message + this.m_current.Context());
  2360. }
  2361. ch = this.m_current.Lastchar;
  2362. break;
  2363. default:
  2364. this.m_current.Error("Unexpected character '{0}'", ch);
  2365. break;
  2366. }
  2367. }
  2368. }
  2369. void ParseMarkup()
  2370. {
  2371. char ch = this.m_current.ReadChar();
  2372. if (ch != '!')
  2373. {
  2374. this.m_current.Error("Found '{0}', but expecing declaration starting with '<!'");
  2375. return;
  2376. }
  2377. ch = this.m_current.ReadChar();
  2378. if (ch == '-')
  2379. {
  2380. ch = this.m_current.ReadChar();
  2381. if (ch != '-') this.m_current.Error("Expecting comment '<!--' but found {0}", ch);
  2382. this.m_current.ScanToEnd(this.m_sb, "Comment", "-->");
  2383. }
  2384. else if (ch == '[')
  2385. {
  2386. ParseMarkedSection();
  2387. }
  2388. else
  2389. {
  2390. string token = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, true);
  2391. switch (token)
  2392. {
  2393. case "ENTITY":
  2394. ParseEntity();
  2395. break;
  2396. case "ELEMENT":
  2397. ParseElementDecl();
  2398. break;
  2399. case "ATTLIST":
  2400. ParseAttList();
  2401. break;
  2402. default:
  2403. this.m_current.Error("Invalid declaration '<!{0}'. Expecting 'ENTITY', 'ELEMENT' or 'ATTLIST'.", token);
  2404. break;
  2405. }
  2406. }
  2407. }
  2408. char ParseDeclComments()
  2409. {
  2410. char ch = this.m_current.Lastchar;
  2411. while (ch == '-')
  2412. {
  2413. ch = ParseDeclComment(true);
  2414. }
  2415. return ch;
  2416. }
  2417. char ParseDeclComment(bool full)
  2418. {
  2419. // This method scans over a comment inside a markup declaration.
  2420. char ch = this.m_current.ReadChar();
  2421. if (full && ch != '-') this.m_current.Error("Expecting comment delimiter '--' but found {0}", ch);
  2422. this.m_current.ScanToEnd(this.m_sb, "Markup Comment", "--");
  2423. return this.m_current.SkipWhitespace();
  2424. }
  2425. void ParseMarkedSection()
  2426. {
  2427. // <![^ name [ ... ]]>
  2428. this.m_current.ReadChar(); // move to next char.
  2429. string name = ScanName("[");
  2430. if (string.Equals(name, "INCLUDE", StringComparison.OrdinalIgnoreCase))
  2431. {
  2432. ParseIncludeSection();
  2433. }
  2434. else if (string.Equals(name, "IGNORE", StringComparison.OrdinalIgnoreCase))
  2435. {
  2436. ParseIgnoreSection();
  2437. }
  2438. else
  2439. {
  2440. this.m_current.Error("Unsupported marked section type '{0}'", name);
  2441. }
  2442. }
  2443. [SuppressMessage("Microsoft.Performance", "CA1822", Justification = "This is not yet implemented and will use 'this' in the future.")]
  2444. [SuppressMessage("Microsoft.Globalization", "CA1303", Justification = "The use of a literal here is only due to this not yet being implemented.")]
  2445. private void ParseIncludeSection()
  2446. {
  2447. throw new NotImplementedException("Include Section");
  2448. }
  2449. void ParseIgnoreSection()
  2450. {
  2451. char ch = this.m_current.SkipWhitespace();
  2452. if (ch != '[') this.m_current.Error("Expecting '[' but found {0}", ch);
  2453. this.m_current.ScanToEnd(this.m_sb, "Conditional Section", "]]>");
  2454. }
  2455. string ScanName(string term)
  2456. {
  2457. // skip whitespace, scan name (which may be parameter entity reference
  2458. // which is then expanded to a name)
  2459. char ch = this.m_current.SkipWhitespace();
  2460. if (ch == '%')
  2461. {
  2462. Entity e = ParseParameterEntity(term);
  2463. ch = this.m_current.Lastchar;
  2464. // bugbug - need to support external and nested parameter entities
  2465. if (!e.IsInternal) throw new NotSupportedException("External parameter entity resolution");
  2466. return e.Literal.Trim();
  2467. }
  2468. else
  2469. {
  2470. return this.m_current.ScanToken(this.m_sb, term, true);
  2471. }
  2472. }
  2473. private Entity ParseParameterEntity(string term)
  2474. {
  2475. // almost the same as this.current.ScanToken, except we also terminate on ';'
  2476. this.m_current.ReadChar();
  2477. string name = this.m_current.ScanToken(this.m_sb, ";"+term, false);
  2478. if (this.m_current.Lastchar == ';')
  2479. this.m_current.ReadChar();
  2480. Entity e = GetParameterEntity(name);
  2481. return e;
  2482. }
  2483. private Entity GetParameterEntity(string name)
  2484. {
  2485. Entity e = null;
  2486. m_pentities.TryGetValue(name, out e);
  2487. if (e == null)
  2488. this.m_current.Error("Reference to undefined parameter entity '{0}'", name);
  2489. return e;
  2490. }
  2491. /// <summary>
  2492. /// Returns a dictionary for looking up entities by their <see cref="Entity.Literal"/> value.
  2493. /// </summary>
  2494. /// <returns>A dictionary for looking up entities by their <see cref="Entity.Literal"/> value.</returns>
  2495. [SuppressMessage("Microsoft.Design", "CA1024", Justification = "This method creates and copies a dictionary, so exposing it as a property is not appropriate.")]
  2496. public Dictionary<string, Entity> GetEntitiesLiteralNameLookup()
  2497. {
  2498. Dictionary<string, Entity> hashtable = new Dictionary<string, Entity>();
  2499. foreach (Entity entity in this.m_entities.Values)
  2500. hashtable[entity.Literal] = entity;
  2501. return hashtable;
  2502. }
  2503. private const string WhiteSpace = " \r\n\t";
  2504. private void ParseEntity()
  2505. {
  2506. char ch = this.m_current.SkipWhitespace();
  2507. bool pe = (ch == '%');
  2508. if (pe)
  2509. {
  2510. // parameter entity.
  2511. this.m_current.ReadChar(); // move to next char
  2512. ch = this.m_current.SkipWhitespace();
  2513. }
  2514. string name = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, true);
  2515. ch = this.m_current.SkipWhitespace();
  2516. Entity e = null;
  2517. if (ch == '"' || ch == '\'')
  2518. {
  2519. string literal = this.m_current.ScanLiteral(this.m_sb, ch);
  2520. e = new Entity(name, literal);
  2521. }
  2522. else
  2523. {
  2524. string pubid = null;
  2525. string extid = null;
  2526. string tok = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, true);
  2527. if (Entity.IsLiteralType(tok))
  2528. {
  2529. ch = this.m_current.SkipWhitespace();
  2530. string literal = this.m_current.ScanLiteral(this.m_sb, ch);
  2531. e = new Entity(name, literal);
  2532. e.SetLiteralType(tok);
  2533. }
  2534. else
  2535. {
  2536. extid = tok;
  2537. if (string.Equals(extid, "PUBLIC", StringComparison.OrdinalIgnoreCase))
  2538. {
  2539. ch = this.m_current.SkipWhitespace();
  2540. if (ch == '"' || ch == '\'')
  2541. {
  2542. pubid = this.m_current.ScanLiteral(this.m_sb, ch);
  2543. }
  2544. else
  2545. {
  2546. this.m_current.Error("Expecting public identifier literal but found '{0}'",ch);
  2547. }
  2548. }
  2549. else if (!string.Equals(extid, "SYSTEM", StringComparison.OrdinalIgnoreCase))
  2550. {
  2551. this.m_current.Error("Invalid external identifier '{0}'. Expecing 'PUBLIC' or 'SYSTEM'.", extid);
  2552. }
  2553. string uri = null;
  2554. ch = this.m_current.SkipWhitespace();
  2555. if (ch == '"' || ch == '\'')
  2556. {
  2557. uri = this.m_current.ScanLiteral(this.m_sb, ch);
  2558. }
  2559. else if (ch != '>')
  2560. {
  2561. this.m_current.Error("Expecting system identifier literal but found '{0}'",ch);
  2562. }
  2563. e = new Entity(name, pubid, uri, this.m_current.Proxy);
  2564. }
  2565. }
  2566. ch = this.m_current.SkipWhitespace();
  2567. if (ch == '-')
  2568. ch = ParseDeclComments();
  2569. if (ch != '>')
  2570. {
  2571. this.m_current.Error("Expecting end of entity declaration '>' but found '{0}'", ch);
  2572. }
  2573. if (pe)
  2574. this.m_pentities.Add(e.Name, e);
  2575. else
  2576. this.m_entities.Add(e.Name, e);
  2577. }
  2578. private void ParseElementDecl()
  2579. {
  2580. char ch = this.m_current.SkipWhitespace();
  2581. string[] names = ParseNameGroup(ch, true);
  2582. ch = char.ToUpperInvariant(this.m_current.SkipWhitespace());
  2583. bool sto = false;
  2584. bool eto = false;
  2585. if (ch == 'O' || ch == '-') {
  2586. sto = (ch == 'O'); // start tag optional?
  2587. this.m_current.ReadChar();
  2588. ch = char.ToUpperInvariant(this.m_current.SkipWhitespace());
  2589. if (ch == 'O' || ch == '-'){
  2590. eto = (ch == 'O'); // end tag optional?
  2591. ch = this.m_current.ReadChar();
  2592. }
  2593. }
  2594. ch = this.m_current.SkipWhitespace();
  2595. ContentModel cm = ParseContentModel(ch);
  2596. ch = this.m_current.SkipWhitespace();
  2597. string [] exclusions = null;
  2598. string [] inclusions = null;
  2599. if (ch == '-')
  2600. {
  2601. ch = this.m_current.ReadChar();
  2602. if (ch == '(')
  2603. {
  2604. exclusions = ParseNameGroup(ch, true);
  2605. ch = this.m_current.SkipWhitespace();
  2606. }
  2607. else if (ch == '-')
  2608. {
  2609. ch = ParseDeclComment(false);
  2610. }
  2611. else
  2612. {
  2613. this.m_current.Error("Invalid syntax at '{0}'", ch);
  2614. }
  2615. }
  2616. if (ch == '-')
  2617. ch = ParseDeclComments();
  2618. if (ch == '+')
  2619. {
  2620. ch = this.m_current.ReadChar();
  2621. if (ch != '(')
  2622. {
  2623. this.m_current.Error("Expecting inclusions name group", ch);
  2624. }
  2625. inclusions = ParseNameGroup(ch, true);
  2626. ch = this.m_current.SkipWhitespace();
  2627. }
  2628. if (ch == '-')
  2629. ch = ParseDeclComments();
  2630. if (ch != '>')
  2631. {
  2632. this.m_current.Error("Expecting end of ELEMENT declaration '>' but found '{0}'", ch);
  2633. }
  2634. foreach (string name in names)
  2635. {
  2636. string atom = name.ToUpperInvariant();
  2637. this.m_elements.Add(atom, new ElementDecl(atom, sto, eto, cm, inclusions, exclusions));
  2638. }
  2639. }
  2640. static string ngterm = " \r\n\t|,)";
  2641. string[] ParseNameGroup(char ch, bool nmtokens)
  2642. {
  2643. var names = new List<string>();
  2644. if (ch == '(')
  2645. {
  2646. ch = this.m_current.ReadChar();
  2647. ch = this.m_current.SkipWhitespace();
  2648. while (ch != ')')
  2649. {
  2650. // skip whitespace, scan name (which may be parameter entity reference
  2651. // which is then expanded to a name)
  2652. ch = this.m_current.SkipWhitespace();
  2653. if (ch == '%')
  2654. {
  2655. Entity e = ParseParameterEntity(SgmlDtd.ngterm);
  2656. PushEntity(this.m_current.ResolvedUri, e);
  2657. ParseNameList(names, nmtokens);
  2658. PopEntity();
  2659. ch = this.m_current.Lastchar;
  2660. }
  2661. else
  2662. {
  2663. string token = this.m_current.ScanToken(this.m_sb, SgmlDtd.ngterm, nmtokens);
  2664. token = token.ToUpperInvariant();
  2665. names.Add(token);
  2666. }
  2667. ch = this.m_current.SkipWhitespace();
  2668. if (ch == '|' || ch == ',') ch = this.m_current.ReadChar();
  2669. }
  2670. this.m_current.ReadChar(); // consume ')'
  2671. }
  2672. else
  2673. {
  2674. string name = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, nmtokens);
  2675. name = name.ToUpperInvariant();
  2676. names.Add(name);
  2677. }
  2678. return names.ToArray();
  2679. }
  2680. void ParseNameList(List<string> names, bool nmtokens)
  2681. {
  2682. char ch = this.m_current.Lastchar;
  2683. ch = this.m_current.SkipWhitespace();
  2684. while (ch != Entity.EOF)
  2685. {
  2686. string name;
  2687. if (ch == '%')
  2688. {
  2689. Entity e = ParseParameterEntity(SgmlDtd.ngterm);
  2690. PushEntity(this.m_current.ResolvedUri, e);
  2691. ParseNameList(names, nmtokens);
  2692. PopEntity();
  2693. ch = this.m_current.Lastchar;
  2694. }
  2695. else
  2696. {
  2697. name = this.m_current.ScanToken(this.m_sb, SgmlDtd.ngterm, true);
  2698. name = name.ToUpperInvariant();
  2699. names.Add(name);
  2700. }
  2701. ch = this.m_current.SkipWhitespace();
  2702. if (ch == '|')
  2703. {
  2704. ch = this.m_current.ReadChar();
  2705. ch = this.m_current.SkipWhitespace();
  2706. }
  2707. }
  2708. }
  2709. static string dcterm = " \r\n\t>";
  2710. private ContentModel ParseContentModel(char ch)
  2711. {
  2712. ContentModel cm = new ContentModel();
  2713. if (ch == '(')
  2714. {
  2715. this.m_current.ReadChar();
  2716. ParseModel(')', cm);
  2717. ch = this.m_current.ReadChar();
  2718. if (ch == '?' || ch == '+' || ch == '*')
  2719. {
  2720. cm.AddOccurrence(ch);
  2721. this.m_current.ReadChar();
  2722. }
  2723. }
  2724. else if (ch == '%')
  2725. {
  2726. Entity e = ParseParameterEntity(SgmlDtd.dcterm);
  2727. PushEntity(this.m_current.ResolvedUri, e);
  2728. cm = ParseContentModel(this.m_current.Lastchar);
  2729. PopEntity(); // bugbug should be at EOF.
  2730. }
  2731. else
  2732. {
  2733. string dc = ScanName(SgmlDtd.dcterm);
  2734. cm.SetDeclaredContent(dc);
  2735. }
  2736. return cm;
  2737. }
  2738. static string cmterm = " \r\n\t,&|()?+*";
  2739. void ParseModel(char cmt, ContentModel cm)
  2740. {
  2741. // Called when part of the model is made up of the contents of a parameter entity
  2742. int depth = cm.CurrentDepth;
  2743. char ch = this.m_current.Lastchar;
  2744. ch = this.m_current.SkipWhitespace();
  2745. while (ch != cmt || cm.CurrentDepth > depth) // the entity must terminate while inside the content model.
  2746. {
  2747. if (ch == Entity.EOF)
  2748. {
  2749. this.m_current.Error("Content Model was not closed");
  2750. }
  2751. if (ch == '%')
  2752. {
  2753. Entity e = ParseParameterEntity(SgmlDtd.cmterm);
  2754. PushEntity(this.m_current.ResolvedUri, e);
  2755. ParseModel(Entity.EOF, cm);
  2756. PopEntity();
  2757. ch = this.m_current.SkipWhitespace();
  2758. }
  2759. else if (ch == '(')
  2760. {
  2761. cm.PushGroup();
  2762. this.m_current.ReadChar();// consume '('
  2763. ch = this.m_current.SkipWhitespace();
  2764. }
  2765. else if (ch == ')')
  2766. {
  2767. ch = this.m_current.ReadChar();// consume ')'
  2768. if (ch == '*' || ch == '+' || ch == '?')
  2769. {
  2770. cm.AddOccurrence(ch);
  2771. ch = this.m_current.ReadChar();
  2772. }
  2773. if (cm.PopGroup() < depth)
  2774. {
  2775. this.m_current.Error("Parameter entity cannot close a paren outside it's own scope");
  2776. }
  2777. ch = this.m_current.SkipWhitespace();
  2778. }
  2779. else if (ch == ',' || ch == '|' || ch == '&')
  2780. {
  2781. cm.AddConnector(ch);
  2782. this.m_current.ReadChar(); // skip connector
  2783. ch = this.m_current.SkipWhitespace();
  2784. }
  2785. else
  2786. {
  2787. string token;
  2788. if (ch == '#')
  2789. {
  2790. ch = this.m_current.ReadChar();
  2791. token = "#" + this.m_current.ScanToken(this.m_sb, SgmlDtd.cmterm, true); // since '#' is not a valid name character.
  2792. }
  2793. else
  2794. {
  2795. token = this.m_current.ScanToken(this.m_sb, SgmlDtd.cmterm, true);
  2796. }
  2797. token = token.ToUpperInvariant();
  2798. ch = this.m_current.Lastchar;
  2799. if (ch == '?' || ch == '+' || ch == '*')
  2800. {
  2801. cm.PushGroup();
  2802. cm.AddSymbol(token);
  2803. cm.AddOccurrence(ch);
  2804. cm.PopGroup();
  2805. this.m_current.ReadChar(); // skip connector
  2806. ch = this.m_current.SkipWhitespace();
  2807. }
  2808. else
  2809. {
  2810. cm.AddSymbol(token);
  2811. ch = this.m_current.SkipWhitespace();
  2812. }
  2813. }
  2814. }
  2815. }
  2816. void ParseAttList()
  2817. {
  2818. char ch = this.m_current.SkipWhitespace();
  2819. string[] names = ParseNameGroup(ch, true);
  2820. Dictionary<string, AttDef> attlist = new Dictionary<string, AttDef>();
  2821. ParseAttList(attlist, '>');
  2822. foreach (string name in names)
  2823. {
  2824. ElementDecl e;
  2825. if (!m_elements.TryGetValue(name, out e))
  2826. {
  2827. this.m_current.Error("ATTLIST references undefined ELEMENT {0}", name);
  2828. }
  2829. e.AddAttDefs(attlist);
  2830. }
  2831. }
  2832. static string peterm = " \t\r\n>";
  2833. void ParseAttList(Dictionary<string, AttDef> list, char term)
  2834. {
  2835. char ch = this.m_current.SkipWhitespace();
  2836. while (ch != term)
  2837. {
  2838. if (ch == '%')
  2839. {
  2840. Entity e = ParseParameterEntity(SgmlDtd.peterm);
  2841. PushEntity(this.m_current.ResolvedUri, e);
  2842. ParseAttList(list, Entity.EOF);
  2843. PopEntity();
  2844. ch = this.m_current.SkipWhitespace();
  2845. }
  2846. else if (ch == '-')
  2847. {
  2848. ch = ParseDeclComments();
  2849. }
  2850. else
  2851. {
  2852. AttDef a = ParseAttDef(ch);
  2853. list.Add(a.Name, a);
  2854. }
  2855. ch = this.m_current.SkipWhitespace();
  2856. }
  2857. }
  2858. AttDef ParseAttDef(char ch)
  2859. {
  2860. ch = this.m_current.SkipWhitespace();
  2861. string name = ScanName(SgmlDtd.WhiteSpace);
  2862. name = name.ToUpperInvariant();
  2863. AttDef attdef = new AttDef(name);
  2864. ch = this.m_current.SkipWhitespace();
  2865. if (ch == '-')
  2866. ch = ParseDeclComments();
  2867. ParseAttType(ch, attdef);
  2868. ch = this.m_current.SkipWhitespace();
  2869. if (ch == '-')
  2870. ch = ParseDeclComments();
  2871. ParseAttDefault(ch, attdef);
  2872. ch = this.m_current.SkipWhitespace();
  2873. if (ch == '-')
  2874. ch = ParseDeclComments();
  2875. return attdef;
  2876. }
  2877. void ParseAttType(char ch, AttDef attdef)
  2878. {
  2879. if (ch == '%')
  2880. {
  2881. Entity e = ParseParameterEntity(SgmlDtd.WhiteSpace);
  2882. PushEntity(this.m_current.ResolvedUri, e);
  2883. ParseAttType(this.m_current.Lastchar, attdef);
  2884. PopEntity(); // bugbug - are we at the end of the entity?
  2885. ch = this.m_current.Lastchar;
  2886. return;
  2887. }
  2888. if (ch == '(')
  2889. {
  2890. //attdef.EnumValues = ParseNameGroup(ch, false);
  2891. //attdef.Type = AttributeType.ENUMERATION;
  2892. attdef.SetEnumeratedType(ParseNameGroup(ch, false), AttributeType.ENUMERATION);
  2893. }
  2894. else
  2895. {
  2896. string token = ScanName(SgmlDtd.WhiteSpace);
  2897. if (string.Equals(token, "NOTATION", StringComparison.OrdinalIgnoreCase))
  2898. {
  2899. ch = this.m_current.SkipWhitespace();
  2900. if (ch != '(')
  2901. {
  2902. this.m_current.Error("Expecting name group '(', but found '{0}'", ch);
  2903. }
  2904. //attdef.Type = AttributeType.NOTATION;
  2905. //attdef.EnumValues = ParseNameGroup(ch, true);
  2906. attdef.SetEnumeratedType(ParseNameGroup(ch, true), AttributeType.NOTATION);
  2907. }
  2908. else
  2909. {
  2910. attdef.SetType(token);
  2911. }
  2912. }
  2913. }
  2914. void ParseAttDefault(char ch, AttDef attdef)
  2915. {
  2916. if (ch == '%')
  2917. {
  2918. Entity e = ParseParameterEntity(SgmlDtd.WhiteSpace);
  2919. PushEntity(this.m_current.ResolvedUri, e);
  2920. ParseAttDefault(this.m_current.Lastchar, attdef);
  2921. PopEntity(); // bugbug - are we at the end of the entity?
  2922. ch = this.m_current.Lastchar;
  2923. return;
  2924. }
  2925. bool hasdef = true;
  2926. if (ch == '#')
  2927. {
  2928. this.m_current.ReadChar();
  2929. string token = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, true);
  2930. hasdef = attdef.SetPresence(token);
  2931. ch = this.m_current.SkipWhitespace();
  2932. }
  2933. if (hasdef)
  2934. {
  2935. if (ch == '\'' || ch == '"')
  2936. {
  2937. string lit = this.m_current.ScanLiteral(this.m_sb, ch);
  2938. attdef.Default = lit;
  2939. ch = this.m_current.SkipWhitespace();
  2940. }
  2941. else
  2942. {
  2943. string name = this.m_current.ScanToken(this.m_sb, SgmlDtd.WhiteSpace, false);
  2944. name = name.ToUpperInvariant();
  2945. attdef.Default = name; // bugbug - must be one of the enumerated names.
  2946. ch = this.m_current.SkipWhitespace();
  2947. }
  2948. }
  2949. }
  2950. }
  2951. internal static class StringUtilities
  2952. {
  2953. public static bool EqualsIgnoreCase(string a, string b){
  2954. return string.Equals(a, b, StringComparison.OrdinalIgnoreCase);
  2955. }
  2956. }
  2957. }