PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlNode.cs

https://bitbucket.org/danipen/mono
C# | 2259 lines | 1637 code | 250 blank | 372 comment | 305 complexity | 510071c773a681da8cd9272ad63ae537 MD5 | raw file
Possible License(s): Unlicense, Apache-2.0, LGPL-2.0, MPL-2.0-no-copyleft-exception, CC-BY-SA-3.0, GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. // HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
  2. using System;
  3. using System.Collections;
  4. using System.IO;
  5. using System.Xml;
  6. using System.Xml.XPath;
  7. namespace HtmlAgilityPack
  8. {
  9. /// <summary>
  10. /// Flags that describe the behavior of an Element node.
  11. /// </summary>
  12. public enum HtmlElementFlag
  13. {
  14. /// <summary>
  15. /// The node is a CDATA node.
  16. /// </summary>
  17. CData = 1,
  18. /// <summary>
  19. /// The node is empty. META or IMG are example of such nodes.
  20. /// </summary>
  21. Empty = 2,
  22. /// <summary>
  23. /// The node will automatically be closed during parsing.
  24. /// </summary>
  25. Closed = 4,
  26. /// <summary>
  27. /// The node can overlap.
  28. /// </summary>
  29. CanOverlap = 8
  30. }
  31. /// <summary>
  32. /// Represents the type of a node.
  33. /// </summary>
  34. public enum HtmlNodeType
  35. {
  36. /// <summary>
  37. /// The root of a document.
  38. /// </summary>
  39. Document,
  40. /// <summary>
  41. /// An HTML element.
  42. /// </summary>
  43. Element,
  44. /// <summary>
  45. /// An HTML comment.
  46. /// </summary>
  47. Comment,
  48. /// <summary>
  49. /// A text node is always the child of an element or a document node.
  50. /// </summary>
  51. Text,
  52. }
  53. /// <summary>
  54. /// Represents an HTML node.
  55. /// </summary>
  56. public class HtmlNode: IXPathNavigable
  57. {
  58. /// <summary>
  59. /// Gets the name of a comment node. It is actually defined as '#comment'.
  60. /// </summary>
  61. public static readonly string HtmlNodeTypeNameComment = "#comment";
  62. /// <summary>
  63. /// Gets the name of the document node. It is actually defined as '#document'.
  64. /// </summary>
  65. public static readonly string HtmlNodeTypeNameDocument = "#document";
  66. /// <summary>
  67. /// Gets the name of a text node. It is actually defined as '#text'.
  68. /// </summary>
  69. public static readonly string HtmlNodeTypeNameText = "#text";
  70. /// <summary>
  71. /// Gets a collection of flags that define specific behaviors for specific element nodes.
  72. /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
  73. /// </summary>
  74. public static Hashtable ElementsFlags;
  75. internal HtmlNodeType _nodetype;
  76. internal HtmlNode _nextnode;
  77. internal HtmlNode _prevnode;
  78. internal HtmlNode _parentnode;
  79. internal HtmlDocument _ownerdocument;
  80. internal HtmlNodeCollection _childnodes;
  81. internal HtmlAttributeCollection _attributes;
  82. internal int _line = 0;
  83. internal int _lineposition = 0;
  84. internal int _streamposition = 0;
  85. internal int _innerstartindex = 0;
  86. internal int _innerlength = 0;
  87. internal int _outerstartindex = 0;
  88. internal int _outerlength = 0;
  89. internal int _namestartindex = 0;
  90. internal int _namelength = 0;
  91. internal bool _starttag = false;
  92. internal string _name;
  93. internal HtmlNode _prevwithsamename = null;
  94. internal HtmlNode _endnode;
  95. internal bool _innerchanged = false;
  96. internal bool _outerchanged = false;
  97. internal string _innerhtml;
  98. internal string _outerhtml;
  99. static HtmlNode()
  100. {
  101. // tags whose content may be anything
  102. ElementsFlags = new Hashtable();
  103. ElementsFlags.Add("script", HtmlElementFlag.CData);
  104. ElementsFlags.Add("style", HtmlElementFlag.CData);
  105. ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
  106. // tags that can not contain other tags
  107. ElementsFlags.Add("base", HtmlElementFlag.Empty);
  108. ElementsFlags.Add("link", HtmlElementFlag.Empty);
  109. ElementsFlags.Add("meta", HtmlElementFlag.Empty);
  110. ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
  111. ElementsFlags.Add("hr", HtmlElementFlag.Empty);
  112. ElementsFlags.Add("col", HtmlElementFlag.Empty);
  113. ElementsFlags.Add("img", HtmlElementFlag.Empty);
  114. ElementsFlags.Add("param", HtmlElementFlag.Empty);
  115. ElementsFlags.Add("embed", HtmlElementFlag.Empty);
  116. ElementsFlags.Add("frame", HtmlElementFlag.Empty);
  117. ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
  118. ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
  119. ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
  120. ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
  121. ElementsFlags.Add("area", HtmlElementFlag.Empty);
  122. ElementsFlags.Add("input", HtmlElementFlag.Empty);
  123. ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
  124. ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
  125. // they sometimes contain, and sometimes they don 't...
  126. ElementsFlags.Add("option", HtmlElementFlag.Empty);
  127. // tag whose closing tag is equivalent to open tag:
  128. // <p>bla</p>bla will be transformed into <p>bla</p>bla
  129. // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
  130. //<br> see above
  131. ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
  132. ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
  133. }
  134. /// <summary>
  135. /// Determines if an element node is closed.
  136. /// </summary>
  137. /// <param name="name">The name of the element node to check. May not be null.</param>
  138. /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
  139. public static bool IsClosedElement(string name)
  140. {
  141. if (name == null)
  142. {
  143. throw new ArgumentNullException("name");
  144. }
  145. object flag = ElementsFlags[name.ToLower()];
  146. if (flag == null)
  147. {
  148. return false;
  149. }
  150. return (((HtmlElementFlag)flag)&HtmlElementFlag.Closed) != 0;
  151. }
  152. /// <summary>
  153. /// Determines if an element node can be kept overlapped.
  154. /// </summary>
  155. /// <param name="name">The name of the element node to check. May not be null.</param>
  156. /// <returns>true if the name is the name of an element node that can be kept overlapped, false otherwise.</returns>
  157. public static bool CanOverlapElement(string name)
  158. {
  159. if (name == null)
  160. {
  161. throw new ArgumentNullException("name");
  162. }
  163. object flag = ElementsFlags[name.ToLower()];
  164. if (flag == null)
  165. {
  166. return false;
  167. }
  168. return (((HtmlElementFlag)flag)&HtmlElementFlag.CanOverlap) != 0;
  169. }
  170. /// <summary>
  171. /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
  172. /// </summary>
  173. /// <param name="text">The text to check. May not be null.</param>
  174. /// <returns>true or false.</returns>
  175. public static bool IsOverlappedClosingElement(string text)
  176. {
  177. if (text == null)
  178. {
  179. throw new ArgumentNullException("text");
  180. }
  181. // min is </x>: 4
  182. if (text.Length <= 4)
  183. return false;
  184. if ((text[0] != '<') ||
  185. (text[text.Length - 1] != '>') ||
  186. (text[1] != '/'))
  187. return false;
  188. string name = text.Substring(2, text.Length - 3);
  189. return CanOverlapElement(name);
  190. }
  191. /// <summary>
  192. /// Determines if an element node is a CDATA element node.
  193. /// </summary>
  194. /// <param name="name">The name of the element node to check. May not be null.</param>
  195. /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
  196. public static bool IsCDataElement(string name)
  197. {
  198. if (name == null)
  199. {
  200. throw new ArgumentNullException("name");
  201. }
  202. object flag = ElementsFlags[name.ToLower()];
  203. if (flag == null)
  204. {
  205. return false;
  206. }
  207. return (((HtmlElementFlag)flag)&HtmlElementFlag.CData) != 0;
  208. }
  209. /// <summary>
  210. /// Determines if an element node is defined as empty.
  211. /// </summary>
  212. /// <param name="name">The name of the element node to check. May not be null.</param>
  213. /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
  214. public static bool IsEmptyElement(string name)
  215. {
  216. if (name == null)
  217. {
  218. throw new ArgumentNullException("name");
  219. }
  220. if (name.Length == 0)
  221. {
  222. return true;
  223. }
  224. // <!DOCTYPE ...
  225. if ('!' == name[0])
  226. {
  227. return true;
  228. }
  229. // <?xml ...
  230. if ('?' == name[0])
  231. {
  232. return true;
  233. }
  234. object flag = ElementsFlags[name.ToLower()];
  235. if (flag == null)
  236. {
  237. return false;
  238. }
  239. return (((HtmlElementFlag)flag)&HtmlElementFlag.Empty) != 0;
  240. }
  241. /// <summary>
  242. /// Creates an HTML node from a string representing literal HTML.
  243. /// </summary>
  244. /// <param name="html">The HTML text.</param>
  245. /// <returns>The newly created node instance.</returns>
  246. public static HtmlNode CreateNode(string html)
  247. {
  248. // REVIEW: this is *not* optimum...
  249. HtmlDocument doc = new HtmlDocument();
  250. doc.LoadHtml(html);
  251. return doc.DocumentNode.FirstChild;
  252. }
  253. /// <summary>
  254. /// Creates a duplicate of the node and the subtree under it.
  255. /// </summary>
  256. /// <param name="node">The node to duplicate. May not be null.</param>
  257. public void CopyFrom(HtmlNode node)
  258. {
  259. CopyFrom(node, true);
  260. }
  261. /// <summary>
  262. /// Creates a duplicate of the node.
  263. /// </summary>
  264. /// <param name="node">The node to duplicate. May not be null.</param>
  265. /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
  266. public void CopyFrom(HtmlNode node, bool deep)
  267. {
  268. if (node == null)
  269. {
  270. throw new ArgumentNullException("node");
  271. }
  272. Attributes.RemoveAll();
  273. if (node.HasAttributes)
  274. {
  275. foreach(HtmlAttribute att in node.Attributes)
  276. {
  277. SetAttributeValue(att.Name, att.Value);
  278. }
  279. }
  280. if (!deep)
  281. {
  282. RemoveAllChildren();
  283. if (node.HasChildNodes)
  284. {
  285. foreach(HtmlNode child in node.ChildNodes)
  286. {
  287. AppendChild(child.CloneNode(true));
  288. }
  289. }
  290. }
  291. }
  292. internal HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
  293. {
  294. _nodetype = type;
  295. _ownerdocument = ownerdocument;
  296. _outerstartindex = index;
  297. switch(type)
  298. {
  299. case HtmlNodeType.Comment:
  300. _name = HtmlNodeTypeNameComment;
  301. _endnode = this;
  302. break;
  303. case HtmlNodeType.Document:
  304. _name = HtmlNodeTypeNameDocument;
  305. _endnode = this;
  306. break;
  307. case HtmlNodeType.Text:
  308. _name = HtmlNodeTypeNameText;
  309. _endnode = this;
  310. break;
  311. }
  312. if (_ownerdocument._openednodes != null)
  313. {
  314. if (!Closed)
  315. {
  316. // we use the index as the key
  317. // -1 means the node comes from public
  318. if (-1 != index)
  319. {
  320. _ownerdocument._openednodes.Add(index, this);
  321. }
  322. }
  323. }
  324. if ((-1 == index) && (type != HtmlNodeType.Comment) && (type != HtmlNodeType.Text))
  325. {
  326. // innerhtml and outerhtml must be calculated
  327. _outerchanged = true;
  328. _innerchanged = true;
  329. }
  330. }
  331. internal void CloseNode(HtmlNode endnode)
  332. {
  333. if (!_ownerdocument.OptionAutoCloseOnEnd)
  334. {
  335. // close all children
  336. if (_childnodes != null)
  337. {
  338. foreach(HtmlNode child in _childnodes)
  339. {
  340. if (child.Closed)
  341. continue;
  342. // create a fake closer node
  343. HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
  344. close._endnode = close;
  345. child.CloseNode(close);
  346. }
  347. }
  348. }
  349. if (!Closed)
  350. {
  351. _endnode = endnode;
  352. if (_ownerdocument._openednodes != null)
  353. {
  354. _ownerdocument._openednodes.Remove(_outerstartindex);
  355. }
  356. HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
  357. if (self == this)
  358. {
  359. _ownerdocument._lastnodes.Remove(Name);
  360. _ownerdocument.UpdateLastParentNode();
  361. }
  362. if (endnode == this)
  363. return;
  364. // create an inner section
  365. _innerstartindex = _outerstartindex + _outerlength;
  366. _innerlength = endnode._outerstartindex - _innerstartindex;
  367. // update full length
  368. _outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
  369. }
  370. }
  371. internal HtmlNode EndNode
  372. {
  373. get
  374. {
  375. return _endnode;
  376. }
  377. }
  378. internal string GetId()
  379. {
  380. HtmlAttribute att = Attributes["id"];
  381. if (att == null)
  382. {
  383. return null;
  384. }
  385. return att.Value;
  386. }
  387. internal void SetId(string id)
  388. {
  389. HtmlAttribute att = Attributes["id"];
  390. if (att == null)
  391. {
  392. att = _ownerdocument.CreateAttribute("id");
  393. }
  394. att.Value = id;
  395. _ownerdocument.SetIdForNode(this, att.Value);
  396. _outerchanged = true;
  397. }
  398. /// <summary>
  399. /// Creates a new XPathNavigator object for navigating this HTML node.
  400. /// </summary>
  401. /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
  402. public XPathNavigator CreateNavigator()
  403. {
  404. return new HtmlNodeNavigator(_ownerdocument, this);
  405. }
  406. /// <summary>
  407. /// Selects the first XmlNode that matches the XPath expression.
  408. /// </summary>
  409. /// <param name="xpath">The XPath expression. May not be null.</param>
  410. /// <returns>The first HtmlNode that matches the XPath query or a null reference if no matching node was found.</returns>
  411. public HtmlNode SelectSingleNode(string xpath)
  412. {
  413. if (xpath == null)
  414. {
  415. throw new ArgumentNullException("xpath");
  416. }
  417. HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
  418. XPathNodeIterator it = nav.Select(xpath);
  419. if (!it.MoveNext())
  420. {
  421. return null;
  422. }
  423. HtmlNodeNavigator node = (HtmlNodeNavigator)it.Current;
  424. return node.CurrentNode;
  425. }
  426. /// <summary>
  427. /// Selects a list of nodes matching the XPath expression.
  428. /// </summary>
  429. /// <param name="xpath">The XPath expression.</param>
  430. /// <returns>An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.</returns>
  431. public HtmlNodeCollection SelectNodes(string xpath)
  432. {
  433. HtmlNodeCollection list = new HtmlNodeCollection(null);
  434. HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
  435. XPathNodeIterator it = nav.Select(xpath);
  436. while (it.MoveNext())
  437. {
  438. HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current;
  439. list.Add(n.CurrentNode);
  440. }
  441. if (list.Count == 0)
  442. {
  443. return null;
  444. }
  445. return list;
  446. }
  447. /// <summary>
  448. /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
  449. /// </summary>
  450. public string Id
  451. {
  452. get
  453. {
  454. if (_ownerdocument._nodesid == null)
  455. {
  456. throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
  457. }
  458. return GetId();
  459. }
  460. set
  461. {
  462. if (_ownerdocument._nodesid == null)
  463. {
  464. throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
  465. }
  466. if (value == null)
  467. {
  468. throw new ArgumentNullException("value");
  469. }
  470. SetId(value);
  471. }
  472. }
  473. /// <summary>
  474. /// Gets the line number of this node in the document.
  475. /// </summary>
  476. public int Line
  477. {
  478. get
  479. {
  480. return _line;
  481. }
  482. }
  483. /// <summary>
  484. /// Gets the column number of this node in the document.
  485. /// </summary>
  486. public int LinePosition
  487. {
  488. get
  489. {
  490. return _lineposition;
  491. }
  492. }
  493. /// <summary>
  494. /// Gets the stream position of this node in the document, relative to the start of the document.
  495. /// </summary>
  496. public int StreamPosition
  497. {
  498. get
  499. {
  500. return _streamposition;
  501. }
  502. }
  503. /// <summary>
  504. /// Gets a value indicating if this node has been closed or not.
  505. /// </summary>
  506. public bool Closed
  507. {
  508. get
  509. {
  510. return (_endnode != null);
  511. }
  512. }
  513. /// <summary>
  514. /// Gets or sets this node's name.
  515. /// </summary>
  516. public string Name
  517. {
  518. get
  519. {
  520. if (_name == null)
  521. {
  522. _name = _ownerdocument._text.Substring(_namestartindex, _namelength).ToLower();
  523. }
  524. return _name;
  525. }
  526. set
  527. {
  528. _name = value;
  529. }
  530. }
  531. /// <summary>
  532. /// Gets or Sets the text between the start and end tags of the object.
  533. /// </summary>
  534. public virtual string InnerText
  535. {
  536. get
  537. {
  538. if (_nodetype == HtmlNodeType.Text)
  539. {
  540. return ((HtmlTextNode)this).Text;
  541. }
  542. if (_nodetype == HtmlNodeType.Comment)
  543. {
  544. return ((HtmlCommentNode)this).Comment;
  545. }
  546. // note: right now, this method is *slow*, because we recompute everything.
  547. // it could be optimised like innerhtml
  548. if (!HasChildNodes)
  549. {
  550. return string.Empty;
  551. }
  552. string s = null;
  553. foreach(HtmlNode node in ChildNodes)
  554. {
  555. s += node.InnerText;
  556. }
  557. return s;
  558. }
  559. }
  560. /// <summary>
  561. /// Gets or Sets the HTML between the start and end tags of the object.
  562. /// </summary>
  563. public virtual string InnerHtml
  564. {
  565. get
  566. {
  567. if (_innerchanged)
  568. {
  569. _innerhtml = WriteContentTo();
  570. _innerchanged = false;
  571. return _innerhtml;
  572. }
  573. if (_innerhtml != null)
  574. {
  575. return _innerhtml;
  576. }
  577. if (_innerstartindex < 0)
  578. {
  579. return string.Empty;
  580. }
  581. return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
  582. }
  583. set
  584. {
  585. HtmlDocument doc = new HtmlDocument();
  586. doc.LoadHtml(value);
  587. RemoveAllChildren();
  588. AppendChildren(doc.DocumentNode.ChildNodes);
  589. }
  590. }
  591. /// <summary>
  592. /// Gets or Sets the object and its content in HTML.
  593. /// </summary>
  594. public virtual string OuterHtml
  595. {
  596. get
  597. {
  598. if (_outerchanged)
  599. {
  600. _outerhtml = WriteTo();
  601. _outerchanged = false;
  602. return _outerhtml;
  603. }
  604. if (_outerhtml != null)
  605. {
  606. return _outerhtml;
  607. }
  608. if (_outerstartindex < 0)
  609. {
  610. return string.Empty;
  611. }
  612. return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
  613. }
  614. }
  615. /// <summary>
  616. /// Creates a duplicate of the node
  617. /// </summary>
  618. /// <returns></returns>
  619. public HtmlNode Clone()
  620. {
  621. return CloneNode(true);
  622. }
  623. /// <summary>
  624. /// Creates a duplicate of the node and changes its name at the same time.
  625. /// </summary>
  626. /// <param name="newName">The new name of the cloned node. May not be null.</param>
  627. /// <returns>The cloned node.</returns>
  628. public HtmlNode CloneNode(string newName)
  629. {
  630. return CloneNode(newName, true);
  631. }
  632. /// <summary>
  633. /// Creates a duplicate of the node and changes its name at the same time.
  634. /// </summary>
  635. /// <param name="newName">The new name of the cloned node. May not be null.</param>
  636. /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
  637. /// <returns>The cloned node.</returns>
  638. public HtmlNode CloneNode(string newName, bool deep)
  639. {
  640. if (newName == null)
  641. {
  642. throw new ArgumentNullException("newName");
  643. }
  644. HtmlNode node = CloneNode(deep);
  645. node._name = newName;
  646. return node;
  647. }
  648. /// <summary>
  649. /// Creates a duplicate of the node.
  650. /// </summary>
  651. /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
  652. /// <returns>The cloned node.</returns>
  653. public HtmlNode CloneNode(bool deep)
  654. {
  655. HtmlNode node = _ownerdocument.CreateNode(_nodetype);
  656. node._name = Name;
  657. switch(_nodetype)
  658. {
  659. case HtmlNodeType.Comment:
  660. ((HtmlCommentNode)node).Comment = ((HtmlCommentNode)this).Comment;
  661. return node;
  662. case HtmlNodeType.Text:
  663. ((HtmlTextNode)node).Text = ((HtmlTextNode)this).Text;
  664. return node;
  665. }
  666. // attributes
  667. if (HasAttributes)
  668. {
  669. foreach(HtmlAttribute att in _attributes)
  670. {
  671. HtmlAttribute newatt = att.Clone();
  672. node.Attributes.Append(newatt);
  673. }
  674. }
  675. // closing attributes
  676. if (HasClosingAttributes)
  677. {
  678. node._endnode = _endnode.CloneNode(false);
  679. foreach(HtmlAttribute att in _endnode._attributes)
  680. {
  681. HtmlAttribute newatt = att.Clone();
  682. node._endnode._attributes.Append(newatt);
  683. }
  684. }
  685. if (!deep)
  686. {
  687. return node;
  688. }
  689. if (!HasChildNodes)
  690. {
  691. return node;
  692. }
  693. // child nodes
  694. foreach(HtmlNode child in _childnodes)
  695. {
  696. HtmlNode newchild = child.Clone();
  697. node.AppendChild(newchild);
  698. }
  699. return node;
  700. }
  701. /// <summary>
  702. /// Gets the HTML node immediately following this element.
  703. /// </summary>
  704. public HtmlNode NextSibling
  705. {
  706. get
  707. {
  708. return _nextnode;
  709. }
  710. }
  711. /// <summary>
  712. /// Gets the node immediately preceding this node.
  713. /// </summary>
  714. public HtmlNode PreviousSibling
  715. {
  716. get
  717. {
  718. return _prevnode;
  719. }
  720. }
  721. /// <summary>
  722. /// Removes all the children and/or attributes of the current node.
  723. /// </summary>
  724. public void RemoveAll()
  725. {
  726. RemoveAllChildren();
  727. if (HasAttributes)
  728. {
  729. _attributes.Clear();
  730. }
  731. if ((_endnode != null) && (_endnode != this))
  732. {
  733. if (_endnode._attributes != null)
  734. {
  735. _endnode._attributes.Clear();
  736. }
  737. }
  738. _outerchanged = true;
  739. _innerchanged = true;
  740. }
  741. /// <summary>
  742. /// Removes all the children of the current node.
  743. /// </summary>
  744. public void RemoveAllChildren()
  745. {
  746. if (!HasChildNodes)
  747. {
  748. return;
  749. }
  750. if (_ownerdocument.OptionUseIdAttribute)
  751. {
  752. // remove nodes from id list
  753. foreach(HtmlNode node in _childnodes)
  754. {
  755. _ownerdocument.SetIdForNode(null, node.GetId());
  756. }
  757. }
  758. _childnodes.Clear();
  759. _outerchanged = true;
  760. _innerchanged = true;
  761. }
  762. /// <summary>
  763. /// Removes the specified child node.
  764. /// </summary>
  765. /// <param name="oldChild">The node being removed. May not be null.</param>
  766. /// <returns>The node removed.</returns>
  767. public HtmlNode RemoveChild(HtmlNode oldChild)
  768. {
  769. if (oldChild == null)
  770. {
  771. throw new ArgumentNullException("oldChild");
  772. }
  773. int index = -1;
  774. if (_childnodes != null)
  775. {
  776. index = _childnodes[oldChild];
  777. }
  778. if (index == -1)
  779. {
  780. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  781. }
  782. _childnodes.Remove(index);
  783. _ownerdocument.SetIdForNode(null, oldChild.GetId());
  784. _outerchanged = true;
  785. _innerchanged = true;
  786. return oldChild;
  787. }
  788. /// <summary>
  789. /// Removes the specified child node.
  790. /// </summary>
  791. /// <param name="oldChild">The node being removed. May not be null.</param>
  792. /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
  793. /// <returns>The node removed.</returns>
  794. public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
  795. {
  796. if (oldChild == null)
  797. {
  798. throw new ArgumentNullException("oldChild");
  799. }
  800. if ((oldChild._childnodes != null) && keepGrandChildren)
  801. {
  802. // get prev sibling
  803. HtmlNode prev = oldChild.PreviousSibling;
  804. // reroute grand children to ourselves
  805. foreach(HtmlNode grandchild in oldChild._childnodes)
  806. {
  807. InsertAfter(grandchild, prev);
  808. }
  809. }
  810. RemoveChild(oldChild);
  811. _outerchanged = true;
  812. _innerchanged = true;
  813. return oldChild;
  814. }
  815. /// <summary>
  816. /// Replaces the child node oldChild with newChild node.
  817. /// </summary>
  818. /// <param name="newChild">The new node to put in the child list.</param>
  819. /// <param name="oldChild">The node being replaced in the list.</param>
  820. /// <returns>The node replaced.</returns>
  821. public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
  822. {
  823. if (newChild == null)
  824. {
  825. return RemoveChild(oldChild);
  826. }
  827. if (oldChild == null)
  828. {
  829. return AppendChild(newChild);
  830. }
  831. int index = -1;
  832. if (_childnodes != null)
  833. {
  834. index = _childnodes[oldChild];
  835. }
  836. if (index == -1)
  837. {
  838. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  839. }
  840. _childnodes.Replace(index, newChild);
  841. _ownerdocument.SetIdForNode(null, oldChild.GetId());
  842. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  843. _outerchanged = true;
  844. _innerchanged = true;
  845. return newChild;
  846. }
  847. /// <summary>
  848. /// Inserts the specified node immediately before the specified reference node.
  849. /// </summary>
  850. /// <param name="newChild">The node to insert. May not be null.</param>
  851. /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
  852. /// <returns>The node being inserted.</returns>
  853. public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
  854. {
  855. if (newChild == null)
  856. {
  857. throw new ArgumentNullException("newChild");
  858. }
  859. if (refChild == null)
  860. {
  861. return AppendChild(newChild);
  862. }
  863. if (newChild == refChild)
  864. {
  865. return newChild;
  866. }
  867. int index = -1;
  868. if (_childnodes != null)
  869. {
  870. index = _childnodes[refChild];
  871. }
  872. if (index == -1)
  873. {
  874. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  875. }
  876. _childnodes.Insert(index, newChild);
  877. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  878. _outerchanged = true;
  879. _innerchanged = true;
  880. return newChild;
  881. }
  882. /// <summary>
  883. /// Inserts the specified node immediately after the specified reference node.
  884. /// </summary>
  885. /// <param name="newChild">The node to insert. May not be null.</param>
  886. /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
  887. /// <returns>The node being inserted.</returns>
  888. public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
  889. {
  890. if (newChild == null)
  891. {
  892. throw new ArgumentNullException("newChild");
  893. }
  894. if (refChild == null)
  895. {
  896. return PrependChild(newChild);
  897. }
  898. if (newChild == refChild)
  899. {
  900. return newChild;
  901. }
  902. int index = -1;
  903. if (_childnodes != null)
  904. {
  905. index = _childnodes[refChild];
  906. }
  907. if (index == -1)
  908. {
  909. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  910. }
  911. _childnodes.Insert(index + 1, newChild);
  912. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  913. _outerchanged = true;
  914. _innerchanged = true;
  915. return newChild;
  916. }
  917. /// <summary>
  918. /// Gets the first child of the node.
  919. /// </summary>
  920. public HtmlNode FirstChild
  921. {
  922. get
  923. {
  924. if (!HasChildNodes)
  925. {
  926. return null;
  927. }
  928. return _childnodes[0];
  929. }
  930. }
  931. /// <summary>
  932. /// Gets the last child of the node.
  933. /// </summary>
  934. public HtmlNode LastChild
  935. {
  936. get
  937. {
  938. if (!HasChildNodes)
  939. {
  940. return null;
  941. }
  942. return _childnodes[_childnodes.Count-1];
  943. }
  944. }
  945. /// <summary>
  946. /// Gets the type of this node.
  947. /// </summary>
  948. public HtmlNodeType NodeType
  949. {
  950. get
  951. {
  952. return _nodetype;
  953. }
  954. }
  955. /// <summary>
  956. /// Gets the parent of this node (for nodes that can have parents).
  957. /// </summary>
  958. public HtmlNode ParentNode
  959. {
  960. get
  961. {
  962. return _parentnode;
  963. }
  964. }
  965. /// <summary>
  966. /// Gets the HtmlDocument to which this node belongs.
  967. /// </summary>
  968. public HtmlDocument OwnerDocument
  969. {
  970. get
  971. {
  972. return _ownerdocument;
  973. }
  974. }
  975. /// <summary>
  976. /// Gets all the children of the node.
  977. /// </summary>
  978. public HtmlNodeCollection ChildNodes
  979. {
  980. get
  981. {
  982. if (_childnodes == null)
  983. {
  984. _childnodes = new HtmlNodeCollection(this);
  985. }
  986. return _childnodes;
  987. }
  988. }
  989. /// <summary>
  990. /// Adds the specified node to the beginning of the list of children of this node.
  991. /// </summary>
  992. /// <param name="newChild">The node to add. May not be null.</param>
  993. /// <returns>The node added.</returns>
  994. public HtmlNode PrependChild(HtmlNode newChild)
  995. {
  996. if (newChild == null)
  997. {
  998. throw new ArgumentNullException("newChild");
  999. }
  1000. ChildNodes.Prepend(newChild);
  1001. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1002. _outerchanged = true;
  1003. _innerchanged = true;
  1004. return newChild;
  1005. }
  1006. /// <summary>
  1007. /// Adds the specified node list to the beginning of the list of children of this node.
  1008. /// </summary>
  1009. /// <param name="newChildren">The node list to add. May not be null.</param>
  1010. public void PrependChildren(HtmlNodeCollection newChildren)
  1011. {
  1012. if (newChildren == null)
  1013. {
  1014. throw new ArgumentNullException("newChildren");
  1015. }
  1016. foreach(HtmlNode newChild in newChildren)
  1017. {
  1018. PrependChild(newChild);
  1019. }
  1020. }
  1021. /// <summary>
  1022. /// Adds the specified node to the end of the list of children of this node.
  1023. /// </summary>
  1024. /// <param name="newChild">The node to add. May not be null.</param>
  1025. /// <returns>The node added.</returns>
  1026. public HtmlNode AppendChild(HtmlNode newChild)
  1027. {
  1028. if (newChild == null)
  1029. {
  1030. throw new ArgumentNullException("newChild");
  1031. }
  1032. ChildNodes.Append(newChild);
  1033. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1034. _outerchanged = true;
  1035. _innerchanged = true;
  1036. return newChild;
  1037. }
  1038. /// <summary>
  1039. /// Adds the specified node to the end of the list of children of this node.
  1040. /// </summary>
  1041. /// <param name="newChildren">The node list to add. May not be null.</param>
  1042. public void AppendChildren(HtmlNodeCollection newChildren)
  1043. {
  1044. if (newChildren == null)
  1045. throw new ArgumentNullException("newChildrend");
  1046. foreach(HtmlNode newChild in newChildren)
  1047. {
  1048. AppendChild(newChild);
  1049. }
  1050. }
  1051. /// <summary>
  1052. /// Gets a value indicating whether the current node has any attributes.
  1053. /// </summary>
  1054. public bool HasAttributes
  1055. {
  1056. get
  1057. {
  1058. if (_attributes == null)
  1059. {
  1060. return false;
  1061. }
  1062. if (_attributes.Count <= 0)
  1063. {
  1064. return false;
  1065. }
  1066. return true;
  1067. }
  1068. }
  1069. /// <summary>
  1070. /// Gets a value indicating whether the current node has any attributes on the closing tag.
  1071. /// </summary>
  1072. public bool HasClosingAttributes
  1073. {
  1074. get
  1075. {
  1076. if ((_endnode == null) || (_endnode == this))
  1077. {
  1078. return false;
  1079. }
  1080. if (_endnode._attributes == null)
  1081. {
  1082. return false;
  1083. }
  1084. if (_endnode._attributes.Count <= 0)
  1085. {
  1086. return false;
  1087. }
  1088. return true;
  1089. }
  1090. }
  1091. /// <summary>
  1092. /// Gets a value indicating whether this node has any child nodes.
  1093. /// </summary>
  1094. public bool HasChildNodes
  1095. {
  1096. get
  1097. {
  1098. if (_childnodes == null)
  1099. {
  1100. return false;
  1101. }
  1102. if (_childnodes.Count <= 0)
  1103. {
  1104. return false;
  1105. }
  1106. return true;
  1107. }
  1108. }
  1109. /// <summary>
  1110. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  1111. /// </summary>
  1112. /// <param name="name">The name of the attribute to get. May not be null.</param>
  1113. /// <param name="def">The default value to return if not found.</param>
  1114. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  1115. public string GetAttributeValue(string name, string def)
  1116. {
  1117. if (name == null)
  1118. {
  1119. throw new ArgumentNullException("name");
  1120. }
  1121. if (!HasAttributes)
  1122. {
  1123. return def;
  1124. }
  1125. HtmlAttribute att = Attributes[name];
  1126. if (att == null)
  1127. {
  1128. return def;
  1129. }
  1130. return att.Value;
  1131. }
  1132. /// <summary>
  1133. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  1134. /// </summary>
  1135. /// <param name="name">The name of the attribute to get. May not be null.</param>
  1136. /// <param name="def">The default value to return if not found.</param>
  1137. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  1138. public int GetAttributeValue(string name, int def)
  1139. {
  1140. if (name == null)
  1141. {
  1142. throw new ArgumentNullException("name");
  1143. }
  1144. if (!HasAttributes)
  1145. {
  1146. return def;
  1147. }
  1148. HtmlAttribute att = Attributes[name];
  1149. if (att == null)
  1150. {
  1151. return def;
  1152. }
  1153. try
  1154. {
  1155. return Convert.ToInt32(att.Value);
  1156. }
  1157. catch
  1158. {
  1159. return def;
  1160. }
  1161. }
  1162. /// <summary>
  1163. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  1164. /// </summary>
  1165. /// <param name="name">The name of the attribute to get. May not be null.</param>
  1166. /// <param name="def">The default value to return if not found.</param>
  1167. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  1168. public bool GetAttributeValue(string name, bool def)
  1169. {
  1170. if (name == null)
  1171. {
  1172. throw new ArgumentNullException("name");
  1173. }
  1174. if (!HasAttributes)
  1175. {
  1176. return def;
  1177. }
  1178. HtmlAttribute att = Attributes[name];
  1179. if (att == null)
  1180. {
  1181. return def;
  1182. }
  1183. try
  1184. {
  1185. return Convert.ToBoolean(att.Value);
  1186. }
  1187. catch
  1188. {
  1189. return def;
  1190. }
  1191. }
  1192. /// <summary>
  1193. /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
  1194. /// </summary>
  1195. /// <param name="name">The name of the attribute to set. May not be null.</param>
  1196. /// <param name="value">The value for the attribute.</param>
  1197. /// <returns>The corresponding attribute instance.</returns>
  1198. public HtmlAttribute SetAttributeValue(string name, string value)
  1199. {
  1200. if (name == null)
  1201. {
  1202. throw new ArgumentNullException("name");
  1203. }
  1204. HtmlAttribute att = Attributes[name];
  1205. if (att == null)
  1206. {
  1207. return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
  1208. }
  1209. att.Value = value;
  1210. return att;
  1211. }
  1212. /// <summary>
  1213. /// Gets the collection of HTML attributes for this node. May not be null.
  1214. /// </summary>
  1215. public HtmlAttributeCollection Attributes
  1216. {
  1217. get
  1218. {
  1219. if (!HasAttributes)
  1220. {
  1221. _attributes = new HtmlAttributeCollection(this);
  1222. }
  1223. return _attributes;
  1224. }
  1225. }
  1226. /// <summary>
  1227. /// Gets the collection of HTML attributes for the closing tag. May not be null.
  1228. /// </summary>
  1229. public HtmlAttributeCollection ClosingAttributes
  1230. {
  1231. get
  1232. {
  1233. if (!HasClosingAttributes)
  1234. {
  1235. return new HtmlAttributeCollection(this);
  1236. }
  1237. return _endnode.Attributes;
  1238. }
  1239. }
  1240. internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
  1241. {
  1242. string name;
  1243. if (_ownerdocument.OptionOutputAsXml)
  1244. {
  1245. if (_ownerdocument.OptionOutputUpperCase)
  1246. {
  1247. name = att.XmlName.ToUpper();
  1248. }
  1249. else
  1250. {
  1251. name = att.XmlName;
  1252. }
  1253. outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\"");
  1254. }
  1255. else
  1256. {
  1257. if (_ownerdocument.OptionOutputUpperCase)
  1258. {
  1259. name = att.Name.ToUpper();
  1260. }
  1261. else
  1262. {
  1263. name = att.Name;
  1264. }
  1265. if (att.Name.Length >= 4)
  1266. {
  1267. if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
  1268. (att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%'))
  1269. {
  1270. outText.Write(" " + name);
  1271. return;
  1272. }
  1273. }
  1274. if (_ownerdocument.OptionOutputOptimizeAttributeValues)
  1275. {
  1276. if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0)
  1277. {
  1278. outText.Write(" " + name + "=" + att.Value);
  1279. }
  1280. else
  1281. {
  1282. outText.Write(" " + name + "=\"" + att.Value + "\"");
  1283. }
  1284. }
  1285. else
  1286. {
  1287. outText.Write(" " + name + "=\"" + att.Value + "\"");
  1288. }
  1289. }
  1290. }
  1291. internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
  1292. {
  1293. if (!node.HasAttributes)
  1294. {
  1295. return;
  1296. }
  1297. // we use _hashitems to make sure attributes are written only once
  1298. foreach(HtmlAttribute att in node.Attributes._hashitems.Values)
  1299. {
  1300. writer.WriteAttributeString(att.XmlName, att.Value);
  1301. }
  1302. }
  1303. internal void WriteAttributes(TextWriter outText, bool closing)
  1304. {
  1305. if (_ownerdocument.OptionOutputAsXml)
  1306. {
  1307. if (_attributes == null)
  1308. {
  1309. return;
  1310. }
  1311. // we use _hashitems to make sure attributes are written only once
  1312. foreach(HtmlAttribute att in _attributes._hashitems.Values)
  1313. {
  1314. WriteAttribute(outText, att);
  1315. }
  1316. return;
  1317. }
  1318. if (!closing)
  1319. {
  1320. if (_attributes != null)
  1321. {
  1322. foreach(HtmlAttribute att in _attributes)
  1323. {
  1324. WriteAttribute(outText, att);
  1325. }
  1326. }
  1327. if (_ownerdocument.OptionAddDebuggingAttributes)
  1328. {
  1329. WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
  1330. WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
  1331. int i = 0;
  1332. foreach(HtmlNode n in ChildNodes)
  1333. {
  1334. WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
  1335. n.Name));
  1336. i++;
  1337. }
  1338. }
  1339. }
  1340. else
  1341. {
  1342. if (_endnode == null)
  1343. {
  1344. return;
  1345. }
  1346. if (_endnode._attributes == null)
  1347. {
  1348. return;
  1349. }
  1350. if (_endnode == this)
  1351. {
  1352. return;
  1353. }
  1354. foreach(HtmlAttribute att in _endnode._attributes)
  1355. {
  1356. WriteAttribute(outText, att);
  1357. }
  1358. if (_ownerdocument.OptionAddDebuggingAttributes)
  1359. {
  1360. WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
  1361. WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
  1362. }
  1363. }
  1364. }
  1365. internal static string GetXmlComment(HtmlCommentNode comment)
  1366. {
  1367. string s = comment.Comment;
  1368. return s.Substring(4, s.Length-7).Replace("--", " - -");
  1369. }
  1370. /// <summary>
  1371. /// Saves the current node to the specified TextWriter.
  1372. /// </summary>
  1373. /// <param name="outText">The TextWriter to which you want to save.</param>
  1374. public void WriteTo(TextWriter outText)
  1375. {
  1376. string html;
  1377. switch(_nodetype)
  1378. {
  1379. case HtmlNodeType.Comment:
  1380. html = ((HtmlCommentNode)this).Comment;
  1381. if (_ownerdocument.OptionOutputAsXml)
  1382. {
  1383. outText.Write("<!--" + GetXmlComment((HtmlCommentNode)this) + " -->");
  1384. }
  1385. else
  1386. {
  1387. outText.Write(html);
  1388. }
  1389. break;
  1390. case HtmlNodeType.Document:
  1391. if (_ownerdocument.OptionOutputAsXml)
  1392. {
  1393. outText.Write("<?xml version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"?>");
  1394. // check there is a root element
  1395. if (_ownerdocument.DocumentNode.HasChildNodes)
  1396. {
  1397. int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
  1398. if (rootnodes > 0)
  1399. {
  1400. HtmlNode xml = _ownerdocument.GetXmlDeclaration();
  1401. if (xml != null)
  1402. {
  1403. rootnodes --;
  1404. }
  1405. if (rootnodes > 1)
  1406. {
  1407. if (_ownerdocument.OptionOutputUpperCase)
  1408. {
  1409. outText.Write("<SPAN>");
  1410. WriteContentTo(outText);
  1411. outText.Write("</SPAN>");
  1412. }
  1413. else
  1414. {
  1415. outText.Write("<span>");
  1416. WriteContentTo(outText);
  1417. outText.Write("</span>");
  1418. }
  1419. break;
  1420. }
  1421. }
  1422. }
  1423. }
  1424. WriteContentTo(outText);
  1425. break;
  1426. case HtmlNodeType.Text:
  1427. html = ((HtmlTextNode)this).Text;
  1428. if (_ownerdocument.OptionOutputAsXml)
  1429. {
  1430. outText.Write(HtmlDocument.HtmlEncode(html));
  1431. }
  1432. else
  1433. {
  1434. outText.Write(html);
  1435. }
  1436. break;
  1437. case HtmlNodeType.Element:
  1438. string name;
  1439. if (_ownerdocument.OptionOutputUpperCase)
  1440. {
  1441. name = Name.ToUpper();
  1442. }
  1443. else
  1444. {
  1445. name = Name;
  1446. }
  1447. if (_ownerdocument.OptionOutputAsXml)
  1448. {
  1449. if (name.Length > 0)
  1450. {
  1451. if (name[0] == '?')
  1452. {
  1453. // forget this one, it's been done at the document level
  1454. break;
  1455. }
  1456. if (name.Trim().Length == 0)
  1457. {
  1458. break;
  1459. }
  1460. name = HtmlDocument.GetXmlName(name);
  1461. }
  1462. else
  1463. {
  1464. break;
  1465. }
  1466. }
  1467. outText.Write("<" + name);
  1468. WriteAttributes(outText, false);
  1469. if (!HasChildNodes)
  1470. {
  1471. if (HtmlNode.IsEmptyElement(Name))
  1472. {
  1473. if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
  1474. {
  1475. outText.Write(" />");
  1476. }
  1477. else
  1478. {
  1479. if (Name.Length > 0)
  1480. {
  1481. if (Name[0] == '?')
  1482. {
  1483. outText.Write("?");
  1484. }
  1485. }
  1486. outText.Write(">");
  1487. }
  1488. }
  1489. else
  1490. {
  1491. outText.Write("></" + name + ">");
  1492. }
  1493. }
  1494. else
  1495. {
  1496. outText.Write(">");
  1497. bool cdata = false;
  1498. if (_ownerdocument.OptionOutputAsXml)
  1499. {
  1500. if (HtmlNode.IsCDataElement(Name))
  1501. {
  1502. // this code and the following tries to output things as nicely as possible for old browsers.
  1503. cdata = true;
  1504. outText.Write("\r\n//<![CDATA[\r\n");
  1505. }
  1506. }
  1507. if (cdata)
  1508. {
  1509. if (HasChildNodes)
  1510. {
  1511. // child must be a text
  1512. ChildNodes[0].WriteTo(outText);
  1513. }
  1514. outText.Write("\r\n//]]>//\r\n");
  1515. }
  1516. else
  1517. {
  1518. WriteContentTo(outText);
  1519. }
  1520. outText.Write("</" + name);
  1521. if (!_ownerdocument.OptionOutputAsXml)
  1522. {
  1523. WriteAttributes(outText, true);
  1524. }
  1525. outText.Write(">");
  1526. }
  1527. break;
  1528. }
  1529. }
  1530. /// <summary>
  1531. /// Saves the current node to the specified XmlWriter.
  1532. /// </summary>
  1533. /// <param name="writer">The XmlWriter to which you want to save.</param>
  1534. public void WriteTo(XmlWriter writer)
  1535. {
  1536. string html;
  1537. switch(_nodetype)
  1538. {
  1539. case HtmlNodeType.Comment:
  1540. writer.WriteComment(GetXmlComment((HtmlCommentNode)this));
  1541. break;
  1542. case HtmlNodeType.Document:
  1543. writer.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"");
  1544. if (HasChildNodes)
  1545. {
  1546. foreach(HtmlNode subnode in ChildNodes)
  1547. {
  1548. subnode.WriteTo(writer);
  1549. }
  1550. }
  1551. break;
  1552. case HtmlNodeType.Text:
  1553. html = ((HtmlTextNode)this).Text;
  1554. writer.WriteString(html);
  1555. break;
  1556. case HtmlNodeType.Element:
  1557. string name;
  1558. if (_ownerdocument.OptionOutputUpperCase)
  1559. {
  1560. name = Name.ToUpper();
  1561. }
  1562. else
  1563. {
  1564. name = Name;
  1565. }
  1566. writer.WriteStartElement(name);
  1567. WriteAttributes(writer, this);
  1568. if (HasChildNodes)
  1569. {
  1570. foreach(HtmlNode subnode in ChildNodes)
  1571. {
  1572. subnode.WriteTo(writer);
  1573. }
  1574. }
  1575. writer.WriteEndElement();
  1576. break;
  1577. }
  1578. }
  1579. /// <summary>
  1580. /// Saves all the children of the node to the specified TextWriter.
  1581. /// </summary>
  1582. /// <param name="outText">The TextWriter to which you want to save.</param>
  1583. public void WriteContentTo(TextWriter outText)
  1584. {
  1585. if (_childnodes == null)
  1586. {
  1587. return;
  1588. }
  1589. foreach(HtmlNode node in _childnodes)
  1590. {
  1591. node.WriteTo(outText);
  1592. }
  1593. }
  1594. /// <summary>
  1595. /// Saves the current node to a string.
  1596. /// </summary>
  1597. /// <returns>The saved string.</returns>
  1598. public string WriteTo()
  1599. {
  1600. StringWriter sw = new StringWriter();
  1601. WriteTo(sw);
  1602. sw.Flush();
  1603. return sw.ToString();
  1604. }
  1605. /// <summary>
  1606. /// Saves all the children of the node to a string.
  1607. /// </summary>
  1608. /// <returns>The saved string.</returns>
  1609. public string WriteContentTo()
  1610. {
  1611. StringWriter sw = new StringWriter();
  1612. WriteContentTo(sw);
  1613. sw.Flush();
  1614. return sw.ToString();
  1615. }
  1616. }
  1617. /// <summary>
  1618. /// Represents a combined list and collection of HTML nodes.
  1619. /// </summary>
  1620. public class HtmlNodeCollection: IEnumerable
  1621. {
  1622. private ArrayList _items = new ArrayList();
  1623. private HtmlNode _parentnode;
  1624. internal HtmlNodeCollection(HtmlNode parentnode)
  1625. {
  1626. _parentnode = parentnode; // may be null
  1627. }
  1628. /// <summary>
  1629. /// Gets the number of elements actually contained in the list.
  1630. /// </summary>
  1631. public int Count
  1632. {
  1633. get
  1634. {
  1635. return _items.Count;
  1636. }
  1637. }
  1638. internal void Clear()
  1639. {
  1640. foreach(HtmlNode node in _items)
  1641. {
  1642. node._parentnode = null;
  1643. node._nextnode = null;
  1644. node._prevnode = null;
  1645. }
  1646. _items.Clear();
  1647. }
  1648. internal void Remove(int index)
  1649. {
  1650. HtmlNode next = null;
  1651. HtmlNode prev = null;
  1652. HtmlNode oldnode = (HtmlNode)_items[index];
  1653. if (index > 0)
  1654. {
  1655. prev = (HtmlNode)_items[index-1];
  1656. }
  1657. if (index < (_items.Count-1))
  1658. {
  1659. next = (HtmlNode)_items[index+1];
  1660. }
  1661. _items.RemoveAt(index);
  1662. if (prev != null)
  1663. {
  1664. if (next == prev)
  1665. {
  1666. throw new InvalidProgramException("Unexpected error.");
  1667. }
  1668. prev._nextnode = next;
  1669. }
  1670. if (next != null)
  1671. {
  1672. next._prevnode = prev;
  1673. }
  1674. oldnode._prevnode = null;
  1675. oldnode._nextnode = null;
  1676. oldnode._parentnode = null;
  1677. }
  1678. internal void Replace(int index, HtmlNode node)
  1679. {
  1680. HtmlNode next = null;
  1681. HtmlNode prev = null;
  1682. HtmlNode oldnode = (HtmlNode)_items[index];
  1683. if (index>0)
  1684. {
  1685. prev = (HtmlNode)_items[index-1];
  1686. }
  1687. if (index<(_items.Count-1))
  1688. {
  1689. next = (HtmlNode)_items[index+1];
  1690. }
  1691. _items[index] = node;
  1692. if (prev != null)
  1693. {
  1694. if (node == prev)
  1695. {
  1696. throw new InvalidProgramException("Unexpected error.");
  1697. }
  1698. prev._nextnode = node;
  1699. }
  1700. if (next!=null)
  1701. {
  1702. next._prevnode = node;
  1703. }
  1704. node._prevnode = prev;
  1705. if (next == node)
  1706. {
  1707. throw new InvalidProgramException("Unexpected error.");
  1708. }
  1709. node._nextnode = next;
  1710. node._parentnode = _parentnode;
  1711. oldnode._prevnode = null;
  1712. oldnode._nextnode = null;
  1713. oldnode._parentnode = null;
  1714. }
  1715. internal void Insert(int index, HtmlNode node)
  1716. {
  1717. HtmlNode next = null;
  1718. HtmlNode prev = null;
  1719. if (index>0)
  1720. {
  1721. prev = (HtmlNode)_items[index-1];
  1722. }
  1723. if (index<_items.Count)
  1724. {
  1725. next = (HtmlNode)_items[index];
  1726. }
  1727. _items.Insert(index, node);
  1728. if (prev != null)
  1729. {
  1730. if (node == prev)
  1731. {
  1732. throw new InvalidProgramException("Unexpected error.");
  1733. }
  1734. prev._nextnode = node;
  1735. }
  1736. if (next != null)
  1737. {
  1738. next._prevnode = node;
  1739. }
  1740. node._prevnode = prev;
  1741. if (next == node)
  1742. {
  1743. throw new InvalidProgramException("Unexpected error.");
  1744. }
  1745. node._nextnode = next;
  1746. node._parentnode = _parentnode;
  1747. }
  1748. internal void Append(HtmlNode node)
  1749. {
  1750. HtmlNode last = null;
  1751. if (_items.Count > 0)
  1752. {
  1753. last = (HtmlNode)_items[_items.Count-1];
  1754. }
  1755. _items.Add(node);
  1756. node._prevnode = last;
  1757. node._nextnode = null;
  1758. node._parentnode = _parentnode;
  1759. if (last != null)
  1760. {
  1761. if (last == node)
  1762. {
  1763. throw new InvalidProgramException("Unexpected error.");
  1764. }
  1765. last._nextnode = node;
  1766. }
  1767. }
  1768. internal void Prepend(HtmlNode node)
  1769. {
  1770. HtmlNode first = null;
  1771. if (_items.Count > 0)
  1772. {
  1773. first = (HtmlNode)_items[0];
  1774. }
  1775. _items.Insert(0, node);
  1776. if (node == first)
  1777. {
  1778. throw new InvalidProgramException("Unexpected error.");
  1779. }
  1780. node._nextnode = first;
  1781. node._prevnode = null;
  1782. node._parentnode = _parentnode;
  1783. if (first != null)
  1784. {
  1785. first._prevnode = node;
  1786. }
  1787. }
  1788. internal void Add(HtmlNode node)
  1789. {
  1790. _items.Add(node);
  1791. }
  1792. /// <summary>
  1793. /// Gets the node at the specified index.
  1794. /// </summary>
  1795. public HtmlNode this[int index]
  1796. {
  1797. get
  1798. {
  1799. return _items[index] as HtmlNode;
  1800. }
  1801. }
  1802. internal int GetNodeIndex(HtmlNode node)
  1803. {
  1804. // TODO: should we rewrite this? what would be the key of a node?
  1805. for(int i=0;i<_items.Count;i++)
  1806. {
  1807. if (node == ((HtmlNode)_items[i]))
  1808. {
  1809. return i;
  1810. }
  1811. }
  1812. return -1;
  1813. }
  1814. /// <summary>
  1815. /// Gets a given node from the list.
  1816. /// </summary>
  1817. public int this[HtmlNode node]
  1818. {
  1819. get
  1820. {
  1821. int index = GetNodeIndex(node);
  1822. if (index == -1)
  1823. {
  1824. throw new ArgumentOutOfRangeException("node", "Node \"" + node.CloneNode(false).OuterHtml + "\" was not found in the collection");
  1825. }
  1826. return index;
  1827. }
  1828. }
  1829. /// <summary>
  1830. /// Returns an enumerator that can iterate through the list.
  1831. /// </summary>
  1832. /// <returns>An IEnumerator for the entire list.</returns>
  1833. public HtmlNodeEnumerator GetEnumerator()
  1834. {
  1835. return new HtmlNodeEnumerator(_items);
  1836. }
  1837. IEnumerator IEnumerable.GetEnumerator()
  1838. {
  1839. return GetEnumerator();
  1840. }
  1841. /// <summary>
  1842. /// Represents an enumerator that can iterate through the list.
  1843. /// </summary>
  1844. public class HtmlNodeEnumerator: IEnumerator
  1845. {
  1846. int _index;
  1847. ArrayList _items;
  1848. internal HtmlNodeEnumerator(ArrayList items)
  1849. {
  1850. _items = items;
  1851. _index = -1;
  1852. }
  1853. /// <summary>
  1854. /// Sets the enumerator to its initial position, which is before the first element in the collection.
  1855. /// </summary>
  1856. public void Reset()
  1857. {
  1858. _index = -1;
  1859. }
  1860. /// <summary>
  1861. /// Advances the enumerator to the next element of the collection.
  1862. /// </summary>
  1863. /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns>
  1864. public bool MoveNext()
  1865. {
  1866. _index++;
  1867. return (_index<_items.Count);
  1868. }
  1869. /// <summary>
  1870. /// Gets the current element in the collection.
  1871. /// </summary>
  1872. public HtmlNode Current
  1873. {
  1874. get
  1875. {
  1876. return (HtmlNode)(_items[_index]);
  1877. }
  1878. }
  1879. /// <summary>
  1880. /// Gets the current element in the collection.
  1881. /// </summary>
  1882. object IEnumerator.Current
  1883. {
  1884. get
  1885. {
  1886. return (Current);
  1887. }
  1888. }
  1889. }
  1890. }
  1891. /// <summary>
  1892. /// Represents an HTML text node.
  1893. /// </summary>
  1894. public class HtmlTextNode: HtmlNode
  1895. {
  1896. private string _text;
  1897. internal HtmlTextNode(HtmlDocument ownerdocument, int index):
  1898. base(HtmlNodeType.Text, ownerdocument, index)
  1899. {
  1900. }
  1901. /// <summary>
  1902. /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
  1903. /// </summary>
  1904. public override string InnerHtml
  1905. {
  1906. get
  1907. {
  1908. return OuterHtml;
  1909. }
  1910. set
  1911. {
  1912. _text = value;
  1913. }
  1914. }
  1915. /// <summary>
  1916. /// Gets or Sets the object and its content in HTML.
  1917. /// </summary>
  1918. public override string OuterHtml
  1919. {
  1920. get
  1921. {
  1922. if (_text == null)
  1923. {
  1924. return base.OuterHtml;
  1925. }
  1926. return _text;
  1927. }
  1928. }
  1929. /// <summary>
  1930. /// Gets or Sets the text of the node.
  1931. /// </summary>
  1932. public string Text
  1933. {
  1934. get
  1935. {
  1936. if (_text == null)
  1937. {
  1938. return base.OuterHtml;
  1939. }
  1940. return _text;
  1941. }
  1942. set
  1943. {
  1944. _text = value;
  1945. }
  1946. }
  1947. }
  1948. /// <summary>
  1949. /// Represents an HTML comment.
  1950. /// </summary>
  1951. public class HtmlCommentNode: HtmlNode
  1952. {
  1953. private string _comment;
  1954. internal HtmlCommentNode(HtmlDocument ownerdocument, int index):
  1955. base(HtmlNodeType.Comment, ownerdocument, index)
  1956. {
  1957. }
  1958. /// <summary>
  1959. /// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
  1960. /// </summary>
  1961. public override string InnerHtml
  1962. {
  1963. get
  1964. {
  1965. if (_comment == null)
  1966. {
  1967. return base.InnerHtml;
  1968. }
  1969. return _comment;
  1970. }
  1971. set
  1972. {
  1973. _comment = value;
  1974. }
  1975. }
  1976. /// <summary>
  1977. /// Gets or Sets the object and its content in HTML.
  1978. /// </summary>
  1979. public override string OuterHtml
  1980. {
  1981. get
  1982. {
  1983. if (_comment == null)
  1984. {
  1985. return base.OuterHtml;
  1986. }
  1987. return "<!--" + _comment + "-->";
  1988. }
  1989. }
  1990. /// <summary>
  1991. /// Gets or Sets the comment text of the node.
  1992. /// </summary>
  1993. public string Comment
  1994. {
  1995. get
  1996. {
  1997. if (_comment == null)
  1998. {
  1999. return base.InnerHtml;
  2000. }
  2001. return _comment;
  2002. }
  2003. set
  2004. {

Large files files are truncated, but you can click here to view the full file