PageRenderTime 55ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/docs/HtmlAgilityPack/HtmlNode.cs

https://bitbucket.org/danipen/mono
C# | 1996 lines | 1406 code | 224 blank | 366 comment | 303 complexity | 4e7c859664dcbe99693db7a6dfb82799 MD5 | raw file
Possible License(s): Unlicense, Apache-2.0, LGPL-2.0, MPL-2.0-no-copyleft-exception, CC-BY-SA-3.0, GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
  2. using System;
  3. using System.Collections;
  4. using System.Collections.Generic;
  5. using System.Diagnostics;
  6. using System.IO;
  7. using System.Xml;
  8. using System.Xml.XPath;
  9. namespace HtmlAgilityPack
  10. {
  11. /// <summary>
  12. /// Represents an HTML node.
  13. /// </summary>
  14. [DebuggerDisplay("Name: {OriginalName}}")]
  15. public class HtmlNode : IXPathNavigable
  16. {
  17. #region Fields
  18. internal HtmlAttributeCollection _attributes;
  19. internal HtmlNodeCollection _childnodes;
  20. internal HtmlNode _endnode;
  21. internal bool _innerchanged;
  22. internal string _innerhtml;
  23. internal int _innerlength;
  24. internal int _innerstartindex;
  25. internal int _line;
  26. internal int _lineposition;
  27. private string _name;
  28. internal int _namelength;
  29. internal int _namestartindex;
  30. internal HtmlNode _nextnode;
  31. internal HtmlNodeType _nodetype;
  32. internal bool _outerchanged;
  33. internal string _outerhtml;
  34. internal int _outerlength;
  35. internal int _outerstartindex;
  36. internal HtmlDocument _ownerdocument;
  37. internal HtmlNode _parentnode;
  38. internal HtmlNode _prevnode;
  39. internal HtmlNode _prevwithsamename;
  40. internal bool _starttag;
  41. internal int _streamposition;
  42. #endregion
  43. #region Static Members
  44. /// <summary>
  45. /// Gets the name of a comment node. It is actually defined as '#comment'.
  46. /// </summary>
  47. public static readonly string HtmlNodeTypeNameComment = "#comment";
  48. /// <summary>
  49. /// Gets the name of the document node. It is actually defined as '#document'.
  50. /// </summary>
  51. public static readonly string HtmlNodeTypeNameDocument = "#document";
  52. /// <summary>
  53. /// Gets the name of a text node. It is actually defined as '#text'.
  54. /// </summary>
  55. public static readonly string HtmlNodeTypeNameText = "#text";
  56. /// <summary>
  57. /// Gets a collection of flags that define specific behaviors for specific element nodes.
  58. /// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
  59. /// </summary>
  60. public static Hashtable ElementsFlags;
  61. #endregion
  62. #region Constructors
  63. /// <summary>
  64. /// Initialize HtmlNode. Builds a list of all tags that have special allowances
  65. /// </summary>
  66. static HtmlNode()
  67. {
  68. // tags whose content may be anything
  69. ElementsFlags = new Hashtable();
  70. ElementsFlags.Add("script", HtmlElementFlag.CData);
  71. ElementsFlags.Add("style", HtmlElementFlag.CData);
  72. ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
  73. // tags that can not contain other tags
  74. ElementsFlags.Add("base", HtmlElementFlag.Empty);
  75. ElementsFlags.Add("link", HtmlElementFlag.Empty);
  76. ElementsFlags.Add("meta", HtmlElementFlag.Empty);
  77. ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
  78. ElementsFlags.Add("hr", HtmlElementFlag.Empty);
  79. ElementsFlags.Add("col", HtmlElementFlag.Empty);
  80. ElementsFlags.Add("img", HtmlElementFlag.Empty);
  81. ElementsFlags.Add("param", HtmlElementFlag.Empty);
  82. ElementsFlags.Add("embed", HtmlElementFlag.Empty);
  83. ElementsFlags.Add("frame", HtmlElementFlag.Empty);
  84. ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
  85. ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
  86. ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
  87. ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
  88. ElementsFlags.Add("area", HtmlElementFlag.Empty);
  89. ElementsFlags.Add("input", HtmlElementFlag.Empty);
  90. ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
  91. ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
  92. // they sometimes contain, and sometimes they don 't...
  93. ElementsFlags.Add("option", HtmlElementFlag.Empty);
  94. // tag whose closing tag is equivalent to open tag:
  95. // <p>bla</p>bla will be transformed into <p>bla</p>bla
  96. // <p>bla<p>bla will be transformed into <p>bla<p>bla and not <p>bla></p><p>bla</p> or <p>bla<p>bla</p></p>
  97. //<br> see above
  98. ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
  99. ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
  100. }
  101. /// <summary>
  102. /// Initializes HtmlNode, providing type, owner and where it exists in a collection
  103. /// </summary>
  104. /// <param name="type"></param>
  105. /// <param name="ownerdocument"></param>
  106. /// <param name="index"></param>
  107. public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
  108. {
  109. _nodetype = type;
  110. _ownerdocument = ownerdocument;
  111. _outerstartindex = index;
  112. switch (type)
  113. {
  114. case HtmlNodeType.Comment:
  115. Name = HtmlNodeTypeNameComment;
  116. _endnode = this;
  117. break;
  118. case HtmlNodeType.Document:
  119. Name = HtmlNodeTypeNameDocument;
  120. _endnode = this;
  121. break;
  122. case HtmlNodeType.Text:
  123. Name = HtmlNodeTypeNameText;
  124. _endnode = this;
  125. break;
  126. }
  127. if (_ownerdocument._openednodes != null)
  128. {
  129. if (!Closed)
  130. {
  131. // we use the index as the key
  132. // -1 means the node comes from public
  133. if (-1 != index)
  134. {
  135. _ownerdocument._openednodes.Add(index, this);
  136. }
  137. }
  138. }
  139. if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
  140. // innerhtml and outerhtml must be calculated
  141. _outerchanged = true;
  142. _innerchanged = true;
  143. }
  144. #endregion
  145. #region Properties
  146. /// <summary>
  147. /// Gets the collection of HTML attributes for this node. May not be null.
  148. /// </summary>
  149. public HtmlAttributeCollection Attributes
  150. {
  151. get
  152. {
  153. if (!HasAttributes)
  154. {
  155. _attributes = new HtmlAttributeCollection(this);
  156. }
  157. return _attributes;
  158. }
  159. internal set { _attributes = value; }
  160. }
  161. /// <summary>
  162. /// Gets all the children of the node.
  163. /// </summary>
  164. public HtmlNodeCollection ChildNodes
  165. {
  166. get
  167. {
  168. if (_childnodes == null)
  169. {
  170. _childnodes = new HtmlNodeCollection(this);
  171. }
  172. return _childnodes;
  173. }
  174. internal set { _childnodes = value; }
  175. }
  176. /// <summary>
  177. /// Gets a value indicating if this node has been closed or not.
  178. /// </summary>
  179. public bool Closed
  180. {
  181. get { return (_endnode != null); }
  182. }
  183. /// <summary>
  184. /// Gets the collection of HTML attributes for the closing tag. May not be null.
  185. /// </summary>
  186. public HtmlAttributeCollection ClosingAttributes
  187. {
  188. get
  189. {
  190. if (!HasClosingAttributes)
  191. {
  192. return new HtmlAttributeCollection(this);
  193. }
  194. return _endnode.Attributes;
  195. }
  196. }
  197. internal HtmlNode EndNode
  198. {
  199. get { return _endnode; }
  200. }
  201. /// <summary>
  202. /// Gets the first child of the node.
  203. /// </summary>
  204. public HtmlNode FirstChild
  205. {
  206. get
  207. {
  208. if (!HasChildNodes)
  209. {
  210. return null;
  211. }
  212. return _childnodes[0];
  213. }
  214. }
  215. /// <summary>
  216. /// Gets a value indicating whether the current node has any attributes.
  217. /// </summary>
  218. public bool HasAttributes
  219. {
  220. get
  221. {
  222. if (_attributes == null)
  223. {
  224. return false;
  225. }
  226. if (_attributes.Count <= 0)
  227. {
  228. return false;
  229. }
  230. return true;
  231. }
  232. }
  233. /// <summary>
  234. /// Gets a value indicating whether this node has any child nodes.
  235. /// </summary>
  236. public bool HasChildNodes
  237. {
  238. get
  239. {
  240. if (_childnodes == null)
  241. {
  242. return false;
  243. }
  244. if (_childnodes.Count <= 0)
  245. {
  246. return false;
  247. }
  248. return true;
  249. }
  250. }
  251. /// <summary>
  252. /// Gets a value indicating whether the current node has any attributes on the closing tag.
  253. /// </summary>
  254. public bool HasClosingAttributes
  255. {
  256. get
  257. {
  258. if ((_endnode == null) || (_endnode == this))
  259. {
  260. return false;
  261. }
  262. if (_endnode._attributes == null)
  263. {
  264. return false;
  265. }
  266. if (_endnode._attributes.Count <= 0)
  267. {
  268. return false;
  269. }
  270. return true;
  271. }
  272. }
  273. /// <summary>
  274. /// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
  275. /// </summary>
  276. public string Id
  277. {
  278. get
  279. {
  280. if (_ownerdocument._nodesid == null)
  281. {
  282. throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
  283. }
  284. return GetId();
  285. }
  286. set
  287. {
  288. if (_ownerdocument._nodesid == null)
  289. {
  290. throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
  291. }
  292. if (value == null)
  293. {
  294. throw new ArgumentNullException("value");
  295. }
  296. SetId(value);
  297. }
  298. }
  299. /// <summary>
  300. /// Gets or Sets the HTML between the start and end tags of the object.
  301. /// </summary>
  302. public virtual string InnerHtml
  303. {
  304. get
  305. {
  306. if (_innerchanged)
  307. {
  308. _innerhtml = WriteContentTo();
  309. _innerchanged = false;
  310. return _innerhtml;
  311. }
  312. if (_innerhtml != null)
  313. {
  314. return _innerhtml;
  315. }
  316. if (_innerstartindex < 0)
  317. {
  318. return string.Empty;
  319. }
  320. return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
  321. }
  322. set
  323. {
  324. HtmlDocument doc = new HtmlDocument();
  325. doc.LoadHtml(value);
  326. RemoveAllChildren();
  327. AppendChildren(doc.DocumentNode.ChildNodes);
  328. }
  329. }
  330. /// <summary>
  331. /// Gets or Sets the text between the start and end tags of the object.
  332. /// </summary>
  333. public virtual string InnerText
  334. {
  335. get
  336. {
  337. if (_nodetype == HtmlNodeType.Text)
  338. {
  339. return ((HtmlTextNode) this).Text;
  340. }
  341. if (_nodetype == HtmlNodeType.Comment)
  342. {
  343. return ((HtmlCommentNode) this).Comment;
  344. }
  345. // note: right now, this method is *slow*, because we recompute everything.
  346. // it could be optimised like innerhtml
  347. if (!HasChildNodes)
  348. {
  349. return string.Empty;
  350. }
  351. string s = null;
  352. foreach (HtmlNode node in ChildNodes)
  353. {
  354. s += node.InnerText;
  355. }
  356. return s;
  357. }
  358. }
  359. /// <summary>
  360. /// Gets the last child of the node.
  361. /// </summary>
  362. public HtmlNode LastChild
  363. {
  364. get
  365. {
  366. return !HasChildNodes ? null : _childnodes[_childnodes.Count - 1];
  367. }
  368. }
  369. /// <summary>
  370. /// Gets the line number of this node in the document.
  371. /// </summary>
  372. public int Line
  373. {
  374. get { return _line; }
  375. internal set { _line = value; }
  376. }
  377. /// <summary>
  378. /// Gets the column number of this node in the document.
  379. /// </summary>
  380. public int LinePosition
  381. {
  382. get { return _lineposition; }
  383. internal set { _lineposition = value; }
  384. }
  385. /// <summary>
  386. /// Gets or sets this node's name.
  387. /// </summary>
  388. public string Name
  389. {
  390. get
  391. {
  392. if (_name == null)
  393. {
  394. Name = _ownerdocument._text.Substring(_namestartindex, _namelength);
  395. }
  396. return _name != null ? _name.ToLower() : string.Empty;
  397. }
  398. set { _name = value; }
  399. }
  400. /// <summary>
  401. /// Gets the HTML node immediately following this element.
  402. /// </summary>
  403. public HtmlNode NextSibling
  404. {
  405. get { return _nextnode; }
  406. internal set { _nextnode = value; }
  407. }
  408. /// <summary>
  409. /// Gets the type of this node.
  410. /// </summary>
  411. public HtmlNodeType NodeType
  412. {
  413. get { return _nodetype; }
  414. internal set { _nodetype = value; }
  415. }
  416. /// <summary>
  417. /// The original unaltered name of the tag
  418. /// </summary>
  419. public string OriginalName
  420. {
  421. get { return _name; }
  422. }
  423. /// <summary>
  424. /// Gets or Sets the object and its content in HTML.
  425. /// </summary>
  426. public virtual string OuterHtml
  427. {
  428. get
  429. {
  430. if (_outerchanged)
  431. {
  432. _outerhtml = WriteTo();
  433. _outerchanged = false;
  434. return _outerhtml;
  435. }
  436. if (_outerhtml != null)
  437. {
  438. return _outerhtml;
  439. }
  440. if (_outerstartindex < 0)
  441. {
  442. return string.Empty;
  443. }
  444. return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
  445. }
  446. }
  447. /// <summary>
  448. /// Gets the <see cref="HtmlDocument"/> to which this node belongs.
  449. /// </summary>
  450. public HtmlDocument OwnerDocument
  451. {
  452. get { return _ownerdocument; }
  453. internal set { _ownerdocument = value; }
  454. }
  455. /// <summary>
  456. /// Gets the parent of this node (for nodes that can have parents).
  457. /// </summary>
  458. public HtmlNode ParentNode
  459. {
  460. get { return _parentnode; }
  461. internal set { _parentnode = value; }
  462. }
  463. /// <summary>
  464. /// Gets the node immediately preceding this node.
  465. /// </summary>
  466. public HtmlNode PreviousSibling
  467. {
  468. get { return _prevnode; }
  469. internal set { _prevnode = value; }
  470. }
  471. /// <summary>
  472. /// Gets the stream position of this node in the document, relative to the start of the document.
  473. /// </summary>
  474. public int StreamPosition
  475. {
  476. get { return _streamposition; }
  477. }
  478. /// <summary>
  479. /// Gets a valid XPath string that points to this node
  480. /// </summary>
  481. public string XPath
  482. {
  483. get
  484. {
  485. string basePath = (ParentNode == null || ParentNode.NodeType == HtmlNodeType.Document)
  486. ? "/"
  487. : ParentNode.XPath + "/";
  488. return basePath + GetRelativeXpath();
  489. }
  490. }
  491. #endregion
  492. #region IXPathNavigable Members
  493. /// <summary>
  494. /// Creates a new XPathNavigator object for navigating this HTML node.
  495. /// </summary>
  496. /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.</returns>
  497. public XPathNavigator CreateNavigator()
  498. {
  499. return new HtmlNodeNavigator(_ownerdocument, this);
  500. }
  501. #endregion
  502. #region Public Methods
  503. /// <summary>
  504. /// Determines if an element node can be kept overlapped.
  505. /// </summary>
  506. /// <param name="name">The name of the element node to check. May not be <c>null</c>.</param>
  507. /// <returns>true if the name is the name of an element node that can be kept overlapped, <c>false</c> otherwise.</returns>
  508. public static bool CanOverlapElement(string name)
  509. {
  510. if (name == null)
  511. {
  512. throw new ArgumentNullException("name");
  513. }
  514. object flag = ElementsFlags[name.ToLower()];
  515. if (flag == null)
  516. {
  517. return false;
  518. }
  519. return (((HtmlElementFlag) flag) & HtmlElementFlag.CanOverlap) != 0;
  520. }
  521. /// <summary>
  522. /// Creates an HTML node from a string representing literal HTML.
  523. /// </summary>
  524. /// <param name="html">The HTML text.</param>
  525. /// <returns>The newly created node instance.</returns>
  526. public static HtmlNode CreateNode(string html)
  527. {
  528. // REVIEW: this is *not* optimum...
  529. HtmlDocument doc = new HtmlDocument();
  530. doc.LoadHtml(html);
  531. return doc.DocumentNode.FirstChild;
  532. }
  533. /// <summary>
  534. /// Determines if an element node is a CDATA element node.
  535. /// </summary>
  536. /// <param name="name">The name of the element node to check. May not be null.</param>
  537. /// <returns>true if the name is the name of a CDATA element node, false otherwise.</returns>
  538. public static bool IsCDataElement(string name)
  539. {
  540. if (name == null)
  541. {
  542. throw new ArgumentNullException("name");
  543. }
  544. object flag = ElementsFlags[name.ToLower()];
  545. if (flag == null)
  546. {
  547. return false;
  548. }
  549. return (((HtmlElementFlag) flag) & HtmlElementFlag.CData) != 0;
  550. }
  551. /// <summary>
  552. /// Determines if an element node is closed.
  553. /// </summary>
  554. /// <param name="name">The name of the element node to check. May not be null.</param>
  555. /// <returns>true if the name is the name of a closed element node, false otherwise.</returns>
  556. public static bool IsClosedElement(string name)
  557. {
  558. if (name == null)
  559. {
  560. throw new ArgumentNullException("name");
  561. }
  562. object flag = ElementsFlags[name.ToLower()];
  563. if (flag == null)
  564. {
  565. return false;
  566. }
  567. return (((HtmlElementFlag) flag) & HtmlElementFlag.Closed) != 0;
  568. }
  569. /// <summary>
  570. /// Determines if an element node is defined as empty.
  571. /// </summary>
  572. /// <param name="name">The name of the element node to check. May not be null.</param>
  573. /// <returns>true if the name is the name of an empty element node, false otherwise.</returns>
  574. public static bool IsEmptyElement(string name)
  575. {
  576. if (name == null)
  577. {
  578. throw new ArgumentNullException("name");
  579. }
  580. if (name.Length == 0)
  581. {
  582. return true;
  583. }
  584. // <!DOCTYPE ...
  585. if ('!' == name[0])
  586. {
  587. return true;
  588. }
  589. // <?xml ...
  590. if ('?' == name[0])
  591. {
  592. return true;
  593. }
  594. object flag = ElementsFlags[name.ToLower()];
  595. if (flag == null)
  596. {
  597. return false;
  598. }
  599. return (((HtmlElementFlag) flag) & HtmlElementFlag.Empty) != 0;
  600. }
  601. /// <summary>
  602. /// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
  603. /// </summary>
  604. /// <param name="text">The text to check. May not be null.</param>
  605. /// <returns>true or false.</returns>
  606. public static bool IsOverlappedClosingElement(string text)
  607. {
  608. if (text == null)
  609. {
  610. throw new ArgumentNullException("text");
  611. }
  612. // min is </x>: 4
  613. if (text.Length <= 4)
  614. return false;
  615. if ((text[0] != '<') ||
  616. (text[text.Length - 1] != '>') ||
  617. (text[1] != '/'))
  618. return false;
  619. string name = text.Substring(2, text.Length - 3);
  620. return CanOverlapElement(name);
  621. }
  622. /// <summary>
  623. /// Returns a collection of all ancestor nodes of this element.
  624. /// </summary>
  625. /// <returns></returns>
  626. public IEnumerable<HtmlNode> Ancestors()
  627. {
  628. HtmlNode node = ParentNode;
  629. while (node.ParentNode != null)
  630. {
  631. yield return node.ParentNode;
  632. node = node.ParentNode;
  633. }
  634. }
  635. /// <summary>
  636. /// Get Ancestors with matching name
  637. /// </summary>
  638. /// <param name="name"></param>
  639. /// <returns></returns>
  640. public IEnumerable<HtmlNode> Ancestors(string name)
  641. {
  642. for (HtmlNode n = ParentNode; n != null; n = n.ParentNode)
  643. if (n.Name == name)
  644. yield return n;
  645. }
  646. /// <summary>
  647. /// Returns a collection of all ancestor nodes of this element.
  648. /// </summary>
  649. /// <returns></returns>
  650. public IEnumerable<HtmlNode> AncestorsAndSelf()
  651. {
  652. for (HtmlNode n = this; n != null; n = n.ParentNode)
  653. yield return n;
  654. }
  655. /// <summary>
  656. /// Gets all anscestor nodes and the current node
  657. /// </summary>
  658. /// <param name="name"></param>
  659. /// <returns></returns>
  660. public IEnumerable<HtmlNode> AncestorsAndSelf(string name)
  661. {
  662. for (HtmlNode n = this; n != null; n = n.ParentNode)
  663. if (n.Name == name)
  664. yield return n;
  665. }
  666. /// <summary>
  667. /// Adds the specified node to the end of the list of children of this node.
  668. /// </summary>
  669. /// <param name="newChild">The node to add. May not be null.</param>
  670. /// <returns>The node added.</returns>
  671. public HtmlNode AppendChild(HtmlNode newChild)
  672. {
  673. if (newChild == null)
  674. {
  675. throw new ArgumentNullException("newChild");
  676. }
  677. ChildNodes.Append(newChild);
  678. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  679. _outerchanged = true;
  680. _innerchanged = true;
  681. return newChild;
  682. }
  683. /// <summary>
  684. /// Adds the specified node to the end of the list of children of this node.
  685. /// </summary>
  686. /// <param name="newChildren">The node list to add. May not be null.</param>
  687. public void AppendChildren(HtmlNodeCollection newChildren)
  688. {
  689. if (newChildren == null)
  690. throw new ArgumentNullException("newChildrend");
  691. foreach (HtmlNode newChild in newChildren)
  692. {
  693. AppendChild(newChild);
  694. }
  695. }
  696. /// <summary>
  697. /// Gets all Attributes with name
  698. /// </summary>
  699. /// <param name="name"></param>
  700. /// <returns></returns>
  701. public IEnumerable<HtmlAttribute> ChildAttributes(string name)
  702. {
  703. return Attributes.AttributesWithName(name);
  704. }
  705. /// <summary>
  706. /// Creates a duplicate of the node
  707. /// </summary>
  708. /// <returns></returns>
  709. public HtmlNode Clone()
  710. {
  711. return CloneNode(true);
  712. }
  713. /// <summary>
  714. /// Creates a duplicate of the node and changes its name at the same time.
  715. /// </summary>
  716. /// <param name="newName">The new name of the cloned node. May not be <c>null</c>.</param>
  717. /// <returns>The cloned node.</returns>
  718. public HtmlNode CloneNode(string newName)
  719. {
  720. return CloneNode(newName, true);
  721. }
  722. /// <summary>
  723. /// Creates a duplicate of the node and changes its name at the same time.
  724. /// </summary>
  725. /// <param name="newName">The new name of the cloned node. May not be null.</param>
  726. /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
  727. /// <returns>The cloned node.</returns>
  728. public HtmlNode CloneNode(string newName, bool deep)
  729. {
  730. if (newName == null)
  731. {
  732. throw new ArgumentNullException("newName");
  733. }
  734. HtmlNode node = CloneNode(deep);
  735. node.Name = newName;
  736. return node;
  737. }
  738. /// <summary>
  739. /// Creates a duplicate of the node.
  740. /// </summary>
  741. /// <param name="deep">true to recursively clone the subtree under the specified node; false to clone only the node itself.</param>
  742. /// <returns>The cloned node.</returns>
  743. public HtmlNode CloneNode(bool deep)
  744. {
  745. HtmlNode node = _ownerdocument.CreateNode(_nodetype);
  746. node.Name = Name;
  747. switch (_nodetype)
  748. {
  749. case HtmlNodeType.Comment:
  750. ((HtmlCommentNode) node).Comment = ((HtmlCommentNode) this).Comment;
  751. return node;
  752. case HtmlNodeType.Text:
  753. ((HtmlTextNode) node).Text = ((HtmlTextNode) this).Text;
  754. return node;
  755. }
  756. // attributes
  757. if (HasAttributes)
  758. {
  759. foreach (HtmlAttribute att in _attributes)
  760. {
  761. HtmlAttribute newatt = att.Clone();
  762. node.Attributes.Append(newatt);
  763. }
  764. }
  765. // closing attributes
  766. if (HasClosingAttributes)
  767. {
  768. node._endnode = _endnode.CloneNode(false);
  769. foreach (HtmlAttribute att in _endnode._attributes)
  770. {
  771. HtmlAttribute newatt = att.Clone();
  772. node._endnode._attributes.Append(newatt);
  773. }
  774. }
  775. if (!deep)
  776. {
  777. return node;
  778. }
  779. if (!HasChildNodes)
  780. {
  781. return node;
  782. }
  783. // child nodes
  784. foreach (HtmlNode child in _childnodes)
  785. {
  786. HtmlNode newchild = child.Clone();
  787. node.AppendChild(newchild);
  788. }
  789. return node;
  790. }
  791. /// <summary>
  792. /// Creates a duplicate of the node and the subtree under it.
  793. /// </summary>
  794. /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
  795. public void CopyFrom(HtmlNode node)
  796. {
  797. CopyFrom(node, true);
  798. }
  799. /// <summary>
  800. /// Creates a duplicate of the node.
  801. /// </summary>
  802. /// <param name="node">The node to duplicate. May not be <c>null</c>.</param>
  803. /// <param name="deep">true to recursively clone the subtree under the specified node, false to clone only the node itself.</param>
  804. public void CopyFrom(HtmlNode node, bool deep)
  805. {
  806. if (node == null)
  807. {
  808. throw new ArgumentNullException("node");
  809. }
  810. Attributes.RemoveAll();
  811. if (node.HasAttributes)
  812. {
  813. foreach (HtmlAttribute att in node.Attributes)
  814. {
  815. SetAttributeValue(att.Name, att.Value);
  816. }
  817. }
  818. if (!deep)
  819. {
  820. RemoveAllChildren();
  821. if (node.HasChildNodes)
  822. {
  823. foreach (HtmlNode child in node.ChildNodes)
  824. {
  825. AppendChild(child.CloneNode(true));
  826. }
  827. }
  828. }
  829. }
  830. /// <summary>
  831. /// Creates an XPathNavigator using the root of this document.
  832. /// </summary>
  833. /// <returns></returns>
  834. public XPathNavigator CreateRootNavigator()
  835. {
  836. return new HtmlNodeNavigator(_ownerdocument, _ownerdocument.DocumentNode);
  837. }
  838. /// <summary>
  839. /// Gets all Descendant nodes for this node and each of child nodes
  840. /// </summary>
  841. /// <returns></returns>
  842. public IEnumerable<HtmlNode> DescendantNodes()
  843. {
  844. foreach (HtmlNode node in ChildNodes)
  845. {
  846. yield return node;
  847. foreach (HtmlNode descendant in node.DescendantNodes())
  848. yield return descendant;
  849. }
  850. }
  851. /// <summary>
  852. /// Returns a collection of all descendant nodes of this element, in document order
  853. /// </summary>
  854. /// <returns></returns>
  855. public IEnumerable<HtmlNode> DescendantNodesAndSelf()
  856. {
  857. return DescendantsAndSelf();
  858. }
  859. /// <summary>
  860. /// Gets all Descendant nodes in enumerated list
  861. /// </summary>
  862. /// <returns></returns>
  863. public IEnumerable<HtmlNode> Descendants()
  864. {
  865. foreach (HtmlNode node in DescendantNodes())
  866. {
  867. yield return node;
  868. }
  869. }
  870. /// <summary>
  871. /// Get all descendant nodes with matching name
  872. /// </summary>
  873. /// <param name="name"></param>
  874. /// <returns></returns>
  875. public IEnumerable<HtmlNode> Descendants(string name)
  876. {
  877. foreach (HtmlNode node in Descendants())
  878. if (node.Name == name)
  879. yield return node;
  880. }
  881. /// <summary>
  882. /// Returns a collection of all descendant nodes of this element, in document order
  883. /// </summary>
  884. /// <returns></returns>
  885. public IEnumerable<HtmlNode> DescendantsAndSelf()
  886. {
  887. yield return this;
  888. foreach (HtmlNode n in DescendantNodes())
  889. {
  890. HtmlNode el = n;
  891. if (el != null)
  892. yield return el;
  893. }
  894. }
  895. /// <summary>
  896. /// Gets all descendant nodes including this node
  897. /// </summary>
  898. /// <param name="name"></param>
  899. /// <returns></returns>
  900. public IEnumerable<HtmlNode> DescendantsAndSelf(string name)
  901. {
  902. yield return this;
  903. foreach (HtmlNode node in Descendants())
  904. if (node.Name == name)
  905. yield return node;
  906. }
  907. /// <summary>
  908. /// Gets first generation child node matching name
  909. /// </summary>
  910. /// <param name="name"></param>
  911. /// <returns></returns>
  912. public HtmlNode Element(string name)
  913. {
  914. foreach (HtmlNode node in ChildNodes)
  915. if (node.Name == name)
  916. return node;
  917. return null;
  918. }
  919. /// <summary>
  920. /// Gets matching first generation child nodes matching name
  921. /// </summary>
  922. /// <param name="name"></param>
  923. /// <returns></returns>
  924. public IEnumerable<HtmlNode> Elements(string name)
  925. {
  926. foreach (HtmlNode node in ChildNodes)
  927. if (node.Name == name)
  928. yield return node;
  929. }
  930. /// <summary>
  931. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  932. /// </summary>
  933. /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
  934. /// <param name="def">The default value to return if not found.</param>
  935. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  936. public string GetAttributeValue(string name, string def)
  937. {
  938. if (name == null)
  939. {
  940. throw new ArgumentNullException("name");
  941. }
  942. if (!HasAttributes)
  943. {
  944. return def;
  945. }
  946. HtmlAttribute att = Attributes[name];
  947. if (att == null)
  948. {
  949. return def;
  950. }
  951. return att.Value;
  952. }
  953. /// <summary>
  954. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  955. /// </summary>
  956. /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
  957. /// <param name="def">The default value to return if not found.</param>
  958. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  959. public int GetAttributeValue(string name, int def)
  960. {
  961. if (name == null)
  962. {
  963. throw new ArgumentNullException("name");
  964. }
  965. if (!HasAttributes)
  966. {
  967. return def;
  968. }
  969. HtmlAttribute att = Attributes[name];
  970. if (att == null)
  971. {
  972. return def;
  973. }
  974. try
  975. {
  976. return Convert.ToInt32(att.Value);
  977. }
  978. catch
  979. {
  980. return def;
  981. }
  982. }
  983. /// <summary>
  984. /// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
  985. /// </summary>
  986. /// <param name="name">The name of the attribute to get. May not be <c>null</c>.</param>
  987. /// <param name="def">The default value to return if not found.</param>
  988. /// <returns>The value of the attribute if found, the default value if not found.</returns>
  989. public bool GetAttributeValue(string name, bool def)
  990. {
  991. if (name == null)
  992. {
  993. throw new ArgumentNullException("name");
  994. }
  995. if (!HasAttributes)
  996. {
  997. return def;
  998. }
  999. HtmlAttribute att = Attributes[name];
  1000. if (att == null)
  1001. {
  1002. return def;
  1003. }
  1004. try
  1005. {
  1006. return Convert.ToBoolean(att.Value);
  1007. }
  1008. catch
  1009. {
  1010. return def;
  1011. }
  1012. }
  1013. /// <summary>
  1014. /// Inserts the specified node immediately after the specified reference node.
  1015. /// </summary>
  1016. /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
  1017. /// <param name="refChild">The node that is the reference node. The newNode is placed after the refNode.</param>
  1018. /// <returns>The node being inserted.</returns>
  1019. public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
  1020. {
  1021. if (newChild == null)
  1022. {
  1023. throw new ArgumentNullException("newChild");
  1024. }
  1025. if (refChild == null)
  1026. {
  1027. return PrependChild(newChild);
  1028. }
  1029. if (newChild == refChild)
  1030. {
  1031. return newChild;
  1032. }
  1033. int index = -1;
  1034. if (_childnodes != null)
  1035. {
  1036. index = _childnodes[refChild];
  1037. }
  1038. if (index == -1)
  1039. {
  1040. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  1041. }
  1042. if (_childnodes != null) _childnodes.Insert(index + 1, newChild);
  1043. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1044. _outerchanged = true;
  1045. _innerchanged = true;
  1046. return newChild;
  1047. }
  1048. /// <summary>
  1049. /// Inserts the specified node immediately before the specified reference node.
  1050. /// </summary>
  1051. /// <param name="newChild">The node to insert. May not be <c>null</c>.</param>
  1052. /// <param name="refChild">The node that is the reference node. The newChild is placed before this node.</param>
  1053. /// <returns>The node being inserted.</returns>
  1054. public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
  1055. {
  1056. if (newChild == null)
  1057. {
  1058. throw new ArgumentNullException("newChild");
  1059. }
  1060. if (refChild == null)
  1061. {
  1062. return AppendChild(newChild);
  1063. }
  1064. if (newChild == refChild)
  1065. {
  1066. return newChild;
  1067. }
  1068. int index = -1;
  1069. if (_childnodes != null)
  1070. {
  1071. index = _childnodes[refChild];
  1072. }
  1073. if (index == -1)
  1074. {
  1075. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  1076. }
  1077. if (_childnodes != null) _childnodes.Insert(index, newChild);
  1078. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1079. _outerchanged = true;
  1080. _innerchanged = true;
  1081. return newChild;
  1082. }
  1083. /// <summary>
  1084. /// Adds the specified node to the beginning of the list of children of this node.
  1085. /// </summary>
  1086. /// <param name="newChild">The node to add. May not be <c>null</c>.</param>
  1087. /// <returns>The node added.</returns>
  1088. public HtmlNode PrependChild(HtmlNode newChild)
  1089. {
  1090. if (newChild == null)
  1091. {
  1092. throw new ArgumentNullException("newChild");
  1093. }
  1094. ChildNodes.Prepend(newChild);
  1095. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1096. _outerchanged = true;
  1097. _innerchanged = true;
  1098. return newChild;
  1099. }
  1100. /// <summary>
  1101. /// Adds the specified node list to the beginning of the list of children of this node.
  1102. /// </summary>
  1103. /// <param name="newChildren">The node list to add. May not be <c>null</c>.</param>
  1104. public void PrependChildren(HtmlNodeCollection newChildren)
  1105. {
  1106. if (newChildren == null)
  1107. {
  1108. throw new ArgumentNullException("newChildren");
  1109. }
  1110. foreach (HtmlNode newChild in newChildren)
  1111. {
  1112. PrependChild(newChild);
  1113. }
  1114. }
  1115. /// <summary>
  1116. /// Removes node from parent collection
  1117. /// </summary>
  1118. public void Remove()
  1119. {
  1120. if (ParentNode != null)
  1121. ParentNode.ChildNodes.Remove(this);
  1122. }
  1123. /// <summary>
  1124. /// Removes all the children and/or attributes of the current node.
  1125. /// </summary>
  1126. public void RemoveAll()
  1127. {
  1128. RemoveAllChildren();
  1129. if (HasAttributes)
  1130. {
  1131. _attributes.Clear();
  1132. }
  1133. if ((_endnode != null) && (_endnode != this))
  1134. {
  1135. if (_endnode._attributes != null)
  1136. {
  1137. _endnode._attributes.Clear();
  1138. }
  1139. }
  1140. _outerchanged = true;
  1141. _innerchanged = true;
  1142. }
  1143. /// <summary>
  1144. /// Removes all the children of the current node.
  1145. /// </summary>
  1146. public void RemoveAllChildren()
  1147. {
  1148. if (!HasChildNodes)
  1149. {
  1150. return;
  1151. }
  1152. if (_ownerdocument.OptionUseIdAttribute)
  1153. {
  1154. // remove nodes from id list
  1155. foreach (HtmlNode node in _childnodes)
  1156. {
  1157. _ownerdocument.SetIdForNode(null, node.GetId());
  1158. }
  1159. }
  1160. _childnodes.Clear();
  1161. _outerchanged = true;
  1162. _innerchanged = true;
  1163. }
  1164. /// <summary>
  1165. /// Removes the specified child node.
  1166. /// </summary>
  1167. /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
  1168. /// <returns>The node removed.</returns>
  1169. public HtmlNode RemoveChild(HtmlNode oldChild)
  1170. {
  1171. if (oldChild == null)
  1172. {
  1173. throw new ArgumentNullException("oldChild");
  1174. }
  1175. int index = -1;
  1176. if (_childnodes != null)
  1177. {
  1178. index = _childnodes[oldChild];
  1179. }
  1180. if (index == -1)
  1181. {
  1182. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  1183. }
  1184. if (_childnodes != null)
  1185. _childnodes.Remove(index);
  1186. _ownerdocument.SetIdForNode(null, oldChild.GetId());
  1187. _outerchanged = true;
  1188. _innerchanged = true;
  1189. return oldChild;
  1190. }
  1191. /// <summary>
  1192. /// Removes the specified child node.
  1193. /// </summary>
  1194. /// <param name="oldChild">The node being removed. May not be <c>null</c>.</param>
  1195. /// <param name="keepGrandChildren">true to keep grand children of the node, false otherwise.</param>
  1196. /// <returns>The node removed.</returns>
  1197. public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
  1198. {
  1199. if (oldChild == null)
  1200. {
  1201. throw new ArgumentNullException("oldChild");
  1202. }
  1203. if ((oldChild._childnodes != null) && keepGrandChildren)
  1204. {
  1205. // get prev sibling
  1206. HtmlNode prev = oldChild.PreviousSibling;
  1207. // reroute grand children to ourselves
  1208. foreach (HtmlNode grandchild in oldChild._childnodes)
  1209. {
  1210. InsertAfter(grandchild, prev);
  1211. }
  1212. }
  1213. RemoveChild(oldChild);
  1214. _outerchanged = true;
  1215. _innerchanged = true;
  1216. return oldChild;
  1217. }
  1218. /// <summary>
  1219. /// Replaces the child node oldChild with newChild node.
  1220. /// </summary>
  1221. /// <param name="newChild">The new node to put in the child list.</param>
  1222. /// <param name="oldChild">The node being replaced in the list.</param>
  1223. /// <returns>The node replaced.</returns>
  1224. public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
  1225. {
  1226. if (newChild == null)
  1227. {
  1228. return RemoveChild(oldChild);
  1229. }
  1230. if (oldChild == null)
  1231. {
  1232. return AppendChild(newChild);
  1233. }
  1234. int index = -1;
  1235. if (_childnodes != null)
  1236. {
  1237. index = _childnodes[oldChild];
  1238. }
  1239. if (index == -1)
  1240. {
  1241. throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
  1242. }
  1243. if (_childnodes != null) _childnodes.Replace(index, newChild);
  1244. _ownerdocument.SetIdForNode(null, oldChild.GetId());
  1245. _ownerdocument.SetIdForNode(newChild, newChild.GetId());
  1246. _outerchanged = true;
  1247. _innerchanged = true;
  1248. return newChild;
  1249. }
  1250. /// <summary>
  1251. /// Selects a list of nodes matching the <see cref="XPath"/> expression.
  1252. /// </summary>
  1253. /// <param name="xpath">The XPath expression.</param>
  1254. /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns>
  1255. public HtmlNodeCollection SelectNodes(string xpath)
  1256. {
  1257. HtmlNodeCollection list = new HtmlNodeCollection(null);
  1258. HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
  1259. XPathNodeIterator it = nav.Select(xpath);
  1260. while (it.MoveNext())
  1261. {
  1262. HtmlNodeNavigator n = (HtmlNodeNavigator) it.Current;
  1263. list.Add(n.CurrentNode);
  1264. }
  1265. if (list.Count == 0)
  1266. {
  1267. return null;
  1268. }
  1269. return list;
  1270. }
  1271. /// <summary>
  1272. /// Selects the first XmlNode that matches the XPath expression.
  1273. /// </summary>
  1274. /// <param name="xpath">The XPath expression. May not be null.</param>
  1275. /// <returns>The first <see cref="HtmlNode"/> that matches the XPath query or a null reference if no matching node was found.</returns>
  1276. public HtmlNode SelectSingleNode(string xpath)
  1277. {
  1278. if (xpath == null)
  1279. {
  1280. throw new ArgumentNullException("xpath");
  1281. }
  1282. HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
  1283. XPathNodeIterator it = nav.Select(xpath);
  1284. if (!it.MoveNext())
  1285. {
  1286. return null;
  1287. }
  1288. HtmlNodeNavigator node = (HtmlNodeNavigator) it.Current;
  1289. return node.CurrentNode;
  1290. }
  1291. /// <summary>
  1292. /// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
  1293. /// </summary>
  1294. /// <param name="name">The name of the attribute to set. May not be null.</param>
  1295. /// <param name="value">The value for the attribute.</param>
  1296. /// <returns>The corresponding attribute instance.</returns>
  1297. public HtmlAttribute SetAttributeValue(string name, string value)
  1298. {
  1299. if (name == null)
  1300. {
  1301. throw new ArgumentNullException("name");
  1302. }
  1303. HtmlAttribute att = Attributes[name];
  1304. if (att == null)
  1305. {
  1306. return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
  1307. }
  1308. att.Value = value;
  1309. return att;
  1310. }
  1311. /// <summary>
  1312. /// Saves all the children of the node to the specified TextWriter.
  1313. /// </summary>
  1314. /// <param name="outText">The TextWriter to which you want to save.</param>
  1315. public void WriteContentTo(TextWriter outText)
  1316. {
  1317. if (_childnodes == null)
  1318. {
  1319. return;
  1320. }
  1321. foreach (HtmlNode node in _childnodes)
  1322. {
  1323. node.WriteTo(outText);
  1324. }
  1325. }
  1326. /// <summary>
  1327. /// Saves all the children of the node to a string.
  1328. /// </summary>
  1329. /// <returns>The saved string.</returns>
  1330. public string WriteContentTo()
  1331. {
  1332. StringWriter sw = new StringWriter();
  1333. WriteContentTo(sw);
  1334. sw.Flush();
  1335. return sw.ToString();
  1336. }
  1337. /// <summary>
  1338. /// Saves the current node to the specified TextWriter.
  1339. //…

Large files files are truncated, but you can click here to view the full file