PageRenderTime 54ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/MixedCodeDocument.cs

https://github.com/acken/mono
C# | 799 lines | 495 code | 84 blank | 220 comment | 30 complexity | d4646abd8bb3ce364a55fe973ba2eef8 MD5 | raw file
  1. // HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
  2. using System;
  3. using System.IO;
  4. using System.Text;
  5. using System.Collections;
  6. namespace HtmlAgilityPack
  7. {
  8. /// <summary>
  9. /// Represents the type of fragement in a mixed code document.
  10. /// </summary>
  11. public enum MixedCodeDocumentFragmentType
  12. {
  13. /// <summary>
  14. /// The fragment contains code.
  15. /// </summary>
  16. Code,
  17. /// <summary>
  18. /// The fragment contains text.
  19. /// </summary>
  20. Text,
  21. }
  22. /// <summary>
  23. /// Represents a fragment of code in a mixed code document.
  24. /// </summary>
  25. public class MixedCodeDocumentCodeFragment: MixedCodeDocumentFragment
  26. {
  27. internal string _code;
  28. internal MixedCodeDocumentCodeFragment(MixedCodeDocument doc):
  29. base(doc, MixedCodeDocumentFragmentType.Code)
  30. {
  31. }
  32. /// <summary>
  33. /// Gets the fragment code text.
  34. /// </summary>
  35. public string Code
  36. {
  37. get
  38. {
  39. if (_code == null)
  40. {
  41. _code = FragmentText.Substring(_doc.TokenCodeStart.Length,
  42. FragmentText.Length - _doc.TokenCodeEnd.Length - _doc.TokenCodeStart.Length -1).Trim();
  43. if (_code.StartsWith("="))
  44. {
  45. _code = _doc.TokenResponseWrite + _code.Substring(1, _code.Length-1);
  46. }
  47. }
  48. return _code;
  49. }
  50. set
  51. {
  52. _code = value;
  53. }
  54. }
  55. }
  56. /// <summary>
  57. /// Represents a fragment of text in a mixed code document.
  58. /// </summary>
  59. public class MixedCodeDocumentTextFragment: MixedCodeDocumentFragment
  60. {
  61. internal MixedCodeDocumentTextFragment(MixedCodeDocument doc):
  62. base(doc, MixedCodeDocumentFragmentType.Text)
  63. {
  64. }
  65. /// <summary>
  66. /// Gets the fragment text.
  67. /// </summary>
  68. public string Text
  69. {
  70. get
  71. {
  72. return FragmentText;
  73. }
  74. set
  75. {
  76. base._fragmenttext = value;
  77. }
  78. }
  79. }
  80. /// <summary>
  81. /// Represents a base class for fragments in a mixed code document.
  82. /// </summary>
  83. public abstract class MixedCodeDocumentFragment
  84. {
  85. internal MixedCodeDocumentFragmentType _type;
  86. internal MixedCodeDocument _doc;
  87. internal int _index;
  88. internal int _length;
  89. internal int _line;
  90. internal int _lineposition;
  91. internal string _fragmenttext;
  92. internal MixedCodeDocumentFragment(MixedCodeDocument doc, MixedCodeDocumentFragmentType type)
  93. {
  94. _doc = doc;
  95. _type = type;
  96. switch(type)
  97. {
  98. case MixedCodeDocumentFragmentType.Text:
  99. _doc._textfragments.Append(this);
  100. break;
  101. case MixedCodeDocumentFragmentType.Code:
  102. _doc._codefragments.Append(this);
  103. break;
  104. }
  105. _doc._fragments.Append(this);
  106. }
  107. /// <summary>
  108. /// Gets the type of fragment.
  109. /// </summary>
  110. public MixedCodeDocumentFragmentType FragmentType
  111. {
  112. get
  113. {
  114. return _type;
  115. }
  116. }
  117. /// <summary>
  118. /// Gets the fragment position in the document's stream.
  119. /// </summary>
  120. public int StreamPosition
  121. {
  122. get
  123. {
  124. return _index;
  125. }
  126. }
  127. /// <summary>
  128. /// Gets the line number of the fragment.
  129. /// </summary>
  130. public int Line
  131. {
  132. get
  133. {
  134. return _line;
  135. }
  136. }
  137. /// <summary>
  138. /// Gets the line position (column) of the fragment.
  139. /// </summary>
  140. public int LinePosition
  141. {
  142. get
  143. {
  144. return _lineposition;
  145. }
  146. }
  147. /// <summary>
  148. /// Gets the fragement text.
  149. /// </summary>
  150. public string FragmentText
  151. {
  152. get
  153. {
  154. if (_fragmenttext == null)
  155. {
  156. _fragmenttext = _doc._text.Substring(_index, _length);
  157. }
  158. return _fragmenttext;
  159. }
  160. }
  161. }
  162. /// <summary>
  163. /// Represents a list of mixed code fragments.
  164. /// </summary>
  165. public class MixedCodeDocumentFragmentList: IEnumerable
  166. {
  167. private MixedCodeDocument _doc;
  168. private ArrayList _items = new ArrayList();
  169. internal MixedCodeDocumentFragmentList(MixedCodeDocument doc)
  170. {
  171. _doc = doc;
  172. }
  173. /// <summary>
  174. /// Appends a fragment to the list of fragments.
  175. /// </summary>
  176. /// <param name="newFragment">The fragment to append. May not be null.</param>
  177. public void Append(MixedCodeDocumentFragment newFragment)
  178. {
  179. if (newFragment == null)
  180. {
  181. throw new ArgumentNullException("newFragment");
  182. }
  183. _items.Add(newFragment);
  184. }
  185. /// <summary>
  186. /// Prepends a fragment to the list of fragments.
  187. /// </summary>
  188. /// <param name="newFragment">The fragment to append. May not be null.</param>
  189. public void Prepend(MixedCodeDocumentFragment newFragment)
  190. {
  191. if (newFragment == null)
  192. {
  193. throw new ArgumentNullException("newFragment");
  194. }
  195. _items.Insert(0, newFragment);
  196. }
  197. /// <summary>
  198. /// Remove a fragment from the list of fragments. If this fragment was not in the list, an exception will be raised.
  199. /// </summary>
  200. /// <param name="fragment">The fragment to remove. May not be null.</param>
  201. public void Remove(MixedCodeDocumentFragment fragment)
  202. {
  203. if (fragment == null)
  204. {
  205. throw new ArgumentNullException("fragment");
  206. }
  207. int index = GetFragmentIndex(fragment);
  208. if (index == -1)
  209. {
  210. throw new IndexOutOfRangeException();
  211. }
  212. RemoveAt(index);
  213. }
  214. /// <summary>
  215. /// Remove a fragment from the list of fragments, using its index in the list.
  216. /// </summary>
  217. /// <param name="index">The index of the fragment to remove.</param>
  218. public void RemoveAt(int index)
  219. {
  220. MixedCodeDocumentFragment frag = (MixedCodeDocumentFragment)_items[index];
  221. _items.RemoveAt(index);
  222. }
  223. /// <summary>
  224. /// Remove all fragments from the list.
  225. /// </summary>
  226. public void RemoveAll()
  227. {
  228. _items.Clear();
  229. }
  230. /// <summary>
  231. /// Gets the number of fragments contained in the list.
  232. /// </summary>
  233. public int Count
  234. {
  235. get
  236. {
  237. return _items.Count;
  238. }
  239. }
  240. internal int GetFragmentIndex(MixedCodeDocumentFragment fragment)
  241. {
  242. if (fragment == null)
  243. {
  244. throw new ArgumentNullException("fragment");
  245. }
  246. for(int i=0;i<_items.Count;i++)
  247. {
  248. if (((MixedCodeDocumentFragment)_items[i])==fragment)
  249. {
  250. return i;
  251. }
  252. }
  253. return -1;
  254. }
  255. /// <summary>
  256. /// Gets a fragment from the list using its index.
  257. /// </summary>
  258. public MixedCodeDocumentFragment this[int index]
  259. {
  260. get
  261. {
  262. return _items[index] as MixedCodeDocumentFragment;
  263. }
  264. }
  265. internal void Clear()
  266. {
  267. _items.Clear();
  268. }
  269. /// <summary>
  270. /// Gets an enumerator that can iterate through the fragment list.
  271. /// </summary>
  272. public MixedCodeDocumentFragmentEnumerator GetEnumerator()
  273. {
  274. return new MixedCodeDocumentFragmentEnumerator(_items);
  275. }
  276. /// <summary>
  277. /// Gets an enumerator that can iterate through the fragment list.
  278. /// </summary>
  279. IEnumerator IEnumerable.GetEnumerator()
  280. {
  281. return GetEnumerator();
  282. }
  283. /// <summary>
  284. /// Represents a fragment enumerator.
  285. /// </summary>
  286. public class MixedCodeDocumentFragmentEnumerator: IEnumerator
  287. {
  288. int _index;
  289. ArrayList _items;
  290. internal MixedCodeDocumentFragmentEnumerator(ArrayList items)
  291. {
  292. _items = items;
  293. _index = -1;
  294. }
  295. /// <summary>
  296. /// Sets the enumerator to its initial position, which is before the first element in the collection.
  297. /// </summary>
  298. public void Reset()
  299. {
  300. _index = -1;
  301. }
  302. /// <summary>
  303. /// Advances the enumerator to the next element of the collection.
  304. /// </summary>
  305. /// <returns>true if the enumerator was successfully advanced to the next element; false if the enumerator has passed the end of the collection.</returns>
  306. public bool MoveNext()
  307. {
  308. _index++;
  309. return (_index<_items.Count);
  310. }
  311. /// <summary>
  312. /// Gets the current element in the collection.
  313. /// </summary>
  314. public MixedCodeDocumentFragment Current
  315. {
  316. get
  317. {
  318. return (MixedCodeDocumentFragment)(_items[_index]);
  319. }
  320. }
  321. /// <summary>
  322. /// Gets the current element in the collection.
  323. /// </summary>
  324. object IEnumerator.Current
  325. {
  326. get
  327. {
  328. return (Current);
  329. }
  330. }
  331. }
  332. }
  333. /// <summary>
  334. /// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
  335. /// </summary>
  336. public class MixedCodeDocument
  337. {
  338. private System.Text.Encoding _streamencoding = null;
  339. internal string _text;
  340. internal MixedCodeDocumentFragmentList _fragments;
  341. internal MixedCodeDocumentFragmentList _codefragments;
  342. internal MixedCodeDocumentFragmentList _textfragments;
  343. private ParseState _state;
  344. private int _index;
  345. private int _c;
  346. private int _line;
  347. private int _lineposition;
  348. private MixedCodeDocumentFragment _currentfragment;
  349. /// <summary>
  350. /// Gets or sets the token representing code start.
  351. /// </summary>
  352. public string TokenCodeStart = "<%";
  353. /// <summary>
  354. /// Gets or sets the token representing code end.
  355. /// </summary>
  356. public string TokenCodeEnd = "%>";
  357. /// <summary>
  358. /// Gets or sets the token representing code directive.
  359. /// </summary>
  360. public string TokenDirective = "@";
  361. /// <summary>
  362. /// Gets or sets the token representing response write directive.
  363. /// </summary>
  364. public string TokenResponseWrite = "Response.Write ";
  365. private string TokenTextBlock = "TextBlock({0})";
  366. /// <summary>
  367. /// Creates a mixed code document instance.
  368. /// </summary>
  369. public MixedCodeDocument()
  370. {
  371. _codefragments = new MixedCodeDocumentFragmentList(this);
  372. _textfragments = new MixedCodeDocumentFragmentList(this);
  373. _fragments = new MixedCodeDocumentFragmentList(this);
  374. }
  375. /// <summary>
  376. /// Loads a mixed code document from a stream.
  377. /// </summary>
  378. /// <param name="stream">The input stream.</param>
  379. public void Load(Stream stream)
  380. {
  381. Load(new StreamReader(stream));
  382. }
  383. /// <summary>
  384. /// Loads a mixed code document from a stream.
  385. /// </summary>
  386. /// <param name="stream">The input stream.</param>
  387. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  388. public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
  389. {
  390. Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
  391. }
  392. /// <summary>
  393. /// Loads a mixed code document from a stream.
  394. /// </summary>
  395. /// <param name="stream">The input stream.</param>
  396. /// <param name="encoding">The character encoding to use.</param>
  397. public void Load(Stream stream, Encoding encoding)
  398. {
  399. Load(new StreamReader(stream, encoding));
  400. }
  401. /// <summary>
  402. /// Loads a mixed code document from a stream.
  403. /// </summary>
  404. /// <param name="stream">The input stream.</param>
  405. /// <param name="encoding">The character encoding to use.</param>
  406. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  407. public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  408. {
  409. Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
  410. }
  411. /// <summary>
  412. /// Loads a mixed code document from a stream.
  413. /// </summary>
  414. /// <param name="stream">The input stream.</param>
  415. /// <param name="encoding">The character encoding to use.</param>
  416. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  417. /// <param name="buffersize">The minimum buffer size.</param>
  418. public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
  419. {
  420. Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
  421. }
  422. /// <summary>
  423. /// Loads a mixed code document from a file.
  424. /// </summary>
  425. /// <param name="path">The complete file path to be read.</param>
  426. public void Load(string path)
  427. {
  428. Load(new StreamReader(path));
  429. }
  430. /// <summary>
  431. /// Loads a mixed code document from a file.
  432. /// </summary>
  433. /// <param name="path">The complete file path to be read.</param>
  434. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  435. public void Load(string path, bool detectEncodingFromByteOrderMarks)
  436. {
  437. Load(new StreamReader(path, detectEncodingFromByteOrderMarks));
  438. }
  439. /// <summary>
  440. /// Loads a mixed code document from a file.
  441. /// </summary>
  442. /// <param name="path">The complete file path to be read.</param>
  443. /// <param name="encoding">The character encoding to use.</param>
  444. public void Load(string path, Encoding encoding)
  445. {
  446. Load(new StreamReader(path, encoding));
  447. }
  448. /// <summary>
  449. /// Loads a mixed code document from a file.
  450. /// </summary>
  451. /// <param name="path">The complete file path to be read.</param>
  452. /// <param name="encoding">The character encoding to use.</param>
  453. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  454. public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  455. {
  456. Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks));
  457. }
  458. /// <summary>
  459. /// Loads a mixed code document from a file.
  460. /// </summary>
  461. /// <param name="path">The complete file path to be read.</param>
  462. /// <param name="encoding">The character encoding to use.</param>
  463. /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
  464. /// <param name="buffersize">The minimum buffer size.</param>
  465. public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
  466. {
  467. Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize));
  468. }
  469. /// <summary>
  470. /// Loads a mixed document from a text
  471. /// </summary>
  472. /// <param name="html">The text to load.</param>
  473. public void LoadHtml(string html)
  474. {
  475. Load(new StringReader(html));
  476. }
  477. /// <summary>
  478. /// Loads the mixed code document from the specified TextReader.
  479. /// </summary>
  480. /// <param name="reader">The TextReader used to feed the HTML data into the document.</param>
  481. public void Load(TextReader reader)
  482. {
  483. _codefragments.Clear();
  484. _textfragments.Clear();
  485. // all pseudo constructors get down to this one
  486. StreamReader sr = reader as StreamReader;
  487. if (sr != null)
  488. {
  489. _streamencoding = sr.CurrentEncoding;
  490. }
  491. _text = reader.ReadToEnd();
  492. reader.Close();
  493. Parse();
  494. }
  495. internal System.Text.Encoding GetOutEncoding()
  496. {
  497. if (_streamencoding != null)
  498. return _streamencoding;
  499. return System.Text.Encoding.Default;
  500. }
  501. /// <summary>
  502. /// Gets the encoding of the stream used to read the document.
  503. /// </summary>
  504. public System.Text.Encoding StreamEncoding
  505. {
  506. get
  507. {
  508. return _streamencoding;
  509. }
  510. }
  511. /// <summary>
  512. /// Gets the list of code fragments in the document.
  513. /// </summary>
  514. public MixedCodeDocumentFragmentList CodeFragments
  515. {
  516. get
  517. {
  518. return _codefragments;
  519. }
  520. }
  521. /// <summary>
  522. /// Gets the list of text fragments in the document.
  523. /// </summary>
  524. public MixedCodeDocumentFragmentList TextFragments
  525. {
  526. get
  527. {
  528. return _textfragments;
  529. }
  530. }
  531. /// <summary>
  532. /// Gets the list of all fragments in the document.
  533. /// </summary>
  534. public MixedCodeDocumentFragmentList Fragments
  535. {
  536. get
  537. {
  538. return _fragments;
  539. }
  540. }
  541. /// <summary>
  542. /// Saves the mixed document to the specified stream.
  543. /// </summary>
  544. /// <param name="outStream">The stream to which you want to save.</param>
  545. public void Save(Stream outStream)
  546. {
  547. StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
  548. Save(sw);
  549. }
  550. /// <summary>
  551. /// Saves the mixed document to the specified stream.
  552. /// </summary>
  553. /// <param name="outStream">The stream to which you want to save.</param>
  554. /// <param name="encoding">The character encoding to use.</param>
  555. public void Save(Stream outStream, System.Text.Encoding encoding)
  556. {
  557. StreamWriter sw = new StreamWriter(outStream, encoding);
  558. Save(sw);
  559. }
  560. /// <summary>
  561. /// Saves the mixed document to the specified file.
  562. /// </summary>
  563. /// <param name="filename">The location of the file where you want to save the document.</param>
  564. public void Save(string filename)
  565. {
  566. StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
  567. Save(sw);
  568. }
  569. /// <summary>
  570. /// Saves the mixed document to the specified file.
  571. /// </summary>
  572. /// <param name="filename">The location of the file where you want to save the document.</param>
  573. /// <param name="encoding">The character encoding to use.</param>
  574. public void Save(string filename, System.Text.Encoding encoding)
  575. {
  576. StreamWriter sw = new StreamWriter(filename, false, encoding);
  577. Save(sw);
  578. }
  579. /// <summary>
  580. /// Saves the mixed document to the specified StreamWriter.
  581. /// </summary>
  582. /// <param name="writer">The StreamWriter to which you want to save.</param>
  583. public void Save(StreamWriter writer)
  584. {
  585. Save((TextWriter)writer);
  586. }
  587. /// <summary>
  588. /// Saves the mixed document to the specified TextWriter.
  589. /// </summary>
  590. /// <param name="writer">The TextWriter to which you want to save.</param>
  591. public void Save(TextWriter writer)
  592. {
  593. writer.Flush();
  594. }
  595. /// <summary>
  596. /// Gets the code represented by the mixed code document seen as a template.
  597. /// </summary>
  598. public string Code
  599. {
  600. get
  601. {
  602. string s = "";
  603. int i = 0;
  604. foreach(MixedCodeDocumentFragment frag in _fragments)
  605. {
  606. switch(frag._type)
  607. {
  608. case MixedCodeDocumentFragmentType.Text:
  609. s += TokenResponseWrite + string.Format(TokenTextBlock, i) + "\n";
  610. i++;
  611. break;
  612. case MixedCodeDocumentFragmentType.Code:
  613. s += ((MixedCodeDocumentCodeFragment)frag).Code + "\n";
  614. break;
  615. }
  616. }
  617. return s;
  618. }
  619. }
  620. /// <summary>
  621. /// Create a text fragment instances.
  622. /// </summary>
  623. /// <returns>The newly created text fragment instance.</returns>
  624. public MixedCodeDocumentTextFragment CreateTextFragment()
  625. {
  626. return (MixedCodeDocumentTextFragment)CreateFragment(MixedCodeDocumentFragmentType.Text);
  627. }
  628. /// <summary>
  629. /// Create a code fragment instances.
  630. /// </summary>
  631. /// <returns>The newly created code fragment instance.</returns>
  632. public MixedCodeDocumentCodeFragment CreateCodeFragment()
  633. {
  634. return (MixedCodeDocumentCodeFragment)CreateFragment(MixedCodeDocumentFragmentType.Code);
  635. }
  636. internal MixedCodeDocumentFragment CreateFragment(MixedCodeDocumentFragmentType type)
  637. {
  638. switch(type)
  639. {
  640. case MixedCodeDocumentFragmentType.Text:
  641. return new MixedCodeDocumentTextFragment(this);
  642. case MixedCodeDocumentFragmentType.Code:
  643. return new MixedCodeDocumentCodeFragment(this);
  644. default:
  645. throw new NotSupportedException();
  646. }
  647. }
  648. private void SetPosition()
  649. {
  650. _currentfragment._line = _line;
  651. _currentfragment._lineposition = _lineposition;
  652. _currentfragment._index = _index - 1;
  653. _currentfragment._length = 0;
  654. }
  655. private void IncrementPosition()
  656. {
  657. _index++;
  658. if (_c == 10)
  659. {
  660. _lineposition = 1;
  661. _line++;
  662. }
  663. else
  664. _lineposition++;
  665. }
  666. private enum ParseState
  667. {
  668. Text,
  669. Code
  670. }
  671. private void Parse()
  672. {
  673. _state = ParseState.Text;
  674. _index = 0;
  675. _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
  676. while (_index<_text.Length)
  677. {
  678. _c = _text[_index];
  679. IncrementPosition();
  680. switch(_state)
  681. {
  682. case ParseState.Text:
  683. if (_index+TokenCodeStart.Length<_text.Length)
  684. {
  685. if (_text.Substring(_index-1, TokenCodeStart.Length) == TokenCodeStart)
  686. {
  687. _state = ParseState.Code;
  688. _currentfragment._length = _index -1 - _currentfragment._index;
  689. _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Code);
  690. SetPosition();
  691. continue;
  692. }
  693. }
  694. break;
  695. case ParseState.Code:
  696. if (_index+TokenCodeEnd.Length<_text.Length)
  697. {
  698. if (_text.Substring(_index-1, TokenCodeEnd.Length) == TokenCodeEnd)
  699. {
  700. _state = ParseState.Text;
  701. _currentfragment._length = _index + TokenCodeEnd.Length - _currentfragment._index;
  702. _index += TokenCodeEnd.Length;
  703. _lineposition += TokenCodeEnd.Length;
  704. _currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
  705. SetPosition();
  706. continue;
  707. }
  708. }
  709. break;
  710. }
  711. }
  712. _currentfragment._length = _index - _currentfragment._index;
  713. }
  714. }
  715. }