PageRenderTime 2587ms CodeModel.GetById 23ms RepoModel.GetById 16ms app.codeStats 0ms

/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlWeb.cs

https://github.com/acken/mono
C# | 824 lines | 589 code | 65 blank | 170 comment | 90 complexity | 941c53a728ac7d5215f306e53987b2b9 MD5 | raw file
  1. // HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
  2. using System;
  3. using System.IO;
  4. using System.Net;
  5. using System.Xml;
  6. using System.Xml.Serialization;
  7. using System.Xml.Xsl;
  8. using Microsoft.Win32;
  9. #if !TARGET_JVM
  10. namespace HtmlAgilityPack
  11. {
  12. /// <summary>
  13. /// A utility class to get HTML document from HTTP.
  14. /// </summary>
  15. public class HtmlWeb
  16. {
  17. /// <summary>
  18. /// Represents the method that will handle the PreRequest event.
  19. /// </summary>
  20. public delegate bool PreRequestHandler(HttpWebRequest request);
  21. /// <summary>
  22. /// Represents the method that will handle the PostResponse event.
  23. /// </summary>
  24. public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response);
  25. /// <summary>
  26. /// Represents the method that will handle the PreHandleDocument event.
  27. /// </summary>
  28. public delegate void PreHandleDocumentHandler(HtmlDocument document);
  29. private int _streamBufferSize = 1024;
  30. private string _cachePath;
  31. private bool _usingCache;
  32. private bool _fromCache;
  33. private bool _cacheOnly;
  34. private bool _useCookies;
  35. private int _requestDuration;
  36. private bool _autoDetectEncoding = true;
  37. private HttpStatusCode _statusCode = HttpStatusCode.OK;
  38. private Uri _responseUri;
  39. /// <summary>
  40. /// Occurs before an HTTP request is executed.
  41. /// </summary>
  42. public PreRequestHandler PreRequest;
  43. /// <summary>
  44. /// Occurs after an HTTP request has been executed.
  45. /// </summary>
  46. public PostResponseHandler PostResponse;
  47. /// <summary>
  48. /// Occurs before an HTML document is handled.
  49. /// </summary>
  50. public PreHandleDocumentHandler PreHandleDocument;
  51. /// <summary>
  52. /// Creates an instance of an HtmlWeb class.
  53. /// </summary>
  54. public HtmlWeb()
  55. {
  56. }
  57. /// <summary>
  58. /// Gets an HTML document from an Internet resource and saves it to the specified file.
  59. /// </summary>
  60. /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  61. /// <param name="path">The location of the file where you want to save the document.</param>
  62. public void Get(string url, string path)
  63. {
  64. Get(url, path, "GET");
  65. }
  66. /// <summary>
  67. /// Gets an HTML document from an Internet resource and saves it to the specified file.
  68. /// </summary>
  69. /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  70. /// <param name="path">The location of the file where you want to save the document.</param>
  71. /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
  72. public void Get(string url, string path, string method)
  73. {
  74. Uri uri = new Uri(url);
  75. if ((uri.Scheme == Uri.UriSchemeHttps) ||
  76. (uri.Scheme == Uri.UriSchemeHttp))
  77. {
  78. Get(uri, method, path, null);
  79. }
  80. else
  81. {
  82. throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
  83. }
  84. }
  85. /// <summary>
  86. /// Gets an HTML document from an Internet resource.
  87. /// </summary>
  88. /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  89. /// <returns>A new HTML document.</returns>
  90. public HtmlDocument Load(string url)
  91. {
  92. return Load(url, "GET");
  93. }
  94. /// <summary>
  95. /// Loads an HTML document from an Internet resource.
  96. /// </summary>
  97. /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  98. /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
  99. /// <returns>A new HTML document.</returns>
  100. public HtmlDocument Load(string url, string method)
  101. {
  102. Uri uri = new Uri(url);
  103. HtmlDocument doc;
  104. if ((uri.Scheme == Uri.UriSchemeHttps) ||
  105. (uri.Scheme == Uri.UriSchemeHttp))
  106. {
  107. doc = LoadUrl(uri, method);
  108. }
  109. else
  110. {
  111. if (uri.Scheme == Uri.UriSchemeFile)
  112. {
  113. doc = new HtmlDocument();
  114. doc.OptionAutoCloseOnEnd = false;
  115. doc.OptionAutoCloseOnEnd = true;
  116. doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
  117. }
  118. else
  119. {
  120. throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
  121. }
  122. }
  123. if (PreHandleDocument != null)
  124. {
  125. PreHandleDocument(doc);
  126. }
  127. return doc;
  128. }
  129. private bool IsCacheHtmlContent(string path)
  130. {
  131. string ct = GetContentTypeForExtension(Path.GetExtension(path), null);
  132. return IsHtmlContent(ct);
  133. }
  134. private bool IsHtmlContent(string contentType)
  135. {
  136. return contentType.ToLower().StartsWith("text/html");
  137. }
  138. private string GetCacheHeadersPath(Uri uri)
  139. {
  140. //return Path.Combine(GetCachePath(uri), ".h.xml");
  141. return GetCachePath(uri) + ".h.xml";
  142. }
  143. /// <summary>
  144. /// Gets the cache file path for a specified url.
  145. /// </summary>
  146. /// <param name="uri">The url fo which to retrieve the cache path. May not be null.</param>
  147. /// <returns>The cache file path.</returns>
  148. public string GetCachePath(Uri uri)
  149. {
  150. if (uri == null)
  151. {
  152. throw new ArgumentNullException("uri");
  153. }
  154. if (!UsingCache)
  155. {
  156. throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
  157. }
  158. string cachePath;
  159. if (uri.AbsolutePath == "/")
  160. {
  161. cachePath = Path.Combine(_cachePath, ".htm");
  162. }
  163. else
  164. {
  165. cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\'));
  166. }
  167. return cachePath;
  168. }
  169. /// <summary>
  170. /// Gets a value indicating if the last document was retrieved from the cache.
  171. /// </summary>
  172. public bool FromCache
  173. {
  174. get
  175. {
  176. return _fromCache;
  177. }
  178. }
  179. /// <summary>
  180. /// Gets the URI of the Internet resource that actually responded to the request.
  181. /// </summary>
  182. public Uri ResponseUri
  183. {
  184. get
  185. {
  186. return _responseUri;
  187. }
  188. }
  189. /// <summary>
  190. /// Gets or Sets a value indicating whether to get document only from the cache.
  191. /// If this is set to true and document is not found in the cache, nothing will be loaded.
  192. /// </summary>
  193. public bool CacheOnly
  194. {
  195. get
  196. {
  197. return _cacheOnly;
  198. }
  199. set
  200. {
  201. if ((value) && !UsingCache)
  202. {
  203. throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
  204. }
  205. _cacheOnly = value;
  206. }
  207. }
  208. /// <summary>
  209. /// Gets or Sets a value indicating if cookies will be stored.
  210. /// </summary>
  211. public bool UseCookies
  212. {
  213. get
  214. {
  215. return _useCookies;
  216. }
  217. set
  218. {
  219. _useCookies = value;
  220. }
  221. }
  222. /// <summary>
  223. /// Gets the last request duration in milliseconds.
  224. /// </summary>
  225. public int RequestDuration
  226. {
  227. get
  228. {
  229. return _requestDuration;
  230. }
  231. }
  232. /// <summary>
  233. /// Gets or Sets a value indicating if document encoding must be automatically detected.
  234. /// </summary>
  235. public bool AutoDetectEncoding
  236. {
  237. get
  238. {
  239. return _autoDetectEncoding;
  240. }
  241. set
  242. {
  243. _autoDetectEncoding = value;
  244. }
  245. }
  246. /// <summary>
  247. /// Gets the last request status.
  248. /// </summary>
  249. public HttpStatusCode StatusCode
  250. {
  251. get
  252. {
  253. return _statusCode;
  254. }
  255. }
  256. /// <summary>
  257. /// Gets or Sets the size of the buffer used for memory operations.
  258. /// </summary>
  259. public int StreamBufferSize
  260. {
  261. get
  262. {
  263. return _streamBufferSize;
  264. }
  265. set
  266. {
  267. if (_streamBufferSize <= 0)
  268. {
  269. throw new ArgumentException("Size must be greater than zero.");
  270. }
  271. _streamBufferSize = value;
  272. }
  273. }
  274. private HtmlDocument LoadUrl(Uri uri, string method)
  275. {
  276. HtmlDocument doc = new HtmlDocument();
  277. doc.OptionAutoCloseOnEnd = false;
  278. doc.OptionFixNestedTags = true;
  279. _statusCode = Get(uri, method, null, doc);
  280. if (_statusCode == HttpStatusCode.NotModified)
  281. {
  282. // read cached encoding
  283. doc.DetectEncodingAndLoad(GetCachePath(uri));
  284. }
  285. return doc;
  286. }
  287. private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc)
  288. {
  289. string cachePath = null;
  290. HttpWebRequest req;
  291. bool oldFile = false;
  292. req = WebRequest.Create(uri) as HttpWebRequest;
  293. req.Method = method;
  294. _fromCache = false;
  295. _requestDuration = 0;
  296. int tc = Environment.TickCount;
  297. if (UsingCache)
  298. {
  299. cachePath = GetCachePath(req.RequestUri);
  300. if (File.Exists(cachePath))
  301. {
  302. req.IfModifiedSince = File.GetLastAccessTime(cachePath);
  303. oldFile = true;
  304. }
  305. }
  306. if (_cacheOnly)
  307. {
  308. if (!File.Exists(cachePath))
  309. {
  310. throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'");
  311. }
  312. if (path != null)
  313. {
  314. IOLibrary.CopyAlways(cachePath, path);
  315. // touch the file
  316. File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
  317. }
  318. _fromCache = true;
  319. return HttpStatusCode.NotModified;
  320. }
  321. if (_useCookies)
  322. {
  323. req.CookieContainer = new CookieContainer();
  324. }
  325. if (PreRequest != null)
  326. {
  327. // allow our user to change the request at will
  328. if (!PreRequest(req))
  329. {
  330. return HttpStatusCode.ResetContent;
  331. }
  332. // dump cookie
  333. // if (_useCookies)
  334. // {
  335. // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri))
  336. // {
  337. // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain);
  338. // }
  339. // }
  340. }
  341. HttpWebResponse resp;
  342. try
  343. {
  344. resp = req.GetResponse() as HttpWebResponse;
  345. }
  346. catch (WebException we)
  347. {
  348. _requestDuration = Environment.TickCount - tc;
  349. resp = (HttpWebResponse)we.Response;
  350. if (resp == null)
  351. {
  352. if (oldFile)
  353. {
  354. if (path != null)
  355. {
  356. IOLibrary.CopyAlways(cachePath, path);
  357. // touch the file
  358. File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
  359. }
  360. return HttpStatusCode.NotModified;
  361. }
  362. throw;
  363. }
  364. }
  365. catch(Exception)
  366. {
  367. _requestDuration = Environment.TickCount - tc;
  368. throw;
  369. }
  370. // allow our user to get some info from the response
  371. if (PostResponse != null)
  372. {
  373. PostResponse(req, resp);
  374. }
  375. _requestDuration = Environment.TickCount - tc;
  376. _responseUri = resp.ResponseUri;
  377. bool html = IsHtmlContent(resp.ContentType);
  378. System.Text.Encoding respenc;
  379. if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length>0))
  380. {
  381. respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding);
  382. }
  383. else
  384. {
  385. respenc = null;
  386. }
  387. if (resp.StatusCode == HttpStatusCode.NotModified)
  388. {
  389. if (UsingCache)
  390. {
  391. _fromCache = true;
  392. if (path != null)
  393. {
  394. IOLibrary.CopyAlways(cachePath, path);
  395. // touch the file
  396. File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
  397. }
  398. return resp.StatusCode;
  399. }
  400. else
  401. {
  402. // this should *never* happen...
  403. throw new HtmlWebException("Server has send a NotModifed code, without cache enabled.");
  404. }
  405. }
  406. Stream s = resp.GetResponseStream();
  407. if (s != null)
  408. {
  409. if (UsingCache)
  410. {
  411. // NOTE: LastModified does not contain milliseconds, so we remove them to the file
  412. SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize);
  413. // save headers
  414. SaveCacheHeaders(req.RequestUri, resp);
  415. if (path != null)
  416. {
  417. // copy and touch the file
  418. IOLibrary.CopyAlways(cachePath, path);
  419. File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
  420. }
  421. }
  422. else
  423. {
  424. // try to work in-memory
  425. if ((doc != null) && (html))
  426. {
  427. if (respenc != null)
  428. {
  429. doc.Load(s,respenc);
  430. }
  431. else
  432. {
  433. doc.Load(s);
  434. }
  435. }
  436. }
  437. resp.Close();
  438. }
  439. return resp.StatusCode;
  440. }
  441. private string GetCacheHeader(Uri requestUri, string name, string def)
  442. {
  443. // note: some headers are collection (ex: www-authenticate)
  444. // we don't handle that here
  445. XmlDocument doc = new XmlDocument();
  446. doc.Load(GetCacheHeadersPath(requestUri));
  447. XmlNode node = doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" + name.ToUpper() + "']");
  448. if (node == null)
  449. {
  450. return def;
  451. }
  452. // attribute should exist
  453. return node.Attributes[name].Value;
  454. }
  455. private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp)
  456. {
  457. // we cache the original headers aside the cached document.
  458. string file = GetCacheHeadersPath(requestUri);
  459. XmlDocument doc = new XmlDocument();
  460. doc.LoadXml("<c></c>");
  461. XmlNode cache = doc.FirstChild;
  462. foreach(string header in resp.Headers)
  463. {
  464. XmlNode entry = doc.CreateElement("h");
  465. XmlAttribute att = doc.CreateAttribute("n");
  466. att.Value = header;
  467. entry.Attributes.Append(att);
  468. att = doc.CreateAttribute("v");
  469. att.Value = resp.Headers[header];
  470. entry.Attributes.Append(att);
  471. cache.AppendChild(entry);
  472. }
  473. doc.Save(file);
  474. }
  475. private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize)
  476. {
  477. FilePreparePath(path);
  478. FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write);
  479. BinaryReader br = null;
  480. BinaryWriter bw = null;
  481. long len;
  482. try
  483. {
  484. br = new BinaryReader(stream);
  485. bw = new BinaryWriter(fs);
  486. len = 0;
  487. byte[] buffer;
  488. do
  489. {
  490. buffer = br.ReadBytes(streamBufferSize);
  491. len += buffer.Length;
  492. if (buffer.Length>0)
  493. {
  494. bw.Write(buffer);
  495. }
  496. }
  497. while (buffer.Length>0);
  498. }
  499. finally
  500. {
  501. if (br != null)
  502. {
  503. br.Close();
  504. }
  505. if (bw != null)
  506. {
  507. bw.Flush();
  508. bw.Close();
  509. }
  510. if (fs != null)
  511. {
  512. fs.Close();
  513. }
  514. }
  515. File.SetLastWriteTime(path, touchDate);
  516. return len;
  517. }
  518. private static void FilePreparePath(string target)
  519. {
  520. if (File.Exists(target))
  521. {
  522. FileAttributes atts = File.GetAttributes(target);
  523. File.SetAttributes(target, atts & ~FileAttributes.ReadOnly);
  524. }
  525. else
  526. {
  527. string dir = Path.GetDirectoryName(target);
  528. if (!Directory.Exists(dir))
  529. {
  530. Directory.CreateDirectory(dir);
  531. }
  532. }
  533. }
  534. private static DateTime RemoveMilliseconds(DateTime t)
  535. {
  536. return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0);
  537. }
  538. /// <summary>
  539. /// Gets the path extension for a given MIME content type.
  540. /// </summary>
  541. /// <param name="contentType">The input MIME content type.</param>
  542. /// <param name="def">The default path extension to return if any error occurs.</param>
  543. /// <returns>The MIME content type's path extension.</returns>
  544. public static string GetExtensionForContentType(string contentType, string def)
  545. {
  546. if ((contentType == null) || (contentType.Length == 0))
  547. {
  548. return def;
  549. }
  550. string ext;
  551. try
  552. {
  553. RegistryKey reg = Registry.ClassesRoot;
  554. reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false);
  555. ext = (string)reg.GetValue("Extension", def);
  556. }
  557. catch(Exception)
  558. {
  559. ext = def;
  560. }
  561. return ext;
  562. }
  563. /// <summary>
  564. /// Gets the MIME content type for a given path extension.
  565. /// </summary>
  566. /// <param name="extension">The input path extension.</param>
  567. /// <param name="def">The default content type to return if any error occurs.</param>
  568. /// <returns>The path extention's MIME content type.</returns>
  569. public static string GetContentTypeForExtension(string extension, string def)
  570. {
  571. if ((extension == null) || (extension.Length == 0))
  572. {
  573. return def;
  574. }
  575. string contentType;
  576. try
  577. {
  578. RegistryKey reg = Registry.ClassesRoot;
  579. reg = reg.OpenSubKey(extension, false);
  580. contentType = (string)reg.GetValue("", def);
  581. }
  582. catch(Exception)
  583. {
  584. contentType = def;
  585. }
  586. return contentType;
  587. }
  588. /// <summary>
  589. /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter.
  590. /// </summary>
  591. /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  592. /// <param name="writer">The XmlTextWriter to which you want to save.</param>
  593. public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer)
  594. {
  595. HtmlDocument doc = Load(htmlUrl);
  596. doc.Save(writer);
  597. }
  598. /// <summary>
  599. /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
  600. /// </summary>
  601. /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  602. /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
  603. /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
  604. /// <param name="writer">The XmlTextWriter to which you want to save.</param>
  605. public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer)
  606. {
  607. LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null);
  608. }
  609. /// <summary>
  610. /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
  611. /// </summary>
  612. /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null.</param>
  613. /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
  614. /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
  615. /// <param name="writer">The XmlTextWriter to which you want to save.</param>
  616. /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
  617. public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer, string xmlPath)
  618. {
  619. if (htmlUrl == null)
  620. {
  621. throw new ArgumentNullException("htmlUrl");
  622. }
  623. HtmlDocument doc = Load(htmlUrl);
  624. if (xmlPath != null)
  625. {
  626. XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding);
  627. doc.Save(w);
  628. w.Close();
  629. }
  630. if (xsltArgs == null)
  631. {
  632. xsltArgs = new XsltArgumentList();
  633. }
  634. // add some useful variables to the xslt doc
  635. xsltArgs.AddParam("url", "", htmlUrl);
  636. xsltArgs.AddParam("requestDuration", "", RequestDuration);
  637. xsltArgs.AddParam("fromCache", "", FromCache);
  638. XslTransform xslt = new XslTransform();
  639. xslt.Load(xsltUrl);
  640. xslt.Transform(doc, xsltArgs, writer, null);
  641. }
  642. /// <summary>
  643. /// Creates an instance of the given type from the specified Internet resource.
  644. /// </summary>
  645. /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  646. /// <param name="type">The requested type.</param>
  647. /// <returns>An newly created instance.</returns>
  648. public object CreateInstance(string url, Type type)
  649. {
  650. return CreateInstance(url, null, null, type);
  651. }
  652. /// <summary>
  653. /// Creates an instance of the given type from the specified Internet resource.
  654. /// </summary>
  655. /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  656. /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
  657. /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
  658. /// <param name="type">The requested type.</param>
  659. /// <returns>An newly created instance.</returns>
  660. public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type)
  661. {
  662. return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null);
  663. }
  664. /// <summary>
  665. /// Creates an instance of the given type from the specified Internet resource.
  666. /// </summary>
  667. /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
  668. /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
  669. /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
  670. /// <param name="type">The requested type.</param>
  671. /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
  672. /// <returns>An newly created instance.</returns>
  673. public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type, string xmlPath)
  674. {
  675. StringWriter sw = new StringWriter();
  676. XmlTextWriter writer = new XmlTextWriter(sw);
  677. if (xsltUrl == null)
  678. {
  679. LoadHtmlAsXml(htmlUrl, writer);
  680. }
  681. else
  682. {
  683. if (xmlPath == null)
  684. {
  685. LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer);
  686. }
  687. else
  688. {
  689. LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath);
  690. }
  691. }
  692. writer.Flush();
  693. StringReader sr = new StringReader(sw.ToString());
  694. XmlTextReader reader = new XmlTextReader(sr);
  695. XmlSerializer serializer = new XmlSerializer(type);
  696. object o = null;
  697. try
  698. {
  699. o = serializer.Deserialize(reader);
  700. }
  701. catch(InvalidOperationException ex)
  702. {
  703. throw new Exception(ex.ToString() + ", --- xml:" + sw.ToString());
  704. }
  705. return o;
  706. }
  707. /// <summary>
  708. /// Gets or Sets the cache path. If null, no caching mechanism will be used.
  709. /// </summary>
  710. public string CachePath
  711. {
  712. get
  713. {
  714. return _cachePath;
  715. }
  716. set
  717. {
  718. _cachePath = value;
  719. }
  720. }
  721. /// <summary>
  722. /// Gets or Sets a value indicating whether the caching mechanisms should be used or not.
  723. /// </summary>
  724. public bool UsingCache
  725. {
  726. get
  727. {
  728. if (_cachePath == null)
  729. {
  730. return false;
  731. }
  732. return _usingCache;
  733. }
  734. set
  735. {
  736. if ((value) && (_cachePath == null))
  737. {
  738. throw new HtmlWebException("You need to define a CachePath first.");
  739. }
  740. _usingCache = value;
  741. }
  742. }
  743. }
  744. /// <summary>
  745. /// Represents an exception thrown by the HtmlWeb utility class.
  746. /// </summary>
  747. public class HtmlWebException: Exception
  748. {
  749. /// <summary>
  750. /// Creates an instance of the HtmlWebException.
  751. /// </summary>
  752. /// <param name="message">The exception's message.</param>
  753. public HtmlWebException(string message)
  754. :base(message)
  755. {
  756. }
  757. }
  758. }
  759. #endif