PageRenderTime 37ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/QDFeedParser/Xml/XpathFeedXmlParser.cs

#
C# | 202 lines | 167 code | 34 blank | 1 comment | 31 complexity | c4f169bcabafd754c23143a2a46433d9 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Xml;
  4. using System.Xml.XPath;
  5. using System.Text;
  6. namespace QDFeedParser.Xml
  7. {
  8. public class XPathFeedXmlParser : FeedXmlParserBase
  9. {
  10. #region IFeedXmlParser Members
  11. public override void ParseFeed(IFeed feed, string xml)
  12. {
  13. switch (feed.FeedType)
  14. {
  15. case FeedType.Rss20:
  16. var rssFeed = feed as Rss20Feed;
  17. ParseRss20Header(rssFeed, xml);
  18. ParseRss20Items(rssFeed, xml);
  19. break;
  20. case FeedType.Atom10:
  21. var atomFeed = feed as Atom10Feed;
  22. ParseAtom10Header(atomFeed, xml);
  23. ParseAtom10Items(atomFeed, xml);
  24. break;
  25. }
  26. }
  27. public override FeedType CheckFeedType(string feedxml)
  28. {
  29. var doc = new XmlDocument();
  30. doc.LoadXml(feedxml);
  31. var xmlRootElement = doc.DocumentElement;
  32. if (xmlRootElement.Name.Contains(RssRootElementName) && xmlRootElement.GetAttribute(RssVersionAttributeName) == "2.0")
  33. return FeedType.Rss20;
  34. else if (xmlRootElement.Name.Contains(AtomRootElementName))
  35. return FeedType.Atom10;
  36. else
  37. throw new InvalidFeedXmlException("Unable to determine feedtype (but was able to parse file) for feed");
  38. }
  39. #endregion
  40. #region Atom 1.0 parsing methods
  41. private XmlNamespaceManager NsManager;
  42. private void ParseAtom10Header(Atom10Feed atomFeed, string xml)
  43. {
  44. var xmlDoc = new XmlDocument();
  45. xmlDoc.LoadXml(xml);
  46. //Initialize our namespace manager.
  47. NsManager = new XmlNamespaceManager(xmlDoc.NameTable);
  48. NsManager.AddNamespace("atom", "http://www.w3.org/2005/Atom");
  49. var titleNode = xmlDoc.SelectSingleNode("/atom:feed/atom:title", NsManager);
  50. atomFeed.Title = titleNode.InnerText;
  51. var linkNode = xmlDoc.SelectSingleNode("/atom:feed/atom:link[not(@rel)]/@href", NsManager) ??
  52. xmlDoc.SelectSingleNode("/atom:feed/atom:author/atom:uri", NsManager) ??
  53. xmlDoc.SelectSingleNode("/atom:feed/atom:link[@rel='alternate']/@href", NsManager);
  54. atomFeed.Link = linkNode == null ? string.Empty : linkNode.InnerText;
  55. var dateTimeNode = xmlDoc.SelectSingleNode("/atom:feed/atom:updated", NsManager);
  56. DateTime timeOut;
  57. DateTime.TryParse(dateTimeNode.InnerText, out timeOut);
  58. atomFeed.LastUpdated = timeOut.ToUniversalTime();
  59. var generatorNode = xmlDoc.SelectSingleNode("/atom:feed/atom:generator", NsManager);
  60. atomFeed.Generator = generatorNode == null ? string.Empty : generatorNode.InnerText;
  61. }
  62. private void ParseAtom10Items(IFeed feed, string xml)
  63. {
  64. var xmlDoc = new XmlDocument();
  65. xmlDoc.LoadXml(xml);
  66. var feedItemNodes = xmlDoc.SelectNodes("/atom:feed/atom:entry", NsManager);
  67. foreach(XmlNode node in feedItemNodes)
  68. {
  69. feed.Items.Add(ParseAtom10SingleItem(node));
  70. }
  71. }
  72. private BaseFeedItem ParseAtom10SingleItem(XmlNode itemNode)
  73. {
  74. var titleNode = itemNode.SelectSingleNode("atom:title", NsManager);
  75. var datePublishedNode = itemNode.SelectSingleNode("atom:updated", NsManager);
  76. var authorNode = itemNode.SelectSingleNode("atom:author/name", NsManager);
  77. var idNode = itemNode.SelectSingleNode("atom:id", NsManager);
  78. var contentNode = itemNode.SelectSingleNode("atom:content", NsManager);
  79. var linkNode = itemNode.SelectSingleNode("atom:link/@href", NsManager);
  80. BaseFeedItem item = new Atom10FeedItem
  81. {
  82. Title = titleNode == null ? string.Empty : titleNode.InnerText,
  83. DatePublished = datePublishedNode == null ? DateTime.UtcNow : SafeGetDate(datePublishedNode.InnerText),
  84. Author = authorNode == null ? string.Empty : authorNode.InnerText,
  85. Id = idNode == null ? string.Empty : idNode.InnerText,
  86. Content = contentNode == null ? string.Empty : contentNode.InnerText,
  87. Link = linkNode == null ? string.Empty : linkNode.InnerText
  88. };
  89. var categoryNodes = itemNode.SelectNodes("atom:category/atom:term", NsManager);
  90. if (categoryNodes != null)
  91. {
  92. foreach (XmlNode categoryNode in categoryNodes)
  93. {
  94. item.Categories.Add(categoryNode.InnerText);
  95. }
  96. }
  97. return item;
  98. }
  99. #endregion
  100. #region RSS 2.0 parsing methods
  101. private void ParseRss20Header(Rss20Feed rssFeed, string xml)
  102. {
  103. var xmlDoc = new XmlDocument();
  104. xmlDoc.LoadXml(xml);
  105. var titleNode = xmlDoc.SelectSingleNode("/rss/channel/title");
  106. rssFeed.Title = titleNode.InnerText;
  107. var descriptionNode = xmlDoc.SelectSingleNode("/rss/channel/description");
  108. rssFeed.Description = descriptionNode == null ? string.Empty : descriptionNode.InnerText;
  109. var linkNode = xmlDoc.SelectSingleNode("/rss/channel/link");
  110. rssFeed.Link = linkNode == null ? string.Empty : linkNode.InnerText;
  111. var dateTimeNode = xmlDoc.SelectSingleNode("//pubDate[1]");
  112. if (dateTimeNode == null) //We have to have a date, so we'll use the date/time when we polled the RSS feed as the default.
  113. {
  114. rssFeed.LastUpdated = DateTime.UtcNow;
  115. }
  116. else
  117. {
  118. DateTime timeOut;
  119. DateTime.TryParse(dateTimeNode.InnerText, out timeOut);
  120. rssFeed.LastUpdated = timeOut.ToUniversalTime();
  121. }
  122. var generatorNode = xmlDoc.SelectSingleNode("/rss/channel/generator");
  123. rssFeed.Generator = generatorNode == null ? string.Empty : generatorNode.InnerText;
  124. var languageNode = xmlDoc.SelectSingleNode("/rss/channel/language");
  125. rssFeed.Language = languageNode == null ? string.Empty : languageNode.InnerText;
  126. }
  127. private void ParseRss20Items(IFeed feed, string xml)
  128. {
  129. var xmlDoc = new XmlDocument();
  130. xmlDoc.LoadXml(xml);
  131. var feedItemNodes = xmlDoc.SelectNodes("/rss/channel/item");
  132. foreach (XmlNode item in feedItemNodes)
  133. {
  134. feed.Items.Add(ParseRss20SingleItem(item));
  135. }
  136. }
  137. private BaseFeedItem ParseRss20SingleItem(XmlNode itemNode)
  138. {
  139. var titleNode = itemNode.SelectSingleNode("title");
  140. var datePublishedNode = itemNode.SelectSingleNode("pubDate");
  141. var authorNode = itemNode.SelectSingleNode("author");
  142. var commentsNode = itemNode.SelectSingleNode("comments");
  143. var idNode = itemNode.SelectSingleNode("guid");
  144. var contentNode = itemNode.SelectSingleNode("description");
  145. var linkNode = itemNode.SelectSingleNode("link");
  146. BaseFeedItem item = new Rss20FeedItem
  147. {
  148. Title = titleNode == null ? string.Empty : titleNode.InnerText,
  149. DatePublished = datePublishedNode == null ? DateTime.UtcNow : SafeGetDate(datePublishedNode.InnerText),
  150. Author = authorNode == null ? string.Empty : authorNode.InnerText,
  151. Comments = commentsNode == null ? string.Empty : commentsNode.InnerText,
  152. Id = idNode == null ? string.Empty : idNode.InnerText,
  153. Content = contentNode == null ? string.Empty : contentNode.InnerText,
  154. Link = linkNode == null ? string.Empty : linkNode.InnerText
  155. };
  156. var categoryNodes = itemNode.SelectNodes("category");
  157. if (categoryNodes != null)
  158. {
  159. foreach (XmlNode categoryNode in categoryNodes)
  160. {
  161. item.Categories.Add(categoryNode.InnerText);
  162. }
  163. }
  164. return item;
  165. }
  166. #endregion
  167. }
  168. }