PageRenderTime 38ms CodeModel.GetById 15ms app.highlight 17ms RepoModel.GetById 2ms app.codeStats 0ms

/QDFeedParser/Xml/XpathFeedXmlParser.cs

#
C# | 202 lines | 167 code | 34 blank | 1 comment | 31 complexity | c4f169bcabafd754c23143a2a46433d9 MD5 | raw file
  1using System;
  2using System.Collections.Generic;
  3using System.Xml;
  4using System.Xml.XPath;
  5using System.Text;
  6
  7namespace QDFeedParser.Xml
  8{
  9    public class XPathFeedXmlParser : FeedXmlParserBase
 10    {
 11        #region IFeedXmlParser Members
 12
 13        public override void ParseFeed(IFeed feed, string xml)
 14        {
 15            switch (feed.FeedType)
 16            {
 17                case FeedType.Rss20:
 18                    var rssFeed = feed as Rss20Feed;
 19                    ParseRss20Header(rssFeed, xml);
 20                    ParseRss20Items(rssFeed, xml);
 21                    break;
 22                case FeedType.Atom10:
 23                    var atomFeed = feed as Atom10Feed;
 24                    ParseAtom10Header(atomFeed, xml);
 25                    ParseAtom10Items(atomFeed, xml);
 26                    break;
 27            }
 28        }
 29        
 30        public override FeedType CheckFeedType(string feedxml)
 31        {
 32            var doc = new XmlDocument();
 33            doc.LoadXml(feedxml);
 34            var xmlRootElement = doc.DocumentElement;
 35            if (xmlRootElement.Name.Contains(RssRootElementName) && xmlRootElement.GetAttribute(RssVersionAttributeName) == "2.0")
 36                return FeedType.Rss20;
 37            else if (xmlRootElement.Name.Contains(AtomRootElementName))
 38                return FeedType.Atom10;
 39            else
 40                throw new InvalidFeedXmlException("Unable to determine feedtype (but was able to parse file) for feed");
 41        }
 42
 43        #endregion
 44
 45        #region Atom 1.0 parsing methods
 46
 47        private XmlNamespaceManager NsManager;
 48
 49        private void ParseAtom10Header(Atom10Feed atomFeed, string xml)
 50        {
 51            var xmlDoc = new XmlDocument();
 52            xmlDoc.LoadXml(xml);
 53
 54            //Initialize our namespace manager.
 55            NsManager = new XmlNamespaceManager(xmlDoc.NameTable);
 56            NsManager.AddNamespace("atom", "http://www.w3.org/2005/Atom");
 57
 58            var titleNode = xmlDoc.SelectSingleNode("/atom:feed/atom:title", NsManager);
 59            atomFeed.Title = titleNode.InnerText;
 60
 61            var linkNode = xmlDoc.SelectSingleNode("/atom:feed/atom:link[not(@rel)]/@href", NsManager) ??
 62                           xmlDoc.SelectSingleNode("/atom:feed/atom:author/atom:uri", NsManager) ??
 63                           xmlDoc.SelectSingleNode("/atom:feed/atom:link[@rel='alternate']/@href", NsManager);
 64
 65            atomFeed.Link = linkNode == null ? string.Empty : linkNode.InnerText;
 66
 67            var dateTimeNode = xmlDoc.SelectSingleNode("/atom:feed/atom:updated", NsManager);
 68
 69            DateTime timeOut;
 70            DateTime.TryParse(dateTimeNode.InnerText, out timeOut);
 71            atomFeed.LastUpdated = timeOut.ToUniversalTime();
 72
 73            var generatorNode = xmlDoc.SelectSingleNode("/atom:feed/atom:generator", NsManager);
 74            atomFeed.Generator = generatorNode == null ? string.Empty : generatorNode.InnerText;
 75        }
 76
 77        private void ParseAtom10Items(IFeed feed, string xml)
 78        {
 79            var xmlDoc = new XmlDocument();
 80            xmlDoc.LoadXml(xml);
 81            var feedItemNodes = xmlDoc.SelectNodes("/atom:feed/atom:entry", NsManager);
 82            foreach(XmlNode node in feedItemNodes)
 83            {
 84                feed.Items.Add(ParseAtom10SingleItem(node));
 85            }
 86        }
 87
 88        private BaseFeedItem ParseAtom10SingleItem(XmlNode itemNode)
 89        {
 90            var titleNode = itemNode.SelectSingleNode("atom:title", NsManager);
 91            var datePublishedNode = itemNode.SelectSingleNode("atom:updated", NsManager);
 92            var authorNode = itemNode.SelectSingleNode("atom:author/name", NsManager);
 93            var idNode = itemNode.SelectSingleNode("atom:id", NsManager);
 94            var contentNode = itemNode.SelectSingleNode("atom:content", NsManager);
 95            var linkNode = itemNode.SelectSingleNode("atom:link/@href", NsManager);
 96
 97            BaseFeedItem item = new Atom10FeedItem
 98            {
 99                Title = titleNode == null ? string.Empty : titleNode.InnerText,
100                DatePublished = datePublishedNode == null ? DateTime.UtcNow : SafeGetDate(datePublishedNode.InnerText),
101                Author = authorNode == null ? string.Empty : authorNode.InnerText,
102                Id = idNode == null ? string.Empty : idNode.InnerText,
103                Content = contentNode == null ? string.Empty : contentNode.InnerText,
104                Link = linkNode == null ? string.Empty : linkNode.InnerText
105            };
106
107            var categoryNodes = itemNode.SelectNodes("atom:category/atom:term", NsManager);
108            if (categoryNodes != null)
109            {
110                foreach (XmlNode categoryNode in categoryNodes)
111                {
112                    item.Categories.Add(categoryNode.InnerText);
113                }
114            }
115
116            return item;
117        }
118
119        #endregion
120
121        #region RSS 2.0 parsing methods
122
123        private void ParseRss20Header(Rss20Feed rssFeed, string xml)
124        {
125
126            var xmlDoc = new XmlDocument();
127            xmlDoc.LoadXml(xml);
128            var titleNode = xmlDoc.SelectSingleNode("/rss/channel/title");
129            rssFeed.Title = titleNode.InnerText;
130
131            var descriptionNode = xmlDoc.SelectSingleNode("/rss/channel/description");
132            rssFeed.Description = descriptionNode == null ? string.Empty : descriptionNode.InnerText;
133
134            var linkNode = xmlDoc.SelectSingleNode("/rss/channel/link");
135            rssFeed.Link = linkNode == null ? string.Empty : linkNode.InnerText;
136
137            var dateTimeNode = xmlDoc.SelectSingleNode("//pubDate[1]");
138            if (dateTimeNode == null) //We have to have a date, so we'll use the date/time when we polled the RSS feed as the default.
139            {
140                rssFeed.LastUpdated = DateTime.UtcNow;
141            }
142            else
143            {
144                DateTime timeOut;
145                DateTime.TryParse(dateTimeNode.InnerText, out timeOut);
146                rssFeed.LastUpdated = timeOut.ToUniversalTime();
147            }
148
149            var generatorNode = xmlDoc.SelectSingleNode("/rss/channel/generator");
150            rssFeed.Generator = generatorNode == null ? string.Empty : generatorNode.InnerText;
151
152            var languageNode = xmlDoc.SelectSingleNode("/rss/channel/language");
153            rssFeed.Language = languageNode == null ? string.Empty : languageNode.InnerText;
154        }
155
156        private void ParseRss20Items(IFeed feed, string xml)
157        {
158            var xmlDoc = new XmlDocument();
159            xmlDoc.LoadXml(xml);
160            var feedItemNodes = xmlDoc.SelectNodes("/rss/channel/item");
161            foreach (XmlNode item in feedItemNodes)
162            {
163                feed.Items.Add(ParseRss20SingleItem(item));
164            }
165        }
166
167        private BaseFeedItem ParseRss20SingleItem(XmlNode itemNode)
168        {
169            var titleNode = itemNode.SelectSingleNode("title");
170            var datePublishedNode = itemNode.SelectSingleNode("pubDate");
171            var authorNode = itemNode.SelectSingleNode("author");
172            var commentsNode = itemNode.SelectSingleNode("comments");
173            var idNode = itemNode.SelectSingleNode("guid");
174            var contentNode = itemNode.SelectSingleNode("description");
175            var linkNode = itemNode.SelectSingleNode("link");
176
177            BaseFeedItem item = new Rss20FeedItem
178            {
179                Title = titleNode == null ? string.Empty : titleNode.InnerText,
180                DatePublished = datePublishedNode == null ? DateTime.UtcNow : SafeGetDate(datePublishedNode.InnerText),
181                Author = authorNode == null ? string.Empty : authorNode.InnerText,
182                Comments = commentsNode == null ? string.Empty : commentsNode.InnerText,
183                Id = idNode == null ? string.Empty : idNode.InnerText,
184                Content = contentNode == null ? string.Empty : contentNode.InnerText,
185                Link = linkNode == null ? string.Empty : linkNode.InnerText
186            };
187
188            var categoryNodes = itemNode.SelectNodes("category");
189            if (categoryNodes != null)
190            {
191                foreach (XmlNode categoryNode in categoryNodes)
192                {
193                    item.Categories.Add(categoryNode.InnerText);
194                }
195            }
196
197            return item;
198        }
199
200        #endregion
201    }
202}