PageRenderTime 43ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/MalApi/MalAppInfoXml.cs

https://bitbucket.org/LHCGreg/mal-api
C# | 245 lines | 163 code | 43 blank | 39 comment | 10 complexity | 016b8d4773adce1a4cfe683af70e3870 MD5 | raw file
Possible License(s): Apache-2.0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.IO;
  6. using System.Xml.Linq;
  7. using System.Text.RegularExpressions;
  8. namespace MalApi
  9. {
  10. public static class MalAppInfoXml
  11. {
  12. /// <summary>
  13. /// Parses XML obtained from malappinfo.php. The XML is sanitized to account for MAL's invalid XML if, for example,
  14. /// a user has a & character in their tags.
  15. /// </summary>
  16. /// <param name="xmlTextReader"></param>
  17. /// <returns></returns>
  18. /// <exception cref="MalApi.MalUserNotFoundException"></exception>
  19. /// <exception cref="MalApi.MalApiException"></exception>
  20. public static MalUserLookupResults Parse(TextReader xmlTextReader)
  21. {
  22. Logging.Log.Trace("Sanitizing XML.");
  23. using (xmlTextReader = SanitizeAnimeListXml(xmlTextReader))
  24. {
  25. Logging.Log.Trace("XML sanitized.");
  26. XDocument doc = XDocument.Load(xmlTextReader);
  27. return Parse(doc);
  28. }
  29. }
  30. // Rumor has it that compiled regexes are far more performant than non-compiled regexes on large pieces of text.
  31. // I haven't profiled it though.
  32. private static Lazy<Regex> s_tagElementContentsRegex =
  33. new Lazy<Regex>(() => new Regex("<my_tags>(?<TagText>.*?)</my_tags>", RegexOptions.Compiled | RegexOptions.CultureInvariant));
  34. private static Regex TagElementContentsRegex { get { return s_tagElementContentsRegex.Value; } }
  35. private static Lazy<Regex> s_nonEntityAmpersandRegex =
  36. new Lazy<Regex>(() => new Regex("&(?!lt;)(?!gt;)(?!amp;)(?!apos;)(?!quot;)(?!#x[0-9a-fA-f]+;)(?!#[0-9]+;)", RegexOptions.Compiled | RegexOptions.CultureInvariant));
  37. private static Regex NonEntityAmpersandRegex { get { return s_nonEntityAmpersandRegex.Value; } }
  38. // Remove any code points not in: U+0009, U+000A, U+000D, U+0020–U+D7FF, U+E000–U+FFFD (see http://en.wikipedia.org/wiki/Xml)
  39. private static Lazy<Regex> s_invalidXmlCharacterRegex =
  40. new Lazy<Regex>(() => new Regex("[^\\u0009\\u000A\\u000D\\u0020-\\uD7FF\\uE000-\\uFFFD]", RegexOptions.Compiled | RegexOptions.CultureInvariant));
  41. private static Regex InvalidXmlCharacterRegex { get { return s_invalidXmlCharacterRegex.Value; } }
  42. // Replace & with &amp; only if the & is not part of &lt; &gt; &amp; &apos; &quot; &#x<hex digits>; &#<decimal digits>;
  43. private static MatchEvaluator TagElementContentsReplacer = (Match match) =>
  44. {
  45. string tagText = match.Groups["TagText"].Value;
  46. string replacementTagText = NonEntityAmpersandRegex.Replace(tagText, "&amp;");
  47. replacementTagText = InvalidXmlCharacterRegex.Replace(replacementTagText, "");
  48. return "<my_tags>" + replacementTagText + "</my_tags>";
  49. };
  50. /// <summary>
  51. /// Sanitizes anime list XML which is not always well-formed. If a user uses &amp; characters in their tags,
  52. /// they will not be escaped in the XML.
  53. /// </summary>
  54. /// <param name="xmlTextReader"></param>
  55. /// <returns></returns>
  56. private static TextReader SanitizeAnimeListXml(TextReader xmlTextReader)
  57. {
  58. string rawXml = xmlTextReader.ReadToEnd();
  59. string sanitizedXml = TagElementContentsRegex.Replace(rawXml, TagElementContentsReplacer);
  60. return new StringReader(sanitizedXml);
  61. }
  62. /// <summary>
  63. /// Parses XML obtained from malappinfo.php.
  64. /// </summary>
  65. /// <param name="doc"></param>
  66. /// <returns></returns>
  67. public static MalUserLookupResults Parse(XDocument doc)
  68. {
  69. Logging.Log.Trace("Parsing XML.");
  70. XElement error = doc.Root.Element("error");
  71. if (error != null && (string)error == "Invalid username")
  72. {
  73. throw new MalUserNotFoundException("No MAL list exists for this user.");
  74. }
  75. else if (error != null)
  76. {
  77. throw new MalApiException((string)error);
  78. }
  79. XElement myinfo = GetExpectedElement(doc.Root, "myinfo");
  80. int userId = GetElementValueInt(myinfo, "user_id");
  81. string canonicalUserName = GetElementValueString(myinfo, "user_name");
  82. List<MyAnimeListEntry> entries = new List<MyAnimeListEntry>();
  83. IEnumerable<XElement> animes = doc.Root.Elements("anime");
  84. foreach (XElement anime in animes)
  85. {
  86. int animeId = GetElementValueInt(anime, "series_animedb_id");
  87. string title = GetElementValueString(anime, "series_title");
  88. string synonymList = GetElementValueString(anime, "series_synonyms");
  89. string[] rawSynonyms = synonymList.Split(SynonymSeparator, StringSplitOptions.RemoveEmptyEntries);
  90. // filter out synonyms that are the same as the main title
  91. HashSet<string> synonyms = new HashSet<string>(rawSynonyms.Where(synonym => !synonym.Equals(title, StringComparison.Ordinal)));
  92. int seriesTypeInt = GetElementValueInt(anime, "series_type");
  93. MalAnimeType seriesType = (MalAnimeType)seriesTypeInt;
  94. int numEpisodes = GetElementValueInt(anime, "series_episodes");
  95. int seriesStatusInt = GetElementValueInt(anime, "series_status");
  96. MalSeriesStatus seriesStatus = (MalSeriesStatus)seriesStatusInt;
  97. string seriesStartString = GetElementValueString(anime, "series_start");
  98. UncertainDate seriesStart = UncertainDate.FromMalDateString(seriesStartString);
  99. string seriesEndString = GetElementValueString(anime, "series_end");
  100. UncertainDate seriesEnd = UncertainDate.FromMalDateString(seriesEndString);
  101. string seriesImage = GetElementValueString(anime, "series_image");
  102. MalAnimeInfoFromUserLookup animeInfo = new MalAnimeInfoFromUserLookup(animeId: animeId, title: title,
  103. type: seriesType, synonyms: synonyms, status: seriesStatus, numEpisodes: numEpisodes, startDate: seriesStart,
  104. endDate: seriesEnd, imageUrl: seriesImage);
  105. int numEpisodesWatched = GetElementValueInt(anime, "my_watched_episodes");
  106. string myStartDateString = GetElementValueString(anime, "my_start_date");
  107. UncertainDate myStartDate = UncertainDate.FromMalDateString(myStartDateString);
  108. string myFinishDateString = GetElementValueString(anime, "my_finish_date");
  109. UncertainDate myFinishDate = UncertainDate.FromMalDateString(myFinishDateString);
  110. decimal rawScore = GetElementValueDecimal(anime, "my_score");
  111. decimal? myScore = rawScore == 0 ? (decimal?)null : rawScore;
  112. int completionStatusInt = GetElementValueInt(anime, "my_status");
  113. CompletionStatus completionStatus = (CompletionStatus)completionStatusInt;
  114. long lastUpdatedUnixTimestamp = GetElementValueLong(anime, "my_last_updated");
  115. DateTime lastUpdated = new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc) + TimeSpan.FromSeconds(lastUpdatedUnixTimestamp);
  116. string rawTagsString = GetElementValueString(anime, "my_tags");
  117. string[] untrimmedTags = rawTagsString.Split(TagSeparator, StringSplitOptions.RemoveEmptyEntries);
  118. List<string> tags = new List<string>(untrimmedTags.Select(tag => tag.Trim()));
  119. MyAnimeListEntry entry = new MyAnimeListEntry(score: myScore, status: completionStatus, numEpisodesWatched: numEpisodesWatched,
  120. myStartDate: myStartDate, myFinishDate: myFinishDate, myLastUpdate: lastUpdated, animeInfo: animeInfo, tags: tags);
  121. entries.Add(entry);
  122. }
  123. MalUserLookupResults results = new MalUserLookupResults(userId: userId, canonicalUserName: canonicalUserName, animeList: entries);
  124. Logging.Log.Trace("Parsed XML.");
  125. return results;
  126. }
  127. private static readonly string[] SynonymSeparator = new string[] { "; " };
  128. private static readonly char[] TagSeparator = new char[] { ',' };
  129. private static XElement GetExpectedElement(XContainer container, string elementName)
  130. {
  131. XElement element = container.Element(elementName);
  132. if (element == null)
  133. {
  134. throw new MalApiException(string.Format("Did not find element {0}.", elementName));
  135. }
  136. return element;
  137. }
  138. private static string GetElementValueString(XContainer container, string elementName)
  139. {
  140. XElement element = GetExpectedElement(container, elementName);
  141. try
  142. {
  143. return (string)element;
  144. }
  145. catch (FormatException ex)
  146. {
  147. throw new MalApiException(string.Format("Unexpected value \"{0}\" for element {1}.", element.Value, elementName), ex);
  148. }
  149. }
  150. private static int GetElementValueInt(XContainer container, string elementName)
  151. {
  152. XElement element = GetExpectedElement(container, elementName);
  153. try
  154. {
  155. return (int)element;
  156. }
  157. catch (FormatException ex)
  158. {
  159. throw new MalApiException(string.Format("Unexpected value \"{0}\" for element {1}.", element.Value, elementName), ex);
  160. }
  161. }
  162. private static long GetElementValueLong(XContainer container, string elementName)
  163. {
  164. XElement element = GetExpectedElement(container, elementName);
  165. try
  166. {
  167. return (long)element;
  168. }
  169. catch (FormatException ex)
  170. {
  171. throw new MalApiException(string.Format("Unexpected value \"{0}\" for element {1}.", element.Value, elementName), ex);
  172. }
  173. }
  174. private static decimal GetElementValueDecimal(XContainer container, string elementName)
  175. {
  176. XElement element = GetExpectedElement(container, elementName);
  177. try
  178. {
  179. return (decimal)element;
  180. }
  181. catch (FormatException ex)
  182. {
  183. throw new MalApiException(string.Format("Unexpected value \"{0}\" for element {1}.", element.Value, elementName), ex);
  184. }
  185. }
  186. }
  187. }
  188. /*
  189. Copyright 2012 Greg Najda
  190. Licensed under the Apache License, Version 2.0 (the "License");
  191. you may not use this file except in compliance with the License.
  192. You may obtain a copy of the License at
  193. http://www.apache.org/licenses/LICENSE-2.0
  194. Unless required by applicable law or agreed to in writing, software
  195. distributed under the License is distributed on an "AS IS" BASIS,
  196. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  197. See the License for the specific language governing permissions and
  198. limitations under the License.
  199. */