PageRenderTime 39ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/MWTools-DEV/WikiAccess/Namespace.cs

#
C# | 318 lines | 153 code | 20 blank | 145 comment | 21 complexity | f51253b3fb9f414a72f6d4a51f85fd9b MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-3.0
  1. /**********************************************************************************
  2. * Namespace utils of WikiAccess Library *
  3. * Copyright (C) 2007 Vasiliev V. V. *
  4. * *
  5. * This program is free software: you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation, either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program. If not, see <http://www.gnu.org/licenses/> *
  17. **********************************************************************************/
  18. using System;
  19. using System.Collections.Generic;
  20. using System.IO;
  21. using System.Net;
  22. using System.Text;
  23. using System.Text.RegularExpressions;
  24. namespace WikiTools.Access
  25. {
  26. /// <summary>
  27. /// Namespace utils
  28. /// </summary>
  29. public class Namespaces
  30. {
  31. static Regex NamespaceFromMeta = new Regex("<ns id=\"(-?\\d{1,3})\">(.+?)</ns>", RegexOptions.Compiled);
  32. /// <summary>
  33. /// Media namesapce
  34. /// </summary>
  35. public static readonly int Media = -2;
  36. /// <summary>
  37. /// Special namespace, where specail pages are stored
  38. /// </summary>
  39. public static readonly int Special = -1;
  40. /// <summary>
  41. /// Main (article) namespace
  42. /// </summary>
  43. public static readonly int Main = 0;
  44. /// <summary>
  45. /// Talk for mainspace pages
  46. /// </summary>
  47. public static readonly int Talk = 1;
  48. /// <summary>
  49. /// Users' personal pages
  50. /// </summary>
  51. public static readonly int User = 2;
  52. /// <summary>
  53. /// Users' talk pages
  54. /// </summary>
  55. public static readonly int UserTalk = 3;
  56. /// <summary>
  57. /// Project pages
  58. /// </summary>
  59. public static readonly int Project = 4;
  60. /// <summary>
  61. /// Talk for project pages
  62. /// </summary>
  63. public static readonly int ProjectTalk = 5;
  64. /// <summary>
  65. /// Media files
  66. /// </summary>
  67. public static readonly int Image = 6;
  68. /// <summary>
  69. /// Media files' talk pages
  70. /// </summary>
  71. public static readonly int ImageTalk = 7;
  72. /// <summary>
  73. /// MediaWiki messages
  74. /// </summary>
  75. public static readonly int MediaWiki = 8;
  76. /// <summary>
  77. /// MediaWiki messages talk pages
  78. /// </summary>
  79. public static readonly int MediaWikiTalk = 9;
  80. /// <summary>
  81. /// Templates
  82. /// </summary>
  83. public static readonly int Template = 10;
  84. /// <summary>
  85. /// Templates' talk pages
  86. /// </summary>
  87. public static readonly int TemplateTalk = 11;
  88. /// <summary>
  89. /// Help
  90. /// </summary>
  91. public static readonly int Help = 12;
  92. /// <summary>
  93. /// Help's talk pages
  94. /// </summary>
  95. public static readonly int HelpTalk = 13;
  96. /// <summary>
  97. /// Categories
  98. /// </summary>
  99. public static readonly int Category = 14;
  100. /// <summary>
  101. /// Categories' talk pages
  102. /// </summary>
  103. public static readonly int CategoryTalk = 15;
  104. #region Load and save
  105. /// <summary>
  106. /// Loads namespaces from live wiki
  107. /// </summary>
  108. /// <param name="wiki">Source of namespaces</param>
  109. /// <returns>Namespace ID:Name list</returns>
  110. public static SortedList<int, string> GetNamespaces(Wiki wiki)
  111. {
  112. string uri = wiki.WikiURI + "/api.php?action=query&meta=siteinfo&siprop=namespaces&format=xml";
  113. WebRequest rq = WebRequest.Create(uri);
  114. string str = new StreamReader(rq.GetResponse().GetResponseStream(), Encoding.UTF8).ReadToEnd();
  115. SortedList<int, string> result = new SortedList<int, string>();
  116. result.Add(0, "");
  117. MatchCollection matches = NamespaceFromMeta.Matches(str);
  118. foreach (Match match in matches)
  119. {
  120. result.Add(Int32.Parse(match.Groups[1].Value), match.Groups[2].Value);
  121. }
  122. return result;
  123. }
  124. /// <summary>
  125. /// Loads namespaces from file
  126. /// </summary>
  127. /// <param name="fname">File name</param>
  128. /// <returns>Namespace ID:Name list</returns>
  129. public static SortedList<int, string> LoadFromFile(string fname)
  130. {
  131. string[] lines = File.ReadAllLines(fname, Encoding.UTF8);
  132. SortedList<int, string> result = new SortedList<int, string>();
  133. foreach (string cline in lines)
  134. {
  135. string[] parts = cline.Split(':');
  136. result.Add(Int32.Parse(parts[0]), parts[1]);
  137. }
  138. return result;
  139. }
  140. /// <summary>
  141. /// Saves namespaces to file
  142. /// </summary>
  143. /// <param name="fname">File name</param>
  144. /// <param name="ns">Namespaces list</param>
  145. public static void SaveToFile(string fname, SortedList<int, string> ns)
  146. {
  147. List<string> result = new List<string>();
  148. foreach (KeyValuePair<int, string> ckp in ns)
  149. result.Add(ckp.Key + ":" + ckp.Value);
  150. File.WriteAllLines(fname, result.ToArray(), Encoding.UTF8);
  151. }
  152. /// <summary>
  153. /// Makes name for cache file
  154. /// </summary>
  155. /// <param name="uri">URI of wiki</param>
  156. /// <returns>File name</returns>
  157. public static string MkName(string uri)
  158. {
  159. return (new Uri(uri).Host) + ".namespaces";
  160. }
  161. #endregion
  162. /// <summary>
  163. /// Gets canonical namespaces list
  164. /// </summary>
  165. /// <returns>Canonical namespaces list</returns>
  166. public static SortedList<int, string> GetStandardNamespaces()
  167. {
  168. SortedList<int, string> result = new SortedList<int, string>();
  169. result.Add(-2, "Media");
  170. result.Add(-1, "Special");
  171. result.Add(0, "");
  172. result.Add(1, "Talk");
  173. result.Add(2, "User");
  174. result.Add(3, "User talk");
  175. result.Add(4, "Project");
  176. result.Add(5, "Project talk");
  177. result.Add(6, "Image");
  178. result.Add(7, "Image talk");
  179. result.Add(8, "MediaWiki");
  180. result.Add(9, "MediaWiki talk");
  181. result.Add(10, "Template");
  182. result.Add(11, "Template talk");
  183. result.Add(12, "Help");
  184. result.Add(13, "Help talk");
  185. result.Add(14, "Category");
  186. result.Add(15, "Category talk");
  187. return result;
  188. }
  189. SortedList<int, string> namespaces;
  190. /// <summary>
  191. /// Initializes new instance of object from file
  192. /// </summary>
  193. /// <param name="fpath">File name</param>
  194. public Namespaces(string fpath)
  195. {
  196. namespaces = Namespaces.LoadFromFile(fpath);
  197. }
  198. /// <summary>
  199. /// Initializes new instance of object from live wiki
  200. /// </summary>
  201. /// <param name="wiki">Wiki</param>
  202. public Namespaces(Wiki wiki)
  203. {
  204. namespaces = Namespaces.GetNamespaces(wiki);
  205. }
  206. /// <summary>
  207. /// Gets IF of specified namespace
  208. /// </summary>
  209. /// <param name="nsName"></param>
  210. /// <returns></returns>
  211. public int GetNamespaceID(string nsName)
  212. {
  213. if (namespaces.ContainsValue(nsName))
  214. return namespaces.Keys[namespaces.Values.IndexOf(nsName)];
  215. else if (GetStandardNamespaces().ContainsValue(nsName))
  216. return GetStandardNamespaces().Keys[GetStandardNamespaces().Values.IndexOf(nsName)];
  217. else return 0;
  218. }
  219. /// <summary>
  220. /// Extracts namespace from title
  221. /// </summary>
  222. /// <param name="title">Page title</param>
  223. /// <returns>Namespace ID</returns>
  224. public int GetNamespaceByTitle(string title)
  225. {
  226. return GetNamespaceID(title.Split(':')[0]);
  227. }
  228. /// <summary>
  229. /// Saves namespaces to file
  230. /// </summary>
  231. /// <param name="fname">File name</param>
  232. public void SaveToFile(string fname)
  233. {
  234. Namespaces.SaveToFile(fname, namespaces);
  235. }
  236. /// <summary>
  237. /// Gets namespace by ID
  238. /// </summary>
  239. /// <param name="ID">Namespace ID</param>
  240. /// <returns>Namespace name</returns>
  241. public string GetNamespaceByID(int ID)
  242. {
  243. return namespaces[ID];
  244. }
  245. /// <summary>
  246. /// Get talk title of specified page
  247. /// </summary>
  248. /// <param name="title">Page title</param>
  249. /// <returns>Talk title</returns>
  250. public string TitleToTalk(string title)
  251. {
  252. int nid = GetNamespaceByTitle(title);
  253. if (IsTalkNamespace(title) | nid < 0) return title;
  254. if (nid == 0) return namespaces[1] + ":" + title;
  255. else if (title.StartsWith(namespaces[nid]))
  256. return namespaces[nid + 1] + title.Substring(title.IndexOf(":"));
  257. else
  258. return GetStandardNamespaces()[nid + 1] + title.Substring(title.IndexOf(":"));
  259. }
  260. /// <summary>
  261. /// Get page title of specified talk page
  262. /// </summary>
  263. /// <param name="title">Talk title</param>
  264. /// <returns>Page title</returns>
  265. public string TitleFromTalk(string title)
  266. {
  267. if (!IsTalkNamespace(title)) return title;
  268. int nid = GetNamespaceByTitle(title);
  269. if (nid == 1) return title.Substring(title.IndexOf(":") + 1);
  270. else if (title.StartsWith(namespaces[nid]))
  271. return namespaces[nid - 1] + title.Substring(title.IndexOf(":"));
  272. else
  273. return GetStandardNamespaces()[nid - 1] + title.Substring(title.IndexOf(":"));
  274. }
  275. /// <summary>
  276. /// Checks if page is in talk namespace
  277. /// </summary>
  278. /// <param name="title">Page title</param>
  279. /// <returns>Is talk namespace</returns>
  280. public bool IsTalkNamespace(string title)
  281. {
  282. return GetNamespaceByTitle(title) > 0 && GetNamespaceByTitle(title) % 2 == 1;
  283. }
  284. /// <summary>
  285. /// Removes namespace prefix from
  286. /// </summary>
  287. /// <param name="pgname">Page name</param>
  288. /// <returns>Page name without namespace</returns>
  289. public string RemoveNamespace(string pgname)
  290. {
  291. pgname = pgname.Trim();
  292. int ns = GetNamespaceByTitle(pgname);
  293. if (ns == 0) return pgname;
  294. else if (pgname.StartsWith(namespaces[ns])) return pgname.Substring(namespaces[ns].Length + 1);
  295. else return pgname.Substring(GetStandardNamespaces()[ns].Length + 1);
  296. }
  297. }
  298. }