PageRenderTime 44ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/Source/Pronto/HtmlHelper.cs

http://prontocms.googlecode.com/
C# | 292 lines | 286 code | 4 blank | 2 comment | 11 complexity | ea72bd64852c61fd7c4f28ae3b1e951b MD5 | raw file
Possible License(s): LGPL-2.1
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Xml.Linq;
  6. namespace Pronto
  7. {
  8. public static class HtmlHelper
  9. {
  10. public static IEnumerable<XObject> ParseXObjects(this string content)
  11. {
  12. return XElement.Parse("<body>" + ConvertHtmlEntitiesToNumbers(content) + "</body>").Nodes().Cast<XObject>();
  13. }
  14. static string ConvertHtmlEntitiesToNumbers(string input)
  15. {
  16. var sb = new StringBuilder(input);
  17. for (int i = 0; i < sb.Length; i++)
  18. {
  19. if (sb[i] == '&')
  20. {
  21. i = ReplaceEntity(sb, i);
  22. }
  23. }
  24. return sb.ToString();
  25. }
  26. static int ReplaceEntity(StringBuilder sb, int i)
  27. {
  28. int start = i;
  29. while (++i < sb.Length && sb[i] != ';') ;
  30. if (i == sb.Length) throw new ArgumentException("Could not find closing ';' for '&' at position " + start + ".");
  31. int length = i - start + 1;
  32. char[] chars = new char[length];
  33. sb.CopyTo(start, chars, 0, length);
  34. if (chars[1] != '#') // ignore numeric entities
  35. {
  36. var entity = new string(chars);
  37. string numericCode;
  38. if (entityMap.TryGetValue(entity, out numericCode))
  39. {
  40. sb.Remove(start, length);
  41. sb.Insert(start, numericCode);
  42. }
  43. }
  44. return i;
  45. }
  46. // XDocument won't like HTML entities.
  47. // This map converts to numeric character codes.
  48. static readonly Dictionary<string, string> entityMap = new Dictionary<string, string>
  49. {
  50. { "&nbsp;", "&#160;"},
  51. { "&iexcl;", "&#161;"},
  52. { "&cent;", "&#162;"},
  53. { "&pound;", "&#163;"},
  54. { "&curren;", "&#164;"},
  55. { "&yen;", "&#165;"},
  56. { "&brvbar;", "&#166;"},
  57. { "&sect;", "&#167;"},
  58. { "&uml;", "&#168;"},
  59. { "&copy;", "&#169;"},
  60. { "&ordf;", "&#170;"},
  61. { "&laquo;", "&#171;"},
  62. { "&not;", "&#172;"},
  63. { "&shy;", "&#173;"},
  64. { "&reg;", "&#174;"},
  65. { "&macr;", "&#175;"},
  66. { "&deg;", "&#176;"},
  67. { "&plusmn;", "&#177;"},
  68. { "&sup2;", "&#178;"},
  69. { "&sup3;", "&#179;"},
  70. { "&acute;", "&#180;"},
  71. { "&micro;", "&#181;"},
  72. { "&para;", "&#182;"},
  73. { "&middot;", "&#183;"},
  74. { "&cedil;", "&#184;"},
  75. { "&sup1;", "&#185;"},
  76. { "&ordm;", "&#186;"},
  77. { "&raquo;", "&#187;"},
  78. { "&frac14;", "&#188;"},
  79. { "&frac12;", "&#189;"},
  80. { "&frac34;", "&#190;"},
  81. { "&iquest;", "&#191;"},
  82. { "&times;", "&#215;"},
  83. { "&divide;", "&#247;"},
  84. { "&Agrave;", "&#192;"},
  85. { "&Aacute;", "&#193;"},
  86. { "&Acirc;", "&#194;"},
  87. { "&Atilde;", "&#195;"},
  88. { "&Auml;", "&#196;"},
  89. { "&Aring;", "&#197;"},
  90. { "&AElig;", "&#198;"},
  91. { "&Ccedil;", "&#199;"},
  92. { "&Egrave;", "&#200;"},
  93. { "&Eacute;", "&#201;"},
  94. { "&Ecirc;", "&#202;"},
  95. { "&Euml;", "&#203;"},
  96. { "&Igrave;", "&#204;"},
  97. { "&Iacute;", "&#205;"},
  98. { "&Icirc;", "&#206;"},
  99. { "&Iuml;", "&#207;"},
  100. { "&ETH;", "&#208;"},
  101. { "&Ntilde;", "&#209;"},
  102. { "&Ograve;", "&#210;"},
  103. { "&Oacute;", "&#211;"},
  104. { "&Ocirc;", "&#212;"},
  105. { "&Otilde;", "&#213;"},
  106. { "&Ouml;", "&#214;"},
  107. { "&Oslash;", "&#216;"},
  108. { "&Ugrave;", "&#217;"},
  109. { "&Uacute;", "&#218;"},
  110. { "&Ucirc;", "&#219;"},
  111. { "&Uuml;", "&#220;"},
  112. { "&Yacute;", "&#221;"},
  113. { "&THORN;", "&#222;"},
  114. { "&szlig;", "&#223;"},
  115. { "&agrave;", "&#224;"},
  116. { "&aacute;", "&#225;"},
  117. { "&acirc;", "&#226;"},
  118. { "&atilde;", "&#227;"},
  119. { "&auml;", "&#228;"},
  120. { "&aring;", "&#229;"},
  121. { "&aelig;", "&#230;"},
  122. { "&ccedil;", "&#231;"},
  123. { "&egrave;", "&#232;"},
  124. { "&eacute;", "&#233;"},
  125. { "&ecirc;", "&#234;"},
  126. { "&euml;", "&#235;"},
  127. { "&igrave;", "&#236;"},
  128. { "&iacute;", "&#237;"},
  129. { "&icirc;", "&#238;"},
  130. { "&iuml;", "&#239;"},
  131. { "&eth;", "&#240;"},
  132. { "&ntilde;", "&#241;"},
  133. { "&ograve;", "&#242;"},
  134. { "&oacute;", "&#243;"},
  135. { "&ocirc;", "&#244;"},
  136. { "&otilde;", "&#245;"},
  137. { "&ouml;", "&#246;"},
  138. { "&oslash;", "&#248;"},
  139. { "&ugrave;", "&#249;"},
  140. { "&uacute;", "&#250;"},
  141. { "&ucirc;", "&#251;"},
  142. { "&uuml;", "&#252;"},
  143. { "&yacute;", "&#253;"},
  144. { "&thorn;", "&#254;"},
  145. { "&yuml;", "&#255;"},
  146. { "&OElig;", "&#338;" },
  147. { "&oelig;", "&#339;" },
  148. { "&Scaron;", "&#352;" },
  149. { "&scaron;", "&#353;" },
  150. { "&Yuml;", "&#376;" },
  151. { "&fnof;", "&#402;" },
  152. { "&circ;", "&#710;" },
  153. { "&tilde;", "&#732;" },
  154. { "&ensp;", "&#8194;" },
  155. { "&emsp;", "&#8195;" },
  156. { "&thinsp;", "&#8201;" },
  157. { "&zwnj;", "&#8204;" },
  158. { "&zwj;", "&#8205;" },
  159. { "&lrm;", "&#8206;" },
  160. { "&rlm;", "&#8207;" },
  161. { "&ndash;", "&#8211;" },
  162. { "&mdash;", "&#8212;" },
  163. { "&lsquo;", "&#8216;" },
  164. { "&rsquo;", "&#8217;" },
  165. { "&sbquo;", "&#8218;" },
  166. { "&ldquo;", "&#8220;" },
  167. { "&rdquo;", "&#8221;" },
  168. { "&bdquo;", "&#8222;" },
  169. { "&dagger;", "&#8224;" },
  170. { "&Dagger;", "&#8225;" },
  171. { "&bull;", "&#8226;" },
  172. { "&hellip;", "&#8230;" },
  173. { "&permil;", "&#8240;" },
  174. { "&prime;", "&#8242;" },
  175. { "&Prime;", "&#8243;" },
  176. { "&lsaquo;", "&#8249;" },
  177. { "&rsaquo;", "&#8250;" },
  178. { "&oline;", "&#8254;" },
  179. { "&euro;", "&#8364;" },
  180. { "&trade;", "&#8482;" },
  181. { "&larr;", "&#8592;" },
  182. { "&uarr;", "&#8593;" },
  183. { "&rarr;", "&#8594;" },
  184. { "&darr;", "&#8595;" },
  185. { "&harr;", "&#8596;" },
  186. { "&crarr;", "&#8629;" },
  187. { "&lceil;", "&#8968;" },
  188. { "&rceil;", "&#8969;" },
  189. { "&lfloor;", "&#8970;" },
  190. { "&rfloor;", "&#8971;" },
  191. { "&loz;", "&#9674;" },
  192. { "&spades;", "&#9824;" },
  193. { "&clubs;", "&#9827;" },
  194. { "&hearts;", "&#9829;" },
  195. { "&diams;", "&#9830;" },
  196. { "&Alpha;", "&#913;" },
  197. { "&Beta;", "&#914;" },
  198. { "&Gamma;", "&#915;" },
  199. { "&Delta;", "&#916;" },
  200. { "&Epsilon;", "&#917;" },
  201. { "&Zeta;", "&#918;" },
  202. { "&Eta;", "&#919;" },
  203. { "&Theta;", "&#920;" },
  204. { "&Iota;", "&#921;" },
  205. { "&Kappa;", "&#922;" },
  206. { "&Lambda;", "&#923;" },
  207. { "&Mu;", "&#924;" },
  208. { "&Nu;", "&#925;" },
  209. { "&Xi;", "&#926;" },
  210. { "&Omicron;", "&#927;" },
  211. { "&Pi;", "&#928;" },
  212. { "&Rho;", "&#929;" },
  213. { "&Sigma;", "&#931;" },
  214. { "&Tau;", "&#932;" },
  215. { "&Upsilon;", "&#933;" },
  216. { "&Phi;", "&#934;" },
  217. { "&Chi;", "&#935;" },
  218. { "&Psi;", "&#936;" },
  219. { "&Omega;", "&#937;" },
  220. { "&alpha;", "&#945;" },
  221. { "&beta;", "&#946;" },
  222. { "&gamma;", "&#947;" },
  223. { "&delta;", "&#948;" },
  224. { "&epsilon;", "&#949;" },
  225. { "&zeta;", "&#950;" },
  226. { "&eta;", "&#951;" },
  227. { "&theta;", "&#952;" },
  228. { "&iota;", "&#953;" },
  229. { "&kappa;", "&#954;" },
  230. { "&lambda;", "&#955;" },
  231. { "&mu;", "&#956;" },
  232. { "&nu;", "&#957;" },
  233. { "&xi;", "&#958;" },
  234. { "&omicron;", "&#959;" },
  235. { "&pi;", "&#960;" },
  236. { "&rho;", "&#961;" },
  237. { "&sigmaf;", "&#962;" },
  238. { "&sigma;", "&#963;" },
  239. { "&tau;", "&#964;" },
  240. { "&upsilon;", "&#965;" },
  241. { "&phi;", "&#966;" },
  242. { "&chi;", "&#967;" },
  243. { "&psi;", "&#968;" },
  244. { "&omega;", "&#969;" },
  245. { "&thetasym;", "&#977;" },
  246. { "&upsih;", "&#978;" },
  247. { "&piv;", "&#982;" },
  248. { "&forall;", "&#8704;" },
  249. { "&part;", "&#8706;" },
  250. { "&exists;", "&#8707;" },
  251. { "&empty;", "&#8709;" },
  252. { "&nabla;", "&#8711;" },
  253. { "&isin;", "&#8712;" },
  254. { "&notin;", "&#8713;" },
  255. { "&ni;", "&#8715;" },
  256. { "&prod;", "&#8719;" },
  257. { "&sum;", "&#8721;" },
  258. { "&minus;", "&#8722;" },
  259. { "&lowast;", "&#8727;" },
  260. { "&radic;", "&#8730;" },
  261. { "&prop;", "&#8733;" },
  262. { "&infin;", "&#8734;" },
  263. { "&ang;", "&#8736;" },
  264. { "&and;", "&#8743;" },
  265. { "&or;", "&#8744;" },
  266. { "&cap;", "&#8745;" },
  267. { "&cup;", "&#8746;" },
  268. { "&int;", "&#8747;" },
  269. { "&there4;", "&#8756;" },
  270. { "&sim;", "&#8764;" },
  271. { "&cong;", "&#8773;" },
  272. { "&asymp;", "&#8776;" },
  273. { "&ne;", "&#8800;" },
  274. { "&equiv;", "&#8801;" },
  275. { "&le;", "&#8804;" },
  276. { "&ge;", "&#8805;" },
  277. { "&sub;", "&#8834;" },
  278. { "&sup;", "&#8835;" },
  279. { "&nsub;", "&#8836;" },
  280. { "&sube;", "&#8838;" },
  281. { "&supe;", "&#8839;" },
  282. { "&oplus;", "&#8853;" },
  283. { "&otimes;", "&#8855;" },
  284. { "&perp;", "&#8869;" },
  285. { "&sdot;", "&#8901;" }
  286. };
  287. }
  288. }