PageRenderTime 45ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/src/ServiceStack/Support/MarkdownDeep/Utils.cs

http://github.com/ServiceStack/ServiceStack
C# | 487 lines | 341 code | 70 blank | 76 comment | 101 complexity | a5a04203606186c360fa3992da4fa1f5 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. //
  2. // MarkdownDeep - http://www.toptensoftware.com/markdowndeep
  3. // Copyright (C) 2010-2011 Topten Software
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this product except in
  6. // compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed under the License is
  11. // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and limitations under the License.
  13. //
  14. using System;
  15. using System.Collections.Generic;
  16. using System.Linq;
  17. using System.Text;
  18. using System.Text.RegularExpressions;
  19. using ServiceStack.Text;
  20. namespace MarkdownDeep
  21. {
  22. /*
  23. * Various utility and extension methods
  24. */
  25. static class Utils
  26. {
  27. // Extension method. Get the last item in a list (or null if empty)
  28. public static T Last<T>(this List<T> list)
  29. {
  30. return list.Count > 0 ? list[list.Count - 1] : default(T);
  31. }
  32. // Extension method. Get the first item in a list (or null if empty)
  33. public static T First<T>(this List<T> list)
  34. {
  35. return list.Count > 0 ? list[0] : default(T);
  36. }
  37. // Extension method. Use a list like a stack
  38. public static void Push<T>(this List<T> list, T value)
  39. {
  40. list.Add(value);
  41. }
  42. // Extension method. Remove last item from a list
  43. public static T Pop<T>(this List<T> list)
  44. {
  45. if (list.Count == 0)
  46. return default(T);
  47. T val = list[list.Count - 1];
  48. list.RemoveAt(list.Count - 1);
  49. return val;
  50. }
  51. // Scan a string for a valid identifier. Identifier must start with alpha or underscore
  52. // and can be followed by alpha, digit or underscore
  53. // Updates `pos` to character after the identifier if matched
  54. public static bool ParseIdentifier(string str, ref int pos, ref string identifer)
  55. {
  56. if (pos >= str.Length)
  57. return false;
  58. // Must start with a letter or underscore
  59. if (!char.IsLetter(str[pos]) && str[pos] != '_')
  60. {
  61. return false;
  62. }
  63. // Find the end
  64. int startpos = pos;
  65. pos++;
  66. while (pos < str.Length && (char.IsDigit(str[pos]) || char.IsLetter(str[pos]) || str[pos] == '_'))
  67. pos++;
  68. // Return it
  69. identifer = str.Substring(startpos, pos - startpos);
  70. return true;
  71. }
  72. // Skip over anything that looks like a valid html entity (eg: &amp, &#123, &#nnn) etc...
  73. // Updates `pos` to character after the entity if matched
  74. public static bool SkipHtmlEntity(string str, ref int pos, ref string entity)
  75. {
  76. if (str[pos] != '&')
  77. return false;
  78. int savepos = pos;
  79. int len = str.Length;
  80. int i = pos+1;
  81. // Number entity?
  82. bool bNumber=false;
  83. bool bHex = false;
  84. if (i < len && str[i] == '#')
  85. {
  86. bNumber = true;
  87. i++;
  88. // Hex identity?
  89. if (i < len && (str[i] == 'x' || str[i] == 'X'))
  90. {
  91. bHex = true;
  92. i++;
  93. }
  94. }
  95. // Parse the content
  96. int contentpos = i;
  97. while (i < len)
  98. {
  99. char ch=str[i];
  100. if (bHex)
  101. {
  102. if (!(char.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))
  103. break;
  104. }
  105. else if (bNumber)
  106. {
  107. if (!char.IsDigit(ch))
  108. break;
  109. }
  110. else if (!char.IsLetterOrDigit(ch))
  111. break;
  112. i++;
  113. }
  114. // Quit if ran out of string
  115. if (i == len)
  116. return false;
  117. // Quit if nothing in the content
  118. if (i == contentpos)
  119. return false;
  120. // Quit if didn't find a semicolon
  121. if (str[i] != ';')
  122. return false;
  123. // Looks good...
  124. pos = i + 1;
  125. entity = str.Substring(savepos, pos - savepos);
  126. return true;
  127. }
  128. // Randomize a string using html entities;
  129. public static void HtmlRandomize(StringBuilder dest, string str)
  130. {
  131. // Deterministic random seed
  132. int seed = 0;
  133. foreach (char ch in str)
  134. {
  135. seed = unchecked(seed + ch);
  136. }
  137. Random r = new Random(seed);
  138. // Randomize
  139. foreach (char ch in str)
  140. {
  141. int x = r.Next() % 100;
  142. if (x > 90 && ch != '@')
  143. {
  144. dest.Append(ch);
  145. }
  146. else if (x > 45)
  147. {
  148. dest.Append("&#");
  149. dest.Append(((int)ch).ToString());
  150. dest.Append(";");
  151. }
  152. else
  153. {
  154. dest.Append("&#x");
  155. dest.Append(((int)ch).ToString("x"));
  156. dest.Append(";");
  157. }
  158. }
  159. }
  160. // Like HtmlEncode, but don't escape &'s that look like html entities
  161. public static void SmartHtmlEncodeAmpsAndAngles(StringBuilder dest, string str)
  162. {
  163. if (str == null)
  164. return;
  165. for (int i=0; i<str.Length; i++)
  166. {
  167. switch (str[i])
  168. {
  169. case '&':
  170. int start = i;
  171. string unused=null;
  172. if (SkipHtmlEntity(str, ref i, ref unused))
  173. {
  174. dest.Append(str, start, i - start);
  175. i--;
  176. }
  177. else
  178. {
  179. dest.Append("&amp;");
  180. }
  181. break;
  182. case '<':
  183. dest.Append("&lt;");
  184. break;
  185. case '>':
  186. dest.Append("&gt;");
  187. break;
  188. case '\"':
  189. dest.Append("&quot;");
  190. break;
  191. default:
  192. dest.Append(str[i]);
  193. break;
  194. }
  195. }
  196. }
  197. // Like HtmlEncode, but only escape &'s that don't look like html entities
  198. public static void SmartHtmlEncodeAmps(StringBuilder dest, string str, int startOffset, int len)
  199. {
  200. int end = startOffset + len;
  201. for (int i = startOffset; i < end; i++)
  202. {
  203. switch (str[i])
  204. {
  205. case '&':
  206. int start = i;
  207. string unused = null;
  208. if (SkipHtmlEntity(str, ref i, ref unused))
  209. {
  210. dest.Append(str, start, i - start);
  211. i--;
  212. }
  213. else
  214. {
  215. dest.Append("&amp;");
  216. }
  217. break;
  218. default:
  219. dest.Append(str[i]);
  220. break;
  221. }
  222. }
  223. }
  224. // Check if a string is in an array of strings
  225. public static bool IsInList(string str, string[] list)
  226. {
  227. foreach (var t in list)
  228. {
  229. if (string.Compare(t, str) == 0)
  230. return true;
  231. }
  232. return false;
  233. }
  234. // Check if a url is "safe" (we require urls start with valid protocol)
  235. // Definitely don't allow "javascript:" or any of it's encodings.
  236. public static bool IsSafeUrl(string url)
  237. {
  238. if (!url.StartsWith("http://") && !url.StartsWith("https://") && !url.StartsWith("ftp://"))
  239. return false;
  240. return true;
  241. }
  242. // Check if a character is escapable in markdown
  243. public static bool IsEscapableChar(char ch, bool ExtraMode)
  244. {
  245. switch (ch)
  246. {
  247. case '\\':
  248. case '`':
  249. case '*':
  250. case '_':
  251. case '{':
  252. case '}':
  253. case '[':
  254. case ']':
  255. case '(':
  256. case ')':
  257. case '>': // Not in markdown documentation, but is in markdown.pl
  258. case '#':
  259. case '+':
  260. case '-':
  261. case '.':
  262. case '!':
  263. return true;
  264. case ':':
  265. case '|':
  266. case '=': // Added for escaping Setext H1
  267. case '<':
  268. return ExtraMode;
  269. }
  270. return false;
  271. }
  272. // Extension method. Skip an escapable character, or one normal character
  273. public static void SkipEscapableChar(this StringScanner p, bool ExtraMode)
  274. {
  275. if (p.current == '\\' && IsEscapableChar(p.CharAtOffset(1), ExtraMode))
  276. {
  277. p.SkipForward(2);
  278. }
  279. else
  280. {
  281. p.SkipForward(1);
  282. }
  283. }
  284. // Remove the markdown escapes from a string
  285. public static string UnescapeString(string str, bool ExtraMode)
  286. {
  287. if (str == null || str.IndexOf('\\')==-1)
  288. return str;
  289. var sb = StringBuilderCacheAlt.Allocate();
  290. for (int i = 0; i < str.Length; i++)
  291. {
  292. if (str[i] == '\\' && i+1<str.Length && IsEscapableChar(str[i+1], ExtraMode))
  293. {
  294. sb.Append(str[i + 1]);
  295. i++;
  296. }
  297. else
  298. {
  299. sb.Append(str[i]);
  300. }
  301. }
  302. return StringBuilderCacheAlt.ReturnAndFree(sb);
  303. }
  304. // Normalize the line ends in a string to just '\n'
  305. // Handles all encodings - '\r\n' (windows), '\n\r' (mac), '\n' (unix) '\r' (something?)
  306. static char[] lineends = new char[] { '\r', '\n' };
  307. public static string NormalizeLineEnds(string str)
  308. {
  309. if (str.IndexOfAny(lineends) < 0)
  310. return str;
  311. var sb = StringBuilderCacheAlt.Allocate();
  312. StringScanner sp = new StringScanner(str);
  313. while (!sp.eof)
  314. {
  315. if (sp.eol)
  316. {
  317. sb.Append('\n');
  318. sp.SkipEol();
  319. }
  320. else
  321. {
  322. sb.Append(sp.current);
  323. sp.SkipForward(1);
  324. }
  325. }
  326. return StringBuilderCacheAlt.ReturnAndFree(sb);
  327. }
  328. /*
  329. * These two functions IsEmailAddress and IsWebAddress
  330. * are intended as a quick and dirty way to tell if a
  331. * <autolink> url is email, web address or neither.
  332. *
  333. * They are not intended as validating checks.
  334. *
  335. * (use of Regex for more correct test unnecessarily
  336. * slowed down some test documents by up to 300%.)
  337. */
  338. // Check if a string looks like an email address
  339. public static bool IsEmailAddress(string str)
  340. {
  341. int posAt = str.IndexOf('@');
  342. if (posAt < 0)
  343. return false;
  344. int posLastDot = str.LastIndexOf('.');
  345. if (posLastDot < posAt)
  346. return false;
  347. return true;
  348. }
  349. // Check if a string looks like a url
  350. public static bool IsWebAddress(string str)
  351. {
  352. return str.StartsWith("http://") ||
  353. str.StartsWith("https://") ||
  354. str.StartsWith("ftp://") ||
  355. str.StartsWith("file://");
  356. }
  357. // Check if a string is a valid HTML ID identifier
  358. internal static bool IsValidHtmlID(string str)
  359. {
  360. if (String.IsNullOrEmpty(str))
  361. return false;
  362. // Must start with a letter
  363. if (!Char.IsLetter(str[0]))
  364. return false;
  365. // Check the rest
  366. for (int i = 0; i < str.Length; i++)
  367. {
  368. char ch = str[i];
  369. if (Char.IsLetterOrDigit(ch) || ch == '_' || ch == '-' || ch == ':' || ch == '.')
  370. continue;
  371. return false;
  372. }
  373. // OK
  374. return true;
  375. }
  376. // Strip the trailing HTML ID from a header string
  377. // ie: ## header text ## {#<idhere>}
  378. // ^start ^out end ^end
  379. //
  380. // Returns null if no header id
  381. public static string StripHtmlID(string str, int start, ref int end)
  382. {
  383. // Skip trailing whitespace
  384. int pos = end - 1;
  385. while (pos >= start && Char.IsWhiteSpace(str[pos]))
  386. {
  387. pos--;
  388. }
  389. // Skip closing '{'
  390. if (pos < start || str[pos] != '}')
  391. return null;
  392. int endId = pos;
  393. pos--;
  394. // Find the opening '{'
  395. while (pos >= start && str[pos] != '{')
  396. pos--;
  397. // Check for the #
  398. if (pos < start || str[pos + 1] != '#')
  399. return null;
  400. // Extract and check the ID
  401. int startId = pos + 2;
  402. string strID = str.Substring(startId, endId - startId);
  403. if (!IsValidHtmlID(strID))
  404. return null;
  405. // Skip any preceeding whitespace
  406. while (pos > start && Char.IsWhiteSpace(str[pos - 1]))
  407. pos--;
  408. // Done!
  409. end = pos;
  410. return strID;
  411. }
  412. public static bool IsUrlFullyQualified(string url)
  413. {
  414. return url.Contains("://") || url.StartsWith("mailto:");
  415. }
  416. }
  417. }