PageRenderTime 46ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/Watcher/Utility.cs

#
C# | 677 lines | 421 code | 93 blank | 163 comment | 78 complexity | ede8bd409245bfe302aeea4fb959c0fd MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0
  1. // WATCHER
  2. //
  3. // Utility.cs
  4. // Main implementation of Watcher Utility functions.
  5. //
  6. // Copyright (c) 2010 Casaba Security, LLC
  7. // All Rights Reserved.
  8. //
  9. using System;
  10. using System.IO;
  11. using System.Diagnostics;
  12. using System.Collections.Specialized;
  13. using System.Collections.Generic;
  14. using System.Globalization;
  15. using System.Text;
  16. using System.Text.RegularExpressions;
  17. using System.Web;
  18. using Fiddler;
  19. namespace CasabaSecurity.Web.Watcher
  20. {
  21. public static class Utility
  22. {
  23. #region Public Method(s)
  24. /// <summary>
  25. /// Encode the specified ASCII/UTF-8 string to its Base-64 representation.
  26. /// </summary>
  27. /// <param name="data">The string to encode.</param>
  28. /// <returns>The string encoded in Base-64.</returns>
  29. public static string Base64Encode(String data)
  30. {
  31. Debug.Assert(data != null, "Cannot encode a null parameter.");
  32. if (data == null)
  33. {
  34. Trace.TraceWarning("Warning: Base64Encode: Not attempting to encode null parameter.");
  35. return String.Empty;
  36. }
  37. try
  38. {
  39. byte[] encodedBytes = System.Text.Encoding.UTF8.GetBytes(data);
  40. return Convert.ToBase64String(encodedBytes);
  41. }
  42. catch (ArgumentNullException e)
  43. {
  44. // Thrown if the argument to ToBase64String is null
  45. Trace.TraceError("Error: ArgumentNullException: {0}", e.Message);
  46. }
  47. catch (EncoderFallbackException e)
  48. {
  49. // Thrown if the string fails to be converted to UTF8
  50. Trace.TraceError("Error: DecoderFallerbackException: {0}", e.Message);
  51. }
  52. return String.Empty;
  53. }
  54. /// <summary>
  55. /// Decode the specified Base-64 string to its ASCII/UTF-8 equivalent.
  56. /// </summary>
  57. /// <param name="data">The encoded Base-64 string.</param>
  58. /// <returns>The string decoded from Base-64.</returns>
  59. public static string Base64Decode(String data)
  60. {
  61. Debug.Assert(data != null, "Cannot decode a null parameter.");
  62. if (data == null)
  63. {
  64. Trace.TraceWarning("Warning: Base64Decode: Not attempting to decode null parameter.");
  65. return String.Empty;
  66. }
  67. try
  68. {
  69. byte[] decodedBytes = Convert.FromBase64String(data);
  70. return System.Text.Encoding.UTF8.GetString(decodedBytes);
  71. }
  72. catch (ArgumentNullException e)
  73. {
  74. // Thrown if the argument to GetString is null
  75. Trace.TraceError("Error: ArgumentNullException: {0}", e.Message);
  76. }
  77. catch (FormatException e)
  78. {
  79. // Thrown if the string to convert is not in the proper format
  80. Trace.TraceError("Error: FormatException: {0}", e.Message);
  81. }
  82. catch (DecoderFallbackException e)
  83. {
  84. // Thrown if the string fails to be converted to UTF8
  85. Trace.TraceError("Error: DecoderFallerbackException: {0}", e.Message);
  86. }
  87. return String.Empty;
  88. }
  89. public static void ReadWriteStream(Stream readStream, Stream writeStream)
  90. {
  91. int Length = 256;
  92. Byte[] buffer = new Byte[Length];
  93. readStream.Position = 0;
  94. int bytesRead = readStream.Read(buffer, 0, Length);
  95. // write the required bytes
  96. while (bytesRead > 0)
  97. {
  98. writeStream.Write(buffer, 0, bytesRead);
  99. bytesRead = readStream.Read(buffer, 0, Length);
  100. }
  101. readStream.Close();
  102. writeStream.Close();
  103. }
  104. public static String GetResponseContentType(Session session)
  105. {
  106. if (session.oResponse.headers.Exists("content-type"))
  107. return (session.oResponse.headers["content-type"].ToLower());
  108. return (null);
  109. }
  110. public static bool IsEmailAddress(String s)
  111. {
  112. // Doesn't hurt to UrlDecode the string since we're looking for an email address
  113. s = HttpUtility.UrlDecode(s);
  114. return (Regex.IsMatch(s, "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b", RegexOptions.IgnoreCase));
  115. }
  116. public static bool IsCreditCard(String s)
  117. {
  118. // This one will match any major credit card, and is probably the most accurate way to check.
  119. // However it's slower than the simpler regex above.
  120. if (Regex.IsMatch(s, "\\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\\d{3})\\d{11})\\b", RegexOptions.IgnoreCase))
  121. {
  122. // FALSE POSITIVE REDUCTION
  123. // A common pattern is a session id in the form of 0.1234123412341234
  124. // which matches the regex pattern. We want to ignore patterns that
  125. // contain a ".".
  126. if (!s.Contains("."))
  127. {
  128. return true;
  129. }
  130. }
  131. return false;
  132. }
  133. public static bool IsUsSSN(String s)
  134. {
  135. // Matches a US Social Security Number provided it has dashes.
  136. return (Regex.IsMatch(s, "\\b[0-9]{3}-[0-9]{2}-[0-9]{4}\\b", RegexOptions.IgnoreCase));
  137. }
  138. public static bool IsResponseContentType(Session session, String contentType)
  139. {
  140. string tmp = GetResponseContentType(session);
  141. return ((tmp != null && tmp.IndexOf(contentType) == 0) ? true : false);
  142. }
  143. public static bool IsResponseCharset(Session session, String charset)
  144. {
  145. string tmp = GetResponseContentType(session);
  146. return ((tmp != null && tmp.IndexOf(charset) >= 0) ? true : false);
  147. }
  148. /// <summary>
  149. /// TODO: Fix up to support other variations of text/html.
  150. /// FIX: This will match Atom and RSS feeds now, which set text/html but use &lt;?xml&gt; in content
  151. /// </summary>
  152. /// <param name="session"></param>
  153. /// <returns></returns>
  154. public static bool IsResponseHtml(Session session)
  155. {
  156. if (session.responseBodyBytes != null)
  157. {
  158. return (IsResponseContentType(session, "text/html") || IsResponseXhtml(session));
  159. }
  160. else
  161. {
  162. return false;
  163. }
  164. }
  165. public static bool IsResponseXhtml(Session session)
  166. {
  167. if (session.responseBodyBytes != null)
  168. {
  169. return (IsResponseContentType(session, "application/xhtml+xml") || IsResponseContentType(session, "application/xhtml"));
  170. }
  171. else
  172. {
  173. return false;
  174. }
  175. }
  176. /// <summary>
  177. /// TODO: Fix up to support other variations of text/css
  178. /// </summary>
  179. /// <param name="session"></param>
  180. /// <returns></returns>
  181. public static bool IsResponseCss(Session session)
  182. {
  183. return (IsResponseContentType(session, "text/css"));
  184. }
  185. /// <summary>
  186. /// TODO: Fix up to support other variations of javascript
  187. /// </summary>
  188. /// <param name="session"></param>
  189. /// <returns></returns>
  190. public static bool IsResponseJavascript(Session session)
  191. {
  192. return (IsResponseContentType(session, "application/javascript") || IsResponseContentType(session, "application/x-javascript"));
  193. }
  194. /// <summary>
  195. /// TODO: Fix up to support other variations of text/xml
  196. /// </summary>
  197. /// <param name="session"></param>
  198. /// <returns></returns>
  199. public static bool IsResponseXml(Session session)
  200. {
  201. return (IsResponseContentType(session, "text/xml") || IsResponseContentType(session, "application/xml"));
  202. }
  203. public static bool IsResponsePlain(Session session)
  204. {
  205. return (IsResponseContentType(session, "text/plain"));
  206. }
  207. /// <summary>
  208. /// Attempt to determine the character set used by the response document. If the character
  209. /// set cannot be determined, return UTF-8 (a reasonable guess).
  210. /// </summary>
  211. /// <remarks>TODO: Extract XML/XHtml character sets?</remarks>
  212. /// <param name="session">The Fiddler HTTP session to examine.</param>
  213. /// <returns>The character set specified by the session content or a reasonable guess.</returns>
  214. public static String GetHtmlCharset(Session session)
  215. {
  216. const String DefaultEncoding = "utf-8"; // Return UTF-8 if unsure, ASCII is preserved.
  217. // Favor the character set from the HTTP Content-Type header if it exists.
  218. String CharacterSet = session.oResponse.headers.GetTokenValue("Content-Type", "charset");
  219. if (!String.IsNullOrEmpty(CharacterSet))
  220. {
  221. // Found the character set in the header: normalize and return.
  222. return CharacterSet.Trim().ToLower();
  223. }
  224. // If there is no content, return the default character set.
  225. if (session.responseBodyBytes == null || session.requestBodyBytes.Length == 0)
  226. {
  227. Trace.TraceWarning("Warning: Response body byte-array is null, assuming default character set.");
  228. return DefaultEncoding;
  229. }
  230. // Otherwise, parse the document returned for character set hints.
  231. String ResponseBody = String.Empty;
  232. try
  233. {
  234. // TODO: Pretty hokey here, defaulting to 7-bit ASCII Encoding
  235. ResponseBody = Encoding.ASCII.GetString(session.responseBodyBytes);
  236. }
  237. catch (DecoderFallbackException e)
  238. {
  239. // Thrown if a character cannot be decoded
  240. Trace.TraceError("Error: DecoderFallbackException: {0}", e.Message);
  241. Trace.TraceWarning("Warning: Assuming default characterencoding due to previous error.");
  242. return DefaultEncoding;
  243. }
  244. String Temp;
  245. // Find Meta tags specifying the content type, e.g.
  246. // <meta http-equiv="content-type" content="text/html; charset=utf-8"/>.
  247. foreach (Match m in Utility.GetHtmlTags(ResponseBody, "meta"))
  248. {
  249. Temp = Utility.GetHtmlTagAttribute(m.ToString(), "http-equiv");
  250. if (!String.IsNullOrEmpty(Temp))
  251. {
  252. if (Temp.Trim().ToLower(CultureInfo.InvariantCulture) == "content-type")
  253. {
  254. CharacterSet = Utility.GetHtmlTagAttribute(m.ToString(), "content");
  255. }
  256. }
  257. }
  258. // ... and return the last content type attribute if found
  259. // TODO: Extract the character set from the content type
  260. if (!String.IsNullOrEmpty(CharacterSet))
  261. {
  262. // Found the character set in the response body: normalize and return.
  263. return CharacterSet.Trim().ToLower();
  264. }
  265. // Return the default character set if unsure
  266. return DefaultEncoding;
  267. }
  268. /// <summary>
  269. /// This method returns the decompressed, dechunked, and normalized HTTP response body.
  270. /// </summary>
  271. /// <param name="session">The Fiddler HTTP session to examine.</param>
  272. /// <returns>Normalized HTTP response body.</returns>
  273. public static String GetResponseText(Session session)
  274. {
  275. // Ensure the response body is available
  276. if (session.responseBodyBytes == null || session.responseBodyBytes.Length == 0)
  277. {
  278. Trace.TraceWarning("Warning: Response body is empty.");
  279. return String.Empty;
  280. }
  281. // Attempt to determine the character set used by the response document
  282. String CharacterSet = Utility.GetHtmlCharset(session);
  283. String ResponseBody = String.Empty;
  284. try
  285. {
  286. // Get the decoded session response.
  287. ResponseBody = Encoding.GetEncoding(CharacterSet).GetString(session.responseBodyBytes);
  288. }
  289. catch (DecoderFallbackException e)
  290. {
  291. // Thrown if a character cannot be decoded
  292. Trace.TraceError("Error: DecoderFallbackException: {0}", e.Message);
  293. }
  294. catch (ArgumentException e)
  295. {
  296. // Thrown if the GetEncoding argument is invalid
  297. Trace.TraceError("Error: ArgumentException: {0}", e.Message);
  298. }
  299. try
  300. {
  301. // Fallback to UTF-8 if we failed from a booty CharacterSet name.
  302. if (ResponseBody == String.Empty)
  303. {
  304. Trace.TraceInformation("Falling back to UTF-8 encoding.");
  305. ResponseBody = Encoding.UTF8.GetString(session.responseBodyBytes);
  306. }
  307. }
  308. catch (DecoderFallbackException e)
  309. {
  310. // Thrown if a character cannot be decoded
  311. Trace.TraceError("Error: DecoderFallbackException: {0}", e.Message);
  312. }
  313. return ResponseBody;
  314. }
  315. /// <summary>
  316. /// TODO: Update with balanced group constructs
  317. /// </summary>
  318. /// <param name="body"></param>
  319. /// <param name="tagName"></param>
  320. /// <returns></returns>
  321. public static MatchCollection GetHtmlTags(String body, String tagName)
  322. {
  323. return (Regex.Matches(body, "<\\s*?" + tagName + "((\\s*?)|(\\s+?\\w.*?))>", RegexOptions.IgnoreCase));
  324. }
  325. public static String StripQuotes(String val)
  326. {
  327. val = val.Trim();
  328. if (val.StartsWith("\""))
  329. val = val.TrimStart('\"');
  330. else
  331. val = val.TrimStart('\'');
  332. if (val.EndsWith("\""))
  333. val = val.TrimEnd('\"');
  334. else
  335. val = val.TrimEnd('\'');
  336. return (val);
  337. }
  338. public static bool CompareStrings(String x, String y, bool ignoreCase)
  339. {
  340. StringComparer sc;
  341. if (ignoreCase)
  342. {
  343. // Case-insensitive comparer
  344. sc = StringComparer.InvariantCultureIgnoreCase;
  345. }
  346. else
  347. {
  348. // Case-sensitive comparer
  349. sc = StringComparer.InvariantCulture;
  350. }
  351. if (x != null && y != null && (sc.Compare(x, y) == 0))
  352. {
  353. return true;
  354. }
  355. else
  356. {
  357. return false;
  358. }
  359. }
  360. public static string ToSafeLower(string s)
  361. {
  362. if (s != null)
  363. {
  364. return (s.ToLower(CultureInfo.InvariantCulture));
  365. }
  366. return (s);
  367. }
  368. /// <summary>
  369. /// Parse single and multi-line comments from HTML.
  370. /// <!-- this is a comment -->
  371. /// <!-- this-is-a comment -->
  372. /// </summary>
  373. /// <param name="body"></param>
  374. /// <returns></returns>
  375. public static MatchCollection GetHtmlComment(String body)
  376. {
  377. // avoid catastrophic backtracking
  378. return (Regex.Matches(body, "<!--.*?-->", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.CultureInvariant));
  379. }
  380. /// <summary>
  381. /// Parse single and multi-line comments from javascript
  382. /// //this is a comment
  383. /// /* this is a comment */
  384. /// /* this is a
  385. /// * comment
  386. /// ****/
  387. /// </summary>
  388. /// <param name="body"></param>
  389. /// <returns></returns>
  390. public static MatchCollection GetJavascriptMultiLineComment(String body)
  391. {
  392. return (Regex.Matches(body, @"(/\*.*?\*/)", RegexOptions.Singleline | RegexOptions.Compiled));
  393. }
  394. public static MatchCollection GetJavascriptSingleLineComment(String body)
  395. {
  396. return (Regex.Matches(body, @"(//.*)", RegexOptions.Compiled));
  397. }
  398. public static String GetHtmlTagAttribute(String tag, String attributeName)
  399. {
  400. String attribute = null;
  401. // Parse out attribute field looking for values in single or double quotes
  402. Match m = Regex.Match(tag, attributeName + "\\s*?=\\s*?(\'|\").*?(\'|\")", RegexOptions.IgnoreCase);
  403. // Parse out attribute field looking for values that aren't wrapped in single or double quotes
  404. // TEST: Passed
  405. Match m1 = Regex.Match(tag, attributeName + "\\s*?=\\s*?.*?(\\s|>)", RegexOptions.IgnoreCase);
  406. if (m.Success)
  407. {
  408. // Parse out attribute value
  409. Match a = Regex.Match(m.ToString(), "(\'|\").*?(\'|\")", RegexOptions.IgnoreCase);
  410. if (a.Success)
  411. {
  412. // BUGBUG: Removing UrlDecode() from here, not sure why we're doing this here.
  413. // It should be up to a check to want UrlDecoded values.
  414. // Otherwise + turns to a space, and other values may break.
  415. //
  416. // attribute = StripQuotes(HttpUtility.UrlDecode(a.ToString()));
  417. attribute = StripQuotes(a.ToString());
  418. }
  419. }
  420. else if (m1.Success)
  421. {
  422. // Parse out attribute value, matching to the next whitespace or closing tag
  423. Match a = Regex.Match(m1.ToString(), "(=).*?(\\s|>)", RegexOptions.IgnoreCase);
  424. if (a.Success)
  425. {
  426. // BUGBUG: Removing UrlDecode() from here, not sure why we're doing this here.
  427. // It should be up to a check to want UrlDecoded values.
  428. // Otherwise + turns to a space, and other values may break.
  429. //
  430. // attribute = HttpUtility.UrlDecode(a.ToString());
  431. attribute = a.ToString();
  432. // Trim the leading = character
  433. attribute = attribute.Substring(1).Trim();
  434. }
  435. }
  436. return attribute;
  437. }
  438. /// <summary>
  439. /// TODO: Update with balanced group constructs
  440. /// </summary>
  441. /// <param name="body"></param>
  442. /// <param name="tagName"></param>
  443. /// <param name="stripEnclosingTags"></param>
  444. /// <returns></returns>
  445. public static String[] GetHtmlTagBodies(String body, String tagName, bool stripEnclosingTags)
  446. {
  447. MatchCollection mc = null;
  448. String[] bodies = null;
  449. String tmp = null;
  450. int x = 0;
  451. // Match opening->closing tag, nested tags not handled
  452. mc = Regex.Matches(body, @"<\s*?" + tagName + @"((\s*?)|(\s+?\w.*?))>.*?<\s*?\/\s*?" + tagName + @"\s*?>", RegexOptions.Singleline | RegexOptions.Compiled);
  453. if (mc != null && mc.Count > 0)
  454. {
  455. bodies = new String[mc.Count];
  456. foreach (Match m in mc)
  457. {
  458. tmp = m.ToString();
  459. if (stripEnclosingTags)
  460. {
  461. tmp = Regex.Replace(tmp, @"<\s*?" + tagName + @"((\s*?)|(\s+?\w.*?))>", "");
  462. tmp = Regex.Replace(tmp, @"<\s*?\/\s*?" + tagName + @"\s*?>", "");
  463. }
  464. bodies[x++] = tmp;
  465. }
  466. }
  467. // Don't return null, return empty string array
  468. if (bodies == null)
  469. {
  470. bodies = new String[] { };
  471. }
  472. return bodies;
  473. }
  474. public static String[] GetHtmlTagBodies(String body, String tagName)
  475. {
  476. return (GetHtmlTagBodies(body, tagName, true));
  477. }
  478. public static String GetUriDomainName(String src)
  479. {
  480. String dom = null;
  481. // if uri begins with "http://" or "https://"
  482. if (src != null && (src.IndexOf("http://") == 0 || src.IndexOf("https://") == 0))
  483. {
  484. // get text past ://
  485. dom = src.Substring(src.IndexOf("://") + 3);
  486. // If contains "/"
  487. if (dom.IndexOf("/") >= 0)
  488. {
  489. // Remove everything including "/" and after
  490. dom = dom.Substring(0, dom.IndexOf("/"));
  491. }
  492. }
  493. return dom;
  494. }
  495. /// <summary>
  496. /// Checks a URL to see if it's already contained in a running list of URL's
  497. /// </summary>
  498. /// <param name="url">A full URI, must include the scheme as in http://www.nottrusted.org. Provided by session.fullUrl.</param>
  499. /// <param name="urls">The List<> of URL's to maintain.</param>
  500. /// <returns></returns>
  501. public static bool UrlNotInList(String url, List<string> urls)
  502. {
  503. // We need to reset our URL List when a user clicks the
  504. // Clear() button. This is done through clear button
  505. // event handler.
  506. lock (urls)
  507. {
  508. Uri uri = new Uri(url);
  509. url = uri.ToString();// String.Concat(uri.Host, uri.AbsolutePath);
  510. // URL has already been checked
  511. if (urls.Contains(url))
  512. {
  513. return false;
  514. }
  515. // Host has not been checked yet
  516. else
  517. {
  518. urls.Add(url);
  519. return true;
  520. }
  521. }
  522. }
  523. public static NameValueCollection GetRequestParameters(Session session)
  524. {
  525. NameValueCollection nvc = null;
  526. String qs = null;
  527. // If this is GET request
  528. if (session.HTTPMethodIs("GET"))
  529. {
  530. // ...and has query string
  531. if (session.PathAndQuery.IndexOf("?") > 0)
  532. {
  533. // Get the query string
  534. qs = session.PathAndQuery.Substring(session.PathAndQuery.IndexOf("?") + 1);
  535. }
  536. }
  537. // If is a POST request
  538. if (session.HTTPMethodIs("POST"))
  539. {
  540. // ...and has a content-type
  541. if (session.oRequest.headers.Exists("content-type"))
  542. {
  543. // ... and is urlencoded form data
  544. if (session.oRequest.headers["content-type"] == "application/x-www-form-urlencoded")
  545. {
  546. // TODO: is a decode needed?
  547. //session.utilDecodeRequest();
  548. // Get the request body as a string
  549. qs = System.Text.Encoding.UTF8.GetString(session.requestBodyBytes);
  550. }
  551. }
  552. }
  553. // If we have a query string
  554. if (qs != null)
  555. {
  556. // Parse it...
  557. try
  558. {
  559. nvc = HttpUtility.ParseQueryString(qs);
  560. // Remove any nulls from ill-formed query strings
  561. List<string> lst = new List<string>();
  562. foreach (String param in nvc.Keys)
  563. {
  564. if (param == null)
  565. {
  566. lst.Add(param);
  567. }
  568. }
  569. foreach (String param in lst)
  570. {
  571. nvc.Remove(param);
  572. }
  573. }
  574. // TODO: Could we be missing things here? False negatives?
  575. catch (ArgumentNullException ane)
  576. {
  577. ExceptionLogger.HandleException(ane);// discard
  578. }
  579. }
  580. return (nvc);
  581. }
  582. #endregion
  583. }
  584. }