/webapp-php/application/models/topcrashersbyurl.php

https://github.com/AlinT/socorro · PHP · 274 lines · 170 code · 20 blank · 84 comment · 13 complexity · 440a1c765daf47dd5012970046989b65 MD5 · raw file

  1. <?php
  2. /**
  3. * Manage data in the `topcrashers` table.
  4. *
  5. * @todo These methods should take a start and an end date as parameters.
  6. * @todo They should not return an array with dates.
  7. */
  8. class TopcrashersByUrl_Model extends Model {
  9. /**
  10. * Given a database result containing
  11. * urls and/or domains, modifies the results
  12. * by applying privacy policies such as
  13. * replacing file urls with a hardcoded
  14. * file_detected_BLOCKED, etc.
  15. *
  16. * @param results - Database query results
  17. * @return results - Database query results with modifications
  18. */
  19. protected function cleanseUrlsAndDomains(&$results)
  20. {
  21. foreach($results as $result) {
  22. if (property_exists($result, 'url')) {
  23. $result->url = $this->cleanseUrl($result->url);
  24. }
  25. if (property_exists($result, 'domain')) {
  26. $result->domain = $this->cleanseDomain($result->domain);
  27. }
  28. }
  29. }
  30. /**
  31. * Given a url, will return the url or
  32. * a version of it with privacy policies
  33. * applied.
  34. *
  35. * @param url - an url
  36. * @return url or other string
  37. */
  38. protected function cleanseUrl($url)
  39. {
  40. if (strpos($url, 'file') === 0) {
  41. return 'local_file_detected_BLOCKED';
  42. } else if (preg_match('/^https?:\/\/[^\/]*@[^\/]*\/.*$/', $url)) {
  43. return 'username_detected_BLOCKED';
  44. } else if (
  45. preg_match('/^https?:\/\/.*$/', $url) ||
  46. preg_match('/^http?:\/\/.*$/', $url)
  47. ) {
  48. return $url;
  49. } else {
  50. return 'non_http_url_detected_BLOCKED';
  51. }
  52. }
  53. /**
  54. * Given a domain, will return the domain or
  55. * a version of it with privacy policies
  56. * applied.
  57. *
  58. * @param domain - an domain
  59. * @return url or other string
  60. */
  61. protected function cleanseDomain($domain)
  62. {
  63. if (strpos($domain, '@') !== false) {
  64. return 'username_detected_BLOCKED';
  65. } elseif (!strstr($domain, '.')) {
  66. return 'invalid_domain_detected_BLOCKED';
  67. } else {
  68. return $domain;
  69. }
  70. }
  71. /**
  72. * Find the top crashing urls from the TBD table
  73. *
  74. * @access public
  75. * @param string The product name
  76. * @param string The product version number
  77. * @param string The build ID of the product (?)
  78. * @param string The branch number (1.9, 1.9.1, 1.9.2, etc.)
  79. * @param int The page number, used for pagination
  80. * @return array An array of topcrasher objects by domain
  81. */
  82. public function getTopCrashersByUrl($tProduct=NULL, $tVersion=NULL, $build_id=NULL, $branch=NULL, $page=1) {
  83. $product = $this->db->escape($tProduct);
  84. $version = $this->db->escape($tVersion);
  85. $offset = ($page -1) * 100;
  86. $aTime = time();
  87. $end_date = date("Y-m-d", $aTime);
  88. $start_date = date("Y-m-d", $aTime - (60 * 60 * 24 * 14) + 1);
  89. $sql = "/* soc.web tcbyrul geturls */
  90. SELECT SUM(tcu.count) as count, ud.url, at.rank
  91. FROM top_crashes_by_url tcu
  92. JOIN urldims ud
  93. ON tcu.urldims_id = ud.id
  94. AND '$start_date' <= (tcu.window_end - tcu.window_size)
  95. AND tcu.window_end < '$end_date'
  96. JOIN productdims pd
  97. ON pd.id = tcu.productdims_id
  98. AND pd.product = $product
  99. AND pd.version = $version
  100. LEFT JOIN alexa_topsites at ON ud.domain LIKE '%' || at.domain
  101. GROUP BY ud.url, at.rank
  102. ORDER BY count DESC
  103. LIMIT 100 OFFSET $offset";
  104. $results = $this->fetchRows($sql);
  105. $this->cleanseUrlsAndDomains($results);
  106. return array($start_date, $end_date, $results);
  107. }
  108. /**
  109. * Find top crashing domains from the TBD table
  110. *
  111. * @access public
  112. * @param string The product name
  113. * @param string The product version number
  114. * @param string The build ID of the product (?)
  115. * @param string The branch number (1.9, 1.9.1, 1.9.2, etc.)
  116. * @param int The page number, used for pagination
  117. * @return array An array of topcrasher objects by domain
  118. */
  119. public function getTopCrashersByDomain($tProduct=NULL, $tVersion=NULL, $build_id=NULL, $branch=NULL, $page=1) {
  120. $product = $this->db->escape($tProduct);
  121. $version = $this->db->escape($tVersion);
  122. $offset = ($page -1) * 100;
  123. $aTime = time();
  124. $end_date = date("Y-m-d", $aTime);
  125. $start_date = date("Y-m-d", $aTime - (60 * 60 * 24 * 14) + 1);
  126. $sql = "/* soc.web tcbyrul getdmns */
  127. SELECT sum(tcu.count) as count, ud.domain, at.rank
  128. FROM top_crashes_by_url tcu
  129. JOIN urldims ud
  130. ON tcu.urldims_id = ud.id
  131. AND '$start_date' <= (tcu.window_end - tcu.window_size)
  132. AND tcu.window_end < '$end_date'
  133. JOIN productdims pd
  134. ON pd.id = tcu.productdims_id
  135. AND pd.product = $product
  136. AND pd.version = $version
  137. LEFT JOIN alexa_topsites at ON ud.domain LIKE '%' || at.domain
  138. GROUP BY ud.domain, at.rank
  139. ORDER BY count DESC
  140. LIMIT 100 OFFSET $offset";
  141. $results = $this->fetchRows($sql);
  142. $this->cleanseUrlsAndDomains($results);
  143. return array($start_date, $end_date, $results);
  144. }
  145. /**
  146. * Find top crashing domains from the TBD table ordered by their topsite ranking.
  147. *
  148. * @access public
  149. * @param string The product name
  150. * @param string The product version number
  151. * @param int The page number
  152. * @return array An array of topcrasher objects by domain
  153. */
  154. public function getTopCrashersByTopsiteRank($tProduct=NULL, $tVersion=NULL, $page = 1) {
  155. $product = $this->db->escape($tProduct);
  156. $version = $this->db->escape($tVersion);
  157. $offset = ($page - 1) * 100;
  158. $aTime = time();
  159. $end_date = date("Y-m-d", $aTime);
  160. $start_date = date("Y-m-d", $aTime - (60 * 60 * 24 * 14) + 1);
  161. $sql = "/* soc.web topcrashersbyurl.getTopCrashersByTopsiteRank */
  162. SELECT
  163. sum(tcu.count) as count,
  164. at.domain, at.rank
  165. FROM top_crashes_by_url tcu
  166. JOIN urldims ud
  167. ON tcu.urldims_id = ud.id
  168. AND '$start_date' <= (tcu.window_end - tcu.window_size)
  169. AND tcu.window_end < '$end_date'
  170. JOIN productdims pd
  171. ON pd.id = tcu.productdims_id
  172. AND pd.product = $product
  173. AND pd.version = $version
  174. JOIN alexa_topsites at ON ud.domain LIKE '%' || at.domain
  175. GROUP BY at.domain, at.rank
  176. ORDER BY count DESC
  177. LIMIT 100 OFFSET $offset";
  178. $results = $this->fetchRows($sql);
  179. $this->cleanseUrlsAndDomains($results);
  180. return array($start_date, $end_date, $results);
  181. }
  182. /**
  183. * Fetch all of the crashing URLs associated with a particular domain.
  184. *
  185. * @access public
  186. * @param string The product name
  187. * @param string The product version number
  188. * @param string The domain name
  189. * @param int The page number, used for pagination
  190. * @return array An array of signatures
  191. */
  192. public function getUrlsByDomain($tProduct, $tVersion, $tDomain, $page=0){
  193. $product = $this->db->escape($tProduct);
  194. $version = $this->db->escape($tVersion);
  195. $domain = $this->db->escape($tDomain);
  196. $offset = ($page -1) * 50;
  197. $aTime = time();
  198. $end_date = date("Y-m-d", $aTime);
  199. $start_date = date("Y-m-d", $aTime - (60 * 60 * 24 * 14) + 1);
  200. $sql = "/* soc.web tcburl urlsbydomain */
  201. SELECT sum(tcu.count) as count, ud.url
  202. FROM top_crashes_by_url tcu
  203. JOIN urldims ud
  204. ON tcu.urldims_id = ud.id
  205. AND ud.domain = $domain
  206. AND '$start_date' <= (tcu.window_end - tcu.window_size)
  207. AND tcu.window_end < '$end_date'
  208. JOIN productdims pd
  209. ON tcu.productdims_id = pd.id
  210. AND pd.product = $product
  211. AND pd.version = $version
  212. GROUP BY ud.url
  213. ORDER BY count DESC
  214. LIMIT 50 OFFSET $offset";
  215. return $this->fetchRows($sql);
  216. }
  217. /**
  218. * Fetch all of the crash signatures associated with a particular URL.
  219. *
  220. * @access public
  221. * @param string The product name
  222. * @param string The product version number
  223. * @param string The URL
  224. * @param int The page number, used for pagination
  225. * @return array An array of signatures
  226. */
  227. public function getSignaturesByUrl($tProduct, $tVersion, $tUrl, $page){
  228. $product = $this->db->escape($tProduct);
  229. $version = $this->db->escape($tVersion);
  230. $url = $this->db->escape($tUrl);
  231. $offset = ($page -1) * 50;
  232. $aTime = time();
  233. $end_date = date("Y-m-d", $aTime);
  234. $start_date = date("Y-m-d", $aTime - (60 * 60 * 24 * 14) + 1);
  235. $sql = "/* soc.web tcburl sigbyurl */
  236. SELECT sum(tucs.count) as count, tucs.signature
  237. FROM top_crashes_by_url tcu
  238. JOIN urldims ud
  239. ON tcu.urldims_id = ud.id
  240. AND '$start_date' <= (tcu.window_end - tcu.window_size)
  241. AND tcu.window_end < '$end_date'
  242. AND ud.url = $url
  243. JOIN productdims pd
  244. ON pd.id = tcu.productdims_id
  245. AND pd.product = $product
  246. AND pd.version = $version
  247. JOIN top_crashes_by_url_signature tucs ON tucs.top_crashes_by_url_id = tcu.id
  248. GROUP BY tucs.signature
  249. ORDER BY 1 DESC
  250. LIMIT 50";
  251. return $this->fetchRows($sql);
  252. }
  253. /* */
  254. }