PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/agents/referrals.php

https://github.com/agnesrambaud/yacs
PHP | 445 lines | 208 code | 72 blank | 165 comment | 46 complexity | a07de94bf7963ab7055bf2346f8d3e33 MD5 | raw file
  1. <?php
  2. /**
  3. * referral processing
  4. *
  5. * @author Bernard Paques
  6. * @reference
  7. * @license http://www.gnu.org/copyleft/lesser.txt GNU Lesser General Public License
  8. */
  9. class Referrals {
  10. /**
  11. * process one single HTTP request
  12. *
  13. * This function removes any PHPSESSID data in the query string, if any
  14. *
  15. * @return void
  16. *
  17. * @see agents/referrals_hook.php
  18. */
  19. function check_request() {
  20. global $context;
  21. // don't bother with HEAD requests
  22. if(isset($_SERVER['REQUEST_METHOD']) && ($_SERVER['REQUEST_METHOD'] == 'HEAD'))
  23. return;
  24. // the target url
  25. if(!isset($_SERVER['REQUEST_URI']) || (!$url = $_SERVER['REQUEST_URI']))
  26. return;
  27. // only remember viewed pages and index pages
  28. if(!preg_match('/\/(index|view).php/', $url))
  29. return;
  30. // continue only if we have a referer
  31. if(!isset($_SERVER['HTTP_REFERER']) || (!$referer = $_SERVER['HTTP_REFERER']))
  32. return;
  33. // do not memorize cache referrals
  34. if(preg_match('/cache:/i', $referer))
  35. return;
  36. // block pernicious attacks
  37. $referer = strip_tags($referer);
  38. // only remember external referrals
  39. if(preg_match('/\b'.preg_quote(str_replace('www.', '', $context['host_name']), '/').'\b/i', $referer))
  40. return;
  41. // stop crawlers
  42. if(Surfer::is_crawler())
  43. return;
  44. // avoid banned sources
  45. include_once $context['path_to_root'].'servers/servers.php';
  46. if(preg_match(Servers::get_banned_pattern(), $referer))
  47. return;
  48. // normalize the referral, extract keywords, and domain
  49. list($referer, $domain, $keywords) = Referrals::normalize($referer);
  50. // if a record exists for this url
  51. $query = "SELECT id FROM ".SQL::table_name('referrals')." AS referrals"
  52. ." WHERE referrals.url LIKE '".SQL::escape($url)."' AND referrals.referer LIKE '".SQL::escape($referer)."'";
  53. if(!$item =& SQL::query_first($query))
  54. return;
  55. // update figures
  56. if(isset($item['id'])) {
  57. $query = "UPDATE ".SQL::table_name('referrals')." SET"
  58. ." hits=hits+1,"
  59. ." stamp='".gmstrftime('%Y-%m-%d %H:%M:%S')."'"
  60. ." WHERE id = ".$item['id'];
  61. // create a new record
  62. } else {
  63. // ensure the referer is accessible
  64. include_once $context['path_to_root'].'links/link.php';
  65. if(($content = Link::fetch($referer, '', '', 'agents/referrals.php')) === FALSE)
  66. return;
  67. // we have to find a reference to ourself in this page
  68. if(strpos($content, $context['url_to_home']) === FALSE)
  69. return;
  70. $query = "INSERT INTO ".SQL::table_name('referrals')." SET"
  71. ." url='".SQL::escape($url)."',"
  72. ." referer='".SQL::escape($referer)."',"
  73. ." domain='".SQL::escape($domain)."',"
  74. ." keywords='".SQL::escape($keywords)."',"
  75. ." hits=1,"
  76. ." stamp='".gmstrftime('%Y-%m-%d %H:%M:%S')."'";
  77. }
  78. // actual database update
  79. if(SQL::query($query) === FALSE)
  80. return;
  81. // prune with a probability of 1/100
  82. if(rand(1, 100) != 50)
  83. return;
  84. // purge oldest records -- 100 days = 8640000 seconds
  85. $query = "DELETE FROM ".SQL::table_name('referrals')
  86. ." WHERE stamp < '".gmstrftime('%Y-%m-%d %H:%M:%S', time()-8640000)."'";
  87. SQL::query($query);
  88. }
  89. /**
  90. * delete one referer
  91. *
  92. * @param string the referer to delete
  93. *
  94. * @see links/check.php
  95. */
  96. function delete($referer) {
  97. global $context;
  98. $query = "DELETE FROM ".SQL::table_name('referrals')." WHERE referer LIKE '".SQL::escape($referer)."'";
  99. SQL::query($query);
  100. }
  101. /**
  102. * list most recent referrals
  103. *
  104. * @param int the offset from the start of the list; usually, 0 or 1
  105. * @param int the number of items to display
  106. *
  107. * @see links/check.php
  108. */
  109. function list_by_dates($offset=0, $count=10) {
  110. global $context;
  111. // the list of referrals
  112. $rows = array();
  113. $query = "SELECT * FROM ".SQL::table_name('referrals')
  114. ." ORDER BY stamp DESC LIMIT ".$offset.', '.$count;
  115. return SQL::query($query, $context['connection']);
  116. }
  117. /**
  118. * list most popular domains
  119. *
  120. * This function removes as many referrals coming from search engines as possible.
  121. *
  122. * @param int the offset from the start of the list; usually, 0 or 1
  123. * @param int the number of items to display
  124. *
  125. * @see agents/index.php
  126. */
  127. function list_by_domain($offset=0, $count=10) {
  128. global $context;
  129. // the list of domains
  130. $query = "SELECT domain, MIN(referer) as referer, SUM(hits) as hits FROM ".SQL::table_name('referrals')
  131. ." WHERE keywords = ''"
  132. ." GROUP BY domain"
  133. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  134. return SQL::query($query, $context['connection']);
  135. }
  136. /**
  137. * list referrals for a given URL
  138. *
  139. * @param string the referenced url
  140. * @param int the offset from the start of the list; usually, 0 or 1
  141. * @param int the number of items to display
  142. *
  143. * @see index.php
  144. * @see actions/index.php
  145. * @see actions/view.php
  146. * @see agents/index.php
  147. * @see articles/index.php
  148. * @see articles/view.php
  149. * @see categories/index.php
  150. * @see categories/view.php
  151. * @see codes/index.php
  152. * @see collections/index.php
  153. * @see comments/index.php
  154. * @see comments/view.php
  155. * @see feeds/index.php
  156. * @see files/index.php
  157. * @see files/view.php
  158. * @see images/index.php
  159. * @see images/view.php
  160. * @see letters/index.php
  161. * @see links/index.php
  162. * @see locations/index.php
  163. * @see locations/view.php
  164. * @see overlays/index.php
  165. * @see scripts/index.php
  166. * @see scripts/view.php
  167. * @see sections/index.php
  168. * @see sections/view.php
  169. * @see servers/index.php
  170. * @see servers/view.php
  171. * @see services/index.php
  172. * @see skins/index.php
  173. * @see smileys/index.php
  174. * @see tables/index.php
  175. * @see tables/view.php
  176. * @see users/index.php
  177. * @see users/view.php
  178. */
  179. function list_by_hits_for_url($url, $offset=0, $count=10) {
  180. global $context;
  181. // the front page is a special case
  182. if(($url == '/') || ($url == $context['url_to_root']))
  183. $where = "(url LIKE '/') OR (url LIKE '".$context['url_to_root']."')";
  184. else
  185. $where = "url LIKE '".SQL::escape($url)."'";
  186. // the list of referrals
  187. $query = "SELECT * FROM ".SQL::table_name('referrals')
  188. ." WHERE ".$where
  189. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  190. if(!$result = SQL::query($query, $context['connection']))
  191. return NULL;
  192. // empty list
  193. if(!SQL::count($result))
  194. return NULL;
  195. // render a compact list, and including the number of referrals
  196. $items = array();
  197. while($row =& SQL::fetch($result)) {
  198. // hack to make this compliant to XHTML
  199. $url = str_replace('&', '&amp;', $row['referer']);
  200. if(isset($row['keywords']) && $row['keywords'])
  201. $items[$url] = array('', $row['keywords'], ' ('.Skin::build_number($row['hits']).')', 'basic', '');
  202. else
  203. $items[$url] = array('', $row['domain'], ' ('.Skin::build_number($row['hits']).')', 'basic', '');
  204. }
  205. if(count($items))
  206. return Skin::build_list($items, 'compact');
  207. return NULL;
  208. }
  209. /**
  210. * list most popular referrals
  211. *
  212. * @param int the offset from the start of the list; usually, 0 or 1
  213. * @param int the number of items to display
  214. */
  215. function list_by_hits($offset=0, $count=10) {
  216. global $context;
  217. // the list of referrals
  218. $query = "SELECT referer, sum(hits) as hits FROM ".SQL::table_name('referrals')
  219. ." GROUP BY referer"
  220. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  221. if($result = SQL::query($query, $context['connection'])) {
  222. while($row =& SQL::fetch($result)) {
  223. $url = $row['referer'];
  224. $items[$url] = Skin::build_number($row['hits']);
  225. }
  226. }
  227. return $items;
  228. }
  229. /**
  230. * list most popular keywords
  231. *
  232. * @param int the offset from the start of the list; usually, 0 or 1
  233. * @param int the number of items to display
  234. *
  235. * @see agents/index.php
  236. */
  237. function list_by_keywords($offset=0, $count=10) {
  238. global $context;
  239. // the list of domains
  240. $query = "SELECT keywords, MIN(referer) as referer, SUM(hits) as hits FROM ".SQL::table_name('referrals')
  241. ." GROUP BY keywords"
  242. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  243. return SQL::query($query, $context['connection']);
  244. }
  245. /**
  246. * normalize an external reference
  247. *
  248. * This function strips noise attributes from search engines
  249. *
  250. * @param string the raw reference
  251. * @return an array( normalized string, search keywords )
  252. */
  253. function normalize($link) {
  254. global $context;
  255. // get the query string, if any
  256. $tokens = explode('?', $link, 2);
  257. $link = $tokens[0];
  258. $query_string = '';
  259. if(isset($tokens[1]))
  260. $query_string = $tokens[1];
  261. // split the query string in variables, if any
  262. $attributes = array();
  263. if($query_string) {
  264. $tokens = explode('&', $query_string);
  265. foreach($tokens as $token) {
  266. list($name, $value) = explode('=', $token);
  267. $name = urldecode($name);
  268. $value = urldecode($value);
  269. // strip any PHPSESSID data
  270. if(preg_match('/^PHPSESSID/i', $name))
  271. continue;
  272. // strip any JSESSIONID data
  273. if(preg_match('/^jsessionid/i', $name))
  274. continue;
  275. // remember this variable
  276. $attributes[ $name ] = $value;
  277. }
  278. }
  279. // looking for keywords
  280. $keywords = '';
  281. // link options, if any
  282. $suffix = '';
  283. // coming from altavista
  284. if(preg_match('/\baltavista\b.+/', $link) && isset($attributes['q'])) {
  285. $attributes = array( 'q' => $attributes['q'] );
  286. $keywords = $attributes['q'];
  287. // coming from aol
  288. } elseif(preg_match('/\baol\b.+/', $link) && isset($attributes['q'])) {
  289. $attributes = array( 'q' => $attributes['q'] );
  290. $keywords = $attributes['q'];
  291. // coming from ask
  292. } elseif(preg_match('/\bask\b.+/', $link) && isset($attributes['q'])) {
  293. $attributes = array( 'q' => $attributes['q'] );
  294. $keywords = $attributes['q'];
  295. // coming from google
  296. } elseif(preg_match('/\bgoogle\b.+/', $link) && isset($attributes['q'])) {
  297. // signal to Google the charset to be used
  298. if(isset($attributes['ie']))
  299. $suffix = '&ie='.urlencode($attributes['ie']);
  300. $attributes = array( 'q' => $attributes['q'] );
  301. $keywords = $attributes['q'];
  302. // coming from msn
  303. } elseif(preg_match('/\bmsn\b.+/', $link) && isset($attributes['q'])) {
  304. $attributes = array( 'q' => $attributes['q'] );
  305. $keywords = $attributes['q'];
  306. // coming from yahoo
  307. } elseif(preg_match('/\byahoo\b.+/', $link) && isset($attributes['p'])) {
  308. $attributes = array( 'p' => $attributes['p'] );
  309. $keywords = $attributes['p'];
  310. }
  311. // rebuild a full link
  312. $query_string = '';
  313. foreach($attributes as $name => $value) {
  314. if($query_string)
  315. $query_string .= '&';
  316. $query_string .= urlencode($name).'='.urlencode($value);
  317. }
  318. if($query_string)
  319. $link .= '?'.$query_string.$suffix;
  320. // extract the referer domain
  321. $domain = preg_replace("/^\w+:\/\//i", "", $link);
  322. $domain = preg_replace("/^www\./i", "", $domain);
  323. $domain = preg_replace("/\/.*/i", "", $domain);
  324. // transcode keywords, and make it a safe string to display
  325. if($keywords)
  326. $keywords = utf8::encode(htmlspecialchars($keywords));
  327. // return normalized elements
  328. return array($link, trim($domain), trim($keywords));
  329. }
  330. /**
  331. * create tables for referrals
  332. *
  333. * @see agents/referrals_hook.php
  334. */
  335. function setup() {
  336. global $context;
  337. $fields = array();
  338. $fields['id'] = "MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT";
  339. $fields['url'] = "TEXT NOT NULL";
  340. $fields['referer'] = "TEXT NOT NULL";
  341. $fields['domain'] = "VARCHAR(255) DEFAULT '' NOT NULL";
  342. $fields['keywords'] = "VARCHAR(255) DEFAULT ''";
  343. $fields['hits'] = "INT UNSIGNED DEFAULT 0 NOT NULL";
  344. $fields['stamp'] = "DATETIME";
  345. $indexes = array();
  346. $indexes['PRIMARY KEY'] = "(id)";
  347. $indexes['INDEX url'] = "(url(255))";
  348. $indexes['INDEX referer'] = "(referer(255))";
  349. $indexes['INDEX domain'] = "(domain)";
  350. $indexes['INDEX keywords'] = "(keywords)";
  351. $indexes['INDEX hits'] = "(hits)";
  352. $indexes['INDEX stamp'] = "(stamp)";
  353. return SQL::setup_table('referrals', $fields, $indexes);
  354. }
  355. /**
  356. * get some statistics
  357. *
  358. * @return the number of rows in table
  359. *
  360. * @see control/index.php
  361. */
  362. function &stat() {
  363. global $context;
  364. // select among available items
  365. $query = "SELECT COUNT(*) as count FROM ".SQL::table_name('referrals');
  366. $output =& SQL::query_first($query);
  367. return $output;
  368. }
  369. }
  370. // load localized strings
  371. if(is_callable(array('i18n', 'bind')))
  372. i18n::bind('agents');
  373. ?>