PageRenderTime 37ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/agents/referrals.php

https://github.com/bernard357/yacs
PHP | 441 lines | 207 code | 72 blank | 162 comment | 46 complexity | 50aee98fd959b0244daae0644ae0a6cc MD5 | raw file
  1. <?php
  2. /**
  3. * referral processing
  4. *
  5. * @author Bernard Paques
  6. * @reference
  7. * @license http://www.gnu.org/copyleft/lesser.txt GNU Lesser General Public License
  8. */
  9. class Referrals {
  10. /**
  11. * process one single HTTP request
  12. *
  13. * This function removes any PHPSESSID data in the query string, if any
  14. *
  15. * @return void
  16. *
  17. * @see agents/referrals_hook.php
  18. */
  19. public static function check_request() {
  20. global $context;
  21. // don't bother with HEAD requests
  22. if(isset($_SERVER['REQUEST_METHOD']) && ($_SERVER['REQUEST_METHOD'] == 'HEAD'))
  23. return;
  24. // the target url
  25. if(!isset($_SERVER['REQUEST_URI']) || (!$url = $_SERVER['REQUEST_URI']))
  26. return;
  27. // only remember viewed pages and index pages
  28. if(!preg_match('/\/(index|view).php/', $url))
  29. return;
  30. // continue only if we have a referer
  31. if(!isset($_SERVER['HTTP_REFERER']) || (!$referer = $_SERVER['HTTP_REFERER']))
  32. return;
  33. // do not memorize cache referrals
  34. if(preg_match('/cache:/i', $referer))
  35. return;
  36. // block pernicious attacks
  37. $referer = strip_tags($referer);
  38. // only remember external referrals
  39. if(preg_match('/\b'.preg_quote(str_replace('www.', '', $context['host_name']), '/').'\b/i', $referer))
  40. return;
  41. // stop crawlers
  42. if(Surfer::is_crawler())
  43. return;
  44. // avoid banned sources
  45. include_once $context['path_to_root'].'servers/servers.php';
  46. if(preg_match(Servers::get_banned_pattern(), $referer))
  47. return;
  48. // normalize the referral, extract keywords, and domain
  49. list($referer, $domain, $keywords) = Referrals::normalize($referer);
  50. // if a record exists for this url
  51. $query = "SELECT id FROM ".SQL::table_name('referrals')." AS referrals"
  52. ." WHERE referrals.url LIKE '".SQL::escape($url)."' AND referrals.referer LIKE '".SQL::escape($referer)."'";
  53. if(!$item = SQL::query_first($query))
  54. return;
  55. // update figures
  56. if(isset($item['id'])) {
  57. $query = "UPDATE ".SQL::table_name('referrals')." SET"
  58. ." hits=hits+1,"
  59. ." stamp='".gmstrftime('%Y-%m-%d %H:%M:%S')."'"
  60. ." WHERE id = ".$item['id'];
  61. // create a new record
  62. } else {
  63. // ensure the referer is accessible
  64. if(($content = http::proceed($referer)) === FALSE)
  65. return;
  66. // we have to find a reference to ourself in this page
  67. if(strpos($content, $context['url_to_home']) === FALSE)
  68. return;
  69. $query = "INSERT INTO ".SQL::table_name('referrals')." SET"
  70. ." url='".SQL::escape($url)."',"
  71. ." referer='".SQL::escape($referer)."',"
  72. ." domain='".SQL::escape($domain)."',"
  73. ." keywords='".SQL::escape($keywords)."',"
  74. ." hits=1,"
  75. ." stamp='".gmstrftime('%Y-%m-%d %H:%M:%S')."'";
  76. }
  77. // actual database update
  78. if(SQL::query($query) === FALSE)
  79. return;
  80. // prune with a probability of 1/100
  81. if(rand(1, 100) != 50)
  82. return;
  83. // purge oldest records -- 100 days = 8640000 seconds
  84. $query = "DELETE FROM ".SQL::table_name('referrals')
  85. ." WHERE stamp < '".gmstrftime('%Y-%m-%d %H:%M:%S', time()-8640000)."'";
  86. SQL::query($query);
  87. }
  88. /**
  89. * delete one referer
  90. *
  91. * @param string the referer to delete
  92. *
  93. * @see links/check.php
  94. */
  95. public static function delete($referer) {
  96. global $context;
  97. $query = "DELETE FROM ".SQL::table_name('referrals')." WHERE referer LIKE '".SQL::escape($referer)."'";
  98. SQL::query($query);
  99. }
  100. /**
  101. * list most recent referrals
  102. *
  103. * @param int the offset from the start of the list; usually, 0 or 1
  104. * @param int the number of items to display
  105. *
  106. * @see links/check.php
  107. */
  108. public static function list_by_dates($offset=0, $count=10) {
  109. global $context;
  110. // the list of referrals
  111. $rows = array();
  112. $query = "SELECT * FROM ".SQL::table_name('referrals')
  113. ." ORDER BY stamp DESC LIMIT ".$offset.', '.$count;
  114. return SQL::query($query, $context['connection']);
  115. }
  116. /**
  117. * list most popular domains
  118. *
  119. * This function removes as many referrals coming from search engines as possible.
  120. *
  121. * @param int the offset from the start of the list; usually, 0 or 1
  122. * @param int the number of items to display
  123. *
  124. * @see agents/index.php
  125. */
  126. public static function list_by_domain($offset=0, $count=10) {
  127. global $context;
  128. // the list of domains
  129. $query = "SELECT domain, MIN(referer) as referer, SUM(hits) as hits FROM ".SQL::table_name('referrals')
  130. ." WHERE keywords = ''"
  131. ." GROUP BY domain"
  132. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  133. return SQL::query($query, $context['connection']);
  134. }
  135. /**
  136. * list referrals for a given URL
  137. *
  138. * @param string the referenced url
  139. * @param int the offset from the start of the list; usually, 0 or 1
  140. * @param int the number of items to display
  141. *
  142. * @see index.php
  143. * @see agents/index.php
  144. * @see articles/index.php
  145. * @see articles/view.php
  146. * @see categories/index.php
  147. * @see categories/view.php
  148. * @see codes/index.php
  149. * @see comments/index.php
  150. * @see comments/view.php
  151. * @see feeds/index.php
  152. * @see files/index.php
  153. * @see files/view.php
  154. * @see images/index.php
  155. * @see images/view.php
  156. * @see letters/index.php
  157. * @see links/index.php
  158. * @see locations/index.php
  159. * @see locations/view.php
  160. * @see overlays/index.php
  161. * @see scripts/index.php
  162. * @see scripts/view.php
  163. * @see sections/index.php
  164. * @see sections/view.php
  165. * @see servers/index.php
  166. * @see servers/view.php
  167. * @see services/index.php
  168. * @see skins/index.php
  169. * @see smileys/index.php
  170. * @see tables/index.php
  171. * @see tables/view.php
  172. * @see users/index.php
  173. * @see users/view.php
  174. */
  175. public static function list_by_hits_for_url($url, $offset=0, $count=10) {
  176. global $context;
  177. // the front page is a special case
  178. if(($url == '/') || ($url == $context['url_to_root']))
  179. $where = "(url LIKE '/') OR (url LIKE '".$context['url_to_root']."')";
  180. else
  181. $where = "url LIKE '".SQL::escape($url)."'";
  182. // the list of referrals
  183. $query = "SELECT * FROM ".SQL::table_name('referrals')
  184. ." WHERE ".$where
  185. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  186. if(!$result = SQL::query($query, $context['connection']))
  187. return NULL;
  188. // empty list
  189. if(!SQL::count($result))
  190. return NULL;
  191. // render a compact list, and including the number of referrals
  192. $items = array();
  193. while($row = SQL::fetch($result)) {
  194. // hack to make this compliant to XHTML
  195. $url = str_replace('&', '&amp;', $row['referer']);
  196. if(isset($row['keywords']) && $row['keywords'])
  197. $items[$url] = array('', $row['keywords'], ' ('.Skin::build_number($row['hits']).')', 'basic', '');
  198. else
  199. $items[$url] = array('', $row['domain'], ' ('.Skin::build_number($row['hits']).')', 'basic', '');
  200. }
  201. if(count($items))
  202. return Skin::build_list($items, 'compact');
  203. return NULL;
  204. }
  205. /**
  206. * list most popular referrals
  207. *
  208. * @param int the offset from the start of the list; usually, 0 or 1
  209. * @param int the number of items to display
  210. */
  211. public static function list_by_hits($offset=0, $count=10) {
  212. global $context;
  213. // the list of referrals
  214. $query = "SELECT referer, sum(hits) as hits FROM ".SQL::table_name('referrals')
  215. ." GROUP BY referer"
  216. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  217. if($result = SQL::query($query, $context['connection'])) {
  218. while($row = SQL::fetch($result)) {
  219. $url = $row['referer'];
  220. $items[$url] = Skin::build_number($row['hits']);
  221. }
  222. }
  223. return $items;
  224. }
  225. /**
  226. * list most popular keywords
  227. *
  228. * @param int the offset from the start of the list; usually, 0 or 1
  229. * @param int the number of items to display
  230. *
  231. * @see agents/index.php
  232. */
  233. public static function list_by_keywords($offset=0, $count=10) {
  234. global $context;
  235. // the list of domains
  236. $query = "SELECT keywords, MIN(referer) as referer, SUM(hits) as hits FROM ".SQL::table_name('referrals')
  237. ." GROUP BY keywords"
  238. ." ORDER BY hits DESC LIMIT ".$offset.', '.$count;
  239. return SQL::query($query, $context['connection']);
  240. }
  241. /**
  242. * normalize an external reference
  243. *
  244. * This function strips noise attributes from search engines
  245. *
  246. * @param string the raw reference
  247. * @return an array( normalized string, search keywords )
  248. */
  249. public static function normalize($link) {
  250. global $context;
  251. // get the query string, if any
  252. $tokens = explode('?', $link, 2);
  253. $link = $tokens[0];
  254. $query_string = '';
  255. if(isset($tokens[1]))
  256. $query_string = $tokens[1];
  257. // split the query string in variables, if any
  258. $attributes = array();
  259. if($query_string) {
  260. $tokens = explode('&', $query_string);
  261. foreach($tokens as $token) {
  262. list($name, $value) = explode('=', $token);
  263. $name = urldecode($name);
  264. $value = urldecode($value);
  265. // strip any PHPSESSID data
  266. if(preg_match('/^PHPSESSID/i', $name))
  267. continue;
  268. // strip any JSESSIONID data
  269. if(preg_match('/^jsessionid/i', $name))
  270. continue;
  271. // remember this variable
  272. $attributes[ $name ] = $value;
  273. }
  274. }
  275. // looking for keywords
  276. $keywords = '';
  277. // link options, if any
  278. $suffix = '';
  279. // coming from altavista
  280. if(preg_match('/\baltavista\b.+/', $link) && isset($attributes['q'])) {
  281. $attributes = array( 'q' => $attributes['q'] );
  282. $keywords = $attributes['q'];
  283. // coming from aol
  284. } elseif(preg_match('/\baol\b.+/', $link) && isset($attributes['q'])) {
  285. $attributes = array( 'q' => $attributes['q'] );
  286. $keywords = $attributes['q'];
  287. // coming from ask
  288. } elseif(preg_match('/\bask\b.+/', $link) && isset($attributes['q'])) {
  289. $attributes = array( 'q' => $attributes['q'] );
  290. $keywords = $attributes['q'];
  291. // coming from google
  292. } elseif(preg_match('/\bgoogle\b.+/', $link) && isset($attributes['q'])) {
  293. // signal to Google the charset to be used
  294. if(isset($attributes['ie']))
  295. $suffix = '&ie='.urlencode($attributes['ie']);
  296. $attributes = array( 'q' => $attributes['q'] );
  297. $keywords = $attributes['q'];
  298. // coming from msn
  299. } elseif(preg_match('/\bmsn\b.+/', $link) && isset($attributes['q'])) {
  300. $attributes = array( 'q' => $attributes['q'] );
  301. $keywords = $attributes['q'];
  302. // coming from yahoo
  303. } elseif(preg_match('/\byahoo\b.+/', $link) && isset($attributes['p'])) {
  304. $attributes = array( 'p' => $attributes['p'] );
  305. $keywords = $attributes['p'];
  306. }
  307. // rebuild a full link
  308. $query_string = '';
  309. foreach($attributes as $name => $value) {
  310. if($query_string)
  311. $query_string .= '&';
  312. $query_string .= urlencode($name).'='.urlencode($value);
  313. }
  314. if($query_string)
  315. $link .= '?'.$query_string.$suffix;
  316. // extract the referer domain
  317. $domain = preg_replace("/^\w+:\/\//i", "", $link);
  318. $domain = preg_replace("/^www\./i", "", $domain);
  319. $domain = preg_replace("/\/.*/i", "", $domain);
  320. // transcode keywords, and make it a safe string to display
  321. if($keywords)
  322. $keywords = utf8::encode(htmlspecialchars($keywords));
  323. // return normalized elements
  324. return array($link, trim($domain), trim($keywords));
  325. }
  326. /**
  327. * create tables for referrals
  328. *
  329. * @see agents/referrals_hook.php
  330. */
  331. public static function setup() {
  332. global $context;
  333. $fields = array();
  334. $fields['id'] = "MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT";
  335. $fields['url'] = "TEXT NOT NULL";
  336. $fields['referer'] = "TEXT NOT NULL";
  337. $fields['domain'] = "VARCHAR(255) DEFAULT '' NOT NULL";
  338. $fields['keywords'] = "VARCHAR(255) DEFAULT ''";
  339. $fields['hits'] = "INT UNSIGNED DEFAULT 0 NOT NULL";
  340. $fields['stamp'] = "DATETIME";
  341. $indexes = array();
  342. $indexes['PRIMARY KEY'] = "(id)";
  343. $indexes['INDEX url'] = "(url(255))";
  344. $indexes['INDEX referer'] = "(referer(255))";
  345. $indexes['INDEX domain'] = "(domain)";
  346. $indexes['INDEX keywords'] = "(keywords)";
  347. $indexes['INDEX hits'] = "(hits)";
  348. $indexes['INDEX stamp'] = "(stamp)";
  349. return SQL::setup_table('referrals', $fields, $indexes);
  350. }
  351. /**
  352. * get some statistics
  353. *
  354. * @return the number of rows in table
  355. *
  356. * @see control/index.php
  357. */
  358. public static function stat() {
  359. global $context;
  360. // select among available items
  361. $query = "SELECT COUNT(*) as count FROM ".SQL::table_name('referrals');
  362. $output = SQL::query_first($query);
  363. return $output;
  364. }
  365. }
  366. // load localized strings
  367. if(is_callable(array('i18n', 'bind')))
  368. i18n::bind('agents');
  369. ?>