PageRenderTime 33ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/_squelettes_/forum_spipBB/contribs/bbclone/log_processor.php

https://bitbucket.org/pombredanne/spip-zone-treemap
PHP | 321 lines | 203 code | 66 blank | 52 comment | 78 complexity | 8893e2e27b76912055c2ee315d99750e MD5 | raw file
  1. <?php
  2. /* This file is part of BBClone (The PHP web counter on steroids)
  3. *
  4. * $Header: /cvs/bbclone/log_processor.php,v 1.105 2005/04/04 09:53:45 olliver Exp $
  5. *
  6. * Copyright (C) 2001-2005, the BBClone Team (see file doc/authors.txt
  7. * distributed with this library)
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * See doc/copying.txt for details
  15. */
  16. // Checking where we came from
  17. if (!defined("_MARK_PAGE")) return;
  18. // used by usort()
  19. function bbc_sort_time_sc($row_a, $row_b) {
  20. if ($row_a['time'] == $row_b['time']) return 0;
  21. return ($row_a['time'] > $row_b['time'] ) ? 1 : -1;
  22. }
  23. // return the key of a value
  24. function bbc_get_key($array, $str) {
  25. reset($array);
  26. while (list($idx, $val) = each($array)) {
  27. if ($val == $str) return $idx;
  28. }
  29. return 0;
  30. }
  31. // purge host and referrer stats at request
  32. function bbc_purge_single() {
  33. global $access;
  34. foreach (array("host", "key", "referer") as $cat) {
  35. reset($access[$cat]);
  36. while (list($key, $score) = each($access[$cat])) {
  37. if ($score == 1) unset($access[$cat][$key]);
  38. }
  39. }
  40. }
  41. // records the hosts that visited us most. Note, that we only pick up hostnames and strip
  42. // any sort of prefix because else the listing would become rather useless
  43. function bbc_update_host_stat($client) {
  44. global $access, $BBC_IGNORE_BOTS;
  45. if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
  46. $is_num = ($client['dns'] == $client['ip']) ? 1 : 0;
  47. $host = (!$is_num) ? explode(".", strtolower($client['dns'])) : trim(substr($client['ip'], 0,
  48. strrpos($client['ip'], "."))).".&nbsp;-";
  49. $parts = (!$is_num) ? count($host) : 0;
  50. // these ones can have less than 3 characters as hostname
  51. $glob = array("org", "com", "edu", "mil", "net", "gov", "int");
  52. if ($parts > 2) {
  53. $host = ((!in_array(trim($host[($parts - 1)]), $glob)) && (((strlen(trim($host[($parts - 2)])) < 3)) ||
  54. (in_array($host[($parts - 2)], $glob)))) ?
  55. trim(implode(".", array_splice($host, -3))) : trim(implode(".", array_splice($host, -2)));
  56. }
  57. else $host = (!$is_num) ? trim(implode(".", $host)) : $host;
  58. }
  59. if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
  60. if (!isset($access['host'][$host])) $access['host'][$host] = 0;
  61. $access['host'][$host]++;
  62. }
  63. if (isset($access['host']['not_specified'])) unset($access['host']['not_specified']);
  64. }
  65. // the listing of the visited pages
  66. function bbc_update_visits($time, $page, $nr) {
  67. global $BBC_MAXVISIBLE, $last;
  68. if ((empty($last['traffic'][$nr]['views'])) || (!is_array($last['traffic'][$nr]['views']))) return;
  69. $lv = count($last['traffic'][$nr]['views']) - 1;
  70. $last['traffic'][$nr]['off'] = !empty($last['traffic'][$nr]['off']) ? $last['traffic'][$nr]['off'] : 0;
  71. list($last_time, $last_page, $last_cnt) = explode("|", $last['traffic'][$nr]['views'][$lv]);
  72. if (intval($last_page) === intval($page)) {
  73. $last['traffic'][$nr]['views'][$lv] = "$last_time|$last_page|".++$last_cnt;
  74. $last['traffic'][$nr]['off']++;
  75. }
  76. else $last['traffic'][$nr]['views'][] = "$time|$page|1";
  77. sort($last['traffic'][$nr]['views']);
  78. // number of elements to be removed with array_splice() if necessary
  79. $lv = count($last['traffic'][$nr]['views']) - 1;
  80. $del = (($lv + 1) > $BBC_MAXVISIBLE) ? (($lv + 1) - $BBC_MAXVISIBLE) : false;
  81. $last['traffic'][$nr]['views'] = ($del !== false) ? array_splice($last['traffic'][$nr]['views'], $del) :
  82. $last['traffic'][$nr]['views'];
  83. }
  84. // The most visited pages ranking
  85. function bbc_update_page_stats($connect, $char) {
  86. global $access, $last;
  87. $long_page = $connect['page'];
  88. $char = $char ? $char : (defined("_BBC_MBSTRING") ? bbc_get_encoding($long_page) : false);
  89. // unfortunately big5 is unsupported as internal charset for mbstring operations
  90. if (defined("_BBC_MBSTRING") && (stristr("UTF", $char) || stristr("EUC-JP", $char) || stristr("gb2312", $char))) {
  91. mb_internal_encoding($char);
  92. }
  93. $over_60 = ((defined("_BBC_MBSTRING") ? mb_strlen($long_page) : strlen($long_page)) > 60) ? 1 : 0;
  94. $connect['page'] = $over_60 ? "...".(defined("_BBC_MBSTRING") ? mb_substr($long_page, -57) :
  95. substr($long_page, -57)) : $long_page;
  96. // Fix oversized page titles
  97. if (($over_60) && (isset($access['page'][$long_page]['count']))) {
  98. $access['page'][($connect['page'])]['count'] = $access['page'][$long_page]['count'];
  99. $access['page'][($connect['page'])]['uri'] = $access['page'][$long_page]['uri'];
  100. unset($access['page'][$long_page]);
  101. }
  102. if (!isset($access['page'][($connect['page'])]['count'])) {
  103. $access['page'][($connect['page'])]['count'] = 0;
  104. }
  105. $access['page'][($connect['page'])]['count']++;
  106. $access['page'][($connect['page'])]['uri'] = $connect['uri'];
  107. $last['pages'] = ((empty($last['pages'])) || (!is_array($last['pages']))) ? array() : $last['pages'];
  108. if (($over_60) && (in_array($long_page, $last['pages']))) {
  109. $last['pages'][bbc_get_key($last['pages'], $long_page)] = $connect['page'];
  110. }
  111. if (!in_array($connect['page'], $last['pages'])) $last['pages'][] = $connect['page'];
  112. $connect['page'] = bbc_get_key($last['pages'], $connect['page']);
  113. if (isset($connect['uri'])) unset($connect['uri']);
  114. return $connect;
  115. }
  116. // Transfer the raw data from the main counters of var into $last.
  117. // Any new data (more recent than $BBC_MAXTIME) is used in the global stats
  118. function bbc_add_new_connections($new_access) {
  119. global $BBC_CUSTOM_CHARSET, $BBC_IGNORE_AGENT, $BBC_IGNORE_BOTS, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_NO_DNS,
  120. $BBC_NO_HITS, $access, $last;
  121. $is_same = false;
  122. $char = !empty($BBC_CUSTOM_CHARSET) ? $BBC_CUSTOM_CHARSET : false;
  123. $new_cnt = (!empty($new_access) && is_array($new_access)) ? count($new_access) : 0;
  124. $old_cnt = (!empty($last['traffic']) && is_array($last['traffic'])) ? count($last['traffic']) : 0;
  125. foreach ($new_access as $connect) {
  126. $connect = bbc_update_connect($connect);
  127. // the "last reset on" flag initialisation
  128. if ((!isset($access['time'])) && (!isset($access['time']['reset']))) {
  129. $access['time']['reset'] = $connect['time'];
  130. }
  131. // Stop processing if bots are completely ignored
  132. if (!empty($BBC_IGNORE_BOTS) && ($BBC_IGNORE_BOTS == 2) && !empty($connect['robot'])) {
  133. --$new_cnt;
  134. continue;
  135. }
  136. $connect['visits'] = 1;
  137. // Omit referrers coming from robots
  138. $connect['referer'] = !empty($connect['robot']) ? "unknown" : $connect['referer'];
  139. $this_connect = $connect['time'];
  140. $last_connect = !empty($access['time']['last']) ? $access['time']['last'] : 0;
  141. // Hits as base for time stats if desired
  142. if (empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
  143. // The script viewed
  144. $connect = isset($connect['page']) ? bbc_update_page_stats($connect, $char) : $connect;
  145. $prev_recorded = 0;
  146. // Check if a similar connection has been recorded yet
  147. for ($l = $old_cnt - 1; ($l >= 0) && (($connect['time'] - $last['traffic'][$l]['time']) < $BBC_MAXTIME); $l--) {
  148. if (!empty($BBC_IGNORE_AGENT) ? ($connect['ip'] == $last['traffic'][$l]['ip']) :
  149. ($connect['ip'] == $last['traffic'][$l]['ip']) &&
  150. ($is_same = bbc_same_agent($connect, $last['traffic'][$l]))) {
  151. $last['traffic'][$l] = bbc_update_detect($connect, $last['traffic'][$l], $is_same);
  152. $last['traffic'][$l]['page'] = $connect['page'];
  153. $last['traffic'][$l]['time'] = $this_connect;
  154. $last['traffic'][$l]['visits']++;
  155. // New referrer entry if it differs from the previous one
  156. if (isset($connect['referer']) && ($connect['referer'] != "unknown")) {
  157. $last['traffic'][$l] = bbc_ref_cmp($last['traffic'][$l], $connect, $char);
  158. }
  159. ($BBC_MAXVISIBLE > 0) ? bbc_update_visits($connect['time'], $connect['page'], $l) : "";
  160. // permanent data
  161. $access['stat']['totalvisits']++;
  162. $prev_recorded = 1;
  163. break;
  164. }
  165. }
  166. // Add new connection if it hasn't been recorded yet
  167. if (!$prev_recorded) {
  168. if (empty($access['stat']['totalvisits'])) $access['stat']['totalvisits'] = 0;
  169. if (empty($access['stat']['totalcount'])) $access['stat']['totalcount'] = 0;
  170. $connect['dns'] = !empty($BBC_NO_DNS) ? $connect['ip'] : bbc_clean(gethostbyaddr($connect['ip']));
  171. $connect['ext'] = bbc_get_extension($connect['dns'], $connect['ip']);
  172. $last['traffic'][$old_cnt] = bbc_update_access($connect);
  173. // Visit stats
  174. $last['traffic'][$old_cnt]['views'][] = $last['traffic'][$old_cnt]['time']."|"
  175. .$last['traffic'][$old_cnt]['page']."|1";
  176. // Unique visits as base for time stats if desired
  177. if (!empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
  178. // Referrers collection will be updated all along with the keywords if available
  179. if (isset($connect['referer']) && ($connect['referer'] != "unknown")) {
  180. bbc_update_referer_stat(bbc_parse_ref($connect['referer']));
  181. // Search engine keywords in detailed stats
  182. $flt_search = bbc_get_keywords($connect['referer'], $char);
  183. $last['traffic'][$old_cnt]['search'] = ($flt_search !== false) ? implode(" ", $flt_search) : "-";
  184. // Search engine keywords in global stats
  185. if ($flt_search !== false) bbc_update_key_stats($flt_search);
  186. }
  187. // The host listing
  188. if (isset($connect['dns']) && isset($connect['ip'])) bbc_update_host_stat($last['traffic'][$old_cnt]);
  189. $access['stat']['totalvisits']++;
  190. $access['stat']['totalcount']++;
  191. $old_cnt++;
  192. }
  193. }
  194. return true;
  195. }
  196. // Remove unnecessary connections from $last, that either exceed the $BBC_MAXVISIBLE limit or are
  197. // older than time() - $BBC_MAXTIME.
  198. function bbc_update_last() {
  199. global $last, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_TIMESTAMP, $BBC_TIME_OFFSET;
  200. if (($BBC_MAXVISIBLE <= 0) || (empty($last['traffic'])) || (!is_array($last['traffic']))) {
  201. $last['traffic'] = array();
  202. return;
  203. }
  204. else {
  205. $cnt = count($last['traffic']);
  206. $ctime = $BBC_TIMESTAMP + ($BBC_TIME_OFFSET * 60);
  207. for ($k = $cnt - 1 - $BBC_MAXVISIBLE; $k >= 0; $k--) {
  208. if (($ctime - $last['traffic'][$k]['time']) > $BBC_MAXTIME) unset($last['traffic'][$k]);
  209. }
  210. usort($last['traffic'],"bbc_sort_time_sc");
  211. }
  212. }
  213. function bbc_update() {
  214. global $access;
  215. // Upgrade from older versions.
  216. foreach (array("bbc046c", "bugs", "last", "refclean", "uaclean") as $i) {
  217. if (isset($access[$i])) unset($access[$i]);
  218. }
  219. // referrer cleanup
  220. if (isset($access['referer'])) {
  221. if (isset($access['referer']['not_specified'])) unset($access['referer']['not_specified']);
  222. $access['referer'] = bbc_sum_ref($access['referer']);
  223. }
  224. // fixed counting of empty keywords
  225. if (isset($access['key']) && isset($access['key'][''])) unset($access['key']['']);
  226. if (isset($access['stat']['browser'])) {
  227. // fix browsers with capital letters (must be lowercase)
  228. foreach (array("Elinks", "Epiphany") as $i) {
  229. if (isset($access['stat']['browser'][$i])) {
  230. $access['stat']['browser'][strtolower($i)] = $access['stat']['browser'][$i];
  231. unset($access['stat']['browser'][$i]);
  232. }
  233. }
  234. // fix wrong browser assignments
  235. foreach (array("java", "wwwc", "libwww") as $i) {
  236. if (isset($access['stat']['browser'][$i])) {
  237. $access['stat']['robot'][$i] = $access['stat']['browser'][$i];
  238. $access['stat']['os']['other'] -= $access['stat']['robot'][$i];
  239. unset($access['stat']['browser'][$i]);
  240. }
  241. }
  242. }
  243. if (isset($access['stat']['robot'])) {
  244. // fix browsers with capital letters (must be lowercase)
  245. foreach (array("MyRSS", "PhpDig") as $i) {
  246. if (isset($access['stat']['robot'][$i])) {
  247. $access['stat']['robot'][strtolower($i)] = $access['stat']['robot'][$i];
  248. unset($access['stat']['robot'][$i]);
  249. }
  250. }
  251. }
  252. // no more happy cycling ;-)
  253. $access['bbc048'] = 1;
  254. }
  255. ?>