PageRenderTime 54ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/core/Tracker/VisitExcluded.php

https://github.com/CodeYellowBV/piwik
PHP | 277 lines | 168 code | 26 blank | 83 comment | 43 complexity | d6054112f14327707726c3da4bf20edd MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Tracker;
  10. use Piwik\Common;
  11. use Piwik\Config;
  12. use Piwik\IP;
  13. use Piwik\Piwik;
  14. /**
  15. * This class contains the logic to exclude some visitors from being tracked as per user settings
  16. */
  17. class VisitExcluded
  18. {
  19. /**
  20. * @param Request $request
  21. * @param bool|string $ip
  22. * @param bool|string $userAgent
  23. */
  24. public function __construct(Request $request, $ip = false, $userAgent = false)
  25. {
  26. if ($ip === false) {
  27. $ip = $request->getIp();
  28. }
  29. if ($userAgent === false) {
  30. $userAgent = $request->getUserAgent();
  31. }
  32. $this->request = $request;
  33. $this->idSite = $request->getIdSite();
  34. $this->userAgent = $userAgent;
  35. $this->ip = $ip;
  36. }
  37. /**
  38. * Test if the current visitor is excluded from the statistics.
  39. *
  40. * Plugins can for example exclude visitors based on the
  41. * - IP
  42. * - If a given cookie is found
  43. *
  44. * @return bool True if the visit must not be saved, false otherwise
  45. */
  46. public function isExcluded()
  47. {
  48. $excluded = false;
  49. if ($this->isNonHumanBot()) {
  50. Common::printDebug('Search bot detected, visit excluded');
  51. $excluded = true;
  52. }
  53. /*
  54. * Requests built with piwik.js will contain a rec=1 parameter. This is used as
  55. * an indication that the request is made by a JS enabled device. By default, Piwik
  56. * doesn't track non-JS visitors.
  57. */
  58. if (!$excluded) {
  59. $toRecord = $this->request->getParam($parameterForceRecord = 'rec');
  60. if (!$toRecord) {
  61. Common::printDebug(@$_SERVER['REQUEST_METHOD'] . ' parameter ' . $parameterForceRecord . ' not found in URL, request excluded');
  62. $excluded = true;
  63. Common::printDebug("'$parameterForceRecord' parameter not found.");
  64. }
  65. }
  66. /**
  67. * Triggered on every tracking request.
  68. *
  69. * This event can be used to tell the Tracker not to record this particular action or visit.
  70. *
  71. * @param bool &$excluded Whether the request should be excluded or not. Initialized
  72. * to `false`. Event subscribers should set it to `true` in
  73. * order to exclude the request.
  74. */
  75. Piwik::postEvent('Tracker.isExcludedVisit', array(&$excluded));
  76. /*
  77. * Following exclude operations happen after the hook.
  78. * These are of higher priority and should not be overwritten by plugins.
  79. */
  80. // Checking if the Piwik ignore cookie is set
  81. if (!$excluded) {
  82. $excluded = $this->isIgnoreCookieFound();
  83. if ($excluded) {
  84. Common::printDebug("Ignore cookie found.");
  85. }
  86. }
  87. // Checking for excluded IPs
  88. if (!$excluded) {
  89. $excluded = $this->isVisitorIpExcluded();
  90. if ($excluded) {
  91. Common::printDebug("IP excluded.");
  92. }
  93. }
  94. // Check if user agent should be excluded
  95. if (!$excluded) {
  96. $excluded = $this->isUserAgentExcluded();
  97. if ($excluded) {
  98. Common::printDebug("User agent excluded.");
  99. }
  100. }
  101. // Check if Referrer URL is a known spam
  102. if (!$excluded) {
  103. $excluded = $this->isReferrerSpamExcluded();
  104. if ($excluded) {
  105. Common::printDebug("Referrer URL is blacklisted as spam.");
  106. }
  107. }
  108. if (!$excluded) {
  109. if ($this->isPrefetchDetected()) {
  110. $excluded = true;
  111. Common::printDebug("Prefetch request detected, not a real visit so we Ignore this visit/pageview");
  112. }
  113. }
  114. if ($excluded) {
  115. Common::printDebug("Visitor excluded.");
  116. return true;
  117. }
  118. return false;
  119. }
  120. protected function isPrefetchDetected()
  121. {
  122. return (isset($_SERVER["HTTP_X_PURPOSE"])
  123. && in_array($_SERVER["HTTP_X_PURPOSE"], array("preview", "instant")))
  124. || (isset($_SERVER['HTTP_X_MOZ'])
  125. && $_SERVER['HTTP_X_MOZ'] == "prefetch");
  126. }
  127. /**
  128. * Live/Bing/MSN bot and Googlebot are evolving to detect cloaked websites.
  129. * As a result, these sophisticated bots exhibit characteristics of
  130. * browsers (cookies enabled, executing JavaScript, etc).
  131. *
  132. * @return boolean
  133. */
  134. protected function isNonHumanBot()
  135. {
  136. $allowBots = $this->request->getParam('bots');
  137. return !$allowBots
  138. // Seen in the wild
  139. && (strpos($this->userAgent, 'Googlebot') !== false // Googlebot
  140. || strpos($this->userAgent, 'Google Web Preview') !== false // Google Instant
  141. || strpos($this->userAgent, 'AdsBot-Google') !== false // Google Adwords landing pages
  142. || strpos($this->userAgent, 'Google Page Speed Insights') !== false // #4049
  143. || strpos($this->userAgent, 'Google (+https://developers.google.com') !== false // Google Snippet https://developers.google.com/+/web/snippet/
  144. || strpos($this->userAgent, 'facebookexternalhit') !== false // http://www.facebook.com/externalhit_uatext.php
  145. || strpos($this->userAgent, 'baidu') !== false // Baidu
  146. || strpos($this->userAgent, 'bingbot') !== false // Bingbot
  147. || strpos($this->userAgent, 'BingPreview') !== false // BingPreview
  148. || strpos($this->userAgent, 'YottaaMonitor') !== false // Yottaa
  149. || strpos($this->userAgent, 'CloudFlare') !== false // CloudFlare-AlwaysOnline
  150. // Added as they are popular bots
  151. || strpos($this->userAgent, 'pingdom') !== false // pingdom
  152. || strpos($this->userAgent, 'yandex') !== false // yandex
  153. || strpos($this->userAgent, 'exabot') !== false // Exabot
  154. || strpos($this->userAgent, 'sogou') !== false // Sogou
  155. || strpos($this->userAgent, 'soso') !== false // Soso
  156. || IP::isIpInRange($this->ip, $this->getBotIpRanges()));
  157. }
  158. protected function getBotIpRanges()
  159. {
  160. return array(
  161. // Google
  162. '66.249.0.0/16',
  163. '64.233.172.0/24',
  164. // Live/Bing/MSN
  165. '64.4.0.0/18',
  166. '65.52.0.0/14',
  167. '157.54.0.0/15',
  168. '157.56.0.0/14',
  169. '157.60.0.0/16',
  170. '207.46.0.0/16',
  171. '207.68.128.0/18',
  172. '207.68.192.0/20',
  173. '131.253.26.0/20',
  174. '131.253.24.0/20',
  175. // Yahoo
  176. '72.30.198.0/20',
  177. '72.30.196.0/20',
  178. '98.137.207.0/20',
  179. // Chinese bot hammering websites
  180. '1.202.218.8'
  181. );
  182. }
  183. /**
  184. * Looks for the ignore cookie that users can set in the Piwik admin screen.
  185. * @return bool
  186. */
  187. protected function isIgnoreCookieFound()
  188. {
  189. if (IgnoreCookie::isIgnoreCookieFound()) {
  190. Common::printDebug('Piwik ignore cookie was found, visit not tracked.');
  191. return true;
  192. }
  193. return false;
  194. }
  195. /**
  196. * Checks if the visitor ip is in the excluded list
  197. *
  198. * @return bool
  199. */
  200. protected function isVisitorIpExcluded()
  201. {
  202. $websiteAttributes = Cache::getCacheWebsiteAttributes($this->idSite);
  203. if (!empty($websiteAttributes['excluded_ips'])) {
  204. if (IP::isIpInRange($this->ip, $websiteAttributes['excluded_ips'])) {
  205. Common::printDebug('Visitor IP ' . IP::N2P($this->ip) . ' is excluded from being tracked');
  206. return true;
  207. }
  208. }
  209. return false;
  210. }
  211. /**
  212. * Returns true if the specified user agent should be excluded for the current site or not.
  213. *
  214. * Visits whose user agent string contains one of the excluded_user_agents strings for the
  215. * site being tracked (or one of the global strings) will be excluded.
  216. *
  217. * @internal param string $this ->userAgent The user agent string.
  218. * @return bool
  219. */
  220. protected function isUserAgentExcluded()
  221. {
  222. $websiteAttributes = Cache::getCacheWebsiteAttributes($this->idSite);
  223. if (!empty($websiteAttributes['excluded_user_agents'])) {
  224. foreach ($websiteAttributes['excluded_user_agents'] as $excludedUserAgent) {
  225. // if the excluded user agent string part is in this visit's user agent, this visit should be excluded
  226. if (stripos($this->userAgent, $excludedUserAgent) !== false) {
  227. return true;
  228. }
  229. }
  230. }
  231. return false;
  232. }
  233. /**
  234. * Returns true if the Referrer is a known spammer.
  235. *
  236. * @return bool
  237. */
  238. protected function isReferrerSpamExcluded()
  239. {
  240. $spamHosts = Config::getInstance()->Tracker['referrer_urls_spam'];
  241. $spamHosts = explode(",", $spamHosts);
  242. $referrerUrl = $this->request->getParam('urlref');
  243. foreach($spamHosts as $spamHost) {
  244. if( strpos($referrerUrl, $spamHost) !== false) {
  245. Common::printDebug('Referrer URL is a known spam: ' . $spamHost);
  246. return true;
  247. }
  248. }
  249. return false;
  250. }
  251. }