PageRenderTime 27ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/wp-content/plugins/all-in-one-seo-pack/app/Common/Tools/BadBotBlocker.php

https://gitlab.com/ebrjose/comcebu
PHP | 453 lines | 323 code | 25 blank | 105 comment | 29 complexity | 62a47f063e691ca2fa3001701aa59db9 MD5 | raw file
  1. <?php
  2. namespace AIOSEO\Plugin\Common\Tools;
  3. // Exit if accessed directly.
  4. if ( ! defined( 'ABSPATH' ) ) {
  5. exit;
  6. }
  7. class BadBotBlocker {
  8. /**
  9. * Holds the logger.
  10. *
  11. * @since 4.0.0
  12. *
  13. * @var mixed
  14. */
  15. private $log = null;
  16. /**
  17. * An array of bad referers.
  18. *
  19. * @since 4.0.0
  20. *
  21. * @var array
  22. */
  23. private $refererList = [
  24. 'semalt.com',
  25. 'kambasoft.com',
  26. 'savetubevideo.com',
  27. 'buttons-for-website.com',
  28. 'sharebutton.net',
  29. 'soundfrost.org',
  30. 'srecorder.com',
  31. 'softomix.com',
  32. 'softomix.net',
  33. 'myprintscreen.com',
  34. 'joinandplay.me',
  35. 'fbfreegifts.com',
  36. 'openmediasoft.com',
  37. 'zazagames.org',
  38. 'extener.org',
  39. 'openfrost.com',
  40. 'openfrost.net',
  41. 'googlsucks.com',
  42. 'best-seo-offer.com',
  43. 'buttons-for-your-website.com',
  44. 'www.Get-Free-Traffic-Now.com',
  45. 'best-seo-solution.com',
  46. 'buy-cheap-online.info',
  47. 'site3.free-share-buttons.com',
  48. 'webmaster-traffic.com'
  49. ];
  50. /**
  51. * An array of bad bots.
  52. *
  53. * @since 4.0.0
  54. *
  55. * @var array
  56. */
  57. private $botList = [
  58. 'Abonti',
  59. 'aggregator',
  60. 'AhrefsBot',
  61. 'asterias',
  62. 'BDCbot',
  63. 'BLEXBot',
  64. 'BuiltBotTough',
  65. 'Bullseye',
  66. 'BunnySlippers',
  67. 'ca-crawler',
  68. 'CCBot',
  69. 'Cegbfeieh',
  70. 'CheeseBot',
  71. 'CherryPicker',
  72. 'CopyRightCheck',
  73. 'cosmos',
  74. 'Crescent',
  75. 'discobot',
  76. 'DittoSpyder',
  77. 'DotBot',
  78. 'Download Ninja',
  79. 'EasouSpider',
  80. 'EmailCollector',
  81. 'EmailSiphon',
  82. 'EmailWolf',
  83. 'EroCrawler',
  84. 'ExtractorPro',
  85. 'Fasterfox',
  86. 'FeedBooster',
  87. 'Foobot',
  88. 'Genieo',
  89. 'grub-client',
  90. 'Harvest',
  91. 'hloader',
  92. 'httplib',
  93. 'HTTrack',
  94. 'humanlinks',
  95. 'ieautodiscovery',
  96. 'InfoNaviRobot',
  97. 'IstellaBot',
  98. 'Java/1.',
  99. 'JennyBot',
  100. 'k2spider',
  101. 'Kenjin Spider',
  102. 'Keyword Density/0.9',
  103. 'larbin',
  104. 'LexiBot',
  105. 'libWeb',
  106. 'libwww',
  107. 'LinkextractorPro',
  108. 'linko',
  109. 'LinkScan/8.1a Unix',
  110. 'LinkWalker',
  111. 'LNSpiderguy',
  112. 'lwp-trivial',
  113. 'magpie',
  114. 'Mata Hari',
  115. 'MaxPointCrawler',
  116. 'MegaIndex',
  117. 'Microsoft URL Control',
  118. 'MIIxpc',
  119. 'Mippin',
  120. 'Missigua Locator',
  121. 'Mister PiX',
  122. 'MJ12bot',
  123. 'moget',
  124. 'MSIECrawler',
  125. 'NetAnts',
  126. 'NICErsPRO',
  127. 'Niki-Bot',
  128. 'NPBot',
  129. 'Nutch',
  130. 'Offline Explorer',
  131. 'Openfind',
  132. 'panscient.com',
  133. 'PHP/5.{',
  134. 'ProPowerBot/2.14',
  135. 'ProWebWalker',
  136. 'Python-urllib',
  137. 'QueryN Metasearch',
  138. 'RepoMonkey',
  139. 'SISTRIX',
  140. 'sitecheck.Internetseer.com',
  141. 'SiteSnagger',
  142. 'SnapPreviewBot',
  143. 'Sogou',
  144. 'SpankBot',
  145. 'spanner',
  146. 'spbot',
  147. 'Spinn3r',
  148. 'suzuran',
  149. 'Szukacz/1.4',
  150. 'Teleport',
  151. 'Telesoft',
  152. 'The Intraformant',
  153. 'TheNomad',
  154. 'TightTwatBot',
  155. 'Titan',
  156. 'toCrawl/UrlDispatcher',
  157. 'True_Robot',
  158. 'turingos',
  159. 'TurnitinBot',
  160. 'UbiCrawler',
  161. 'UnisterBot',
  162. 'URLy Warning',
  163. 'VCI',
  164. 'WBSearchBot',
  165. 'Web Downloader/6.9',
  166. 'Web Image Collector',
  167. 'WebAuto',
  168. 'WebBandit',
  169. 'WebCopier',
  170. 'WebEnhancer',
  171. 'WebmasterWorldForumBot',
  172. 'WebReaper',
  173. 'WebSauger',
  174. 'Website Quester',
  175. 'Webster Pro',
  176. 'WebStripper',
  177. 'WebZip',
  178. 'Wotbox',
  179. 'wsr-agent',
  180. 'WWW-Collector-E',
  181. 'Xenu',
  182. 'Zao',
  183. 'Zeus',
  184. 'ZyBORG',
  185. 'coccoc',
  186. 'Incutio',
  187. 'lmspider',
  188. 'memoryBot',
  189. 'serf',
  190. 'Unknown',
  191. 'uptime files',
  192. ];
  193. /**
  194. * Initialize the blocker.
  195. *
  196. * @since 4.0.0
  197. */
  198. public function init() {
  199. if ( aioseo()->options->deprecated->tools->blocker->blockBots ) {
  200. $uploadDirectory = wp_upload_dir();
  201. $logDirectory = $uploadDirectory['basedir'] . '/aioseo/logs/';
  202. if ( wp_mkdir_p( $logDirectory ) ) {
  203. $wpfs = aioseo()->helpers->wpfs();
  204. $filePath = $logDirectory . 'aioseo-bad-bot-blocker.log';
  205. if ( ! @$wpfs->exists( $filePath ) ) {
  206. @$wpfs->touch( $filePath );
  207. }
  208. if ( @$wpfs->exists( $filePath ) ) {
  209. $this->log = new \AIOSEO\Vendor\Monolog\Logger( 'aioseo-bad-bot-blocker' );
  210. $this->log->pushHandler( new \AIOSEO\Vendor\Monolog\Handler\StreamHandler( $filePath ) );
  211. }
  212. }
  213. $blockReferer = aioseo()->options->deprecated->tools->blocker->blockReferer;
  214. $track = aioseo()->options->deprecated->tools->blocker->track;
  215. $ip = ! empty( $_SERVER['REMOTE_ADDR'] ) ? $_SERVER['REMOTE_ADDR'] : '';
  216. $ip = aioseo()->helpers->validateIp( $ip ) ? $ip : __( '(Invalid IP)', 'all-in-one-seo-pack' );
  217. if ( ! $this->allowBot() ) {
  218. if ( $track ) {
  219. $userAgent = $_SERVER['HTTP_USER_AGENT'];
  220. // Translators: 1 - The IP address. 2 - The user agent.
  221. $this->track( sprintf( __( 'Blocked bot with IP %1$s -- matched user agent %2$s found in blocklist.', 'all-in-one-seo-pack' ), $ip, $userAgent ) );
  222. }
  223. status_header( 503 );
  224. exit;
  225. } elseif ( $blockReferer && $this->isBadReferer() ) {
  226. status_header( 503 );
  227. if ( $track ) {
  228. $referer = ! empty( $_SERVER['HTTP_REFERER'] ) ? esc_url_raw( wp_unslash( $_SERVER['HTTP_REFERER'] ) ) : '';
  229. // Translators: 1 - The IP address. 2 - The referer.
  230. $this->track( sprintf( __( 'Blocked bot with IP %1$s -- matched referer %2$s found in blocklist.', 'all-in-one-seo-pack' ), $ip, $referer ) );
  231. }
  232. status_header( 503 );
  233. exit;
  234. }
  235. }
  236. }
  237. /**
  238. * Get the size of the log file.
  239. *
  240. * @since 4.0.0
  241. *
  242. * @return integer The size of the log file.
  243. */
  244. public function getLogSize() {
  245. $uploadDirectory = wp_upload_dir();
  246. $logDirectory = $uploadDirectory['basedir'] . '/aioseo/logs/';
  247. $filePath = $logDirectory . 'aioseo-bad-bot-blocker.log';
  248. $wpfs = aioseo()->helpers->wpfs();
  249. if ( @$wpfs->exists( $filePath ) ) {
  250. return @$wpfs->size( $filePath );
  251. }
  252. return 0;
  253. }
  254. /**
  255. * Clears the log for the bad bot blocker.
  256. *
  257. * @since 4.0.0
  258. *
  259. * @return integer The file size.
  260. */
  261. public function clearLog() {
  262. $uploadDirectory = wp_upload_dir();
  263. $logDirectory = $uploadDirectory['basedir'] . '/aioseo/logs/';
  264. $filePath = $logDirectory . 'aioseo-bad-bot-blocker.log';
  265. $wpfs = aioseo()->helpers->wpfs();
  266. if ( @$wpfs->exists( $filePath ) ) {
  267. @$wpfs->put_contents( $filePath, '' );
  268. }
  269. return $this->getLogSize( $filePath );
  270. }
  271. /**
  272. * Returns the bot list.
  273. *
  274. * @since 4.0.0
  275. *
  276. * @return array The bot list.
  277. */
  278. public function getBotList() {
  279. return $this->botList;
  280. }
  281. /**
  282. * Returns the referer list.
  283. *
  284. * @since 4.0.0
  285. *
  286. * @return array The referer list.
  287. */
  288. public function getRefererList() {
  289. return $this->refererList;
  290. }
  291. /**
  292. * Whether or not to allow the bot through.
  293. *
  294. * @since 4.0.0
  295. *
  296. * @return boolean True if this is a good bot and we can allow it through.
  297. */
  298. private function allowBot() {
  299. $allow = true;
  300. if ( ! $this->isGoodBot() && $this->isBadBot() && ! is_user_logged_in() ) {
  301. $allow = false;
  302. }
  303. return apply_filters( 'aioseo_allow_bot', $allow );
  304. }
  305. /**
  306. * Is this a good bot?
  307. *
  308. * @see Original code, thanks to Sean M. Brown.
  309. * @link http://smbrown.wordpress.com/2009/04/29/verify-googlebot-forward-reverse-dns/
  310. *
  311. * @since 4.0.0
  312. *
  313. * @return boolean True if this is a good bot.
  314. */
  315. private function isGoodBot() {
  316. $botList = [
  317. 'Yahoo! Slurp' => 'crawl.yahoo.net',
  318. 'googlebot' => '.googlebot.com',
  319. 'msnbot' => 'search.msn.com',
  320. ];
  321. $botList = apply_filters( 'aioseo_good_bot_list', $botList );
  322. if ( ! empty( $botList ) ) {
  323. if ( ! isset( $_SERVER['HTTP_USER_AGENT'] ) ) {
  324. return false;
  325. }
  326. $ua = $_SERVER['HTTP_USER_AGENT'];
  327. $uas = $this->prepareList( $botList );
  328. if ( preg_match( '/' . $uas . '/i', $ua ) ) {
  329. $ip = $_SERVER['REMOTE_ADDR'];
  330. $hostname = gethostbyaddr( $ip );
  331. $ipByHostName = gethostbyname( $hostname );
  332. if ( $ipByHostName === $ip ) {
  333. $hosts = array_values( $botList );
  334. foreach ( $hosts as $k => $h ) {
  335. $hosts[ $k ] = preg_quote( $h ) . '$';
  336. }
  337. $hosts = join( '|', $hosts );
  338. if ( preg_match( '/' . $hosts . '/i', $hostname ) ) {
  339. return true;
  340. }
  341. }
  342. }
  343. }
  344. return false;
  345. }
  346. /**
  347. * Is this a bad bot?
  348. *
  349. * @since 4.0.0
  350. *
  351. * @return boolean True if it is a bad bot.
  352. */
  353. private function isBadBot() {
  354. $botList = aioseo()->options->deprecated->tools->blocker->custom->enable
  355. ? explode( "\n", aioseo()->options->deprecated->tools->blocker->custom->bots )
  356. : $this->botList;
  357. $botList = apply_filters( 'aioseo_bad_bot_list', $botList );
  358. if ( ! empty( $botList ) ) {
  359. if ( ! isset( $_SERVER['HTTP_USER_AGENT'] ) ) {
  360. return false;
  361. }
  362. $ua = $_SERVER['HTTP_USER_AGENT'];
  363. $uas = $this->prepareList( $botList );
  364. if ( preg_match( '/' . $uas . '/i', $ua ) ) {
  365. return true;
  366. }
  367. }
  368. return false;
  369. }
  370. /**
  371. * Is this a bad referer?
  372. *
  373. * @since 4.0.0
  374. *
  375. * @return boolean True if this is a bad referer.
  376. */
  377. private function isBadReferer() {
  378. $refererList = aioseo()->options->deprecated->tools->blocker->custom->enable
  379. ? explode( "\n", aioseo()->options->deprecated->tools->blocker->custom->referer )
  380. : $this->refererList;
  381. $refererList = apply_filters( 'aioseo_bad_referer_list', $refererList );
  382. if ( ! empty( $refererList ) && ! empty( $_SERVER ) && ! empty( $_SERVER['HTTP_REFERER'] ) ) {
  383. $referer = esc_url_raw( wp_unslash( $_SERVER['HTTP_REFERER'] ) );
  384. $regex = $this->prepareList( $refererList );
  385. if ( preg_match( '/' . $regex . '/i', $referer ) ) {
  386. return true;
  387. }
  388. }
  389. return false;
  390. }
  391. /**
  392. * Quote List for Regex
  393. *
  394. * @since ?
  395. *
  396. * @param $list
  397. * @param string $quote
  398. * @return string
  399. */
  400. private function prepareList( $list, $quote = '/' ) {
  401. $regex = '';
  402. $cont = 0;
  403. foreach ( $list as $l ) {
  404. $trim_l = trim( $l );
  405. if ( ! empty( $trim_l ) ) {
  406. if ( $cont ) {
  407. $regex .= '|';
  408. }
  409. $cont = 1;
  410. $regex .= preg_quote( trim( $l ), $quote );
  411. }
  412. }
  413. return $regex;
  414. }
  415. /**
  416. * Tracks the bad bot that was blocked.
  417. *
  418. * @since 4.0.0
  419. *
  420. * @param string $message The message to log.
  421. * @return void
  422. */
  423. public function track( $message ) {
  424. if ( $this->log ) {
  425. $this->log->info( $message );
  426. }
  427. }
  428. }