PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/stats/vendor/piwik/device-detector/regexes/bots.yml

https://bitbucket.org/webstar1987923/mycampaignsio
YAML | 1685 lines | 1449 code | 229 blank | 7 comment | 0 complexity | 2d22682d0c62f25a234b808ad3892c8e MD5 | raw file
Possible License(s): BSD-3-Clause, MPL-2.0-no-copyleft-exception, GPL-3.0, GPL-2.0, WTFPL, BSD-2-Clause, LGPL-2.1, Apache-2.0, MIT, AGPL-3.0
  1. ###############
  2. # Device Detector - The Universal Device Detection library for parsing User Agents
  3. #
  4. # @link http://piwik.org
  5. # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
  6. ###############
  7. - regex: '360Spider(-Image|-Video)?'
  8. name: '360Spider'
  9. category: 'Search bot'
  10. url: 'http://www.so.com/help/help_3_2.html'
  11. producer:
  12. name: 'Online Media Group, Inc.'
  13. url: ''
  14. - regex: 'Aboundex'
  15. name: 'Aboundexbot'
  16. category: 'Search bot'
  17. url: 'http://www.aboundex.com/crawler/'
  18. producer:
  19. name: 'Aboundex.com'
  20. url: 'http://www.aboundex.com'
  21. - regex: 'AcoonBot'
  22. name: 'Acoon'
  23. category: 'Search bot'
  24. url: 'http://www.acoon.de/robot.asp'
  25. producer:
  26. name: 'Acoon GmbH'
  27. url: 'http://www.acoon.de'
  28. - regex: 'AddThis\.com'
  29. name: 'AddThis.com'
  30. category: 'Social Media Agent'
  31. url: ''
  32. producer:
  33. name: 'Clearspring Technologies, Inc.'
  34. url: 'http://www.clearspring.com'
  35. - regex: 'AhrefsBot'
  36. name: 'aHrefs Bot'
  37. category: 'Crawler'
  38. url: 'http://ahrefs.com/robot'
  39. producer:
  40. name: 'Ahrefs Pte Ltd'
  41. url: 'http://ahrefs.com/robot'
  42. - regex: 'ia_archiver|alexabot|verifybot'
  43. name: 'Alexa Crawler'
  44. category: 'Search bot'
  45. url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
  46. producer:
  47. name: 'Alexa Internet'
  48. url: 'http://www.alexa.com'
  49. - regex: 'AmorankSpider'
  50. name: 'Amorank Spider'
  51. category: 'Crawler'
  52. url: 'http://amorank.com/webcrawler.html'
  53. producer:
  54. name: 'Amorank'
  55. url: 'http://www.amorank.com'
  56. - regex: 'ApacheBench'
  57. name: 'ApacheBench'
  58. category: 'Benchmark'
  59. url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
  60. producer:
  61. name: 'The Apache Software Foundation'
  62. url: 'http://www.apache.org/foundation/'
  63. - regex: 'Applebot'
  64. name: 'Applebot'
  65. category: 'Crawler'
  66. url: 'http://www.apple.com/go/applebot'
  67. producer:
  68. name: 'Apple Inc'
  69. url: 'http://www.apple.com'
  70. - regex: 'Castro 2, Episode Duration Lookup'
  71. name: 'Castro 2'
  72. category: 'Service Agent'
  73. url: 'http://supertop.co/castro/'
  74. producer:
  75. name: 'Supertop'
  76. url: 'http://supertop.co'
  77. - regex: 'Curious George'
  78. name: 'Analytics SEO Crawler'
  79. category: 'Crawler'
  80. url: 'http://www.analyticsseo.com/crawler'
  81. producer:
  82. name: 'Analytics SEO'
  83. url: 'http://www.analyticsseo.com'
  84. - regex: 'archive\.org_bot|special_archiver'
  85. name: 'archive.org bot'
  86. category: 'Crawler'
  87. url: 'http://www.archive.org/details/archive.org_bot'
  88. producer:
  89. name: 'The Internet Archive'
  90. url: 'http://www.archive.org'
  91. - regex: 'Ask Jeeves/Teoma'
  92. name: 'Ask Jeeves'
  93. category: 'Search bot'
  94. url: ''
  95. producer:
  96. name: 'Ask Jeeves Inc.'
  97. url: 'http://www.ask.com'
  98. - regex: 'Backlink-Check\.de'
  99. name: 'Backlink-Check.de'
  100. category: 'Crawler'
  101. url: 'http://www.backlink-check.de/bot.html'
  102. producer:
  103. name: 'Mediagreen Medienservice'
  104. url: 'http://www.backlink-check.de'
  105. - regex: 'BacklinkCrawler'
  106. name: 'BacklinkCrawler'
  107. category: 'Crawler'
  108. url: 'http://www.backlinktest.com/crawler.html'
  109. producer:
  110. name: '2.0Promotion GbR'
  111. url: 'http://www.backlinktest.com'
  112. - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
  113. name: 'Baidu Spider'
  114. category: 'Search bot'
  115. url: 'http://www.baidu.com/search/spider.htm'
  116. producer:
  117. name: 'Baidu'
  118. url: 'http://www.baidu.com'
  119. - regex: 'BazQux'
  120. name: 'BazQux Reader'
  121. url: 'https://bazqux.com/fetcher'
  122. category: 'Feed Fetcher'
  123. producer:
  124. name: ''
  125. url: ''
  126. - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
  127. name: 'BingBot'
  128. category: 'Search bot'
  129. url: 'http://search.msn.com/msnbot.htmn'
  130. producer:
  131. name: 'Microsoft Corporation'
  132. url: 'http://www.microsoft.com'
  133. - regex: 'Blekkobot'
  134. name: 'Blekkobot'
  135. category: 'Search bot'
  136. url: 'http://blekko.com/about/blekkobot'
  137. producer:
  138. name: 'Blekko'
  139. url: 'http://blekko.com'
  140. - regex: 'BLEXBot(Test)?'
  141. name: 'BLEXBot Crawler'
  142. category: 'Crawler'
  143. url: 'http://webmeup-crawler.com'
  144. producer:
  145. name: 'WebMeUp'
  146. url: 'http://webmeup.com'
  147. - regex: 'Bloglovin'
  148. name: 'Bloglovin'
  149. url: 'http://www.bloglovin.com'
  150. category: 'Feed Fetcher'
  151. producer:
  152. name: ''
  153. url: ''
  154. - regex: 'Blogtrottr'
  155. name: 'Blogtrottr'
  156. url: ''
  157. category: 'Feed Fetcher'
  158. producer:
  159. name: 'Blogtrottr Ltd'
  160. url: 'https://blogtrottr.com/'
  161. - regex: 'BountiiBot'
  162. name: 'Bountii Bot'
  163. category: 'Search bot'
  164. url: 'http://bountii.com/contact.php'
  165. producer:
  166. name: 'Bountii Inc.'
  167. url: 'http://bountii.com'
  168. - regex: 'Browsershots'
  169. name: 'Browsershots'
  170. category: 'Service Agent'
  171. url: 'http://browsershots.org/faq'
  172. producer:
  173. name: 'Browsershots.org'
  174. url: 'http://browsershots.org'
  175. - regex: 'BUbiNG'
  176. name: 'BUbiNG'
  177. category: 'Crawler'
  178. url: 'http://law.di.unimi.it/BUbiNG.html'
  179. producer:
  180. name: 'The Laboratory for Web Algorithmics (LAW)'
  181. url: 'http://law.di.unimi.it/software.php#buging'
  182. - regex: '(?<!HTC)[ _]Butterfly/'
  183. name: 'Butterfly Robot'
  184. category: 'Search bot'
  185. url: 'http://labs.topsy.com/butterfly'
  186. producer:
  187. name: 'Topsy Labs'
  188. url: 'http://labs.topsy.com'
  189. - regex: 'CareerBot'
  190. name: 'CareerBot'
  191. category: 'Crawler'
  192. url: 'http://www.career-x.de/bot.html'
  193. producer:
  194. name: 'career-x GmbH'
  195. url: 'http://www.career-x.de'
  196. - regex: 'CCBot'
  197. name: 'ccBot crawler'
  198. category: 'Crawler'
  199. url: 'http://commoncrawl.org/faq/'
  200. producer:
  201. name: 'reddit inc.'
  202. url: 'http://www.reddit.com'
  203. - regex: 'Cliqzbot'
  204. name: 'Cliqzbot'
  205. category: 'Crawler'
  206. url: 'http://cliqz.com/company/cliqzbot'
  207. producer:
  208. name: '10betterpages GmbH'
  209. url: 'http://cliqz.com'
  210. - regex: 'Cloudflare-AMP'
  211. name: 'CloudFlare AMP Fetcher'
  212. category: 'Crawler'
  213. url: 'https://amp.cloudflare.com/doc/fetcher.html'
  214. producer:
  215. name: 'CloudFlare'
  216. url: 'http://www.cloudflare.com'
  217. - regex: 'CloudFlare-AlwaysOnline'
  218. name: 'CloudFlare Always Online'
  219. category: 'Site Monitor'
  220. url: 'http://www.cloudflare.com/always-online'
  221. producer:
  222. name: 'CloudFlare'
  223. url: 'http://www.cloudflare.com'
  224. - regex: 'coccoc/'
  225. name: 'Cốc Cốc Bot'
  226. url: 'http://help.coccoc.com/'
  227. category: 'Search bot'
  228. producer:
  229. name: 'Cốc Cốc'
  230. url: 'http://coccoc.com/'
  231. - regex: 'collectd'
  232. name: 'Collectd'
  233. url: 'https://collectd.org/'
  234. category: 'Site Monitor'
  235. producer:
  236. name: 'Collectd'
  237. url: 'https://collectd.org/'
  238. - regex: 'CommaFeed'
  239. name: 'CommaFeed'
  240. url: 'http://www.commafeed.com'
  241. category: 'Feed Fetcher'
  242. producer:
  243. name: ''
  244. url: ''
  245. - regex: 'CSS Certificate Spider'
  246. name: 'CSS Certificate Spider'
  247. category: 'Crawler'
  248. url: 'http://www.css-security.com/certificatespider/'
  249. producer:
  250. name: 'Certified Security Solutions'
  251. url: 'https://www.css-security.com/company/about-us/'
  252. - regex: 'Datadog Agent'
  253. name: 'Datadog Agent'
  254. url: 'https://github.com/DataDog/dd-agent'
  255. category: 'Site Monitor'
  256. producer:
  257. name: 'Datadog'
  258. url: 'https://www.datadoghq.com/'
  259. - regex: 'Dataprovider'
  260. name: 'Dataprovider'
  261. category: 'Crawler'
  262. url: ''
  263. producer:
  264. name: 'Dataprovider B.V.'
  265. url: 'https://www.dataprovider.com/'
  266. - regex: 'Daum(oa)?[ /][0-9]'
  267. name: 'Daum'
  268. category: 'Search bot'
  269. url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
  270. producer:
  271. name: 'Daum Communications Corp.'
  272. url: 'http://www.kakaocorp.com/main'
  273. - regex: 'Dazoobot'
  274. name: 'Dazoobot'
  275. category: 'Search bot'
  276. url: ''
  277. producer:
  278. name: 'DAZOO.FR'
  279. url: 'http://dazoo.fr'
  280. - regex: 'discobot(-news)?'
  281. name: 'Discobot'
  282. category: 'Search bot'
  283. url: 'http://discoveryengine.com/discobot.html'
  284. producer:
  285. name: 'Discovery Engine'
  286. url: 'http://discoveryengine.com'
  287. - regex: 'Domain Re-Animator Bot|support@domainreanimator.com'
  288. name: 'Domain Re-Animator Bot'
  289. category: 'Crawler'
  290. url: ''
  291. producer:
  292. name: 'Domain Re-Animator, LLC'
  293. url: 'http://domainreanimator.com'
  294. - regex: 'DotBot'
  295. name: 'DotBot'
  296. category: 'Crawler'
  297. url: 'http://www.opensiteexplorer.org/dotbot'
  298. producer:
  299. name: 'SEOmoz, Inc.'
  300. url: 'http://moz.com/'
  301. - regex: 'DuckDuck'
  302. name: 'DuckDuckGo Bot'
  303. category: 'Search bot'
  304. url: 'https://duckduckgo.com/duckduckbot'
  305. producer:
  306. name: 'DuckDuckGo'
  307. url: 'https://duckduckgo.com/'
  308. - regex: 'EasouSpider'
  309. name: 'Easou Spider'
  310. category: 'Search bot'
  311. url: 'http://www.easou.com/search/spider.html'
  312. producer:
  313. name: 'easou ICP'
  314. url: 'http://www.easou.com'
  315. - regex: 'EMail Exractor'
  316. name: 'EMail Exractor'
  317. category: 'Crawler'
  318. url: ''
  319. producer:
  320. name: ''
  321. url: ''
  322. - regex: 'evc-batch'
  323. name: 'evc-batch'
  324. category: 'Crawler'
  325. url: ''
  326. producer:
  327. name: 'eVenture Capital Partners II, LLC'
  328. url: 'http://www.eventures.vc/'
  329. - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
  330. name: 'ExaBot'
  331. category: 'Crawler'
  332. url: 'http://www.exabot.com/go/robot'
  333. producer:
  334. name: 'Dassault Systèmes'
  335. url: 'http://www.3ds.com'
  336. - regex: 'ExactSeek Crawler'
  337. name: 'ExactSeek Crawler'
  338. category: 'Search bot'
  339. url: 'http://www.exactseek.com'
  340. producer:
  341. name: 'Jayde Online, Inc.'
  342. url: 'http://www.jaydeonlineinc.com'
  343. - regex: 'Ezooms'
  344. name: 'Ezooms'
  345. category: 'Crawler'
  346. url: ''
  347. producer:
  348. name: 'SEOmoz, Inc.'
  349. url: 'http://moz.com/'
  350. - regex: 'facebookexternalhit|facebookplatform'
  351. name: 'Facebook External Hit'
  352. category: 'Social Media Agent'
  353. url: 'https://www.facebook.com/externalhit_uatext.php'
  354. producer:
  355. name: 'Facebook'
  356. url: 'http://www.facebook.com'
  357. - regex: 'Feedbin'
  358. name: 'Feedbin'
  359. url: 'http://feedbin.com/'
  360. category: 'Feed Fetcher'
  361. producer:
  362. name: ''
  363. url: ''
  364. - regex: 'FeedBurner'
  365. name: 'FeedBurner'
  366. url: 'http://www.feedburner.com'
  367. category: 'Feed Fetcher'
  368. producer:
  369. name: ''
  370. url: ''
  371. - regex: 'Feed Wrangler'
  372. name: 'Feed Wrangler'
  373. url: 'https://feedwrangler.net/'
  374. category: 'Feed Fetcher'
  375. producer:
  376. name: 'David Smith & Developing Perspective, LLC'
  377. url: 'https://david-smith.org'
  378. - regex: '(Meta)?Feedly(Bot|App)?'
  379. name: 'Feedly'
  380. url: 'http://www.feedly.com'
  381. category: 'Feed Fetcher'
  382. producer:
  383. name: ''
  384. url: ''
  385. - regex: 'Feedspot'
  386. name: 'Feedspot'
  387. url: 'http://www.feedspot.com'
  388. category: 'Feed Fetcher'
  389. producer:
  390. name: ''
  391. url: ''
  392. - regex: 'Fever/[0-9]'
  393. name: 'Fever'
  394. url: 'http://feedafever.com/'
  395. category: 'Feed Fetcher'
  396. producer:
  397. name: ''
  398. url: ''
  399. - regex: 'FlipboardProxy|FlipboardRSS'
  400. name: 'Flipboard'
  401. url: 'http://flipboard.com/browserproxy'
  402. category: 'Feed Fetcher'
  403. producer:
  404. name: 'Flipboard'
  405. url: 'http://flipboard.com/'
  406. - regex: 'Findxbot'
  407. name: 'Findxbot'
  408. category: 'Crawler'
  409. url: 'http://www.findxbot.com'
  410. - regex: 'Genieo'
  411. name: 'Genieo Web filter'
  412. category: ''
  413. url: 'http://www.genieo.com/webfilter.html'
  414. producer:
  415. name: 'Genieo'
  416. url: 'http://www.genieo.com'
  417. - regex: 'GigablastOpenSource'
  418. name: 'Gigablast'
  419. category: 'Search bot'
  420. url: 'https://github.com/gigablast/open-source-search-engine'
  421. producer:
  422. name: 'Matt Wells'
  423. url: 'http://www.gigablast.com/faq.html'
  424. - regex: 'Gluten Free Crawler'
  425. name: 'Gluten Free Crawler'
  426. category: 'Crawler'
  427. url: 'http://glutenfreepleasure.com/'
  428. producer:
  429. name: ''
  430. url: ''
  431. - regex: 'ichiro/mobile goo'
  432. name: 'Goo'
  433. category: 'Search bot'
  434. url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
  435. producer:
  436. name: 'NTT Resonant'
  437. url: 'http://goo.ne.jp'
  438. - regex: 'Google Page Speed Insights'
  439. name: 'Google PageSpeed Insights'
  440. category: 'Site Monitor'
  441. url: 'http://developers.google.com/speed/pagespeed/insights/'
  442. producer:
  443. name: 'Google Inc.'
  444. url: 'http://www.google.com'
  445. - regex: 'google_partner_monitoring'
  446. name: 'Google Partner Monitoring'
  447. category: 'Site Monitor'
  448. url: ''
  449. producer:
  450. name: 'Google Inc.'
  451. url: 'http://www.google.com'
  452. - regex: 'Google-Structured-Data-Testing-Tool'
  453. name: 'Google Structured Data Testing Tool'
  454. category: 'Validator'
  455. url: 'https://search.google.com/structured-data/testing-tool'
  456. producer:
  457. name: 'Google Inc.'
  458. url: 'http://www.google.com'
  459. - regex: 'via ggpht\.com GoogleImageProxy'
  460. name: 'Gmail Image Proxy'
  461. category: 'Crawler'
  462. url: ''
  463. producer:
  464. name: 'Google Inc.'
  465. url: 'http://www.google.com'
  466. - regex: 'SeznamEmailProxy'
  467. name: 'Seznam Email Proxy'
  468. category: 'Crawler'
  469. url: ''
  470. producer:
  471. name: 'Seznam.cz, a.s.'
  472. url: 'http://www.seznam.cz/'
  473. - regex: 'Seznam-Zbozi-robot'
  474. name: 'Seznam Zbozi.cz'
  475. category: 'Crawler'
  476. url: ''
  477. producer:
  478. name: 'Seznam.cz, a.s.'
  479. url: 'https://www.zbozi.cz/'
  480. - regex: 'Heurekabot-Feed'
  481. name: 'Heureka Feed'
  482. category: 'Crawler'
  483. url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
  484. producer:
  485. name: 'Heureka.cz, a.s.'
  486. url: 'https://www.heureka.cz/'
  487. - regex: 'ShopAlike'
  488. name: 'ShopAlike'
  489. category: 'Crawler'
  490. url: ''
  491. producer:
  492. name: 'Visual Meta'
  493. url: 'https://www.shopalike.cz/'
  494. - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality'
  495. name: 'Googlebot'
  496. category: 'Search bot'
  497. url: 'http://www.google.com/bot.html'
  498. producer:
  499. name: 'Google Inc.'
  500. url: 'http://www.google.com'
  501. - regex: 'heritrix'
  502. name: 'Heritrix'
  503. category: 'Crawler'
  504. url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
  505. producer:
  506. name: 'The Internet Archive'
  507. url: 'http://www.archive.org'
  508. - regex: 'HubSpot '
  509. name: 'HubSpot'
  510. category: 'Crawler'
  511. producer:
  512. name: 'HubSpot Inc.'
  513. url: 'https://www.hubspot.com'
  514. - regex: 'HTTPMon'
  515. name: 'HTTPMon'
  516. category: 'Site Monitor'
  517. url: 'http://www.httpmon.com'
  518. producer:
  519. name: 'towards GmbH'
  520. url: 'http://www.towards.ch/'
  521. - regex: 'ICC-Crawler'
  522. name: 'ICC-Crawler'
  523. category: 'Crawler'
  524. url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html'
  525. producer:
  526. name: ''
  527. url: ''
  528. - regex: 'iisbot'
  529. name: 'IIS Site Analysis'
  530. category: 'Crawler'
  531. url: 'http://www.iis.net/iisbot.html'
  532. producer:
  533. name: 'Microsoft Corporation'
  534. url: 'http://www.microsoft.com'
  535. - regex: 'ips-agent'
  536. name: 'IPS Agent'
  537. category: 'crawler'
  538. producer:
  539. name: 'VeriSign, Inc'
  540. url: 'http://www.verisign.com/'
  541. - regex: 'IP-Guide\.com'
  542. name: 'IP-Guide Crawler'
  543. category: 'Crawler'
  544. url: ''
  545. producer:
  546. name: ''
  547. url: 'https://ip-guide.com'
  548. - regex: 'kouio'
  549. name: 'Kouio'
  550. url: 'http://kouio.com/'
  551. category: 'Feed Fetcher'
  552. producer:
  553. name: ''
  554. url: ''
  555. - regex: 'larbin'
  556. name: 'Larbin web crawler'
  557. category: 'Crawler'
  558. url: 'http://larbin.sourceforge.net'
  559. producer:
  560. name: ''
  561. url: ''
  562. - regex: '([A-z0-9]*)-Lighthouse'
  563. name: 'Lighthouse'
  564. category: 'Site Monitor'
  565. url: 'https://developers.google.com/web/tools/lighthouse'
  566. producer:
  567. name: 'Lighthouse'
  568. url: 'https://developers.google.com/web/tools/lighthouse'
  569. - regex: 'linkdexbot(-mobile)?|linkdex\.com'
  570. name: 'Linkdex Bot'
  571. category: 'Search bot'
  572. url: 'http://www.linkdex.com/bots'
  573. producer:
  574. name: 'Mojeek Ltd.'
  575. url: 'http://www.mojeek.com'
  576. - regex: 'LinkedInBot'
  577. name: 'LinkedIn Bot'
  578. category: 'Social Media Agent'
  579. url: 'http://www.linkedin.com'
  580. producer:
  581. name: 'LinkedIn'
  582. url: 'http://www.linkedin.com'
  583. - regex: 'ltx71'
  584. name: 'LTX71'
  585. url: 'http://ltx71.com/'
  586. producer:
  587. name: ''
  588. url: ''
  589. - regex: 'Mail\.RU(_Bot)?'
  590. name: 'Mail.Ru Bot'
  591. category: 'Search bot'
  592. url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
  593. producer:
  594. name: 'Mail.Ru Group'
  595. url: 'http://corp.mail.ru'
  596. - regex: 'magpie-crawler'
  597. name: 'Magpie-Crawler'
  598. category: 'Social Media Agent'
  599. url: 'http://www.brandwatch.com/magpie-crawler/'
  600. producer:
  601. name: 'Brandwatch'
  602. url: 'http://www.brandwatch.com'
  603. - regex: 'MagpieRSS'
  604. name: 'MagpieRSS'
  605. url: 'http://magpierss.sourceforge.net/'
  606. category: 'Feed Parser'
  607. producer:
  608. name: ''
  609. url: ''
  610. - regex : 'masscan'
  611. name: 'masscan'
  612. url: 'https://github.com/robertdavidgraham/masscan'
  613. category: 'Crawler'
  614. producer:
  615. name: 'Robert Graham'
  616. url: 'https://github.com/robertdavidgraham'
  617. - regex: 'meanpathbot'
  618. name: 'Meanpath Bot'
  619. category: 'Search bot'
  620. url: 'http://www.meanpath.com/meanpathbot.html'
  621. producer:
  622. name: 'Meanpath'
  623. url: 'http://www.meanpath.com'
  624. - regex: 'MetaJobBot'
  625. name: 'MetaJobBot'
  626. category: 'Crawler'
  627. url: 'http://www.metajob.at/the/crawler'
  628. producer:
  629. name: 'MetaJob'
  630. url: 'http://www.metajob.at'
  631. - regex: 'MetaInspector'
  632. name: 'MetaInspector'
  633. category: 'Crawler'
  634. url: 'https://github.com/jaimeiniesta/metainspector'
  635. - regex: 'MixrankBot'
  636. name: 'Mixrank Bot'
  637. category: 'Crawler'
  638. url: 'http://mixrank.com'
  639. producer:
  640. name: 'Online Media Group, Inc.'
  641. url: ''
  642. - regex: 'MJ12bot'
  643. name: 'MJ12 Bot'
  644. category: 'Search bot'
  645. url: 'http://majestic12.co.uk/bot.php'
  646. producer:
  647. name: 'Majestic-12'
  648. url: 'http://majestic12.co.uk'
  649. - regex: 'Mnogosearch'
  650. name: 'Mnogosearch'
  651. category: 'Search bot'
  652. url: 'http://www.mnogosearch.org/'
  653. producer:
  654. name: 'Lavtech.Com Corp.'
  655. url: ''
  656. - regex: 'MojeekBot'
  657. name: 'MojeekBot'
  658. category: 'Search bot'
  659. url: 'http://www.mojeek.com/bot.html'
  660. producer:
  661. name: 'Mojeek Ltd.'
  662. url: 'http://www.mojeek.com'
  663. - regex: 'munin'
  664. name: 'Munin'
  665. category: 'Site Monitor'
  666. url: 'http://munin-monitoring.org/'
  667. producer:
  668. name: 'Munin'
  669. url: 'http://munin-monitoring.org/'
  670. - regex: 'NalezenCzBot'
  671. name: 'NalezenCzBot'
  672. category: 'Crawler'
  673. url: 'http://www.nalezen.cz/about-crawler'
  674. producer:
  675. name: 'Jaroslav Kuboš'
  676. url: ''
  677. - regex: 'check_http/v'
  678. name: 'Nagios check_http'
  679. category: 'Site Monitor'
  680. url: 'https://nagios.org'
  681. producer:
  682. name: 'Nagios Plugins Development Team'
  683. url: 'https://nagios.org'
  684. - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
  685. name: 'Netcraft Survey Bot'
  686. category: 'Search bot'
  687. url: ''
  688. producer:
  689. name: 'Netcraft'
  690. url: 'http://www.netcraft.com'
  691. - regex: 'netEstate NE Crawler'
  692. name: 'netEstate'
  693. category: 'Analytics SEO Crawler'
  694. url: 'http://www.website-datenbank.de/Impressum'
  695. producer:
  696. name: 'netEstate GmbH'
  697. url: 'https://www.netestate.de/en/'
  698. - regex: 'Netvibes'
  699. name: 'Netvibes'
  700. url: 'http://www.netvibes.com/'
  701. category: 'Feed Fetcher'
  702. producer:
  703. name: ''
  704. url: ''
  705. - regex: 'NewsBlur .*(Fetcher|Finder)'
  706. name: 'NewsBlur'
  707. url: 'http://www.newsblur.com'
  708. category: 'Feed Fetcher'
  709. producer:
  710. name: ''
  711. url: ''
  712. - regex: 'NewsGatorOnline'
  713. name: 'NewsGator'
  714. url: 'http://www.newsgator.com'
  715. category: 'Feed Fetcher'
  716. producer:
  717. name: ''
  718. url: ''
  719. - regex: 'nlcrawler'
  720. name: 'NLCrawler'
  721. category: 'Crawler'
  722. url: ''
  723. producer:
  724. name: 'Northern Light'
  725. url: 'http://northernlight.com'
  726. - regex: 'Nmap Scripting Engine'
  727. name: 'Nmap'
  728. category: 'Security Checker'
  729. url: 'https://nmap.org/book/nse.html'
  730. producer:
  731. name: 'Nmap'
  732. url: 'https://nmap.org/'
  733. - regex: 'Octopus [0-9]'
  734. name: 'Octopus'
  735. - regex: 'omgilibot'
  736. name: 'Omgili bot'
  737. category: 'Search bot'
  738. url: 'http://www.omgili.com/Crawler.html'
  739. producer:
  740. name: 'Omgili'
  741. url: 'http://www.omgili.com'
  742. - regex: 'OpenindexSpider'
  743. name: 'Openindex Spider'
  744. category: 'Search bot'
  745. url: 'http://www.openindex.io/en/webmasters/spider.html'
  746. producer:
  747. name: 'Openindex B.V.'
  748. url: 'http://www.openindex.io'
  749. - regex: 'spbot'
  750. name: 'OpenLinkProfiler'
  751. category: 'Crawler'
  752. url: 'http://openlinkprofiler.org/bot'
  753. producer:
  754. name: 'Axandra GmbH'
  755. url: 'http://www.axandra.com'
  756. - regex: 'OpenWebSpider'
  757. name: 'OpenWebSpider'
  758. category: 'Crawler'
  759. url: 'http://www.openwebspider.org'
  760. producer:
  761. name: 'OpenWebSpider Lab'
  762. url: 'http://lab.openwebspider.org'
  763. - regex: 'OrangeBot|VoilaBot'
  764. name: 'Orange Bot'
  765. category: 'Search bot'
  766. url: 'http://lemoteur.orange.fr'
  767. producer:
  768. name: 'Orange'
  769. url: 'http://www.orange.fr'
  770. - regex: 'PaperLiBot'
  771. name: 'PaperLiBot'
  772. category: 'Search bot'
  773. url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
  774. producer:
  775. name: 'Smallrivers SA'
  776. url: 'http://www.paper.li'
  777. - regex: 'phantomas/'
  778. name: 'Phantomas'
  779. category: 'Site Monitor'
  780. url: 'https://github.com/macbre/phantomas'
  781. - regex: 'phpservermon'
  782. name: 'PHP Server Monitor'
  783. category: 'Site Monitor'
  784. url: 'https://github.com/phpservermon/phpservermon'
  785. producer:
  786. name: 'PHP Server Monitor'
  787. url: 'http://www.phpservermonitor.org/'
  788. - regex: 'PocketParser'
  789. name: 'PocketParser'
  790. category: 'Read-it-later Service'
  791. url: 'https://getpocket.com/pocketparser_ua'
  792. producer:
  793. name: 'Pocket'
  794. url: 'https://getpocket.com/'
  795. - regex: 'PritTorrent'
  796. name: 'PritTorrent'
  797. category: 'Crawler'
  798. url: 'https://github.com/astro/prittorrent'
  799. producer:
  800. name: 'Bitlove'
  801. url: 'http://bitlove.org/'
  802. - regex: 'psbot(-page)?'
  803. name: 'Picsearch bot'
  804. category: 'Search bot'
  805. url: 'http://www.picsearch.com/bot.html'
  806. producer:
  807. name: 'Picsearch'
  808. url: 'http://www.picsearch.com'
  809. - regex: 'Pingdom\.com'
  810. name: 'Pingdom Bot'
  811. category: 'Site Monitor'
  812. url: ''
  813. producer:
  814. name: 'Pingdom AB'
  815. url: 'https://www.pingdom.com'
  816. - regex: 'RamblerMail'
  817. name: 'RamblerMail Image Proxy'
  818. category: 'Crawler'
  819. url: ''
  820. producer:
  821. name: 'Rambler&Co'
  822. url: 'https://rambler-co.ru/'
  823. - regex: 'QuerySeekerSpider'
  824. name: 'QuerySeekerSpider'
  825. category: 'Crawler'
  826. url: 'http://queryseeker.com/bot.html'
  827. producer:
  828. name: 'QueryEye Inc.'
  829. url: 'http://queryeye.com'
  830. - regex: 'Qwantify'
  831. name: 'Qwantify'
  832. category: 'Crawler'
  833. url: 'https://www.qwant.com/'
  834. producer:
  835. name: 'Qwant Corporation'
  836. url: 'https://www.qwant.com/'
  837. - regex: 'Rainmeter'
  838. name: 'Rainmeter'
  839. category: 'Crawler'
  840. url: 'https://www.rainmeter.net'
  841. - regex: 'redditbot'
  842. name: 'Reddit Bot'
  843. category: 'Social Media Agent'
  844. url: 'http://www.reddit.com/feedback'
  845. producer:
  846. name: 'reddit inc.'
  847. url: 'http://www.reddit.com'
  848. - regex: 'Riddler'
  849. name: 'Riddler'
  850. category: 'Security search bot'
  851. url: 'https://riddler.io/about'
  852. producer:
  853. name: 'F-Secure'
  854. url: 'https://www.f-secure.com'
  855. - regex: 'rogerbot'
  856. name: 'Rogerbot'
  857. category: 'Crawler'
  858. url: 'http://moz.com/help/pro/what-is-rogerbot-'
  859. producer:
  860. name: 'SEOmoz, Inc.'
  861. url: 'http://moz.com/'
  862. - regex: 'ROI Hunter'
  863. name: 'ROI Hunter'
  864. category: 'Crawler'
  865. url: ''
  866. producer:
  867. name: 'Roihunter a.s.'
  868. url: 'http://roihunter.com/'
  869. - regex: 'SafeDNSBot'
  870. name: 'SafeDNSBot'
  871. category: 'Crawler'
  872. url: 'https://www.safedns.com/searchbot'
  873. producer:
  874. name: 'SafeDNS, Inc.'
  875. url: 'https://www.safedns.com/'
  876. - regex: 'Scrapy'
  877. name: 'Scrapy'
  878. category: 'Crawler'
  879. url: 'http://scrapy.org'
  880. - regex: 'Screaming Frog SEO Spider'
  881. name: 'Screaming Frog SEO Spider'
  882. category: 'Crawler'
  883. url: 'http://www.screamingfrog.co.uk/seo-spider'
  884. producer:
  885. name: 'Screaming Frog Ltd'
  886. url: 'http://www.screamingfrog.co.uk'
  887. - regex: 'ScreenerBot'
  888. name: 'ScreenerBot'
  889. category: 'Crawler'
  890. url: 'http://www.screenerbot.com'
  891. producer:
  892. name: ''
  893. url: ''
  894. - regex: 'SemrushBot'
  895. name: 'Semrush Bot'
  896. category: 'Crawler'
  897. url: 'http://www.semrush.com/bot.html'
  898. producer:
  899. name: 'SEMrush'
  900. url: 'http://www.semrush.com'
  901. - regex: 'SensikaBot'
  902. name: 'Sensika Bot'
  903. category: ''
  904. url: ''
  905. producer:
  906. name: 'Sensika'
  907. url: 'http://sensika.com'
  908. - regex: 'SEOENG(World)?Bot'
  909. name: 'SEOENGBot'
  910. category: 'Crawler'
  911. url: 'http://www.seoengine.com/seoengbot.htm'
  912. producer:
  913. name: 'SEO Engine'
  914. url: 'http://www.seoengine.com'
  915. - regex: 'SEOkicks-Robot'
  916. name: 'SEOkicks-Robot'
  917. category: 'Crawler'
  918. url: 'http://www.seokicks.de/robot.html'
  919. producer:
  920. name: 'SEOkicks'
  921. url: 'https://www.seokicks.de/'
  922. - regex: 'seoscanners\.net'
  923. name: 'Seoscanners.net'
  924. category: 'Crawler'
  925. url: ''
  926. - regex: 'SkypeUriPreview'
  927. name: 'Skype URI Preview'
  928. category: 'Service Agent'
  929. url: ''
  930. producer:
  931. name: 'Skype Communications S.à.r.l.'
  932. url: 'https://www.skype.com'
  933. - regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
  934. name: 'Seznam Bot'
  935. category: 'Search bot'
  936. url: 'http://www.mapy.cz/cz/seznambot.html'
  937. producer:
  938. name: 'Seznam.cz, a.s.'
  939. url: 'http://www.seznam.cz/'
  940. - regex: 'ShopWiki'
  941. name: 'ShopWiki'
  942. category: 'Search tools'
  943. url: 'http://www.shopwiki.com/wiki/Help:Bot'
  944. producer:
  945. name: 'ShopWiki Corp.'
  946. url: 'http://www.shopwiki.com'
  947. - regex: 'SilverReader'
  948. name: 'SilverReader'
  949. url: 'http://silverreader.com'
  950. category: 'Feed Fetcher'
  951. producer:
  952. name: ''
  953. url: ''
  954. - regex: 'SimplePie'
  955. name: 'SimplePie'
  956. url: 'http://www.simplepie.org'
  957. category: 'Feed Parser'
  958. producer:
  959. name: ''
  960. url: ''
  961. - regex: 'SISTRIX Crawler'
  962. name: 'SISTRIX Crawler'
  963. category: 'Crawler'
  964. url: 'http://crawler.sistrix.net'
  965. producer:
  966. name: 'SISTRIX GmbH'
  967. url: 'http://www.sistrix.de'
  968. - regex: 'SiteSucker'
  969. name: 'SiteSucker'
  970. category: 'Crawler'
  971. url: 'http://ricks-apps.com/osx/sitesucker/'
  972. - regex: 'sixy.ch'
  973. name: 'Sixy.ch'
  974. category: 'Site Monitor'
  975. url: 'http://sixy.ch'
  976. producer:
  977. name: 'Manuel Kasper'
  978. url: 'https://neon1.net/'
  979. - regex: 'Slackbot|Slack-ImgProxy'
  980. name: 'Slackbot'
  981. category: 'Crawler'
  982. url: 'https://api.slack.com/robots'
  983. producer:
  984. name: 'Slack Technologies'
  985. url: 'http://slack.com'
  986. - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
  987. name: 'Sogou Spider'
  988. category: 'Search bot'
  989. url: 'http://www.sogou.com/docs/help/webmasters.htm'
  990. producer:
  991. name: 'Sohu, Inc.'
  992. url: 'http://www.sogou.com'
  993. - regex: 'Sosospider|Sosoimagespider'
  994. name: 'Soso Spider'
  995. category: 'Search bot'
  996. url: 'http://help.soso.com/webspider.htm'
  997. producer:
  998. name: 'Tencent Holdings'
  999. url: 'http://www.soso.com'
  1000. - regex: 'sqlmap/'
  1001. name: 'sqlmap'
  1002. category: 'Security Checker'
  1003. url: 'http://sqlmap.org/'
  1004. producer:
  1005. name: 'sqlmap'
  1006. url: 'http://sqlmap.org/'
  1007. - regex: 'SSL Labs'
  1008. name: 'SSL Labs'
  1009. category: 'Validator'
  1010. url: 'https://www.ssllabs.com/about/assessment.html'
  1011. producer:
  1012. name: 'SSL Labs'
  1013. url: 'https://www.ssllabs.com/about/assessment.html'
  1014. - regex: 'StatusCake'
  1015. name: 'StatusCake'
  1016. category: 'Site Monitor'
  1017. url: 'https://www.statuscake.com'
  1018. producer:
  1019. name: 'StatusCake'
  1020. url: 'https://www.statuscake.com'
  1021. - regex: 'Superfeedr bot'
  1022. name: 'Superfeedr Bot'
  1023. category: 'Feed Fetcher'
  1024. url: ''
  1025. producer:
  1026. name: 'Superfeedr'
  1027. url: 'https://superfeedr.com/'
  1028. - regex: 'Sparkler/[0-9]'
  1029. name: 'Sparkler'
  1030. category: 'Crawler'
  1031. url: 'https://github.com/USCDataScience/sparkler'
  1032. - regex: 'Spinn3r'
  1033. name: 'Spinn3r'
  1034. category: 'Crawler'
  1035. url: 'http://spinn3r.com/robot'
  1036. producer:
  1037. name: 'Tailrank Inc'
  1038. url: 'http://spinn3r.com'
  1039. - regex: 'Sputnik(Image)?Bot'
  1040. name: 'Sputnik Bot'
  1041. category: ''
  1042. url: ''
  1043. producer:
  1044. name: ''
  1045. url: ''
  1046. - regex: 'SurveyBot'
  1047. name: 'Survey Bot'
  1048. category: 'Search bot'
  1049. url: 'http://www.domaintools.com/webmasters/surveybot.php'
  1050. producer:
  1051. name: 'Domain Tools'
  1052. url: 'http://www.domaintools.com'
  1053. - regex: 'TarmotGezgin'
  1054. name: 'Tarmot Gezgin'
  1055. url: 'http://www.tarmot.com/gezgin/'
  1056. category: 'Search bot'
  1057. - regex: 'TelegramBot'
  1058. name: 'TelgramBot'
  1059. url: 'https://telegram.org/blog/bot-revolution'
  1060. - regex: 'TLSProbe'
  1061. name: 'TLSProbe'
  1062. url: 'https://scan.trustnet.venafi.com/'
  1063. category: 'Security search bot'
  1064. producer:
  1065. name: 'Venafi TrustNet'
  1066. url: 'https://www.venafi.com'
  1067. - regex: 'TinEye-bot'
  1068. name: 'TinEye Crawler'
  1069. category: 'Search bot'
  1070. url: 'http://www.tineye.com/crawler.html'
  1071. producer:
  1072. name: 'Idée Inc.'
  1073. url: 'http://ideeinc.com'
  1074. - regex: 'Tiny Tiny RSS'
  1075. name: 'Tiny Tiny RSS'
  1076. url: 'http://tt-rss.org'
  1077. category: 'Feed Fetcher'
  1078. producer:
  1079. name: ''
  1080. url: ''
  1081. - regex: 'trendictionbot'
  1082. name: 'Trendiction Bot'
  1083. category: 'Crawler'
  1084. url: 'http://www.trendiction.de/bot'
  1085. producer:
  1086. name: 'Talkwalker Inc.'
  1087. url: 'http://www.talkwalker.com'
  1088. - regex: 'TurnitinBot'
  1089. name: 'TurnitinBot'
  1090. category: 'Crawler'
  1091. url: 'http://www.turnitin.com/robot/crawlerinfo.html'
  1092. producer:
  1093. name: 'iParadigms, LLC.'
  1094. url: 'http://www.turnitin.com'
  1095. - regex: 'TweetedTimes Bot'
  1096. name: 'TweetedTimes Bot'
  1097. category: 'Crawler'
  1098. url: 'http://tweetedtimes.com'
  1099. producer:
  1100. name: 'TweetedTimes'
  1101. url: 'http://tweetedtimes.com/'
  1102. - regex: 'TweetmemeBot'
  1103. name: 'Tweetmeme Bot'
  1104. category: 'Crawler'
  1105. url: 'http://tweetmeme.com/'
  1106. producer:
  1107. name: 'Mediasift'
  1108. url: ''
  1109. - regex: 'Twitterbot'
  1110. name: 'Twitterbot'
  1111. category: 'Social Media Agent'
  1112. url: 'https://dev.twitter.com/docs/cards/getting-started'
  1113. producer:
  1114. name: 'Twitter'
  1115. url: 'http://www.twitter.com'
  1116. - regex: 'UniversalFeedParser'
  1117. name: 'UniversalFeedParser'
  1118. category: 'Feed Fetcher'
  1119. url: 'https://github.com/kurtmckee/feedparser'
  1120. producer:
  1121. name: 'Kurt McKee'
  1122. url: 'https://github.com/kurtmckee'
  1123. - regex: 'via secureurl\.fwdcdn\.com'
  1124. name: 'UkrNet Mail Proxy'
  1125. category: 'Crawler'
  1126. url: ''
  1127. producer:
  1128. name: 'UkrNet Ltd'
  1129. url: 'https://www.ukr.net/'
  1130. - regex: 'Uptimebot'
  1131. name: 'Uptimebot'
  1132. category: 'Site Monitor'
  1133. url: 'https://uptime.com/uptimebot'
  1134. producer:
  1135. name: 'Uptime'
  1136. url: 'https://uptime.com'
  1137. - regex: 'UptimeRobot'
  1138. name: 'Uptime Robot'
  1139. category: 'Site Monitor'
  1140. url: ''
  1141. producer:
  1142. name: 'Uptime Robot'
  1143. url: 'http://uptimerobot.com'
  1144. - regex: 'URLAppendBot'
  1145. name: 'URLAppendBot'
  1146. category: 'Crawler'
  1147. url: 'http://www.profound.net/urlappendbot.html'
  1148. producer:
  1149. name: 'Profound Networks'
  1150. url: 'http://www.profound.net'
  1151. - regex: 'Vagabondo'
  1152. name: 'Vagabondo'
  1153. category: 'Crawler'
  1154. url: ''
  1155. producer:
  1156. name: 'WiseGuys'
  1157. url: 'http://www.wise-guys.nl/'
  1158. - regex: 'VSMCrawler'
  1159. name: 'Visual Site Mapper Crawler'
  1160. category: 'Crawler'
  1161. url: 'http://www.visualsitemapper.com/crawler'
  1162. producer:
  1163. name: 'Alentum Software Ltd.'
  1164. url: 'http://www.alentum.com'
  1165. - regex: 'Jigsaw'
  1166. name: 'W3C CSS Validator'
  1167. category: 'Validator'
  1168. url: 'http://jigsaw.w3.org/css-validator'
  1169. producer:
  1170. name: 'W3C'
  1171. url: 'http://www.w3.org'
  1172. - regex: 'W3C_I18n-Checker'
  1173. name: 'W3C I18N Checker'
  1174. category: 'Validator'
  1175. url: 'http://validator.w3.org/i18n-checker'
  1176. producer:
  1177. name: 'W3C'
  1178. url: 'http://www.w3.org'
  1179. - regex: 'W3C-checklink'
  1180. name: 'W3C Link Checker'
  1181. category: 'Validator'
  1182. url: 'http://validator.w3.org/checklink'
  1183. producer:
  1184. name: 'W3C'
  1185. url: 'http://www.w3.org'
  1186. - regex: 'W3C_Validator|Validator.nu'
  1187. name: 'W3C Markup Validation Service'
  1188. category: 'Validator'
  1189. url: 'http://validator.w3.org/services'
  1190. producer:
  1191. name: 'W3C'
  1192. url: 'http://www.w3.org'
  1193. - regex: 'W3C-mobileOK'
  1194. name: 'W3C MobileOK Checker'
  1195. category: 'Validator'
  1196. url: 'http://validator.w3.org/mobile'
  1197. producer:
  1198. name: 'W3C'
  1199. url: 'http://www.w3.org'
  1200. - regex: 'W3C_Unicorn'
  1201. name: 'W3C Unified Validator'
  1202. category: 'Validator'
  1203. url: 'http://validator.w3.org/unicorn'
  1204. producer:
  1205. name: 'W3C'
  1206. url: 'http://www.w3.org'
  1207. - regex: 'Wappalyzer'
  1208. name: 'Wappalyzer'
  1209. url: 'https://github.com/AliasIO/Wappalyzer'
  1210. producer:
  1211. name: 'AliasIO'
  1212. url: 'https://github.com/AliasIO'
  1213. - regex: 'WeSEE(:Search)?'
  1214. name: 'WeSEE:Search'
  1215. category: 'Search bot'
  1216. url: 'http://www.wesee.com/bot'
  1217. producer:
  1218. name: 'WeSEE Ltd'
  1219. url: 'http://www.wesee.com'
  1220. - regex: 'WebbCrawler'
  1221. name: 'WebbCrawler'
  1222. category: 'Crawler'
  1223. url: 'http://badcheese.com/crawler.html'
  1224. producer:
  1225. name: 'Steve Webb'
  1226. url: 'http://badcheese.com'
  1227. - regex: 'websitepulse[+ ]checker'
  1228. name: 'WebSitePulse'
  1229. category: 'Site Monitor'
  1230. url: 'http://badcheese.com/crawler.html'
  1231. producer:
  1232. name: 'WebSitePulse'
  1233. url: 'http://www.websitepulse.com/'
  1234. - regex: 'WordPress'
  1235. name: 'WordPress'
  1236. category: 'Service Agent'
  1237. url: 'https://wordpress.org/'
  1238. producer:
  1239. name: 'Wordpress.org'
  1240. url: 'https://wordpress.org/'
  1241. - regex: 'Wotbox'
  1242. name: 'Wotbox'
  1243. category: 'Search bot'
  1244. url: 'http://www.wotbox.com/bot/'
  1245. producer:
  1246. name: 'Wotbox'
  1247. url: 'http://www.wotbox.com'
  1248. - regex: 'yacybot'
  1249. name: 'YaCy'
  1250. category: 'Search bot'
  1251. url: 'http://yacy.net/bot.html'
  1252. producer:
  1253. name: 'YaCy'
  1254. url: 'http://yacy.net'
  1255. - regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
  1256. name: 'Yahoo! Slurp'
  1257. category: 'Search bot'
  1258. url: 'http://help.yahoo.com/ysearch/slurp'
  1259. producer:
  1260. name: 'Yahoo! Inc.'
  1261. url: 'http://www.yahoo.com'
  1262. - regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone'
  1263. name: 'Yahoo! Link Preview'
  1264. category: 'Crawler'
  1265. url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html'
  1266. producer:
  1267. name: 'Yahoo! Inc.'
  1268. url: 'http://www.yahoo.com'
  1269. - regex: 'YahooCacheSystem'
  1270. name: 'Yahoo! Cache System'
  1271. category: 'Crawler'
  1272. url: ''
  1273. producer:
  1274. name: 'Yahoo! Inc.'
  1275. url: 'http://www.yahoo.com'
  1276. - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
  1277. name: 'Yandex Bot'
  1278. category: 'Search bot'
  1279. url: 'http://www.yandex.com/bots'
  1280. producer:
  1281. name: 'Yandex LLC'
  1282. url: 'http://company.yandex.com'
  1283. - regex: 'Yeti'
  1284. name: 'Yeti/Naverbot'
  1285. category: 'Search bot'
  1286. url: 'http://help.naver.com/robots/'
  1287. producer:
  1288. name: 'Naver'
  1289. url: 'http://www.naver.com'
  1290. - regex: 'YoudaoBot'
  1291. name: 'Youdao Bot'
  1292. category: 'Search bot'
  1293. url: 'http://www.youdao.com/help/webmaster/spider'
  1294. producer:
  1295. name: 'NetEase, Inc.'
  1296. url: 'http://corp.163.com'
  1297. - regex: 'YOURLS v[0-9]'
  1298. name: 'Yourls'
  1299. category: 'Crawler'
  1300. url: 'http://yourls.org'
  1301. - regex: 'YRSpider|YYSpider'
  1302. name: 'Yunyun Bot'
  1303. category: 'Search bot'
  1304. url: 'http://www.yunyun.com/SiteInfo.php?r=about'
  1305. producer:
  1306. name: 'YunYun'
  1307. url: 'http://www.yunyun.com'
  1308. - regex: 'zgrab'
  1309. name: 'zgrab'
  1310. category: 'Security Checker'
  1311. url: 'https://github.com/zmap/zgrab'
  1312. - regex: 'Zookabot'
  1313. name: 'Zookabot'
  1314. category: 'Crawler'
  1315. url: 'http://zookabot.com'
  1316. producer:
  1317. name: 'Hwacha ApS'
  1318. url: 'http://hwacha.dk'
  1319. - regex: 'ZumBot'
  1320. name: 'ZumBot'
  1321. category: 'Search bot'
  1322. url: 'http://help.zum.com/inquiry'
  1323. producer:
  1324. name: 'ZUM internet'
  1325. url: 'http://www.zuminternet.com/'
  1326. - regex: 'YottaaMonitor'
  1327. name: 'Yottaa Site Monitor'
  1328. category: 'Site Monitor'
  1329. url: 'http://www.yottaa.com/products/site-monitor'
  1330. producer:
  1331. name: 'Yottaa'
  1332. url: 'http://www.yottaa.com/'
  1333. - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
  1334. name: 'Yahoo Gemini'
  1335. category: 'Crawler'
  1336. url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
  1337. producer:
  1338. name: 'Yahoo! Inc.'
  1339. url: 'http://www.yahoo.com'
  1340. - regex: '.*Java.*outbrain'
  1341. name: 'Outbrain'
  1342. category: 'Crawler'
  1343. url: ''
  1344. producer:
  1345. name: 'Outbrain'
  1346. url: 'http://www.outbrain.com/'
  1347. - regex: 'HubPages.*crawlingpolicy'
  1348. name: 'HubPages'
  1349. category: 'Crawler'
  1350. url: 'http://hubpages.com/help/crawlingpolicy'
  1351. producer:
  1352. name: 'HubPages'
  1353. url: 'http://hubpages.com/'
  1354. - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
  1355. name: 'Pinterest'
  1356. url: ''
  1357. category: 'Crawler'
  1358. producer:
  1359. name: 'Pinterest'
  1360. url: 'http://www.pinterest.com/'
  1361. - regex: 'Site24x7'
  1362. name: 'Site24x7 Website Monitoring'
  1363. category: 'Site Monitor'
  1364. url: 'https://www.site24x7.com/site24x7-faq.html'
  1365. producer:
  1366. name: 'Site24x7'
  1367. url: 'https://www.site24x7.com'
  1368. - regex: "Let's Encrypt validation server"
  1369. name: "Let's Encrypt Validation"
  1370. category: 'Service Agent'
  1371. url: 'https://letsencrypt.org/how-it-works/'
  1372. producer:
  1373. name: "Let's Encrypt"
  1374. url: 'https://letsencrypt.org'
  1375. - regex: 'GrapeshotCrawler'
  1376. name: 'Grapeshot'
  1377. category: 'Crawler'
  1378. url: 'https://www.grapeshot.com/crawler'
  1379. producer:
  1380. name: 'Grapeshot'
  1381. url: 'https://www.grapeshot.com'
  1382. - regex: 'www\.monitor\.us'
  1383. name: 'Monitor.Us'
  1384. category: 'Site Monitor'
  1385. url: 'http://www.monitor.us'
  1386. producer:
  1387. name: 'Monitor.Us'
  1388. url: 'http://www.monitor.us'
  1389. - regex: 'Catchpoint( bot)?'
  1390. name: 'Catchpoint'
  1391. category: 'Site Monitor'
  1392. url: ''
  1393. producer:
  1394. name: 'Catchpoint Systems'
  1395. url: 'http://www.catchpoint.com/'
  1396. - regex: 'bitlybot'
  1397. name: 'BitlyBot'
  1398. category: 'Crawler'
  1399. url: 'https://bitly.com'
  1400. producer:
  1401. name: 'Bitly, Inc.'
  1402. url: 'https://bitly.com'
  1403. - regex: 'Zao/'
  1404. name: 'Zao'
  1405. category: 'Crawler'
  1406. - regex: 'lycos'
  1407. name: 'Lycos'
  1408. - regex: 'Slurp'
  1409. name: 'Inktomi Slurp'
  1410. - regex: 'Speedy Spider'
  1411. name: 'Speedy'
  1412. - regex: 'ScoutJet'
  1413. name: 'ScoutJet'
  1414. - regex: 'nrsbot|netresearch'
  1415. name: 'NetResearchServer'
  1416. - regex: 'scooter'
  1417. name: 'Scooter'
  1418. - regex: 'gigabot'
  1419. name: 'Gigabot'
  1420. - regex: 'charlotte'
  1421. name: 'Charlotte'
  1422. - regex: 'Pompos'
  1423. name: 'Pompos'
  1424. - regex: 'ichiro'
  1425. name: 'ichiro'
  1426. - regex: 'PagePeeker'
  1427. name: 'PagePeeker'
  1428. - regex: 'WebThumbnail'
  1429. name: 'WebThumbnail'
  1430. - regex: 'Willow Internet Crawler'
  1431. name: 'Willow Internet Crawler'
  1432. - regex: 'EmailWolf'
  1433. name: 'EmailWolf'
  1434. - regex: 'NetLyzer FastProbe'
  1435. name: 'NetLyzer FastProbe'
  1436. - regex: 'AdMantX.*admantx\.com'
  1437. name: 'ADMantX'
  1438. - regex: 'Server Density Service Monitoring.*'
  1439. name: 'Server Density'
  1440. - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
  1441. name: 'Generic Bot'
  1442. - regex: '^sentry'
  1443. name: 'Sentry Bot'
  1444. producer:
  1445. name: 'Sentry'
  1446. url: 'https://sentry.io'
  1447. # Generic detections
  1448. - regex: 'Nutch'
  1449. name: 'Nutch-based Bot'
  1450. category: 'Crawler'
  1451. url: 'https://nutch.apache.org'
  1452. producer:
  1453. name: 'The Apache Software Foundation'
  1454. url: 'http://www.apache.org/foundation/'
  1455. - regex: '[a-z0-9\-_]*((?<!cu|power )bot(?! TAB| ?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
  1456. name: 'Generic Bot'