PageRenderTime 106ms CodeModel.GetById 99ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/extra/robots/robots.txt

http://github.com/abeaumont/factor
Plain Text | 279 lines | 236 code | 43 blank | 0 comment | 0 complexity | 8783bb03a4c4d0ee8f2e7eb297c010ef MD5 | raw file
  1
  2
  3# robots.txt
  4
  5Sitemap: http://www.chiplist.com/sitemap.txt
  6
  7User-Agent: *
  8
  9Disallow: /cgi-bin/
 10Disallow: /scripts/
 11Disallow: /ChipList2/scripts/
 12#Disallow: /styles/
 13Disallow: /ChipList2/styles/
 14
 15Disallow: /ads/
 16Disallow: /ChipList2/ads/
 17Disallow: /advertisements/
 18Disallow: /ChipList2/advertisements/
 19
 20Disallow: /graphics/
 21Disallow: /ChipList2/graphics/
 22
 23#Disallow: /ChipList1/
 24
 25
 26# robots.txt for http://www.wikipedia.org/ and friends
 27#
 28# Please note: There are a lot of pages on this site, and there are
 29# some misbehaved spiders out there that go _way_ too fast. If you're
 30# irresponsible, your access to the site may be blocked.
 31
 32# Inktomi's "Slurp" can read a minimum delay between hits; if your
 33# bot supports such a thing using the 'Crawl-delay' or another
 34# instruction, please let us know.
 35
 36# *at least* 1 second please. preferably more :D
 37#User-agent: *
 38Crawl-delay: 1
 39Request-rate: 1/1
 40Visit-time: 0200-0500
 41
 42# Crawlers that are kind enough to obey, but which we'd rather not have
 43# unless they're feeding search engines.
 44User-agent: UbiCrawler
 45Disallow: /
 46
 47User-agent: DOC
 48Disallow: /
 49
 50User-agent: Zao
 51Disallow: /
 52
 53# Some bots are known to be trouble, particularly those designed to copy
 54# entire sites. Please obey robots.txt.
 55User-agent: sitecheck.internetseer.com
 56Disallow: /
 57
 58User-agent: Zealbot
 59Disallow: /
 60
 61User-agent: MSIECrawler
 62Disallow: /
 63
 64User-agent: SiteSnagger
 65Disallow: /
 66
 67User-agent: WebStripper
 68Disallow: /
 69
 70User-agent: WebCopier
 71Disallow: /
 72
 73User-agent: Fetch
 74Disallow: /
 75
 76User-agent: Offline Explorer
 77Disallow: /
 78
 79User-agent: Teleport
 80Disallow: /
 81
 82User-agent: TeleportPro
 83Disallow: /
 84
 85User-agent: WebZIP
 86Disallow: /
 87
 88User-agent: linko
 89Disallow: /
 90
 91User-agent: HTTrack
 92Disallow: /
 93
 94User-agent: Microsoft.URL.Control
 95Disallow: /
 96
 97User-agent: Xenu
 98Disallow: /
 99
100User-agent: larbin
101Disallow: /
102
103User-agent: libwww
104Disallow: /
105
106User-agent: ZyBORG
107Disallow: /
108
109User-agent: Download Ninja
110Disallow: /
111
112#
113# Sorry, wget in its recursive mode is a frequent problem.
114# Please read the man page and use it properly; there is a
115# --wait option you can use to set the delay between hits,
116# for instance.
117#
118User-agent: wget
119Disallow: /
120
121#
122# The 'grub' distributed client has been *very* poorly behaved.
123#
124User-agent: grub-client
125Disallow: /
126
127#
128# Doesn't follow robots.txt anyway, but...
129#
130User-agent: k2spider
131Disallow: /
132
133#
134# Hits many times per second, not acceptable
135# http://www.nameprotect.com/botinfo.html
136User-agent: NPBot
137Disallow: /
138
139# A capture bot, downloads gazillions of pages with no public benefit
140# http://www.webreaper.net/
141User-agent: WebReaper
142Disallow: /
143
144
145# Provided courtesy of http://browsers.garykeith.com.
146# Created on February 13, 2008 at 7:39:00 PM GMT.
147#
148# Place this file in the root public folder of your website.
149# It will stop the following bots from indexing your website.
150#
151User-agent: abot
152User-agent: ALeadSoftbot
153User-agent: BeijingCrawler
154User-agent: BilgiBot
155User-agent: bot
156User-agent: botlist
157User-agent: BOTW Spider
158User-agent: bumblebee
159User-agent: Bumblebee
160User-agent: BuzzRankingBot
161User-agent: Charlotte
162User-agent: Clushbot
163User-agent: Crawler
164User-agent: CydralSpider
165User-agent: DataFountains
166User-agent: DiamondBot
167User-agent: Dulance bot
168User-agent: DYNAMIC
169User-agent: EARTHCOM.info
170User-agent: EDI
171User-agent: envolk
172User-agent: Exabot
173User-agent: Exabot-Images
174User-agent: Exabot-Test
175User-agent: exactseek-pagereaper
176User-agent: Exalead NG
177User-agent: FANGCrawl
178User-agent: Feed::Find
179User-agent: flatlandbot
180User-agent: Gigabot
181User-agent: GigabotSiteSearch
182User-agent: GurujiBot
183User-agent: Hatena Antenna
184User-agent: Hatena Bookmark
185User-agent: Hatena RSS
186User-agent: HatenaScreenshot
187User-agent: Helix
188User-agent: HiddenMarket
189User-agent: HyperEstraier
190User-agent: iaskspider
191User-agent: IIITBOT
192User-agent: InfociousBot
193User-agent: iVia
194User-agent: iVia Page Fetcher
195User-agent: Jetbot
196User-agent: Kolinka Forum Search
197User-agent: KRetrieve
198User-agent: LetsCrawl.com
199User-agent: Lincoln State Web Browser
200User-agent: Links4US-Crawler
201User-agent: LOOQ
202User-agent: Lsearch/sondeur
203User-agent: MapoftheInternet.com
204User-agent: NationalDirectory
205User-agent: NetCarta_WebMapper
206User-agent: NewsGator
207User-agent: NextGenSearchBot
208User-agent: ng
209User-agent: nicebot
210User-agent: NP
211User-agent: NPBot
212User-agent: Nudelsalat
213User-agent: Nutch
214User-agent: OmniExplorer_Bot
215User-agent: OpenIntelligenceData
216User-agent: Oracle Enterprise Search
217User-agent: Pajaczek
218User-agent: panscient.com
219User-agent: PeerFactor 404 crawler
220User-agent: PeerFactor Crawler
221User-agent: PlantyNet
222User-agent: PlantyNet_WebRobot
223User-agent: plinki
224User-agent: PMAFind
225User-agent: Pogodak!
226User-agent: QuickFinder Crawler
227User-agent: Radiation Retriever
228User-agent: Reaper
229User-agent: RedCarpet
230User-agent: ScorpionBot
231User-agent: Scrubby
232User-agent: Scumbot
233User-agent: searchbot
234User-agent: Seeker.lookseek.com
235User-agent: SeznamBot
236User-agent: ShowXML
237User-agent: snap.com
238User-agent: snap.com beta crawler
239User-agent: Snapbot
240User-agent: SnapPreviewBot
241User-agent: sohu
242User-agent: SpankBot
243User-agent: Speedy Spider
244User-agent: Speedy_Spider
245User-agent: SpeedySpider
246User-agent: spider
247User-agent: SquigglebotBot
248User-agent: SurveyBot
249User-agent: SynapticSearch
250User-agent: T-H-U-N-D-E-R-S-T-O-N-E
251User-agent: Talkro Web-Shot
252User-agent: Tarantula
253User-agent: TerrawizBot
254User-agent: TheInformant
255User-agent: TMCrawler
256User-agent: TridentSpider
257User-agent: Tutorial Crawler
258User-agent: Twiceler
259User-agent: unwrapbot
260User-agent: URI::Fetch
261User-agent: VengaBot
262User-agent: Vonna.com b o t
263User-agent: Vortex
264User-agent: Votay bot
265User-agent: WebAlta Crawler
266User-agent: Webbot
267User-agent: Webclipping.com
268User-agent: WebCorp
269User-agent: Webinator
270User-agent: WIRE
271User-agent: WISEbot
272User-agent: Xerka WebBot
273User-agent: XSpider
274User-agent: YodaoBot
275User-agent: Yoono
276User-agent: yoono
277Disallow: /
278
279