# /robots.txt file for http://www.flowfood4health.com #Examples... #This allows a crawler to access everywhere #User-agent: mr-webcrawler #Disallow: #This allows a crawler no access, as all URLs on a server start with '/' which is all of them #User-agent: mr-webcrawler #Disallow: / #This stops all robots visiting URLs with /images or /hub '*' means any other useragent #User-agent: * #Disallow: /images/ #Disallow: /hub/ #Other info from Wikipedia - http://en.wikipedia.org/wiki/Robots.txt #Crawl-delay: 30 - in seconds to wait between successive requests to the same server #Request-rate: 1/5 - maximum rate is one page every 5 seconds #Visit-time: 0600-0845 - only visit between 6:00 AM and 8:45 AM UT (GMT) #A L L O W E D #Allow Google Bot User-agent: Googlebot Disallow: Crawl-delay: 5 #D I S A L L O W E D #Disallow all slurp as a test User-agent: Slurp Disallow: /images/ Disallow: /_mmServerScripts Crawl-delay: 30 #Disallow all spidering of images and hub User-agent: * Disallow: /images/ Disallow: /_mmServerScripts Crawl-delay: 30 #Disallow Baidu Bot (Japanese) User-agent: Baiduspider Disallow: /_mmServerScripts Disallow: / #Disallow Boitho dc Bot (Norway) User-agent: boitho.com-dc Disallow: / #Disallow Busiverse Bot (Turkey Sirketce/Busiverse ) User-agent: Busiverse Disallow: / #Disallow CazoodleBot - from University of Illinois User-agent: CazoodleBot Disallow: / #Disallow Exabot Bot - Exalead User-agent: Exabot Disallow: / #Disallow Google Image Bot User-agent: Googlebot-Image Disallow: / #Disallow heritrix Bot - from Yell.Com User-agent: heritrix Disallow: / #Disallow IRLbot - IRL Texas AM research bot User-agent: IRLbot Disallow: / #Disallow Jyxobot - Czech Webcrawler for Jyxo User-agent: Jyxobot Disallow: / #Disallow Majestic12.co.uk User-agent: MJ12bot Disallow: / #Disallow Mirago.com User-agent: Mirago-Test-Robot (http://www.miragorobot.com) Disallow: / #Disallow MSN from seeing gifs and jpgsd User-agent: msnbot Disallow: /*.gif$ Disallow: /*.jpeg$ #Disallow NimbleCrawler (http://www.webmasterworld.com/forum93/858.htm) UserAgent: nimblecrawler Disallow: / #Disallow psbot spidering of images and hub User-agent: psbot Disallow: / #Disallow Sirketce Bot (Turkey Sirketce/Busiverse ) User-agent: Sirketce Disallow: / #Disallow Seekbot - http://www.seekport.co.uk/seekbot/ User-agent: Seekbot Disallow: / #Disallow semanticdiscovery - from Southern Utah University (compyter Science Dept.) User-agent: semanticdiscovery Disallow: / #Disallow Sogou - Chinese Search Engine User-agent: Sogou Disallow: / #Disallow SoSo - Chinese Search Engine User-agent: Soso Disallow: / #Disallow SoSoImageSpider - Chinese picture Search Engine User-agent: Sosoimagespider Disallow: / #Disallow TinEye - Image trawler Search Engine User-agent: TinEye Disallow: / #Disallow TurnITin - "This robot collects content from the Internet for the sole purpose of helping educational institutions prevent plagiarism" User-agent: TurnitinBot Disallow: / #Disallow Twiceler - Cuill (also Barred IPs on firewall) User-agent: twiceler Disallow: / #Disallow Voilabot Bot - France Telecom User-agent: VoilaBot Disallow: / #Disallow WebAlta Bot - Russian User-agent: WebAlta Disallow: / #Disallow Yahoo Image Bot User-agent: Yahoo-MMCrawler Disallow: / #Disallow YodaoBot - Chinese Search Engine User-agent: YodaoBot Disallow: / #Disallow zermelo - Bot du Jour from Amazon - may need to block IP range User-agent: zermelo Disallow: / #Disallow All Bots to see in '/flash' folder User-agent: * Disallow: /flash Disallow: /404.asp #O L D #Not spidering ATM, but was in original robots.txt #User-agent: Mediapartners-Google* #Disallow: