Voilà, après quelques recherches et surtout après avoir constaté les limites du fichier robots.txt pour exclure les robots indésirables, j'ai vu qu'il est possible d'inscrire des lignes dans le fichier htaccess.
J'ai trouvé 2 manières dont voici des extraits.
La première façon est semble t il très complète mais donne un fichier htacces de 13 k
La seconde manière est la suivante :
J'ai trouvé 2 manières dont voici des extraits.
La première façon est semble t il très complète mais donne un fichier htacces de 13 k
RewriteEngine on
RewriteCond %{HTTP_USER_AGENT} .*almaden* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Anarchie* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*ASPSeek* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Atomz* [OR]
RewriteCond %{HTTP_USER_AGENT} .*attach* [OR]
RewriteCond %{HTTP_USER_AGENT} .*autoemailspider* [OR]
RewriteCond %{HTTP_USER_AGENT} .*BackWeb* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Bandit* [OR]
RewriteCond %{HTTP_USER_AGENT} .*BatchFTP* [OR]
RewriteCond %{HTTP_USER_AGENT} .*bdfetch* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*BlackWidow* [OR]
RewriteCond %{HTTP_USER_AGENT} .*bmclient* [OR]
RewriteCond %{HTTP_USER_AGENT} .*bot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Bot\ mailto:craftbot@yahoo.com* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Bot\mailto:craftbot@yahoo.com* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Buddy* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Bullseye* [OR]
RewriteCond %{HTTP_USER_AGENT} .*bumblebee* [OR]
RewriteCond %{HTTP_USER_AGENT} .*capture* [OR]
RewriteCond %{HTTP_USER_AGENT} .*CherryPicker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*CherryPickerElite* [OR]
RewriteCond %{HTTP_USER_AGENT} .*CherryPickerSE* [OR]
RewriteCond %{HTTP_USER_AGENT} .*ChinaClaw* [OR]
RewriteCond %{HTTP_USER_AGENT} .*CICC* [OR]
RewriteCond %{HTTP_USER_AGENT} .*clipping* [OR]
RewriteCond %{HTTP_USER_AGENT} .*collage* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Collector* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Copier* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Crescent* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Crescent.*Internet.*ToolPak.* [OR]
RewriteCond %{HTTP_USER_AGENT} .*christcrawler* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Custo* [OR]
RewriteCond %{HTTP_USER_AGENT} .*DA* [OR]
RewriteCond %{HTTP_USER_AGENT} .*diagem* [OR]
RewriteCond %{HTTP_USER_AGENT} .*DIIbot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*DISCo* [OR]
RewriteCond %{HTTP_USER_AGENT} .*DISCo\ Pump* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Download* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Download\ Demon* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Download\ Wonder* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Download\Demon* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Download\Wonder* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Downloader* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Drip* [OR]
RewriteCond %{HTTP_USER_AGENT} .*DSurf15a* [OR]
RewriteCond %{HTTP_USER_AGENT} .*dual*proxy* [OR]
RewriteCond %{HTTP_USER_AGENT} .*easydl* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EasyDL/2.99* [OR]
RewriteCond %{HTTP_USER_AGENT} .*eCatch* [OR]
RewriteCond %{HTTP_USER_AGENT} .*efp@gmx\.net* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EirGrabber* [OR]
RewriteCond %{HTTP_USER_AGENT} .*email* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EmailCollector* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EmailSiphon* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EmailWolf* [OR]
RewriteCond %{HTTP_USER_AGENT} .*express* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Express\ WebPictures* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Express\WebPictures* [OR]
RewriteCond %{HTTP_USER_AGENT} .*extractor* [OR]
RewriteCond %{HTTP_USER_AGENT} .*ExtractorPro* [OR]
RewriteCond %{HTTP_USER_AGENT} .*EyeNetIE* [OR]
RewriteCond %{HTTP_USER_AGENT} .*fetch* [OR]
RewriteCond %{HTTP_USER_AGENT} .*FileHound* [OR]
RewriteCond %{HTTP_USER_AGENT} .*FlashGet* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*FlickBot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*FrontPage* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*GetRight* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*GetSmart* [OR]
RewriteCond %{HTTP_USER_AGENT} .*GetWeb!* [OR]
RewriteCond %{HTTP_USER_AGENT} .*GetWebPage* [OR]
RewriteCond %{HTTP_USER_AGENT} .*gigabaz* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Go!Zilla* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Go-Ahead-Got-It* [OR]
RewriteCond %{HTTP_USER_AGENT} .*GornKer* [OR]
RewriteCond %{HTTP_USER_AGENT} .*gotit* [OR]
RewriteCond %{HTTP_USER_AGENT} .*grab* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Grabber* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*GrabNet* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Grafula* [OR]
RewriteCond %{HTTP_USER_AGENT} .*grub* [OR]
RewriteCond %{HTTP_USER_AGENT} .*grub-client* [OR]
RewriteCond %{HTTP_USER_AGENT} .*hloader* [OR]
RewriteCond %{HTTP_USER_AGENT} .*HMView* [OR]
RewriteCond %{HTTP_USER_AGENT} .*http* [OR]
RewriteCond %{HTTP_USER_AGENT} .*HTTPConnect* [OR]
RewriteCond %{HTTP_USER_AGENT} .*httpdown* [OR]
RewriteCond %{HTTP_USER_AGENT} .*HTTrack* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Image\ Stripper* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Image\ Sucker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*imagefetch* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Indy*Library* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Indy\ Library* [OR]
RewriteCond %{HTTP_USER_AGENT} .*informant* [OR]
RewriteCond %{HTTP_USER_AGENT} .*InterGET* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Internet\ Ninja* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Internet\Ninja* [OR]
RewriteCond %{HTTP_USER_AGENT} .*InternetLinkAgent* [OR]
RewriteCond %{HTTP_USER_AGENT} .*InternetSeer.com* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Iria* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Irvine* [OR]
RewriteCond %{HTTP_USER_AGENT} .*JBH*Agent* [OR]
RewriteCond %{HTTP_USER_AGENT} .*JetCar* [OR]
RewriteCond %{HTTP_USER_AGENT} .*JOC* [OR]
RewriteCond %{HTTP_USER_AGENT} .*JOC\ Web\ Spider* [OR]
RewriteCond %{HTTP_USER_AGENT} .*JustView* [OR]
RewriteCond %{HTTP_USER_AGENT} .*larbin* [OR]
RewriteCond %{HTTP_USER_AGENT} .*LeechFTP* [OR]
RewriteCond %{HTTP_USER_AGENT} .*LexiBot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*lftp* [OR]
RewriteCond %{HTTP_USER_AGENT} .*likse* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Link* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Link*Sleuth* [OR]
RewriteCond %{HTTP_USER_AGENT} .*LINKS\ ARoMATIZED* [OR]
RewriteCond %{HTTP_USER_AGENT} .*LinkWalker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*lotus* [OR]
RewriteCond %{HTTP_USER_AGENT} .*lwp-trivial* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Magnet* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mag-Net* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mass\ Downloader* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mass\Downloader* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Memo* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Microsoft.URL* [OR]
RewriteCond %{HTTP_USER_AGENT} .*MIDown\ tool* [OR]
RewriteCond %{HTTP_USER_AGENT} .*MIDown\tool* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mirror* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mister\ PiX* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Mister\PiX* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mozilla*MSIECrawler* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mozilla.*Indy* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Mozilla.*NEWT* [OR]
RewriteCond %{HTTP_USER_AGENT} .*MSProxy* [OR]
RewriteCond %{HTTP_USER_AGENT} .*multithreaddb* [OR]
RewriteCond %{HTTP_USER_AGENT} .*nationaldirectory* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Navroad* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NearSite* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Net\ Vampire* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Net\Vampire* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NetAnts* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NetMechanic* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NetSpider* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NetZIP* [OR]
RewriteCond %{HTTP_USER_AGENT} .*NICErsPRO* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Ninja* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*obot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Octopus* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\ Explorer* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\ Navigator* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\Explorer* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\Navigator* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\Explorer* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Offline\Navigator* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Openfind* [OR]
RewriteCond %{HTTP_USER_AGENT} .*PageGrabber* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Papa\ Foto* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Papa\Foto* [OR]
RewriteCond %{HTTP_USER_AGENT} .*pavuk* [OR]
RewriteCond %{HTTP_USER_AGENT} .*pcBrowser* [OR]
RewriteCond %{HTTP_USER_AGENT} .*PersonaPilot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Ping* [OR]
RewriteCond %{HTTP_USER_AGENT} .*PingALink* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Pockey* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Proxy* [OR]
RewriteCond %{HTTP_USER_AGENT} .*psbot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Pump* [OR]
RewriteCond %{HTTP_USER_AGENT} .*QRVA* [OR]
RewriteCond %{HTTP_USER_AGENT} .*RealDownload* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Reaper* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Recorder* [OR]
RewriteCond %{HTTP_USER_AGENT} .*ReGet* [OR]
RewriteCond %{HTTP_USER_AGENT} .*replacer* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SearchExpress* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Seeker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Siphon* [OR]
RewriteCond %{HTTP_USER_AGENT} .*site* [OR]
RewriteCond %{HTTP_USER_AGENT} .*sitecheck.internetseer.com* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SiteSnagger* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Slurp* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*SlySearch* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SmartDownload* [OR]
RewriteCond %{HTTP_USER_AGENT} .*snagger* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Snake* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SpaceBison* [OR]
RewriteCond %{HTTP_USER_AGENT} .*spider* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Sqworm* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Strip* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Stripper* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Sucker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SuperBot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SuperHTTP* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Surf* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Surfbot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*surfcontrol* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SurfWalker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*SurveyBot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Szukacz* [OR]
RewriteCond %{HTTP_USER_AGENT} .*tAkeOut* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Teleport* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Teleport\ Pro* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Telesoft* [OR]
RewriteCond %{HTTP_USER_AGENT} .*turingos* [OR]
RewriteCond %{HTTP_USER_AGENT} .*TurnitinBot* [OR]
RewriteCond %{HTTP_USER_AGENT} .*TV33_Mercator* [OR]
RewriteCond %{HTTP_USER_AGENT} .*URLSpiderPro* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Vacuum* [OR]
RewriteCond %{HTTP_USER_AGENT} .*vagabondo* [OR]
RewriteCond %{HTTP_USER_AGENT} .*vayala* [OR]
RewriteCond %{HTTP_USER_AGENT} .*visibilitygap* [OR]
RewriteCond %{HTTP_USER_AGENT} .*VoidEYE* [OR]
RewriteCond %{HTTP_USER_AGENT} .*watcher* [OR]
RewriteCond %{HTTP_USER_AGENT} .*web* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\ Data\ Extractor* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\ Downloader* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\ Image\ Collector* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\ Sucker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\Image\Collector* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Web\Sucker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebAuto* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebBandit* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebCapture* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Webclipping* [OR]
RewriteCond %{HTTP_USER_AGENT} .*webcollage* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebCopier* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebEMailExtrac* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebFetch* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebGo\ IS* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebHook* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Webinator* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebLeacher* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebMiner* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebMirror* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebReaper* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebSauger* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Website* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Website\ eXtractor* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Website\ Quester* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Webster* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebStripper* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebStripper/2.09* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebWhacker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WebZIP* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Wget* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Whacker* [OR]
RewriteCond %{HTTP_USER_AGENT} .*whizbang* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Widow* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Wweb* [OR]
RewriteCond %{HTTP_USER_AGENT} .*WWWOFFLE* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Xaldon* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Xaldon\ WebSpider* [OR]
RewriteCond %{HTTP_USER_AGENT} .*x-Tractor* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Zeus* [OR]
RewriteCond %{HTTP_USER_AGENT} .*Zeus.*Webster* [OR]
RewriteCond %{HTTP_USER_AGENT} .*ZyBorg*
RewriteRule ^.*$ http://www.autre_site.com [R,L]
La seconde manière est la suivante :
D'après vous, quelle manière est la meilleure ? La première solution avec un fichier de 13 k n'allourdit elle pas trop un site ?SetEnvIfNoCase User-Agent "^EmailSiphon" bad_bot
SetEnvIfNoCase User-Agent "^EmailWolf" bad_bot
SetEnvIfNoCase User-Agent "^ExtractorPro" bad_bot
SetEnvIfNoCase User-Agent "^CherryPicker" bad_bot
SetEnvIfNoCase User-Agent "^NICErsPRO" bad_bot
SetEnvIfNoCase User-Agent "^Teleport" bad_bot
SetEnvIfNoCase User-Agent "^EmailCollector" bad_bot
SetEnvIfNoCase User-Agent "^LinkWalker" bad_bot
SetEnvIfNoCase User-Agent "^Zeus" bad_bot
<Limit GET POST>
Order Allow,Deny
Allow from all
Deny from env=bad_bot
</Limit>