代碼如下:
#全局配置
limit_req_zone $anti_spider zone=anti_spider:10m rate=15r/m;
#某個server中
limit_req zone=anti_spider burst=30 nodelay;
if ($http_user_agent ~* "xxspider|xxbot") {
set $anti_spider $http_user_agent;
}
超過設置的限定頻率,就會給spider一個503。
上述配置詳細解釋請自行google下,具體的spider/bot名稱請自定義。
附:nginx中禁止屏蔽網絡爬蟲
代碼如下:
server {
listen 80;
server_name www.xxx.com;
#charset koi8-r;
#access_log logs/host.access.log main;
#location / {
# root html;
# index index.html index.htm;
#}
if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Feedfetcher-Google|Yahoo! Slurp|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot|ia_archiver|Tomato Bot") {
return 403;
}
location ~ ^/(.*)$ {
proxy_pass http://localhost:8080;
proxy_redirect off;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
client_max_body_size 10m;
client_body_buffer_size 128k;
proxy_connect_timeout 90;
proxy_send_timeout 90;
proxy_read_timeout 90;
proxy_buffer_size 4k;
新聞熱點
疑難解答