我們可以通過HTTP_USER_AGENT來判斷是否是蜘蛛,搜索引擎的蜘蛛都有自己的獨特標志,下面列取了一部分。
function is_crawler() {
$userAgent = strtolower($_SERVER['HTTP_USER_AGENT']);
$spiders = array(
'Googlebot', // Google 爬蟲
'Baiduspider', // 百度爬蟲
'Yahoo! Slurp', // 雅虎爬蟲
'YodaoBot', // 有道爬蟲
'msnbot' // Bing爬蟲
// 更多爬蟲關鍵字
);
foreach ($spiders as $spider) {
$spider = strtolower($spider);
if (strpos($userAgent, $spider) !== false) {
return true;
}
}
return false;
}
下面的php代碼附帶了更多的蜘蛛標識
function isCrawler() {
echo $agent= strtolower($_SERVER['HTTP_USER_AGENT']);
if (!empty($agent)) {
$spiderSite= array(
"TencentTraveler",
"Baiduspider+",
"BaiduGame",
"Googlebot",
"msnbot",
"Sosospider+",
"Sogou web spider",
"ia_archiver",
"Yahoo! Slurp",
"YoudaoBot",
"Yahoo Slurp",
"MSNBot",
"Java (Often spam bot)",
"BaiDuSpider",
"Voila",
"Yandex bot",
"BSpider",
"twiceler",
"Sogou Spider",
"Speedy Spider",
"Google AdSense",
"Heritrix",
"Python-urllib",
"Alexa (IA Archiver)",
"Ask",
"Exabot",
"Custo",
"OutfoxBot/YodaoBot",
"yacy",
"SurveyBot",
"legs",
"lwp-trivial",
"Nutch",
"StackRambler",
"The web archive (IA Archiver)",
"Perl tool",
"MJ12bot",
"Netcraft",
"MSIECrawler",
"WGet tools",
"larbin",
"Fish search",
);
foreach($spiderSite as $val) {
$str = strtolower($val);
if (strpos($agent, $str) !== false) {
return true;
}
}
} else {
return false;
}
}
if (isCrawler()){
echo "你好蜘蛛精!";
}
else{
echo "你不是蜘蛛精?。?quot;;
}
新聞熱點
疑難解答