php采集代码
采集http://www.01job.cn/asp/itjob.asp该页面中职位列表头三条的记录
看了很多的php采集教程了 还是不会写 所以这里请教了....
以下是我的代码,运行时了错:
include(function.php);
$url=http://www.01job.cn/asp/itjob.asp;
$ft[title][begin]=
;
$ft[title][end]=
;
$rs=pick($url,$ft,$th);
echo $rs[title];
echo
内容:.$rs[content];
?>
function.php
$value)
{
$rs[$key]=fetch_match_contents($value[begin],$value[end],$c);
if(is_array($th[$key]))
{ foreach($th[$key] as $old => $new)
{
$rs[$key]=str_replace($old,$new,$rs[$key]);
}
}
}
return $rs;
}
?>
------解决方案--------------------
//获取网页内容
function fetch_urlpage_contents($url){
for($i=0;$i {
$c=@file_get_contents($url);
if(trim($c) != )break;
}
// print($c);
return $c;
}
//获取匹配内容
function fetch_match_contents($begin,$end,$c)
{
$beginpos = strpos($c,$begin);
$endpos = strpos($c,$end);
if($beginpos > 0 && $endpos > 0 && $endpos > $beginpos)
{
$result = substr($c,$beginpos+strlen($begin),$endpos - $beginpos-strlen($begin));
return $result;
}
else
{
return ;
}
}
//采集网页
function pick($url,$ft,$th)
{
$c=fetch_urlpage_contents($url);
foreach($ft as $key => $value)
{
$rs[$key]=fetch_match_contents($value[begin],$value[end],$c);
if(is_array($th[$key]))
{ foreach($th[$key] as $old => $new)
{
$rs[$key]=str_replace($old,$new,$rs[$key]);
}
}
}
return $rs;
}
?>
caiji
$url=http://www.01job.cn/asp/itjob.asp;
$ft[title][begin]=;
$ft[title][end]=;
$rs=pick($url,$ft,$th);
print_r($rs);
?>
------瑙e