关于cur采集 对方设置了来源页
本帖最后由 u013366173 于 2015-03-03 22:35:09 编辑
request url:http://miaoo.sinaapp.com/cai.php
request method:get
status code:200 ok
request headersview source
accept:image/webp,*/*;q=0.8
accept-encoding:gzip,deflate,sdch
accept-language:zh-cn,zh;q=0.8
cache-control:max-age=0
connection:keep-alive
cookie:saeut=27.156.92.178.1425388310638157
host:miaoo.sinaapp.com
if-modified-since:tue, 03 mar 2015 04:57:57 gmt
referer:http://52jifenbao.com/cai/
user-agent:mozilla/5.0 (windows nt 5.1) applewebkit/537.36 (khtml, like gecko) chrome/31.0.1650.63 safari/537.36
response headersview source
connection:keep-alive
content-disposition:inline; filename=download.png
content-encoding:gzip
content-type:image/png
date:tue, 03 mar 2015 14:31:09 gmt
last-modified:tue, 03 mar 2015 04:57:57 gmt
server:nginx/1.4.4
transfer-encoding:chunked
vary:accept-encoding
via:10.67.15.22
x-powered-by:php/5.3.29
要采集的是这个网址http://miaoo.sinaapp.com/cai.php,成功的话显示是一张图片,有点类似验证码图片那种,对方应该设置了判断来源页,不用cookie和post数据,来源页为referer:http://52jifenbao.com/cai/,请问怎么样才能成功采集呢,上面代码是360浏览器复制下来的头信息文件
------解决思路----------------------
常用的功能要写成函数或类保存起来,以备不时之需
而不是临阵擦枪include 'curl/curl_get.php';
$url = 'http://miaoo.sinaapp.com/cai.php';
echo curl_get($url);
curl/curl_get.php
function curl_get($durl, $data=array()) {
$cookiejar = realpath('cookie.txt');
$t = parse_url($durl);
$ch = curl_init();
curl_setopt($ch, curlopt_url,$durl);
curl_setopt($ch, curlopt_timeout,5);
curl_setopt($ch, curlopt_ssl_verifypeer, 0);
curl_setopt($ch, curlopt_useragent, $_server['http_user_agent']);
curl_setopt($ch, curlopt_referer, http://$t[host]/);
curl_setopt($ch, curlopt_cookiefile, $cookiejar);
curl_setopt($ch, curlopt_cookiejar, $cookiejar);
curl_setopt($ch, curlopt_returntransfer,1);
curl_setopt($ch, curlopt_encoding, 1); //gzip 解码
curl_setopt($ch, curlopt_followlocation, true);
if($data) {
curl_setopt($ch, curlopt_post, 1);
curl_setopt($ch, curlopt_postfields, $data);
}
$r = curl_exec($ch);
curl_close($ch);
return $r;
}