跳至
<?php
/**
*
* @authors hg (hg0728@qq.com)
* @date 2015-05-22 17:00:48
* @version 1.0
*/
header("content-type:text/html;charset=utf-8");
function getcurl($url) {
$ch = curl_init();
curl_setopt($ch, curlopt_url, $url);
curl_setopt($ch, curlopt_returntransfer, 1);
curl_setopt($ch, curlopt_ssl_verifypeer, false);
curl_setopt($ch, curlopt_ssl_verifyhost, false);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
function preg_list($str){//从curl获得指定内容
$regex = '/(.*?)/';
$ismatched = preg_match_all($regex, $str, $matches);
for ($i=0; $i < $ismatched; $i++) {
$str = $matches[1][$i] .' '. $matches[2][$i];
echo $matches[1][$i];
file_put_contents('blogs.txt', $str. "\n", file_append);
}
}
for ($i=0; $i < 201; $i++) { //翻页抓取
if($i==0){
$url = 'http://www.cnblogs.com/';
$str = getcurl($url);
}
else {
$url = 'http://www.cnblogs.com/sitehome/p/'.$i;
$str = getcurl($url);
}
preg_list($str);
}