您好,欢迎访问一九零五行业门户网

DedeHttpDown PHP远程下载网页的类,增强版 2013

(1)新增远程主机判断,节约服务器资源。避免远程主机不存在的时候仍旧fsockopen,导致的死机占用服务器cpu (2)新增响应401的判断和支持。 (3)增加对json返回文本的支持。 (4)日志的生成,如果设置了debug_level且为true则会对每次远程下载做日志。
(1)新增远程主机判断,节约服务器资源。避免远程主机不存在的时候仍旧fsockopen,导致的死机占用服务器cpu
(2)新增响应401的判断和支持。
(3)增加对json返回文本的支持。
(4)日志的生成,如果设置了debug_level且为true则会对每次远程下载做日志。
(5)获取字节的限定datalimit,节约服务器资源。
(6)修改日期:2013-1-17
如果还有更好的方法建议,可以随时联系我本人。admin@zbphp.com
m_url = $url; if(is_array($urls)) { $this->m_host = $urls[host]; if(!empty($urls[scheme])) { $this->m_scheme = $urls[scheme]; } if(!empty($urls[user])) { $this->m_user = $urls[user]; } if(!empty($urls[pass])) { $this->m_pass = $urls[pass]; } if(!empty($urls[port])) { $this->m_port = $urls[port]; } if(!empty($urls[path])) { $this->m_path = $urls[path]; } $this->m_urlpath = $this->m_path; if(!empty($urls[query])) { $this->m_query = $urls[query]; $this->m_urlpath .= ?.$this->m_query; } $this->homeurl = $urls[host]; $this->baseurlpath = $this->homeurl.$urls[path]; $this->baseurlpath = preg_replace(/\/([^\/]*)\.(.*)$/,/,$this->baseurlpath); $this->baseurlpath = preg_replace(/\/$/,,$this->baseurlpath); } } /** * 重设各参数 * * @access public * @return void */ function resetany() { $this->m_url = ; $this->m_urlpath = ; $this->m_scheme = http; $this->m_host = ; $this->m_port = 80; $this->m_user = ; $this->m_pass = ; $this->m_path = /; $this->m_query = ; $this->m_error = ; } /** * 打开指定网址 * * @access public * @param string $url 地址 * @param string $requesttype 请求类型 * @return string */ function openurl($url,$requesttype=get) { $this->resetany(); $this->jumpcount = 0; $this->m_httphead = array() ; $this->m_html = ''; $this->datalimit = 0; $this->retry = 0; $this->close(); //初始化系统 $this->privateinit($url); $this->privatestartsession($requesttype); } /** * 转到303重定向网址 * * @access public * @param string $url 地址 * @return string */ function jumpopenurl($url) { $this->resetany(); $this->jumpcount++; $this->m_httphead = array() ; $this->m_html = ; $this->close(); //初始化系统 $this->privateinit($url); $this->privatestartsession('get'); } /** * 获得某操作错误的原因 * * @access public * @return void */ function printerror() { echo 错误信息:.$this->m_error; echo
具体返回头:
; foreach($this->m_httphead as $k=>$v){ echo $k => $v
\r\n; } } /** * 判别用get方法发送的头的应答结果是否正确 * * @access public * @return bool */ function isgetok() { if( preg_match(/^2/,$this->gethead(http-state)) ) { return true; } else { $this->m_error .= $this->gethead(http-state). - .$this->gethead(http-describe).
; return false; } } /** * 看看返回的网页是否是text类型 * * @access public * @return bool */ function istext() { if( preg_match(/^(2|401)/,$this->gethead(http-state)) && preg_match(/text|xml|json/i,$this->gethead(content-type)) ) { return true; } else { $this->m_error .= 内容为非文本类型或网址重定向
; return false; } } /** * 判断返回的网页是否是特定的类型 * * @access public * @param string $ctype 内容类型 * @return string */ function iscontenttype($ctype) { if(preg_match(/^2/,$this->gethead(http-state)) && $this->gethead(content-type)==strtolower($ctype)) { return true; } else { $this->m_error .= 类型不对 .$this->gethead(content-type).
; return false; } } /** * 用http协议下载文件 * * @access public * @param string $savefilename 保存文件名称 * @return string */ function savetobin($savefilename) { if(!$this->isgetok()) { return false; } if(@feof($this->m_fp)) { $this->m_error = 连接已经关闭!; return false; } $fp = fopen($savefilename,w); while(!feof($this->m_fp)) { fwrite($fp, fread($this->m_fp, 1024)); } fclose($this->m_fp); fclose($fp); return true; } /** * 保存网页内容为text文件 * * @access public * @param string $savefilename 保存文件名称 * @return string */ function savetotext($savefilename) { if($this->istext()) { $this->savebinfile($savefilename); } else { return ; } } /** * 用http协议获得一个网页的内容 * * @access public * @return string */ function gethtml() { $tm1 = microtime(true); if(!$this->istext()) { return ''; } if($this->m_html!='') { return $this->m_html; } if(!$this->m_fp||@feof($this->m_fp)) { return ''; } while(!feof($this->m_fp)) { $this->m_html .= fgets($this->m_fp,256); if($this->datalimit > 0 && strlen($this->m_html) > $this->datalimit) break; } @fclose($this->m_fp); $tm2 = microtime(true); $log = \ntm2-tm1 = .($tm2-$tm1); $log.= \n.$this->m_html; $this->log_write('gethtml',$log); return $this->m_html; } /** * 开始http会话 * * @access public * @param string $requesttype 请求类型 * @return string */ function privatestartsession($requesttype=get) { if(!$this->privateopenhost()) { $this->m_error .= 打开远程主机出错!; return false; } $this->retry++; if($this->gethead(http-edition)==http/1.1) { $httpv = http/1.1; } else { $httpv = http/1.0; } $ps = explode('?',$this->m_urlpath); $headstring = ''; //发送固定的起始请求头get、host信息 if($requesttype==get) { $headstring .= get .$this->m_urlpath. $httpv\r\n; } else { $headstring .= post .$ps[0]. $httpv\r\n; } if($this->m_user || $this->m_pass) { $headstring .= authorization: basic .base64_encode($this->m_user.:.$this->m_pass).\r\n; } $this->m_puthead[host] = $this->m_host; //发送用户自定义的请求头 if(!isset($this->m_puthead[user-agent])) { $this->m_puthead[user-agent] = mozilla/4.0 (compatible; msie 6.0; windows nt 5.2); } if(!isset($this->m_puthead[refer])) { $this->m_puthead[refer] = http://.$this->m_puthead[host]; } /* add on 2012-12-19 */ $headstring.=connection:keep-alive\r\n; $headstring.=accept-language:zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\n; $headstring.=accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n; foreach($this->m_puthead as $k=>$v) { $k = trim($k); $v = trim($v); if($k!=&&$v!=) { $headstring .= $k: $v\r\n; } } fputs($this->m_fp, $headstring); if($requesttype==post) { $postdata = ; if(count($ps)>1) { for($i=1;$im_fp,content-type: application/x-www-form-urlencoded\r\n); fputs($this->m_fp,content-length: $plen\r\n); } //发送固定的结束请求头 //http1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束 if($httpv==http/1.1) { fputs($this->m_fp,connection: close\r\n\r\n); } else { fputs($this->m_fp,\r\n); } if($requesttype==post) { fputs($this->m_fp,$postdata); } //获取应答头状态信息 $httpstas = explode( ,fgets($this->m_fp,256)); $this->m_httphead[http-edition] = trim($httpstas[0]); $this->m_httphead[http-state] = trim($httpstas[1]); $this->m_httphead[http-describe] = ; for($i=2;$im_httphead[http-describe] .= .trim($httpstas[$i]); } //获取详细应答头 while(!feof($this->m_fp)) { $line = trim(fgets($this->m_fp,256)); if($line == ) { break; } $hkey = ; $hvalue = ; $v = 0; for($i=0;$im_httphead[strtolower($hkey)] = trim($hvalue); } } //如果连接被不正常关闭,重试 if(feof($this->m_fp)) { if($this->retry > 10) { return false; } $this->privatestartsession($requesttype); } //判断是否是3xx开头的应答 if(preg_match(/^3/,$this->m_httphead[http-state])) { if($this->jumpcount > 3) { return; } if(isset($this->m_httphead[location])) { $newurl = $this->m_httphead[location]; if(preg_match(/^http/i,$newurl)) { $this->jumpopenurl($newurl); } else { $newurl = $this->fillurl($newurl); $this->jumpopenurl($newurl); } } else { $this->m_error = 无法识别的答复!; } } } /** * 获得一个http头的值 * * @access public * @param string $headname 头文件名称 * @return string */ function gethead($headname) { $headname = strtolower($headname); return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : ''; } /** * 设置http头的值 * * @access public * @param string $skey 键 * @param string $svalue 值 * @return string */ function sethead($skey,$svalue) { $this->m_puthead[$skey] = $svalue; } /** * 打开连接 * * @access public * @return bool */ function privateopenhost() { if($this->m_host==) { return false; } if(function_exists('checkdnsrr') && !checkdnsrr($this->m_host,'a') && !checkdnsrr($this->m_host,'cname')) { $this->m_error = '远程主机'.$this->m_host.'不存在!checkdnsrr !'; return false; } $errno = ; $errstr = ; $this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr,10); if(!$this->m_fp) { $this->m_error = $errstr; return false; } else { return true; } } /** * 关闭连接 * * @access public * @return void */ function close() { @fclose($this->m_fp); } /** * 补全相对网址 * * @access public * @param string $surl 需要不全的地址 * @return string */ function fillurl($surl) { $i = 0; $dstr = ; $pstr = ; $okurl = ; $pathstep = 0; $surl = trim($surl); if($surl==) { return ; } $pos = strpos($surl,#); if($pos>0) { $surl = substr($surl,0,$pos); } if($surl[0]==/) { $okurl = http://.$this->homeurl.$surl; } else if($surl[0]==.) { if(strlen($surl)baseurlpath./.substr($surl,2,strlen($surl)-2); } else { $urls = explode(/,$surl); foreach($urls as $u) { if($u==..) { $pathstep++; } else if($ibaseurlpath); if(count($urls) baseurlpath./.$surl; } else if(strtolower(substr($surl,0,7))==http://) { $okurl = $surl; } else { $okurl = http://.$this->baseurlpath./.$surl; } } $okurl = preg_replace(/^(http:\/\/)/i,,$okurl); $okurl = preg_replace(/\/{1,}/, /, $okurl); return http://.$okurl; } function log_write($funcname,$message) { if(!(defined('debug_level') && debug_level == true)) return ; $log = \n.date(y-m-d h:i:s ).get_current_user().[.getmypid().]; $log.= \n.$this->m_url.\n.str_repeat('------', 10).\n.$message; $path = $funcname.' '.date('y m d h i s ').preg_replace('/([\w]+|\s+)/i', ' ', $this->m_url); if(strlen($path) > 250) $path = substr($path,0,250); $dir = dededata.'/httpdownlog'; if(!is_dir($dir) && !mkdir($dir)) exit('can not make dir '.$dir); $path = $dir.'/'.$path; if(!file_exists($path)) touch($path); $fp = fopen($path,'a+'); flock($fp, lock_ex); fputs($fp, path:.$path.\nreal:.realpath($path).\nmssg:.$log); fclose($fp); return true; }}//end class
使用方法:
$dhd = new dedehttpdown(); $dhd->openurl($rs['wurl']); $dhd->datalimit = 5120; $dhd->m_puthead[refer] = $rs['wurl']; $filecnt = trim($dhd->gethtml());
————————————————————————
存在未解决的问题:
(1)假如域名是绑定了a记录和cname,有ip指向,但是ip地址是不存在的或者虚假的,程序仍旧会继续获取。
(2)php的fsockopen 里面的time out貌似根本就没有起作用。设置了10s超时,但是根本就是执行到程序time out 为止。
如果好的方法建议,可以随时联系我本人。admin@zbphp.com
其它类似信息

推荐信息