基于snoopy的php近似完美获取网站编码    
 用于php爬虫,获取编码准确率99.9%, 还有部分不能获取,求大牛完善    
 代码来源:   站云网    www.siteyun.com   
 先要到网上下载snoopy.class.php    
 调用方法:getcharset();             ?>
[code]url=$url;	}		//打开网站	private function open($url)	{		if($this->request!==null)		{			if($this->request->status==200)			{				return true;			}			else 			{				return false;			}		}		else 		{			$this->request=new snoopy();			$this->request->fetch($url);			if($this->request->status==200)			{				$this->request->results=strtolower($this->request->results);				$charset=$this->getcharset();				if($charset!=utf-8)				{					if($charset==windows-1252)					{						$this->request->results=$this->uni_decode($this->request->results);					}					else 					{						$this->request->results=mb_convert_encoding($this->request->results,utf-8,$charset);					}									}				return true;			}			else 			{				return false;			}		}	}			//获取网站title,keywords,description	public function getwebinfo()	{		$info=array(			'title'=>'',			'keywords'=>'',			'desc'=>'',			'ip'=>''		);		if(!$this->open($this->url)){return $info;exit;}			//	print_r($this->request->results);exit;		preg_match('/([^>]*)/si', $this->request->results, $titlematch );		if (isset($titlematch) && is_array($titlematch) && count($titlematch) > 0)		{			$info['title'] = strip_tags($titlematch[1]);		}				preg_match_all('/]*)?[\s]*' . 'content=?([^>]*)?[\s]*[\/]?[\s]*>/si', $this->request->results, $match);		$ft=0;		foreach($match[1] as $mt)		{			if($mt==keywords || $mt==description)			{				$ft=1;			}		}		if($ft==0)		{			preg_match_all('/]*)?[\s]*name=?' . '([^>]*)?[\s]*[\/]?[\s]*>/si', $this->request->results, $match);			if (isset($match) && is_array($match) && count($match) == 3)			{				$originals = $match[0];				$names = $match[2];				$values = $match[1];				if (count($originals) == count($names) && count($names) == count($values))				{					$metatags = array();					for ($i=0, $limiti=count($names); $i  htmlentities($originals[$i]),							   'value' => $values[$i]							   );					}				}			}		}		else 		{			if (isset($match) && is_array($match) && count($match) == 3)			{				$originals = $match[0];				$names = $match[1];				$values = $match[2];				if (count($originals) == count($names) && count($names) == count($values))				{					$metatags = array();					for ($i=0, $limiti=count($names); $i  htmlentities($originals[$i]),						   'value' => $values[$i]						   );					}				}			}		}		$result = array (			'metatags' => $metatags		);		if(isset($result['metatags']['keywords']['value']))		{			$info['keywords']=$result['metatags']['keywords']['value'];		}		else		{			$info['keywords']=;		}		if(isset($result['metatags']['description']['value']))		{			$info['desc']=$result['metatags']['description']['value'];		}		else		{			$info['desc']=;		}				$domain=preg_replace('/http\:\/\//si', '', $this->url);		$ip=@gethostbyname($domain);		$ip_arr=explode(., $ip);			if(count($ip_arr)==4)		{			$info['ip']=$ip;		}		return $info;	}		public function t($string,$o)	{	    for($i=0;$iopen($this->url)){return false;exit;}		//首先从html获取编码		preg_match(/request->results,$temp) ? strtolower($temp[1]):;		if($temp[1]!=)		{			if(in_array($temp[1], $this->charset_arr))			{				if($temp[1]==gb2312)				{					$tmp_charset=$this->t($this->request->results,$temp[1]);					if($tmp_charset==$temp[1])					{						return $temp[1]; 					}				}				else 				{					return $temp[1];				}			}		}					if(!empty($this->request->headers))		{			//从header中获取编码			$hstr=strtolower(implode(
   
 
   