您好,欢迎访问一九零五行业门户网

PHP制作百度词典查词采集器,php百度词典采集器_PHP教程

php制作百度词典查词采集器,php百度词典采集器
百度dict 采集样本
写的采集百度dict词典翻译后的所有结果数据,当然附带了13.5w单词库和采集简单的案例,这里我把写出的主要类dict.class.php放出来,项目地址http://github.com/widuu/baidu_dict,有需要的直接fork就可以了~么么哒,这东西用的人很少,所以有用的兄弟拿走了哈~
音标 * pro => 发音 * example=> 例句 * explain=> 简明释义 * synonym=> 同反义词 * phrase => 短语数组 * ) * */ public function content($word){ $this -> word = $word; $symbol = $this -> pronounced(); $pro = $this->getsay(); $example = $this -> getexample(); $explain = $this -> getexplain(); $synonym = $this -> getsynonym(); $phrase = $this -> getphrase(); $result = array( symbol => $symbol, //音标 pro => $pro, //发音 example=> $example, //例句 explain=> $explain, //简明释义 synonym=> $synonym, //同反义词 phrase => $phrase //短语数组 ); return $result; } /** * 远程获取百度翻译内容 * get function curl * retun string * */ private function getcontent(){ $useragent = mozilla/5.0 (windows nt 6.1; wow64; rv:23.0) gecko/20100101 firefox/23.0; $ch = curl_init(); $url = http://dict.baidu.com/s?wd=.$this->word; curl_setopt($ch, curlopt_url, $url); curl_setopt($ch, curlopt_useragent,$useragent); curl_setopt($ch, curlopt_returntransfer, true); curl_setopt($ch, curlopt_followlocation, 1); curl_setopt($ch, curlopt_httpget, 1); curl_setopt($ch, curlopt_autoreferer,1); curl_setopt($ch, curlopt_header, 0); curl_setopt($ch, curlopt_timeout, 30); $result = curl_exec($ch); if (curl_errno($curl)) { echo 'errno'.curl_error($curl); } curl_close($ch); return $result; } /** * 获取百度翻译发音 * retun array(英,美) * */ private function pronounced(){ $data = $this -> getcontent(); preg_match_all(/\en\-us\\>(.*)\/ui,$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); } /** * 获取百度翻译发音 * return array(英,美) * */ private function getsay(){ $data = $this -> getcontent(); preg_match_all(/url=\(.*)\/ui,$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); } /** * 获取百度翻译例句 * return array() 多维数组 例句 * */ private function getexample(){ $str = ; $data = $this -> getcontent(); preg_match_all(/var example_data = (.*)\]\;/us,$data,$example); $data1 = [[[.ltrim($example[1][0],[); $data2 = explode([[[,$data1); $num = count(array_filter($data2)); foreach($data2 as $key => $value){ $data3 = explode([[,[[.$value); foreach ($data3 as $k => $v) { preg_match_all(/\[\(.*)\,/us,[.$v, $match); if(!empty($match[1])){ $str .= implode($match[1], ).@; } } } $data4 = trim($str,@); $data5 = explode(@, $data4); $result = array_chunk($data5, 2); return $result; } /** * 获取简明释义 * return array (x => 词性,b => 附属) * **/ private function getexplain(){ $data = $this -> getcontent(); preg_match_all(/id\=\en\-simple\-means\\>(.*)\/us,$data,$explain); $r_data = $explain[1][0]; preg_match_all(/\\(?p.*)\\(?p.*)\\/us, $r_data, $a_data); preg_match_all(/\(?p[^\>]+)\:\(?p.*)\\/us, $r_data, $b_data); $result = array(); foreach ($a_data[adj] as $key => $value) { $result[$value] = $a_data[name][$key]; } $word_b = array(); foreach ($b_data[tag] as $key => $value) { $word_b[$value] = strip_tags($b_data[word][$key]); } $result_data = array(x => $result,b => $word_b); return $result_data; } /** * 获取同义词 * return array(0 => 同义词, 1 => 反义词) 一般为多维数组 * */ private function getsynonym(){ $data = $this -> getcontent(); preg_match_all(/id=\en\-syn\-ant\\>(.*)/us,$data,$synonym); $content = $synonym[1][0]; $data1 = explode(, $content); $result = array(); $data2 = array(); foreach ($data1 as $key => $value) { preg_match_all(/\(?p.*)\ \;\\\\(?.*)\/us, $value, $r_data); $data2[$key][adj] = $r_data[adj]; $data2[$key][content] = $r_data[content]; } foreach ($data2 as $key => $value) { foreach ($value[content] as $k => $v) { if(!empty($v)){ preg_match_all(/\\(?p.*)\(?p.*)\/us, $v, $v_data); foreach ($v_data['title'] as $m => $d) { $data = strip_tags(preg_replace(, , $v_data[value][$m])); $result[$key][$value[adj][$k]][$d] = $data; } } } } return $result; } /** * 获取短语词组 * return array (key => value) 一维或者多维数组 * */ private function getphrase(){ $num = self::$num; $data = $this -> getcontent(); preg_match_all(/id=\en\-phrase\\>(.*)\/us,$data,$phrase); $data = explode(,$phrase[1][0]); $data1 = array_slice($data,0,$num); $result = array(); foreach ($data1 as $key => $value) { $data2 = explode(
, $value); $n = count($data2); if($n $value) { foreach ($value as $k => $v) { $value[$k] = strip_tags($v); } $array = array($result[$key_value],$value); if (array_key_exists($key_value, $result)){ $result[$key_value] = $array; } } } } return $result; } /** * 将数组转换为字符串 * * @param array $data 数组 * @param bool $isformdata 如果为0,则不使用new_stripslashes处理,可选参数,默认为1 * @return string 返回字符串,如果,data为空,则返回空 */ private function array2string($data, $isformdata = 1) { if($data == '') return ''; if($isformdata) $data = $this->new_stripslashes($data); return addslashes(var_export($data, true)); } /** * 返回经stripslashes处理过的字符串或数组 * @param $string 需要处理的字符串或数组 * @return mixed */ private function new_stripslashes($string) { if(!is_array($string)) return stripslashes($string); foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val); return $string; }}// $word = new dict(express);// $word ->content();
以上就是本文的全部内容了,非常实用的功能,希望小伙伴们能够喜欢。
http://www.bkjia.com/phpjc/949449.htmlwww.bkjia.comtruehttp://www.bkjia.com/phpjc/949449.htmltecharticlephp制作百度词典查词采集器,php百度词典采集器 百度dict 采集样本 写的采集百度dict词典翻译后的所有结果数据,当然附带了13.5w单词库和采...
其它类似信息

推荐信息