您好,欢迎访问一九零五行业门户网

PHP获取可以用GBK编码的汉字拼音首字母

跳至
'gbk','out'=>'utf-8'); //例外处理//当没有拼音首字母other===false的情况下,返回原字串否则设置为该值 protected $other = '!';//other=false; public function __construct($flag=true){ $this->flag = $flag; //加载资源 $this->source['gk221']=file_get_contents(data_path.'word/gk2-2-1.txt'); $this->source['gk31']=file_get_contents(data_path.'word/gk3-1.txt'); $this->source['gk41']=file_get_contents(data_path.'word/gk4-1.txt'); $this->pos=json_decode(file_get_contents(data_path.'word/pos.txt'),true); } /** * 获得拼音首字母 * @param string/arr $data 数据,可以是字符串和数组 * @param string $in 数据的编码 * @param string $out 输出的编码 * @return arr 返回数组 */ public function getinitial($data,$in='gbk',$out='utf-8'){ if(is_string($data)){ return self::getinitialbystr($data,$in,$out); }elseif(is_array($data)){ return self::getinitialbyarr($data,$in,$out); } } /** * 获得拼音首字母 * @param string $data 字符串数据 * @param string $in 数据的编码 * @param string $out 输出的编码 * @return arr 返回数组 */ public function getinitialbystr($str,$in='gbk',$out='utf-8'){ $this->charset['in'] = strtolower($in); $this->charset['out'] = strtolower($out); if($this->flag!=true){ $this->temp['fws']= array(); } switch ($this->charset['in']) { case 'gbk': return self::_getinitialingbk($str); break; case 'utf-8': return self::_getinitialinutf8($str); break; default: # code... break; } //历史数据 if($this->flag!=true){ unset($this->temp['fws']); } } /** * 获得拼音首字母 * @param array $data 数组数据 * @param string $in 数据的编码 * @param string $out 输出的编码 * @return arr 返回数组 */ public function getinitialbyarr($arr,$in='gbk',$out='utf-8'){ $this->charset['in'] = strtolower($in); $this->charset['out'] = strtolower($out); if($this->flag!=true){ $this->temp['fws']= array(); } switch ($this->charset['in']) { case 'gbk': return self::_getinitialingbkarr($arr); break; case 'utf-8': return self::_getinitialinutf8arr($arr); break; default: # code... break; } //历史数据 if($this->flag!=true){ unset($this->temp['fws']); } } /** * 处理gbk编码字符串的首字母 * @param string $str 字符串 * @return array 数组 */ protected function _getinitialingbk($str){ //存放字符串拼音 $w = array(); $i = 0; $str_length = strlen($str); //字符串的字节数 while ($i= 0x81){//gbk区域 $nstr = substr($str, $i, 2); $i = $i + 2; }else{ $nstr = substr($str, $i, 1); $i = $i + 1; } $this->word = iconv('gbk','utf-8',$nstr); if(isset($this->temp['fws'][$nstr])){ $w[] = $this->temp['fws'][$nstr]; }else{ $w[] = self::_pregetinitial($nstr); } } return $w; } /** * 处理gbk编码数组的首字母 * @param array $arr 字符串单字数组 * @return array 数组 */ protected function _getinitialingbkarr($arr){ //存放字符串拼音 $w = array(); foreach ($arr as $key => $word) { $this->word = iconv('gbk','utf-8',$word); if(isset($this->temp['fws'][$word])){ $w[] = $this->temp['fws'][$word]; }else{ $w[] = self::_pregetinitial($word); } } return $w; } /** * 处理utf-8编码字符串的首字母 * @param string $str 字符串 * @return array 数组 */ protected function _getinitialinutf8($str){ //存放字符串拼音 $w = array(); $nstr = ''; $i = 0; $str_length = strlen($str); //字符串的字节数 while ($i= 252){//如果ascii位高与252 $nstr = substr($str, $i, 6); //根据utf-8编码规范,将6个连续的字符计为单个字符 $i = $i + 6; //实际byte计为6 }elseif ( $ascnum >= 248 ){//如果ascii位高与248 $nstr = substr($str, $i, 5); //根据utf-8编码规范,将5个连续的字符计为单个字符 $i = $i + 5; //实际byte计为5 }elseif ( $ascnum >= 240 ){//如果ascii位高与240 $nstr = substr($str, $i, 4); //根据utf-8编码规范,将4个连续的字符计为单个字符 $i = $i + 4; //实际byte计为4 }elseif ( $ascnum >= 224 ){//如果ascii位高与224 $nstr = substr($str, $i, 3); //根据utf-8编码规范,将3个连续的字符计为单个字符 $i = $i + 3 ; //实际byte计为3 }elseif ( $ascnum >= 192 ){//如果ascii位高与192 $nstr = substr($str, $i, 2); //根据utf-8编码规范,将2个连续的字符计为单个字符 $i = $i + 2; //实际byte计为2 }else{//其他情况下,包括大写字母,小写字母和半角标点符号,%,&,@,m,w等 $nstr = substr($str, $i, 1); $i = $i + 1; //实际的byte数计1个 } $this->word = $nstr; //编码转换至gbk $nstr = iconv('utf-8','gbk',$nstr); if(isset($this->temp['fws'][$nstr])){ $w[] = $this->temp['fws'][$nstr]; }else{ $w[] = self::_pregetinitial($nstr); } } return $w; } /** * 处理utf-8编码数组的首字母 * @param array $arr 字符串单字数组 * @return array 数组 */ protected function _getinitialinutf8arr($arr){ //存放字符串拼音 $w = array(); foreach ($arr as $key => $word) { $this->word = $word; $nword = iconv('utf-8','gbk',$word); if(isset($this->temp['fws'][$nword])){ $w[] = $this->temp['fws'][$nword]; }else{ $w[] = self::_pregetinitial($nword); } } return $w; } /** * 对单字预处理 * @param string $word 单字,gbk编码 * @return string 拼音首字母,编码视$this->charset['out'] */ protected function _pregetinitial($word){ $fw = self::_getinitial($word);//返回的utf-8编码数据首字母 if($fw!==false){ $nstr=$this->temp['fws'][$word]=iconv('utf-8',$this->charset['out'],$fw); }else{ $nstr=$this->temp['fws'][$word]=iconv('gbk',$this->charset['out'],$word); } return $nstr; } /** * 获得汉字拼音首字母的核心函数 * @param string $word 单字,gbk编码 * @return string 首字母,utf-8编码 */ protected function _getinitial($word){ $high = ord($word{0}); $low = ord($word{1}); //对20902个汉字支持拼音首字母提取 $hexc = $high * 256 + $low; //gbk/2:gb2312汉字表(拼音排序),低位a0开始 if($hexc >= 0xb0a1 and $hexc =0xa0){ //共3755个字 return self::_getingbk21($hexc); } //gbk/2:gb2312汉字表,低位a0开始 if($hexc >= 0xd8a1 and $hexc =0xa0){ //共3008个字 return self::_getingbk('gk221'); } //gbk/3:扩充汉字表(ucs 代码大小排列) if($hexc >= 0x8140 and $hexc = 0xaa40 and $hexc other; } /** * 获取首字母 * gbk/2:gb2312汉字表(拼音序) * 共3755个字 * @param int $hexc 单字gbk编码值 * @return string 首字母,utf-8编码 */ protected function _getingbk21($hexc){ //无i,u,v开始的拼音 $char = array("",//填充位置 "a","b","c","d","e","f", "g","h","j","k","l","m", "n","o","p","q","r","s", "t","w","x","y","z" ); $hcs = array( 0xb0a1,0xb0c5,0xb2c1,0xb4ee,0xb6ea,0xb7a2, 0xb8c1,0xb9fe,0xbbf7,0xbfa6,0xc0ac,0xc2e8, 0xc4c3,0xc5b6,0xc5be,0xc6da,0xc8bb,0xc8f6, 0xcbfa,0xcdda,0xcef4,0xd1b9,0xd4d1 ); if($key=array_search($hexc,$hcs)){ return $char[$key]; }else{ $hcs[] = $hexc; sort($hcs); return $char[array_search($hexc,$hcs)]; } } /** * 获取首字母 * @param string $type 单字所属gbk区域类型 * @return string 首字母,utf-8编码 */ protected function _getingbk($type){ //无i,u,v开始的拼音 $char = array("",//填充位置 "a","b","c","d","e","f", "g","h","j","k","l","m", "n","o","p","q","r","s", "t","w","x","y","z" ); $str = str_replace("\r\n",'',$this->source[$type]); $p = stripos($str,$this->word)+3;//居右//stripos($str,$word),居左 $str = ''; if($key=array_search($p,$this->pos[$type])){ return $char[$key]; }else{ $pos = $this->pos[$type]; $pos[] = $p; sort($pos); return $char[array_search($p,$pos)]; } } } ?>
其它类似信息

推荐信息