您好,欢迎访问一九零五行业门户网

敏感词过滤

适用于规模较大的环境 无 /** * 禁词过滤 * 执行效率:每篇用时0.05秒 * @author liuxu * */class logic_blackword{const app_forum= 1;const app_blog= 2;const app_vote= 3;/** * 过滤得到禁词 * @param unknown $txt * @return ambigous multitype:, unkno
适用于规模较大的环境 /** * 禁词过滤 * 执行效率:每篇用时0.05秒 * @author liuxu * */class logic_blackword{ const app_forum = 1; const app_blog = 2; const app_vote = 3; /** * 过滤得到禁词 * @param unknown $txt * @return ambigous */ public function gethitlist($txt) { $hitlist = array(); //对禁词分批过滤 $max = $this->getmax(); if($max) { $size = 1000; $last = ceil($max/$size); for($page=1;$pagegethitlistbypage($txt,$page,$size); if($result) $hitlist = array_merge($hitlist,$result); } } $hitlist2 = array(); foreach($hitlist as $hit=>$type) { $hitlist2[$type][] = $hit; } return $hitlist2; } private function getmax() { $redis = rds::factory(); $memkey = 'blackword_max'; $max = $redis->get($memkey); if($max===false) { $max = 0; $blackword = new model_blackword_blackword(); $para['field'] = max(id) as max; $result = $blackword->search($para); if(isset($result[0]['max'])) $max = $result[0]['max']; $redis->setex($memkey,300,$max); } return $max; } /** * 分批过滤得到禁词 * @param unknown $txt * @param number $page * @param number $size * @return multitype:ambigous */ private function gethitlistbypage($txt,$page=1,$size=1000) { $hitlist = array(); //分批得到禁词树 $wordtree = $this->getwordtreebypage($page,$size); $txt = strip_tags($txt); $txt = preg_replace('/[^a-za-z0-9\x{4e00}-\x{9fa5}]/iu','',$txt); $len = mb_strlen($txt,'utf-8'); for($i=0;$igethitlistbytree(mb_substr($txt,$i,50,'utf-8'),$wordtree); if($result) { foreach($result as $hit=>$type) { $hitlist[$hit] = $type; } } } } return $hitlist; } /** * 是否禁词 * @param str $txt * @param arr $wordtree * @return multitype:unknown */ private function gethitlistbytree($txt,&$wordtree) { $len = mb_strlen($txt,'utf-8'); $point = & $wordtree; $hit = ''; $hitlist = array(); for($i=0;$iget($memkey); if($wordtree===false) { $wordtree = array(); $blackword = new model_blackword_blackword(); $start = ($page-1)*$size; $end = $start + $size; $para['where'] = status=1 and id>.$start. and idsearch($para); if($result) { foreach($result as $value) { if($value['word']) { $value['word'] = preg_split('/(?setex($memkey,300,$wordtree); } return $wordtree; }}
其它类似信息

推荐信息