您好,欢迎访问一九零五行业门户网

sphinx scws 全文检索 安装 配置 详解

这次搭建的环境:centos 6.5 x86,php 5.3.3,mysql 5.5.8,三次搭建各不相同
一,下载sphinx,scws相关包
sphinx下载地址:http://sphinxsearch.com/downloads/release/
sphinx php扩展下载地址:http://pecl.php.net/package/sphinx
scws下载地址:http://www.xunsearch.com/scws/download.php
scws词库下载地址:http://www.xunsearch.com/scws/down/scws-dict-chs-utf8.tar.bz2
二,安装sphinx,scws,以及php扩展
1,安装sphinx
# tar zxvf sphinx-2.2.5-release.tar.gz # cd sphinx-2.2.5-release # ./configure --prefix=/usr/local/sphinx2 --with-mysql=/usr/local/mysql # make && make install
2,安装sphinx客户端
查看复制打印?
# cd api/libsphinxclient //sphinx-2.2.5-release目录下 # ./configure --prefix=/usr/local/sphinx2/libsphinxclient # make && make install
,安装sphinx php扩展
# tar zxvf sphinx-1.3.1.tgz
# cd sphinx-1.3.1
# phpize
# ./configure --with-sphinx=/usr/local/sphinx2/libsphinxclient --with-php-config=/usr/bin/php-config
# make && make install
4,安装scws
查看复制打印?
# tar xvjf scws-1.2.2.tar.bz2 # mkdir /usr/local/scws # cd scws-1.2.2 # ./configure --prefix=/usr/local/scws/ # make && make install
5,安装scws php扩展
# cd ./phpext/ # phpize # ./configure --with-php-config=/usr/bin/php-config # make && make install
三,配置sphinx,scws,php等
1,创建测试表和数据
mysql> desc users; +----------+-------------+------+-----+---------+----------------+ | field | type | null | key | default | extra | +----------+-------------+------+-----+---------+----------------+ | user_id | int(11) | no | pri | null | auto_increment | | username | varchar(20) | no | | null | | +----------+-------------+------+-----+---------+----------------+ 2 rows in set (0.00 sec) mysql> select * from users; +------------+------------+ | user_id | username | +------------+------------+ | 1311895262 | 张三 | | 1311895263 | tank张二 | | 1311895264 | tank张一 | | 1311895265 | tank张 | +------------+------------+ 4 rows in set (0.00 sec)

mysql> desc users; +----------+-------------+------+-----+---------+----------------+ | field | type | null | key | default | extra | +----------+-------------+------+-----+---------+----------------+ | user_id | int(11) | no | pri | null | auto_increment | | username | varchar(20) | no | | null | | +----------+-------------+------+-----+---------+----------------+ 2 rows in set (0.00 sec) mysql> select * from users; +------------+------------+ | user_id | username | +------------+------------+ | 1311895262 | 张三 | | 1311895263 | tank张二 | | 1311895264 | tank张一 | | 1311895265 | tank张 | +------------+------------+ 4 rows in set (0.00 sec)

上面二张表,都是真实的mysql表
2,配置sphinx.conf,加上以下内容
source myorder { type = mysql sql_host = localhost sql_user = root sql_pass = sql_db = test sql_query_pre = set names utf8 sql_query_pre = set session query_cache_type=off sql_query = \ select a.id, a.user_id,b.username, unix_timestamp(a.create_time) as create_time, a.product_name, a.summary \ from orders a left join users b on a.user_id = b.user_id sql_attr_uint = user_id sql_field_string = username sql_field_string = product_name sql_attr_timestamp = create_time sql_ranged_throttle = 0 #sql_query_info = select * from orders where id=$id } index myorder { source = myorder path = /usr/local/sphinx2/var/data/myorder docinfo = extern mlock = 0 morphology = none min_word_len = 1 #charset_type = zh_cn.utf-8 html_strip = 1 charset_table = u+ff10..u+ff19->0..9, 0..9, u+ff41..u+ff5a->a..z, u+ff21..u+ff3a->a..z,a..z->a..z, a..z, u+0149, u+017f, u+0138, u+00df, u+00ff, u+00c0..u+00d6->u+00e0..u+00f6,u+00e0..u+00f6, u+00d8..u+00de->u+00f8..u+00fe, u+00f8..u+00fe, u+0100->u+0101, u+0101,u+0102->u+0103, u+0103, u+0104->u+0105, u+0105, u+0106->u+0107, u+0107, u+0108->u+0109,u+0109, u+010a->u+010b, u+010b, u+010c->u+010d, u+010d, u+010e->u+010f, u+010f,u+0110->u+0111, u+0111, u+0112->u+0113, u+0113, u+0114->u+0115, u+0115, u+0116->u+0117,u+0117, u+0118->u+0119, u+0119, u+011a->u+011b, u+011b, u+011c->u+011d, u+011d,u+011e->u+011f, u+011f, u+0130->u+0131, u+0131, u+0132->u+0133, u+0133, u+0134->u+0135,u+0135, u+0136->u+0137, u+0137, u+0139->u+013a, u+013a, u+013b->u+013c, u+013c,u+013d->u+013e, u+013e, u+013f->u+0140, u+0140, u+0141->u+0142, u+0142, u+0143->u+0144,u+0144, u+0145->u+0146, u+0146, u+0147->u+0148, u+0148, u+014a->u+014b, u+014b,u+014c->u+014d, u+014d, u+014e->u+014f, u+014f, u+0150->u+0151, u+0151, u+0152->u+0153,u+0153, u+0154->u+0155, u+0155, u+0156->u+0157, u+0157, u+0158->u+0159, u+0159,u+015a->u+015b, u+015b, u+015c->u+015d, u+015d, u+015e->u+015f, u+015f, u+0160->u+0161,u+0161, u+0162->u+0163, u+0163, u+0164->u+0165, u+0165, u+0166->u+0167, u+0167,u+0168->u+0169, u+0169, u+016a->u+016b, u+016b, u+016c->u+016d, u+016d, u+016e->u+016f,u+016f, u+0170->u+0171, u+0171, u+0172->u+0173, u+0173, u+0174->u+0175, u+0175,u+0176->u+0177, u+0177, u+0178->u+00ff, u+00ff, u+0179->u+017a, u+017a, u+017b->u+017c,u+017c, u+017d->u+017e, u+017e, u+0410..u+042f->u+0430..u+044f, u+0430..u+044f,u+05d0..u+05ea, u+0531..u+0556->u+0561..u+0586, u+0561..u+0587, u+0621..u+063a, u+01b9,u+01bf, u+0640..u+064a, u+0660..u+0669, u+066e, u+066f, u+0671..u+06d3, u+06f0..u+06ff,u+0904..u+0939, u+0958..u+095f, u+0960..u+0963, u+0966..u+096f, u+097b..u+097f,u+0985..u+09b9, u+09ce, u+09dc..u+09e3, u+09e6..u+09ef, u+0a05..u+0a39, u+0a59..u+0a5e,u+0a66..u+0a6f, u+0a85..u+0ab9, u+0ae0..u+0ae3, u+0ae6..u+0aef, u+0b05..u+0b39,u+0b5c..u+0b61, u+0b66..u+0b6f, u+0b71, u+0b85..u+0bb9, u+0be6..u+0bf2, u+0c05..u+0c39,u+0c66..u+0c6f, u+0c85..u+0cb9, u+0cde..u+0ce3, u+0ce6..u+0cef, u+0d05..u+0d39, u+0d60,u+0d61, u+0d66..u+0d6f, u+0d85..u+0dc6, u+1900..u+1938, u+1946..u+194f, u+a800..u+a805,u+a807..u+a822, u+0386->u+03b1, u+03ac->u+03b1, u+0388->u+03b5, u+03ad->u+03b5,u+0389->u+03b7, u+03ae->u+03b7, u+038a->u+03b9, u+0390->u+03b9, u+03aa->u+03b9,u+03af->u+03b9, u+03ca->u+03b9, u+038c->u+03bf, u+03cc->u+03bf, u+038e->u+03c5,u+03ab->u+03c5, u+03b0->u+03c5, u+03cb->u+03c5, u+03cd->u+03c5, u+038f->u+03c9,u+03ce->u+03c9, u+03c2->u+03c3, u+0391..u+03a1->u+03b1..u+03c1,u+03a3..u+03a9->u+03c3..u+03c9, u+03b1..u+03c1, u+03c3..u+03c9, u+0e01..u+0e2e,u+0e30..u+0e3a, u+0e40..u+0e45, u+0e47, u+0e50..u+0e59, u+a000..u+a48f, u+4e00..u+9fbf,u+3400..u+4dbf, u+20000..u+2a6df, u+f900..u+faff, u+2f800..u+2fa1f, u+2e80..u+2eff,u+2f00..u+2fdf, u+3100..u+312f, u+31a0..u+31bf, u+3040..u+309f, u+30a0..u+30ff,u+31f0..u+31ff, u+ac00..u+d7af, u+1100..u+11ff, u+3130..u+318f, u+a000..u+a48f,u+a490..u+a4cf ngram_len = 1 ngram_chars = u+4e00..u+9fbf, u+3400..u+4dbf, u+20000..u+2a6df, u+f900..u+faff,u+2f800..u+2fa1f, u+2e80..u+2eff, u+2f00..u+2fdf, u+3100..u+312f, u+31a0..u+31bf,u+3040..u+309f, u+30a0..u+30ff,u+31f0..u+31ff, u+ac00..u+d7af, u+1100..u+11ff,u+3130..u+318f, u+a000..u+a48f, u+a490..u+a4cf }
注意,新的sphinx,不支持sql_query_info,charset_type设置了,
warning: key 'sql_query_info' was permanently removed from sphinx configuration. refer to documentation for details.
warning: key 'charset_type' was permanently removed from sphinx configuration. refer to documentation for details.
3,安装scws词库
# tar xvjf scws-dict-chs-utf8.tar.bz2 -c /usr/local/scws/etc/ # chown tank:tank /usr/local/scws/etc/dict.utf8.xdb
在这里一定要加权限,也就是说让php-fpm或者php-cgi的运行用户,拥有dict.utf8.xdb的所有权限。如果不这么做的话,php 扩展调用词库会报如下错误:
warning: simplecws::add_dict(): failed to add the dict file
怎么查看php-fpm,php-cgi的运行用户呢?
# ps aux |grep php-fpm root 23487 0.0 0.1 284928 4652 ? ss nov05 0:00 php-fpm: master process (/etc/php-fpm.conf) tank 23488 0.0 1.3 336108 52328 ? s nov05 0:02 php-fpm: pool www //在这里就是tank了 tank 23489 0.0 0.8 310484 34028 ? s nov05 0:02 php-fpm: pool www tank 23490 0.0 0.7 306620 30156 ? s nov05 0:02 php-fpm: pool www tank 23491 0.0 0.8 310096 33748 ? s nov05 0:02 php-fpm: pool www tank 23492 0.0 1.2 331812 47712 ? s nov05 0:02 php-fpm: pool www tank 24669 0.0 1.2 333520 48896 ? s nov05 0:01 php-fpm: pool www tank 29747 0.0 0.7 305000 27340 ? s 03:27 0:00 php-fpm: pool www tank 29761 0.0 1.0 320536 39928 ? s 03:27 0:00 php-fpm: pool www root 30705 0.0 0.0 103260 872 pts/5 s+ 04:11 0:00 grep php-fpm
4,配置php.ini
# vim /etc/php.ini [sphinx] extension = sphinx.so [scws] extension = scws.so scws.default.charset = utf-8 scws.default.fpath = /usr/local/scws/etc
四,启动sphinx,php-fpm
1,启动sphinx
# /usr/local/sphinx2/bin/indexer --config /usr/local/sphinx2/etc/sphinx.conf --all # /usr/local/sphinx2/bin/searchd --config /usr/local/sphinx2/etc/sphinx.conf
2,重启php-fpm
# /etc/init.d/php-fpm restart
前二次,我安装sphinx,必须在mysql中安装sphinx存储插件,而这次没有,看下图
五,测试sphinx全文检索
1,命令行的测试
[root@localhost phpext]# mysql -h 127.0.0.1 -p 9306 welcome to the mysql monitor. commands end with ; or \g. your mysql connection id is 1 server version: 2.2.5-id64-release (r4825) copyright (c) 2000, 2013, oracle and/or its affiliates. all rights reserved. oracle is a registered trademark of oracle corporation and/or its affiliates. other names may be trademarks of their respective owners. type 'help;' or '\h' for help. type '\c' to clear the current input statement. mysql> select * from myorder where match('张'); +------+------------+------------+-------------+----------------+ | id | user_id | username | create_time | product_name | +------+------------+------------+-------------+----------------+ | 9 | 1311895262 | 张三 | 1406823894 | tank is 坦克 | | 10 | 1311895263 | tank张二 | 1406823894 | tank is 坦克 | | 11 | 1311895264 | tank张一 | 1406823894 | tank is 坦克 | | 12 | 1311895265 | tank张 | 1406823894 | tank is 坦克 | +------+------------+------------+-------------+----------------+ 4 rows in set (0.00 sec)
2,利用php 扩展
<?php header("content-type: text/html; charset=utf-8"); $b_time = microtime(true); echo '<p>'.$b_time.'</p>'; $key = "张三"; $index = "myorder"; //========================================分词 $so = scws_new(); $so->set_charset('utf-8'); //默认词库 $so->add_dict(ini_get('scws.default.fpath') . '/dict.utf8.xdb'); //自定义词库 // $so->add_dict('./dd.txt',scws_xdict_txt); //默认规则 $so->set_rule(ini_get('scws.default.fpath') . '/rules.utf8.ini'); //设定分词返回结果时是否去除一些特殊的标点符号 $so->set_ignore(true); //设定分词返回结果时是否复式分割,如“中国人”返回“中国+人+中国人”三个词。 // 按位异或的 1 | 2 | 4 | 8 分别表示: 短词 | 二元 | 主要单字 | 所有单字 //1,2,4,8 分别对应常量 scws_multi_short scws_multi_duality scws_multi_zmain scws_multi_zall $so->set_multi(false); //设定是否将闲散文字自动以二字分词法聚合 $so->set_duality(false); //设定搜索词 $so->send_text($key); $words_array = $so->get_result(); $words = ""; foreach($words_array as $v) { $words = $words.'|('.$v['word'].')'; } //加入全词 #$words = '('.$key.')'.$words; $words = trim($words,'|'); $so->close(); echo '<p>输入:'.$key.'</p>'; echo '<p>分词:'.$words.'</p>'; //========================================搜索 $sc = new sphinxclient(); $sc->setserver('127.0.0.1',9312); #$sc->setmatchmode(sph_match_all); $sc->setmatchmode(sph_match_extended); $sc->setarrayresult(true); $res = $sc->query($words,$index); print_r($res); $e_time = microtime(true); $time = $e_time - $b_time; echo '<p>'.$e_time.'</p>'; echo '<p>'.$time.'</p>'; exit; ?>
结果如下:
<p>1415214126.9106</p><p>输入:张三</p><p>分词:(张三)</p>array ( [error] => [warning] => [status] => 0 [fields] => array ( [0] => username [1] => product_name [2] => summary ) [attrs] => array ( [user_id] => 1 [username] => 7 [create_time] => 2 [product_name] => 7 ) [matches] => array ( [0] => array ( [id] => 9 [weight] => 2500 [attrs] => array ( [user_id] => 1311895262 [username] => 张三 [create_time] => 1406823894 [product_name] => tank is 坦克 ) ) ) [total] => 1 [total_found] => 1 [time] => 0 [words] => array ( [张] => array ( [docs] => 4 [hits] => 4 ) [三] => array ( [docs] => 1 [hits] => 1 ) ) ) <p>1415214126.9516</p><p>0.041085958480835</p>
其它类似信息

推荐信息