这次搭建的环境:centos 6.5 x86,php 5.3.3,mysql 5.5.8,三次搭建各不相同
一,下载sphinx,scws相关包
sphinx下载地址:http://sphinxsearch.com/downloads/release/
sphinx php扩展下载地址:http://pecl.php.net/package/sphinx
scws下载地址:http://www.xunsearch.com/scws/download.php
scws词库下载地址:http://www.xunsearch.com/scws/down/scws-dict-chs-utf8.tar.bz2
二,安装sphinx,scws,以及php扩展
1,安装sphinx
# tar zxvf sphinx-2.2.5-release.tar.gz
# cd sphinx-2.2.5-release
# ./configure --prefix=/usr/local/sphinx2 --with-mysql=/usr/local/mysql
# make && make install
2,安装sphinx客户端
查看复制打印?
# cd api/libsphinxclient //sphinx-2.2.5-release目录下
# ./configure --prefix=/usr/local/sphinx2/libsphinxclient
# make && make install
,安装sphinx php扩展
# tar zxvf sphinx-1.3.1.tgz
# cd sphinx-1.3.1
# phpize
# ./configure --with-sphinx=/usr/local/sphinx2/libsphinxclient --with-php-config=/usr/bin/php-config
# make && make install
4,安装scws
查看复制打印?
# tar xvjf scws-1.2.2.tar.bz2
# mkdir /usr/local/scws
# cd scws-1.2.2
# ./configure --prefix=/usr/local/scws/
# make && make install
5,安装scws php扩展
# cd ./phpext/
# phpize
# ./configure --with-php-config=/usr/bin/php-config
# make && make install
三,配置sphinx,scws,php等
1,创建测试表和数据
mysql> desc users;
+----------+-------------+------+-----+---------+----------------+
| field | type | null | key | default | extra |
+----------+-------------+------+-----+---------+----------------+
| user_id | int(11) | no | pri | null | auto_increment |
| username | varchar(20) | no | | null | |
+----------+-------------+------+-----+---------+----------------+
2 rows in set (0.00 sec)
mysql> select * from users;
+------------+------------+
| user_id | username |
+------------+------------+
| 1311895262 | 张三 |
| 1311895263 | tank张二 |
| 1311895264 | tank张一 |
| 1311895265 | tank张 |
+------------+------------+
4 rows in set (0.00 sec)
mysql> desc users;
+----------+-------------+------+-----+---------+----------------+
| field | type | null | key | default | extra |
+----------+-------------+------+-----+---------+----------------+
| user_id | int(11) | no | pri | null | auto_increment |
| username | varchar(20) | no | | null | |
+----------+-------------+------+-----+---------+----------------+
2 rows in set (0.00 sec)
mysql> select * from users;
+------------+------------+
| user_id | username |
+------------+------------+
| 1311895262 | 张三 |
| 1311895263 | tank张二 |
| 1311895264 | tank张一 |
| 1311895265 | tank张 |
+------------+------------+
4 rows in set (0.00 sec)
上面二张表,都是真实的mysql表
2,配置sphinx.conf,加上以下内容
source myorder
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass =
sql_db = test
sql_query_pre = set names utf8
sql_query_pre = set session query_cache_type=off
sql_query = \
select a.id, a.user_id,b.username, unix_timestamp(a.create_time) as create_time, a.product_name, a.summary \
from orders a left join users b on a.user_id = b.user_id
sql_attr_uint = user_id
sql_field_string = username
sql_field_string = product_name
sql_attr_timestamp = create_time
sql_ranged_throttle = 0
#sql_query_info = select * from orders where id=$id
}
index myorder
{
source = myorder
path = /usr/local/sphinx2/var/data/myorder
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
#charset_type = zh_cn.utf-8
html_strip = 1
charset_table = u+ff10..u+ff19->0..9, 0..9, u+ff41..u+ff5a->a..z, u+ff21..u+ff3a->a..z,a..z->a..z, a..z, u+0149, u+017f, u+0138, u+00df, u+00ff, u+00c0..u+00d6->u+00e0..u+00f6,u+00e0..u+00f6, u+00d8..u+00de->u+00f8..u+00fe, u+00f8..u+00fe, u+0100->u+0101, u+0101,u+0102->u+0103, u+0103, u+0104->u+0105, u+0105, u+0106->u+0107, u+0107, u+0108->u+0109,u+0109, u+010a->u+010b, u+010b, u+010c->u+010d, u+010d, u+010e->u+010f, u+010f,u+0110->u+0111, u+0111, u+0112->u+0113, u+0113, u+0114->u+0115, u+0115, u+0116->u+0117,u+0117, u+0118->u+0119, u+0119, u+011a->u+011b, u+011b, u+011c->u+011d, u+011d,u+011e->u+011f, u+011f, u+0130->u+0131, u+0131, u+0132->u+0133, u+0133, u+0134->u+0135,u+0135, u+0136->u+0137, u+0137, u+0139->u+013a, u+013a, u+013b->u+013c, u+013c,u+013d->u+013e, u+013e, u+013f->u+0140, u+0140, u+0141->u+0142, u+0142, u+0143->u+0144,u+0144, u+0145->u+0146, u+0146, u+0147->u+0148, u+0148, u+014a->u+014b, u+014b,u+014c->u+014d, u+014d, u+014e->u+014f, u+014f, u+0150->u+0151, u+0151, u+0152->u+0153,u+0153, u+0154->u+0155, u+0155, u+0156->u+0157, u+0157, u+0158->u+0159, u+0159,u+015a->u+015b, u+015b, u+015c->u+015d, u+015d, u+015e->u+015f, u+015f, u+0160->u+0161,u+0161, u+0162->u+0163, u+0163, u+0164->u+0165, u+0165, u+0166->u+0167, u+0167,u+0168->u+0169, u+0169, u+016a->u+016b, u+016b, u+016c->u+016d, u+016d, u+016e->u+016f,u+016f, u+0170->u+0171, u+0171, u+0172->u+0173, u+0173, u+0174->u+0175, u+0175,u+0176->u+0177, u+0177, u+0178->u+00ff, u+00ff, u+0179->u+017a, u+017a, u+017b->u+017c,u+017c, u+017d->u+017e, u+017e, u+0410..u+042f->u+0430..u+044f, u+0430..u+044f,u+05d0..u+05ea, u+0531..u+0556->u+0561..u+0586, u+0561..u+0587, u+0621..u+063a, u+01b9,u+01bf, u+0640..u+064a, u+0660..u+0669, u+066e, u+066f, u+0671..u+06d3, u+06f0..u+06ff,u+0904..u+0939, u+0958..u+095f, u+0960..u+0963, u+0966..u+096f, u+097b..u+097f,u+0985..u+09b9, u+09ce, u+09dc..u+09e3, u+09e6..u+09ef, u+0a05..u+0a39, u+0a59..u+0a5e,u+0a66..u+0a6f, u+0a85..u+0ab9, u+0ae0..u+0ae3, u+0ae6..u+0aef, u+0b05..u+0b39,u+0b5c..u+0b61, u+0b66..u+0b6f, u+0b71, u+0b85..u+0bb9, u+0be6..u+0bf2, u+0c05..u+0c39,u+0c66..u+0c6f, u+0c85..u+0cb9, u+0cde..u+0ce3, u+0ce6..u+0cef, u+0d05..u+0d39, u+0d60,u+0d61, u+0d66..u+0d6f, u+0d85..u+0dc6, u+1900..u+1938, u+1946..u+194f, u+a800..u+a805,u+a807..u+a822, u+0386->u+03b1, u+03ac->u+03b1, u+0388->u+03b5, u+03ad->u+03b5,u+0389->u+03b7, u+03ae->u+03b7, u+038a->u+03b9, u+0390->u+03b9, u+03aa->u+03b9,u+03af->u+03b9, u+03ca->u+03b9, u+038c->u+03bf, u+03cc->u+03bf, u+038e->u+03c5,u+03ab->u+03c5, u+03b0->u+03c5, u+03cb->u+03c5, u+03cd->u+03c5, u+038f->u+03c9,u+03ce->u+03c9, u+03c2->u+03c3, u+0391..u+03a1->u+03b1..u+03c1,u+03a3..u+03a9->u+03c3..u+03c9, u+03b1..u+03c1, u+03c3..u+03c9, u+0e01..u+0e2e,u+0e30..u+0e3a, u+0e40..u+0e45, u+0e47, u+0e50..u+0e59, u+a000..u+a48f, u+4e00..u+9fbf,u+3400..u+4dbf, u+20000..u+2a6df, u+f900..u+faff, u+2f800..u+2fa1f, u+2e80..u+2eff,u+2f00..u+2fdf, u+3100..u+312f, u+31a0..u+31bf, u+3040..u+309f, u+30a0..u+30ff,u+31f0..u+31ff, u+ac00..u+d7af, u+1100..u+11ff, u+3130..u+318f, u+a000..u+a48f,u+a490..u+a4cf
ngram_len = 1
ngram_chars = u+4e00..u+9fbf, u+3400..u+4dbf, u+20000..u+2a6df, u+f900..u+faff,u+2f800..u+2fa1f, u+2e80..u+2eff, u+2f00..u+2fdf, u+3100..u+312f, u+31a0..u+31bf,u+3040..u+309f, u+30a0..u+30ff,u+31f0..u+31ff, u+ac00..u+d7af, u+1100..u+11ff,u+3130..u+318f, u+a000..u+a48f, u+a490..u+a4cf
}
注意,新的sphinx,不支持sql_query_info,charset_type设置了,
warning: key 'sql_query_info' was permanently removed from sphinx configuration. refer to documentation for details.
warning: key 'charset_type' was permanently removed from sphinx configuration. refer to documentation for details.
3,安装scws词库
# tar xvjf scws-dict-chs-utf8.tar.bz2 -c /usr/local/scws/etc/
# chown tank:tank /usr/local/scws/etc/dict.utf8.xdb
在这里一定要加权限,也就是说让php-fpm或者php-cgi的运行用户,拥有dict.utf8.xdb的所有权限。如果不这么做的话,php 扩展调用词库会报如下错误:
warning: simplecws::add_dict(): failed to add the dict file
怎么查看php-fpm,php-cgi的运行用户呢?
# ps aux |grep php-fpm
root 23487 0.0 0.1 284928 4652 ? ss nov05 0:00 php-fpm: master process (/etc/php-fpm.conf)
tank 23488 0.0 1.3 336108 52328 ? s nov05 0:02 php-fpm: pool www //在这里就是tank了
tank 23489 0.0 0.8 310484 34028 ? s nov05 0:02 php-fpm: pool www
tank 23490 0.0 0.7 306620 30156 ? s nov05 0:02 php-fpm: pool www
tank 23491 0.0 0.8 310096 33748 ? s nov05 0:02 php-fpm: pool www
tank 23492 0.0 1.2 331812 47712 ? s nov05 0:02 php-fpm: pool www
tank 24669 0.0 1.2 333520 48896 ? s nov05 0:01 php-fpm: pool www
tank 29747 0.0 0.7 305000 27340 ? s 03:27 0:00 php-fpm: pool www
tank 29761 0.0 1.0 320536 39928 ? s 03:27 0:00 php-fpm: pool www
root 30705 0.0 0.0 103260 872 pts/5 s+ 04:11 0:00 grep php-fpm
4,配置php.ini
# vim /etc/php.ini
[sphinx]
extension = sphinx.so
[scws]
extension = scws.so
scws.default.charset = utf-8
scws.default.fpath = /usr/local/scws/etc
四,启动sphinx,php-fpm
1,启动sphinx
# /usr/local/sphinx2/bin/indexer --config /usr/local/sphinx2/etc/sphinx.conf --all
# /usr/local/sphinx2/bin/searchd --config /usr/local/sphinx2/etc/sphinx.conf
2,重启php-fpm
# /etc/init.d/php-fpm restart
前二次,我安装sphinx,必须在mysql中安装sphinx存储插件,而这次没有,看下图
五,测试sphinx全文检索
1,命令行的测试
[root@localhost phpext]# mysql -h 127.0.0.1 -p 9306
welcome to the mysql monitor. commands end with ; or \g.
your mysql connection id is 1
server version: 2.2.5-id64-release (r4825)
copyright (c) 2000, 2013, oracle and/or its affiliates. all rights reserved.
oracle is a registered trademark of oracle corporation and/or its
affiliates. other names may be trademarks of their respective
owners.
type 'help;' or '\h' for help. type '\c' to clear the current input statement.
mysql> select * from myorder where match('张');
+------+------------+------------+-------------+----------------+
| id | user_id | username | create_time | product_name |
+------+------------+------------+-------------+----------------+
| 9 | 1311895262 | 张三 | 1406823894 | tank is 坦克 |
| 10 | 1311895263 | tank张二 | 1406823894 | tank is 坦克 |
| 11 | 1311895264 | tank张一 | 1406823894 | tank is 坦克 |
| 12 | 1311895265 | tank张 | 1406823894 | tank is 坦克 |
+------+------------+------------+-------------+----------------+
4 rows in set (0.00 sec)
2,利用php 扩展
<?php
header("content-type: text/html; charset=utf-8");
$b_time = microtime(true);
echo '<p>'.$b_time.'</p>';
$key = "张三";
$index = "myorder";
//========================================分词
$so = scws_new();
$so->set_charset('utf-8');
//默认词库
$so->add_dict(ini_get('scws.default.fpath') . '/dict.utf8.xdb');
//自定义词库
// $so->add_dict('./dd.txt',scws_xdict_txt);
//默认规则
$so->set_rule(ini_get('scws.default.fpath') . '/rules.utf8.ini');
//设定分词返回结果时是否去除一些特殊的标点符号
$so->set_ignore(true);
//设定分词返回结果时是否复式分割,如“中国人”返回“中国+人+中国人”三个词。
// 按位异或的 1 | 2 | 4 | 8 分别表示: 短词 | 二元 | 主要单字 | 所有单字
//1,2,4,8 分别对应常量 scws_multi_short scws_multi_duality scws_multi_zmain scws_multi_zall
$so->set_multi(false);
//设定是否将闲散文字自动以二字分词法聚合
$so->set_duality(false);
//设定搜索词
$so->send_text($key);
$words_array = $so->get_result();
$words = "";
foreach($words_array as $v)
{
$words = $words.'|('.$v['word'].')';
}
//加入全词
#$words = '('.$key.')'.$words;
$words = trim($words,'|');
$so->close();
echo '<p>输入:'.$key.'</p>';
echo '<p>分词:'.$words.'</p>';
//========================================搜索
$sc = new sphinxclient();
$sc->setserver('127.0.0.1',9312);
#$sc->setmatchmode(sph_match_all);
$sc->setmatchmode(sph_match_extended);
$sc->setarrayresult(true);
$res = $sc->query($words,$index);
print_r($res);
$e_time = microtime(true);
$time = $e_time - $b_time;
echo '<p>'.$e_time.'</p>';
echo '<p>'.$time.'</p>';
exit;
?>
结果如下:
<p>1415214126.9106</p><p>输入:张三</p><p>分词:(张三)</p>array
(
[error] =>
[warning] =>
[status] => 0
[fields] => array
(
[0] => username
[1] => product_name
[2] => summary
)
[attrs] => array
(
[user_id] => 1
[username] => 7
[create_time] => 2
[product_name] => 7
)
[matches] => array
(
[0] => array
(
[id] => 9
[weight] => 2500
[attrs] => array
(
[user_id] => 1311895262
[username] => 张三
[create_time] => 1406823894
[product_name] => tank is 坦克
)
)
)
[total] => 1
[total_found] => 1
[time] => 0
[words] => array
(
[张] => array
(
[docs] => 4
[hits] => 4
)
[三] => array
(
[docs] => 1
[hits] => 1
)
)
)
<p>1415214126.9516</p><p>0.041085958480835</p>