一聚教程网:一个值得你收藏的教程网站

热门教程

discuzX使用sphinx实现全文检索教程

时间:2022-06-25 16:21:08 编辑:袖梨 来源:一聚教程网

这儿为大家介绍两点

第一个是基于discuz的索引配置文件,这个配置文件比较灵活,可以根据不同的需求来配置

 

 代码如下 复制代码

#
# LinuxTone full index search configure file
#
source lt_posts
{
    type = mysql
    sql_host = 127.0.0.1
    sql_user = root
    sql_pass =
    sql_db = lt_bbs
    sql_port = 3306
    sql_query_pre = SET NAMES utf8
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 #此处是基于posts表来做索引的,这样的目的是可以同时检索到subject,message,author 三个字段的值
    sql_attr_uint = fid
    sql_attr_timestamp = dateline
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id
}
index lt_posts
{
    source = lt_posts
    path = /data/sphinx/data/lt_posts
    docinfo = extern
    mlock = 0
    morphology = none
    min_word_len = 2
    html_strip = 1
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/
    charset_type = zh_cn.utf-8
    ngram_len = 0
}
########## 增量索引 ##################
source delta
{
    type = mysql
    sql_host = 127.0.0.1
    sql_user = root
    sql_pass =
    sql_db = lt_bbs
    sql_port = 3306 # optional, default is 3306
    sql_query_pre = SET NAMES utf8
    sql_query = SELECT pid,tid,fid,dateline,subject,message,author FROM cdb_posts where first=1 and dateline > unix_timestamp()-3600*10 #增量索引采用当前时间戳减去一个需要间隔的时间来新建新增的数据索引

    sql_attr_uint = fid
    sql_attr_timestamp = dateline
    sql_query_info = SELECT * FROM cdb_posts WHERE pid=$id
}
index delta
{
    source = delta
    path = /data/sphinx/data/lt_delta
    docinfo = extern
    mlock = 0
    morphology = none
    min_word_len = 2
    html_strip = 1
    charset_dictpath = /usr/local/mmseg-3.2.13/etc/
    charset_type = zh_cn.utf-8
    ngram_len = 0
}
indexer
{
    mem_limit = 32M
}
searchd
{
    port = 9312
    log = /data/sphinx/var/log/searchd.log
    query_log = /data/sphinx/var/log/query.log
    read_timeout = 5
    max_children = 30
    pid_file = /data/sphinx/var/log/searchd.pid
    max_matches = 10000
    seamless_rotate = 1
    preopen_indexes = 0
    unlink_old = 1
}

 

sphinx最主要的就是这个配置文件,当然在增量索引部分可以写一个脚本放到crontab里面来定时跑
下面介绍下sphinx的PHP调用部分,sphinx的接口采用PHP的扩展,可以通过pecl或者http://pecl.php.net/package/sphinx来安装

 

 

 代码如下 复制代码

  /**
 *全文搜索服务
 */
define('IN_DISCUZ', true);
require_once './include/common.inc.php';

$q = isset($_GET['q']) && !empty($_GET['q']) ? $_GET['q'] : '';
$q = str_replace(array('<', '>', ' ', ''', ','), array('', '', ' ', '', ''), strip_tags($q));

$page = isset($_GET['page']) && intval($_GET['page']) > 0 ? intval($_GET['page']) : 1;
$perNum = 20;
$offset = ($page - 1) * $perNum;

$search = new SphinxClient();
$search -> setServer('127.0.0.1', 9312);
$search -> setConnectTimeout(2);
$search -> setArrayResult(true);
$search -> setMatchMode(SPH_MATCH_ANY);
$search -> setRankingMode(SPH_RANK_PROXIMITY_BM25);
$search -> setSortMode(SPH_SORT_EXTENDED, '@relevance desc,@weight desc');
$search -> setLimits($offset, $perNum);
$search -> setFieldWeights(array('subject' => 2000, 'message' => 0));

$rs = array();
$query_totals = $query_time = 0;
if (!empty($q)) {
 $rs = $search -> Query($q, "*");
 $pages = ceil($rs['total'] / $perNum);

 $query_totals = $rs['total_found'];
 $query_time = $rs['time'];
}

$data = $title = $content = array();

if (!empty($rs) && $page <= $pages) {
 $pids = array();
 foreach($rs['matches'] as $v) {
  $pids[] = $v['id'];
 }
 $pid = implode(',', $pids);
 $sql = "select pid,tid,author,authorid,subject,message,dateline from cdb_posts where pid IN($pid) and status ='0' and invisible='0'";

 $query = $db -> query($sql);
 while ($row = $db -> fetch_array($query)) {
  $data[] = $row;
  $title[] = $row['subject'];
  $content[] = preg_replace('/[[/]?(b|img|url|color|s|hr|p|list|i|align|email|u|font|code|hide|table|tr|td|th|attach|list|indent|float).*]/', '', strip_tags($row['message']));
 }
 // 搜索词高亮
 $opts = array();
 $opts['before_match'] = '';
 $opts['after_match'] = '
';
 $title = $search -> BuildExcerpts($title, 'lt_posts', $q, $opts);
 $content = $search -> BuildExcerpts($content, 'lt_posts', $q, $opts);

 foreach($data as $k => $v) {
  $data[$k]['subject'] = $title[$k];
  $data[$k]['message'] = $content[$k];
 }

 $url = "s.php?q=" . urlencode($q);
 $multipage = multi($rs['total'], $perNum, $page, $url);
}

include template("lt_search");

?>

 

跑主索引的shell脚本search-index.sh

 代码如下 复制代码
#!/bin/bash
#
# The BBS search exec full index
#
/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate lt_posts >> /data/sphinx/var/`date "+%Y-%m-%d-%H"`.log


跑增量索引的shell脚本search-delta.sh

 代码如下 复制代码
#!/bin/bash
#
# The BBS search exec delta index
#

 

 代码如下 复制代码

#跑增量索引

/usr/local/csft-3.2.13/bin/indexer -c /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate delta

#合并主索引和增量索引

#/usr/local/csft-3.2.13/bin/indexer --config /usr/local/csft-3.2.13/etc/lt_posts.conf --rotate --merge lt_posts delta

 

热门栏目