sphinx使用整理文档

来源:互联网 发布:王思聪双性恋 知乎 编辑:程序博客网 时间:2024/05/21 22:44

相关命令及步骤

    创建主索引:

        /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --all

    创建增量索引:

        1. 创建测试数据表以及数据

        2. 修改配置文件

            主索引源:sql_query_pre

            增量索引源:sql_query_pre  sql_query  sql_query_post

            主索引:source path

            增量索引:source path

        3. 创建/更新主索引

        4. 创建/更新增量索引

        /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf delta

    重启索引进程

        /usr/local/coreseek/bin/searchd --stop

        /usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf

    索引合并

        /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --merge main delta --rotate

 

 

csft.conf配置文件

    source src1   # 用来配置数据库源,查出要进行索引的数据,其中src1是数据源的名称,随意命名

    {

        type                    = mysql

        sql_host                = 127.0.0.1

        sql_user                = root

        sql_pass                =

        sql_db                  = test

        sql_port                = 3306  # optional, default is 3306

 

        sql_query_pre           = SET NAMES utf8

        sql_query_pre           = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM documents

 

        sql_query               = \

            SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \

            FROM documents

 

        sql_attr_uint           = group_id    #用来对搜索的结果进行过滤,相当与添加where条件

 

        sql_attr_timestamp      = date_added

 

        sql_ranged_throttle = 0

 

        sql_query_info      = SELECT * FROM documents WHERE id=$id

 

    }

 

    index test1   # 将数据源中查出来的数据建立索引,其中test1是索引的名字,随意命名,一般与其对应的数据源名称一致

    {

        source          = src1  # 对应数据源的名称

 

        path            = /usr/local/coreseek/var/data/test1   # 索引文件存放目录及名称

 

        docinfo         = extern

 

        mlock           = 0

 

        morphology      = none

 

        stopwords           = /usr/local/coreseek/var/data/test1/stopwords.txt

 

        wordforms           = /usr/local/coreseek/var/data/test1/wordforms.txt

 

        min_word_len        = 1

 

        charset_type        = sbcs

 

        html_strip              = 0

 

    }

 

    source delta : src1   # 增量索引数据源,其中delta是增量索引数据源的名称,随意命名,":"用来继承主索引

    {

        sql_query_pre = SET NAMES utf8

        sql_query = SELECT \

                        id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \

                    FROM documents \

                    WHERE \

                        id>( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )

        sql_query_post = UPDATE sph_counter SET max_doc_id=(SELECT MAX(id) FROM documents) where counter_id=1

    }

 

    index delta : test1

    {

        source = delta

        path = /usr/local/coreseek/var/data/delta

    }

 

 

创建mysql测试数据表及数据

    CREATE TABLE `documents` (`id` int(11) NOT NULL auto_increment,`group_id` int(11) NOT NULL,`group_id2` int(11) NOT NULL,`date_added` datetime NOT NULL,`title` varchar(255) NOT NULL,`content` text NOT NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=5;

 

    INSERT INTO `documents` VALUES ('1', '1', '5', '2008-09-13 21:37:47', 'test one', 'this is my test document number one. also checking search within phrases.');INSERT INTO `documents` VALUES ('2', '1', '6', '2008-09-13 21:37:47', 'test two', 'this is my test document number two');INSERT INTO `documents` VALUES ('3', '2', '7', '2008-09-13 21:37:47', 'another doc', 'this is another group');INSERT INTO `documents` VALUES ('4', '2', '8', '2008-09-13 21:37:47', 'doc number four', 'this is to test groups');

 

    // 实现增量索引时使用的计数表

    CREATE TABLE sph_counter( counter_id INTEGER PRIMARY KEY NOT NULL, max_doc_id INTEGER NOT NULL);

 

 

 

PHP使用

 

    <?php

 

    header("Content-type: text/html; charset=utf-8");

 

    require_once('sphinxapi.php');

 

    $s = new SphinxClient();

 

    $s->setServer("127.0.0.1", 9312);

    $s->setArrayResult(true);

    $s->setMatchMode(SPH_MATCH_ALL);

 

    $keyword = 'test';

 

    $result = $s->Query($keyword, '*');

    if ($result['total'] == 0) {

        echo '无搜索结果';die;

    }

 

    // 获取结果id集

    $ids = array();

    foreach($result['matches'] as $key => $val)

    {

        $ids[] = $val['id'];

    }

    print_r($ids);

 

    // 连接数据库

    $dsn = "mysql:host=localhost;dbname=test;charset=utf8";

    $db = new PDO($dsn, 'root', '');

 

    $sql = 'select * from documents where id in('.implode(',', $ids).')';

    $result = $db->query($sql);

    $result->setFetchMode(PDO::FETCH_ASSOC);

 

    $data = $result->fetchAll();

 

    // 搜索结果高亮显示

    $rule = array(

                "before_match" => "<font style='font-weight:bold;color:#f00'>",

                "after_match" => "</font>"

            );

    foreach ($data as $key=>$val) {

        $data[$key] = $s->BuildExcerpts($val, 'delta', $keyword, $rule);

    }

 

    print_r($data);

 

 

 

添加新分词

    1. 复制unigram.txt文件为unigram_new.txt

    2. 在unigram_new.txt中添加新词

    3. 生成新的词典文件:/usr/local/mmseg3/bin/mmseg -u /usr/local/mmseg3/etc/unigram_new.txt

    4. 替换原有的uni.lib文件

    5. 重建索引 && 重启索引

原创粉丝点击