sphinx 采用c扩展xmlpipe2数据源 .

来源:互联网 发布:淘宝摩托车准备店铺 编辑:程序博客网 时间:2024/05/17 09:14

由于要采用sphinx 作为全文检索引擎,但是我的很多数据都存在硬盘上,如果这些数据都读入数据库那就太痛苦了,数据量很大,mysql数据库存储这些数据,只能是为了全文检索,那也过于浪费了,所以决定采用xmlpipe2数据源,由于c语言的io操作能力超强为了效率问题也就是用,从不会写的c语言。废话少说,代码如下

 

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <mysql.h>#include <iconv.h>#include "dictionary.h"#include "iniparser.h"//配置文件名称#define INI_FILE_NAME "sanshi_xmlpipe.ini"//sql多条语句分隔符号#define SQL_SIGN "|"//设置字段里名字与字段值的分隔符#define ATTRIBUTE_SIGN ":"//设置多个字段间的分隔符#define DIELD_SIGN ","//设置读取文件的长度#define READFILE_MAX_LEN 1024//设置路径+文件的最大长度#define FILE_NAME_MAX_LEN 1024typedef struct tag_sanshi_ini_config{char * db_name;char * db_user;char * db_pwd;char * db_host;char * sql_query_pre;char * sql_query;char * sql_query_post;char * sphinx_schema;char *sphinx_id;char * sphinx_file;char * sphinx_other_field;char * file_dir;}sanshi_ini_config;sanshi_ini_config conf={"db:db_name","db:db_user","db:db_pwd","db:db_host","sql:sql_query_pre","sql:sql_query","sql:sql_query_post","xml:schema","xml:index_id","xml:file_field","xml:other_field","file:base_dir"};typedef struct tag_sanshi_field{char *field;int  id;} sanshi_field;sanshi_field parser_field(char * field_str){sanshi_field temp;char * buf = strstr( field_str, ATTRIBUTE_SIGN);temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));buf[0]='/0';temp.field = field_str;//temp.id = buf + strlen(ATTRIBUTE_SIGN);//buf = strstr( field_str, ATTRIBUTE_SIGN);//temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));//printf("%s %s %d/n",field_str,temp.field,temp.id);/*char * p;char field_tmp[strlen(field_str)+1];strcpy(field_tmp,field_str);//temp.field = strtok( field_str, ATTRIBUTE_SIGN);//field_str = strtok( NULL, ATTRIBUTE_SIGN);//temp.id = atoi(p);printf("%s /n",field_tmp);field_str =NULL;*/return temp;}void print_file_content(char * file_name){FILE *fp;char line[READFILE_MAX_LEN];fp=fopen(file_name,"r");if(fp!=NULL){while(fgets(line,READFILE_MAX_LEN,fp)!=NULL){printf(line);}fclose(fp);}}void exec_mysql_query(MYSQL * mysql_con,char * sql_str){char* token = strtok( sql_str, SQL_SIGN);while( token != NULL )    {int query_error_no=0;query_error_no = mysql_query(mysql_con,token);if(query_error_no !=0){printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",token,query_error_no,mysql_error(mysql_con));mysql_close(mysql_con);exit(0);}mysql_free_result(mysql_store_result(mysql_con));//printf("exec sql : %s /n",token);        token = strtok( NULL, SQL_SIGN);    }}int main(int argc,char * argv[]){dictionary * ini;MYSQL mysql,*mysql_con;MYSQL_RES *result;MYSQL_ROW row;int query_error_no,sphinx_id;sanshi_field file_field;ini = iniparser_load(INI_FILE_NAME);//get ini config mysql set conf.db_name = iniparser_getstring(ini,conf.db_name,"test");conf.db_user = iniparser_getstring(ini,conf.db_user,"root");conf.db_pwd = iniparser_getstring(ini,conf.db_pwd,"");conf.db_host = iniparser_getstring(ini,conf.db_host,"localhost");conf.sql_query_pre = iniparser_getstring(ini,conf.sql_query_pre,NULL);conf.sql_query = iniparser_getstring(ini,conf.sql_query,NULL);conf.sql_query_post = iniparser_getstring(ini,conf.sql_query_post,NULL);conf.sphinx_schema = iniparser_getstring(ini,conf.sphinx_schema,NULL);sphinx_id = iniparser_getint(ini,conf.sphinx_id,0);conf.sphinx_file = iniparser_getstring(ini,conf.sphinx_file,NULL);file_field = parser_field(conf.sphinx_file);conf.sphinx_other_field = iniparser_getstring(ini,conf.sphinx_other_field,NULL);conf.file_dir = iniparser_getstring(ini,conf.file_dir,"./");//printf("db_name=%s /t db_user=%s /t db_pwd=%s /t db_host=%s /n",conf.db_name,conf.db_user,conf.db_pwd,conf.db_host);//mysql connect mysql_init(&mysql);mysql_con =mysql_real_connect(&mysql,conf.db_host,conf.db_user,conf.db_pwd,conf.db_name,0,NULL,0);if(mysql_con == NULL){printf("ERROR: connect mysql fail! plaese check ini file in set/n %s /n",mysql_error(&mysql));exit(0);}//printf("mysql connect suc!/n");//exec sqlexec_mysql_query(mysql_con,conf.sql_query_pre);query_error_no = mysql_query(mysql_con,conf.sql_query);if(query_error_no !=0){printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",conf.sql_query,query_error_no,mysql_error(mysql_con));mysql_close(mysql_con);exit(0);}//printf("exec sql_query : %s /n",conf.sql_query);result = mysql_store_result(mysql_con);//echo xml headerprintf("<?xml version=/"1.0/" encodeing=/"utf-8/"?>/n<sphinx:docset>/n%s/n",conf.sphinx_schema);//printf("%s/n",conf.sphinx_other_field);while(row = mysql_fetch_row(result)){printf("<sphinx:document id=/"%d/">/n",(row[sphinx_id]?row[sphinx_id]:0));char * field_str;char field_tmp[strlen(conf.sphinx_other_field)+1];char temp_file_name[FILE_NAME_MAX_LEN];memcpy(field_tmp,conf.sphinx_other_field,strlen(conf.sphinx_other_field)+1);field_str = strtok( field_tmp, DIELD_SIGN);while( field_str != NULL ){//printf("%s/n",field_str);sanshi_field other_field = parser_field(field_str);printf("


 

本人第一次写c,有些地方优化部够,还望各位指点

 

配置文件如下:

[db]db_host=127.0.0.1db_name=testdb_pwd=123456db_user=root[sql]sql_query_pre=select * from log|select * from logsql_query = select * from logsql_query_post =[xml]schema=/<sphinx:schema>/<sphinx:field name="LogActionType"/>/<sphinx:field name="LogDataType"/>/<sphinx:attr name="LogTime" type="timestamp"/>/<sphinx:attr name="LogIP" type="int" bits="16" default="1"/>/</sphinx:schema>index_id=0file_field=LogActionType:1other_field=LogDataType:2,LogTime:6,LogIP:4[file]base_dir=


 

 

 

备注:该程序的ini解析用到的是iniparser3.0b 的源代码,也就是依赖的2个h文件

#include "dictionary.h"

#include "iniparser.h"

       编译的命令为:

gcc -I /data/app/mysql/include/mysql/ -L /data/app/mysql/lib/mysql/ -l mysqlclient -g -o sanshi sanshi_xmlpipe.c dictionary.c iniparser.c

注意msql的库路径,已经文件名

0 0