Solr单集代码调用案例
来源:互联网 发布:微信扫码抽奖软件 编辑:程序博客网 时间:2024/05/17 17:43
实现代码
1、配置pom,在maven添加solr的依赖
<dependency> <groupId>org.apache.solr</groupId> <artifactId>solr-solrj</artifactId> <version>4.10.3</version></dependency>
配置如下内容:
配置要连接的solrcore
applicationContext.xml的代码如下:
<?xml version="1.0" encoding="UTF-8"?><beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" xmlns:context="http://www.springframework.org/schema/context" xmlns:tx="http://www.springframework.org/schema/tx" xmlns:task="http://www.springframework.org/schema/task" xmlns:mongo="http://www.springframework.org/schema/data/mongo" xsi:schemaLocation=" http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.1.xsd http://www.springframework.org/schema/tx http://www.springframework.org/schema/tx/spring-tx-4.0.xsd http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc-4.0.xsd http://www.springframework.org/schema/cache http://www.springframework.org/schema/cache/spring-cache-4.0.xsd http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task-4.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-4.0.xsd http://www.springframework.org/schema/aop http://www.springframework.org/schema/aop/spring-aop-4.0.xsd http://www.springframework.org/schema/data/mongo http://www.springframework.org/schema/data/mongo/spring-mongo-1.0.xsd"> <!-- 打开注解 --> <context:annotation-config /> <!-- <aop:aspectj-autoproxy/> --> <!-- 打开自动扫描 --> <context:component-scan base-package="cn.com.hbny.docdetection" /> <!-- 定时器驱动 --> <task:annotation-driven/> <!-- 引入jdbc配置文件 --> <context:property-placeholder location="classpath:jdbc.properties,classpath:mongodb.properties,classpath:solr.properties" /> <!-- 配置solr服务器的地址 --> <bean class="org.apache.solr.client.solrj.impl.HttpSolrServer"> <constructor-arg value="${solr.host}"></constructor-arg> </bean></beans>
DocInfoSolrService代码实现:
package cn.com.hbny.docdetection.solr;import cn.com.hbny.docdetection.solr.po.ResultModel;public interface DocInfoSolrService { /** * 通过命名空间,检测库Id,文档内容,查找符合条件的元素 * @param isPreciseSearch :是否是精确查找 true表示的是精确查找,false表示的是粗略查找 * @param ns :报名 * @param propertyId :属性分类的 * @param propertyType :属性分类名称 * @param majorId :专业的id * @param title :标题 * @param keyword :关键词 * @param sentences :句子内容 * @param pageNum :页码数 * @param pageSize :每页的大小 * @return * @attention 方法的使用注意事项 * @author toto * @date 2017年4月4日 * @note begin modify by 涂作权 2017年4月4日 原始创建 */ public ResultModel findDocInfoBySolr( Boolean isPreciseSearch, String ns, String propertyId, String propertyType, String majorId, String title, String keyword, String sentences, Integer pageNum, Integer pageSize);}
DocInfoSolrServiceImpl 代码实现如下:
package cn.com.hbny.docdetection.solr.impl;import java.util.ArrayList;import java.util.List;import java.util.Map;import org.apache.commons.lang.StringUtils;import org.apache.solr.client.solrj.SolrQuery;import org.apache.solr.client.solrj.SolrQuery.ORDER;import org.apache.solr.client.solrj.impl.HttpSolrServer;import org.apache.solr.client.solrj.response.QueryResponse;import org.apache.solr.common.SolrDocument;import org.apache.solr.common.SolrDocumentList;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Service;import com.ctc.wstx.util.StringUtil;import cn.com.hbny.docdetection.mongodb.beans.SentenceInfo;import cn.com.hbny.docdetection.server.ExtendedServerConfig;import cn.com.hbny.docdetection.solr.DocInfoSolrService;import cn.com.hbny.docdetection.solr.po.ResultModel;import cn.com.hbny.docdetection.utils.CosineSimilarAlgorithm;import cn.com.hbny.docdetection.utils.HtmlUtil;@Servicepublic class DocInfoSolrServiceImpl implements DocInfoSolrService { @Autowired private HttpSolrServer server; public String escapeQueryChars(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == ';' || c == '/' || Character.isWhitespace(c)) { sb.append('\\'); } sb.append(c); } return sb.toString(); } /** * 相似的句子 * @param sentences 要找出的相似的句子的原文 * @param htmlPrefix 高亮的前缀 * @param htmlSufix 高亮的后缀 * @return * @attention 相似的句子 * @author toto * @date 2017年4月15日 * @note begin modify by 涂作权 2017年4月15日 原始创建 */ public String gainSimilarSentence(String sentences,String htmlPrefix,String htmlSufix) { if (StringUtils.isBlank(sentences)) return null; String similarSentence = ""; try { //开始的位置 int beginIndex = 0; int endIndex = 0; //最终要返回的相似的句子 while(sentences.indexOf(htmlPrefix,endIndex) != -1) { beginIndex = sentences.indexOf(htmlPrefix,endIndex) + htmlPrefix.length(); endIndex = sentences.indexOf(htmlSufix, beginIndex); if(beginIndex != 0) { similarSentence += sentences.substring(beginIndex, endIndex) + ";"; } else { similarSentence += sentences.substring(beginIndex, endIndex); } } //去掉最后一个分号 if (StringUtils.isNotBlank(similarSentence) && similarSentence.length() >= 2) { similarSentence = similarSentence.substring(0, similarSentence.length() - 1); } } catch(Exception e) { e.printStackTrace(); } return similarSentence; } /** * 获取单个句子的相似度的值 * @param sentenceOriginal :作为搜索条件的句子 * @param sentenceFromSolr :带有高亮信息的从搜索库中搜索出来的句子 * @return * @attention 方法的使用注意事项 * @author toto * @date 2017年4月15日 * @note begin modify by 涂作权 2017年4月15日 原始创建 */ public double gianSimilarityValue(String sentenceOriginal,String sentenceFromSolr) { //如果其中任何一个有一个参数值是0,这获取到的相似度值为空0 if (StringUtils.isBlank(sentenceOriginal) || StringUtils.isBlank(sentenceFromSolr)) return 0f; double similarityValue = 0f; try { //获取到的不带有html的句子 String sentenceWithNoHtml = sentenceFromSolr; sentenceWithNoHtml = sentenceFromSolr.replaceAll("<font style=\"color:#006600;\">", ""); sentenceWithNoHtml = sentenceWithNoHtml.replaceAll("</font>", "");// System.out.println("================================================================");// System.out.println("sentenceFromSolr = " + sentenceFromSolr);// System.out.println("sentenceWithNoHtml = " + sentenceWithNoHtml);// System.out.println("sentenceOriginal = " + sentenceOriginal);// System.out.println("================================================================"); similarityValue = (float) CosineSimilarAlgorithm.getSimilarity(sentenceOriginal, sentenceWithNoHtml); } catch (Exception e) { e.printStackTrace(); } return (similarityValue * 100); } /** * 通过命名空间,检测库Id,文档内容,查找符合条件的元素 * @param isPreciseSearch :是否是精确查找 true表示的是精确查找,false表示的是粗略查找 * @param ns :报名 * @param propertyId :属性分类的 * @param propertyType :属性分类名称 * @param title :标题 * @param keyword :关键词 * @param sentences :句子内容 * @param pageNum :页码数 * @param pageSize :每页的大小 * @return * @attention 方法的使用注意事项 * @author toto * @date 2017年4月4日 * @note begin modify by 涂作权 2017年4月4日 原始创建 */ public ResultModel findDocInfoBySolr( Boolean isPreciseSearch, String ns, String propertyId, String propertyType, String majorId, String title, String keyword, String sentences, Integer pageNum, Integer pageSize) { //创建solrQuery对象 try { //创建solrQuery对象 SolrQuery query = new SolrQuery(); //设置edismax的权重值 query.set("defType", "edismax"); query.set("fl", "*,score"); query.setSort("score",ORDER.desc); //设置标题的最小匹配的百分比 String solrMMValueForTitleAndKeyword = ExtendedServerConfig.getInstance().getStringProperty("SolrMMValueForTitleAndKeyword").trim(); //设置句子最小匹配的百分比 String SolrMMValue = ExtendedServerConfig.getInstance().getStringProperty("SolrMMValue").trim(); //设置q,查询条件 StringBuilder params = new StringBuilder("ns:docdetection.hbny.SentenceInfo"); //根据属性id来进行查询// if (StringUtils.isNotBlank(propertyId)) {// propertyId = escapeQueryChars(propertyId);// params.append(" AND propertyId:" + propertyId);// } //根据专业id来进行查询// if (StringUtils.isNotBlank(majorId)) {// majorId = escapeQueryChars(majorId);// params.append(" AND majorId:" + majorId);// } //根据文档的名称来进行查询 if (StringUtils.isNotBlank(title)) { String titleNew = escapeQueryChars(title); params.append(" AND title:" + titleNew); if (StringUtils.isNotBlank(solrMMValueForTitleAndKeyword)) { query.set("mm", solrMMValueForTitleAndKeyword + "%"); } else { query.set("mm", "67%"); } } //关键词 if (StringUtils.isNotBlank(keyword)) { String keywordNew = escapeQueryChars(keyword); params.append(" AND sentences:" + keywordNew); if (StringUtils.isNotBlank(solrMMValueForTitleAndKeyword)) { query.set("mm", solrMMValueForTitleAndKeyword + "%"); } else { query.set("mm", "67%"); } } //根据句子 if (StringUtils.isNotBlank(sentences)) { String sentencesNew = escapeQueryChars(sentences); params.append(" AND sentences:" + sentencesNew); if (StringUtils.isNotBlank(SolrMMValue)) { query.set("mm", SolrMMValue + "%"); } else { query.set("mm", "30%"); } } query.setQuery(params.toString()); if (pageNum == null) { pageNum = 1; } if (pageSize == null) { pageSize = 1000; } //设置start、rows,分页信息 //通过当前页面和煤业的条数去计算其实际记录的下标 query.setStart((pageNum - 1) * pageSize); query.setRows(pageSize); query.setHighlight(true); //开启高亮组件 query.addHighlightField("sentences"); //高亮字段 query.addHighlightField("title"); //给标题也添加高亮 query.setHighlightSimplePre("<font style=\"color:#006600;\">"); //标记 query.setHighlightSimplePost("</font>"); query.setHighlightSnippets(2); //结果分片数,默认为1 query.setHighlightFragsize(300); //每个分片的最大长度,默认为100 server.setSoTimeout(15000); server.setConnectionTimeout(1000); server.setDefaultMaxConnectionsPerHost(1000); server.setMaxTotalConnections(1000); QueryResponse response = server.query(query); SolrDocumentList list = response.getResults(); //获取文档总数 long count = list.getNumFound(); //创建文档心里列表 List<SentenceInfo> sentenceInfos = new ArrayList<SentenceInfo>(); //获取高亮信息 Map<String, Map<String, List<String>>> highlighting = response .getHighlighting(); ResultModel rm = new ResultModel(); //总的相似度的值 for(SolrDocument solrDocument : list) { SentenceInfo sentenceInfo = new SentenceInfo(); sentenceInfo.setId(solrDocument.get("_id").toString()); //sentenceInfo.setDocInfoId(solrDocument.get("docInfoId").toString()); sentenceInfo.setDocLibrayId(solrDocument.get("docLibrayId").toString()); sentenceInfo.setOriginalDocPath(solrDocument.get("originalDocPath").toString()); sentenceInfo.setHtmlDocPath(solrDocument.get("htmlDocPath").toString()); sentenceInfo.setOriginalFileName(solrDocument.get("originalFileName").toString()); //sentenceInfo.setMajorId(solrDocument.get("majorId").toString()); //sentenceInfo.setMajorName(solrDocument.get("majorName").toString()); sentenceInfo.setPropertyId(solrDocument.get("propertyId").toString()); sentenceInfo.setPropertyName(solrDocument.get("propertyName").toString()); String titleValue = solrDocument.get("title").toString(); sentenceInfo.setTitle(titleValue); //sentenceInfo.setKeyword(solrDocument.get("keyword").toString()); sentenceInfo.setWordNum(Integer.parseInt(solrDocument.get("wordNum").toString())); sentenceInfo.setParagNum(Integer.parseInt(solrDocument.get("paragNum").toString())); sentenceInfo.setSentenceNum(Integer.parseInt(solrDocument.get("sentenceNum").toString())); sentenceInfo.setAuthor(solrDocument.get("author").toString()); try { sentenceInfo.setInstitution(solrDocument.get("institution").toString()); } catch (Exception e) { e.printStackTrace(); } String sentencesValue = ""; try { Object object = solrDocument.get("sentences"); if (null != object) { sentencesValue = object.toString(); } else { sentencesValue = null; } //sentencesValue = solrDocument.get("sentences").toString(); } catch (Exception e) { e.printStackTrace(); } List<String> list2 = highlighting.get(solrDocument.get("_id")).get("sentences"); if (list2 != null) { sentencesValue = list2.get(0); } //设置文档内容的信息 sentenceInfo.setSentences(sentencesValue); String similarSentence = this.gainSimilarSentence(sentencesValue, "<font style=\"color:#006600;\">", "</font>"); sentenceInfo.setSimilarSentence(similarSentence); //属性分类名称 sentenceInfo.setPropertyName(propertyType); //通过下面的方式计算通过句子算出的各句的相似度值 if (StringUtils.isNotBlank(sentences)) { //设置相似度的值 double similarityValue = this.gianSimilarityValue(sentences, sentencesValue); sentenceInfo.setSimilarityValue((float)similarityValue); //totalSimilarityValue += similarityValue; if (null != rm.getMostSimilarSentenceInfo()) { if (rm.getMostSimilarSentenceInfo().getSimilarityValue() < similarityValue) { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } else { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } //通过下面计算通过标题计算出来的标题相似度值 if (StringUtils.isNotBlank(title)) { //设置相似度的值 double similarityValue = this.gianSimilarityValue(title, titleValue); sentenceInfo.setSimilarityValue((float)similarityValue); //totalSimilarityValue += similarityValue; if (null != rm.getMostSimilarSentenceInfo()) { if (rm.getMostSimilarSentenceInfo().getSimilarityValue() < similarityValue) { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } else { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } //算出带有关键字的所有的相似度值 if (StringUtils.isNotBlank(keyword)) { //设置相似度的值 double similarityValue = this.gianSimilarityValue(keyword, sentencesValue); sentenceInfo.setSimilarityValue((float)similarityValue); //totalSimilarityValue += similarityValue; if (null != rm.getMostSimilarSentenceInfo()) { if (rm.getMostSimilarSentenceInfo().getSimilarityValue() < similarityValue) { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } else { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) similarityValue); } } if (StringUtils.isBlank(sentences) && StringUtils.isBlank(title) && StringUtils.isBlank(keyword)) { rm.setMostSimilarSentenceInfo(sentenceInfo); rm.setTotalSimilarityValue((float) 0.0f); } //System.out.println(solrDocument.get("score")); sentenceInfos.add(sentenceInfo); } //设置句子列表 rm.setSentenceInfoList(sentenceInfos);// if (null != sentenceInfos && !sentenceInfos.isEmpty()) {// rm.setTotalSimilarityValue((float) (totalSimilarityValue / sentenceInfos.size()));// } else {// rm.setTotalSimilarityValue(0.0f);// } //设置当前页 rm.setRecordCount(count); //设置总页数 = 文档总数 / pageSize,如果有余则加1 int totalPages = (int) (count / pageSize); if (count % pageSize > 0) { totalPages++; } rm.setPageCount(totalPages); return rm; } catch (Exception e) { e.printStackTrace(); } return null; }}
DocInfoSolrController的代码如下:
package cn.com.hbny.docdetection.solr.controller;import java.util.HashMap;import java.util.Map;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.log4j.Logger;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Controller;import org.springframework.ui.Model;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RequestMethod;import org.springframework.web.bind.annotation.RequestParam;import org.springframework.web.bind.annotation.ResponseBody;import cn.com.hbny.docdetection.controller.base.BaseController;import cn.com.hbny.docdetection.solr.DocInfoSolrService;import cn.com.hbny.docdetection.solr.po.ResultModel;/** * @brief 文档检查对应的接口 * @attention * mongo-connector -m localhost:27017 --auto-commit-interval=0 -t http://localhost:8983/solr/docdetection -d solr_doc_manager * * 访问方式:http://localhost:8080/docdetection/solr/docInfoSolrController/gainDocInfoBySolr.action * @author toto * @date 2017年4月4日 * @note begin modify by 涂作权 2017年4月4日 原始创建 */@Controller@RequestMapping(value = "/solr/docInfoSolrController", method = {RequestMethod.GET,RequestMethod.POST})public class DocInfoSolrController extends BaseController { private static final Logger logger = Logger.getLogger(DocInfoSolrController.class); @Autowired private DocInfoSolrService docInfoSolrService; /** * \brief 通过内容和其它条件进行搜索出所需要的内容 * @param model * @param ns :命名空间 * @param propertyId :属性id * @param propertyType :属性分类的名称 * @param majorId :专业id * @param title :标题名称 * @param keyword :关键字 * @param sentences :句子的内容 * @param pageNum :当前页码数 * @param pageSize :每页的大小 * @param request * @param response * @return * @attention 访问方式是:http://localhost:8080/docdetection/solr/docInfoSolrController/gainDocInfosByContentAndOtherCondition.action * @author toto * @date 2017年4月4日 * @note begin modify by 涂作权 2017年4月4日 原始创建 */ @RequestMapping(value = "/gainDocInfosByContentAndOtherCondition") @ResponseBody public Map<String, Object> gainDocInfosByContentAndOtherCondition( Model model, Boolean isPreciseSearch, String ns, String propertyId, String propertyType, String majorId, String title, String keyword, String sentences, @RequestParam(value = "pageNum",required = false,defaultValue = "1") Integer pageNum, @RequestParam(value = "paseSize",required = false,defaultValue = "2000000000") Integer pageSize, HttpServletRequest request, HttpServletResponse response) { Map<String, Object> resultMap = new HashMap<String, Object>(); try { ResultModel resultModel = docInfoSolrService.findDocInfoBySolr( isPreciseSearch, ns, propertyId, propertyType, majorId, title, keyword, sentences, pageNum, pageSize); logger.info(resultModel.getSentenceInfoList()); resultMap.put("resultModel", resultModel); } catch (Exception e) { e.printStackTrace(); } return resultMap; }}
阅读全文
0 0
- Solr单集代码调用案例
- solr案例
- 沪深股市api调用代码案例
- JAVA多态调用案例代码
- Hadoop-rpc调用案例,服务端,客户端代码案例
- solr实际案例
- Solr优化案例分析
- solr检索案例
- ElasticSearch+Solr几个案例笔记
- solr的修改操作案例
- Solr之入门案例-yellowcong
- 简单调用第三方接口的案例代码
- solr调用 java php
- 使用.net调用Solr
- Solr调用示例
- 案例代码
- solr FieldType代码
- solr操作代码大全
- 遍历Map的四种方式与sql优化
- 新款安卓架构设计-Architecture Components介绍
- 对象克隆
- GIC-400寄存器 之CPU接口
- Android7.0中文文档(API)-- ShareActionProvider
- Solr单集代码调用案例
- UVa12100
- Let the Balloon Rise
- [mark]Chrome webdriver 下载
- [机器学习入门] 李弘毅机器学习笔记-17(Unsupervised Learning: Deep Auto-encoder;无监督学习:深度自动编码器)
- Error:Execution failed for task ':xxx:compileReleaseJavaWithJavac'.
- String、StringBuffer、StringBuilder
- Document对象,Element对象和Node对象简介
- oracle数据库锁表问题