solr索引过程源码解析

来源：互联网发布：js中定义字符串数组编辑：程序博客网时间：2024/06/09 15:41

在文章http://blog.csdn.net/jj380382856/article/details/51603818我们分析了更新索引的solrj源码的处理方式，最后会向solr发送一个/update的请求，下面我们继续分析solr在接收到这个请求会怎么处理。

1.请求首先被SolrDispatchFilter截获，然后执行dofilter方法

2.在方法中调用 Action result = call.call();方法，进入HttpSolrCall.call()方法，这个方法会调用这个类的init()方法，该方法的主要作用是根据servlet和solrconfig的配置获取当前处理请求的SolrRequestHandler的对象。这个方法中调用了 extractHandlerFromURLPath(parser);方法，该方法代码如下。

private void extractHandlerFromURLPath(SolrRequestParsers parser) throws Exception {    if (handler == null && path.length() > 1) { // don't match "" or "/" as valid path      handler = core.getRequestHandler(path);。。。。。。  }

执行完这个代码后handler变成了

init()方法执行完成后action变成了process，HttpSolrCall.call()方法继续执行，代码如下，主要就是封装请求，这里面的主要的代码是 execute(solrRsp);

 switch (action) {        case ADMIN:          handleAdminRequest();          return RETURN;        case REMOTEQUERY:          remoteQuery(coreUrl + path, resp);          return RETURN;        case PROCESS:          final Method reqMethod = Method.getMethod(req.getMethod());          HttpCacheHeaderUtil.setCacheControlHeader(config, resp, reqMethod);          // unless we have been explicitly told not to, do cache validation          // if we fail cache validation, execute the query          if (config.getHttpCachingConfig().isNever304() ||              !HttpCacheHeaderUtil.doCacheHeaderValidation(solrReq, req, reqMethod, resp)) {            SolrQueryResponse solrRsp = new SolrQueryResponse();              /* even for HEAD requests, we need to execute the handler to               * ensure we don't get an error (and to make sure the correct               * QueryResponseWriter is selected and we get the correct               * Content-Type)               */            SolrRequestInfo.setRequestInfo(new SolrRequestInfo(solrReq, solrRsp));            execute(solrRsp);<span style="color:#ff0000;">//主要代码</span>            HttpCacheHeaderUtil.checkHttpCachingVeto(solrRsp, resp, reqMethod);            Iterator<Map.Entry<String, String>> headers = solrRsp.httpHeaders();            while (headers.hasNext()) {              Map.Entry<String, String> entry = headers.next();              resp.addHeader(entry.getKey(), entry.getValue());            }            QueryResponseWriter responseWriter = core.getQueryResponseWriter(solrReq);            if (invalidStates != null) solrReq.getContext().put(CloudSolrClient.STATE_VERSION, invalidStates);            writeResponse(solrRsp, responseWriter, reqMethod);          }          return RETURN;        default: return action;

 protected void execute(SolrQueryResponse rsp) {    // a custom filter could add more stuff to the request before passing it on.    // for example: sreq.getContext().put( "HttpServletRequest", req );    // used for logging query stats in SolrCore.execute()    solrReq.getContext().put("webapp", req.getContextPath());    solrReq.getCore().execute(handler, solrReq, rsp);  }

这里面的excute（）方法代码如下：

public void execute(SolrRequestHandler handler, SolrQueryRequest req, SolrQueryResponse rsp) {    if (handler==null) {      String msg = "Null Request Handler '" +        req.getParams().get(CommonParams.QT) + "'";      if (log.isWarnEnabled()) log.warn(logid + msg + ":" + req);      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg);    }    preDecorateResponse(req, rsp);    if (requestLog.isDebugEnabled() && rsp.getToLog().size() > 0) {      // log request at debug in case something goes wrong and we aren't able to log later      requestLog.debug(rsp.getToLogAsString(logid));    }    // TODO: this doesn't seem to be working correctly and causes problems with the example server and distrib (for example /spell)    // if (req.getParams().getBool(ShardParams.IS_SHARD,false) && !(handler instanceof SearchHandler))    //   throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"isShard is only acceptable with search handlers");    handler.handleRequest(req,rsp);<span style="white-space:pre"></span><span style="color:#ff6666;">//主要代码</span>    postDecorateResponse(handler, req, rsp);    if (rsp.getToLog().size() > 0) {      if (requestLog.isInfoEnabled()) {        requestLog.info(rsp.getToLogAsString(logid));      }      if (log.isWarnEnabled() && slowQueryThresholdMillis >= 0) {        final long qtime = (long) (req.getRequestTimer().getTime());        if (qtime >= slowQueryThresholdMillis) {          log.warn("slow: " + rsp.getToLogAsString(logid));        }      }    }  }

上面主要的代码是 handler.handleRequest(req,rsp);这个方法调用的是RequestHandlerBase的handleRequest方法，该方法又调用handleRequestBody抽象方法，定义如下：

 public abstract void handleRequestBody( SolrQueryRequest req, SolrQueryResponse rsp ) throws Exception;

ContentStreamHandlerBase类中实现了该方法，代码如下：

@Override  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {    SolrParams params = req.getParams();    UpdateRequestProcessorChain processorChain =        req.getCore().getUpdateProcessorChain(params);<span style="white-space:pre"></span><span style="color:#ff0000;">//获得更新处理链</span>    UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);    try {      ContentStreamLoader documentLoader = newLoader(req, processor);      Iterable<ContentStream> streams = req.getContentStreams();      if (streams == null) {        if (!RequestHandlerUtils.handleCommit(req, processor, params, false) && !RequestHandlerUtils.handleRollback(req, processor, params, false)) {          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing content stream");        }      } else {        for (ContentStream stream : streams) {          documentLoader.load(req, rsp, stream, processor);        }        // Perhaps commit from the parameters        RequestHandlerUtils.handleCommit(req, processor, params, false);        RequestHandlerUtils.handleRollback(req, processor, params, false);      }    } finally {      // finish the request      processor.finish();    }  }

上面这段代码首先获得了更新处理链如下

可见更新需要经过3个流程，一个是tlog的更新，一个是分布式转发，一个是更新链。

该方法中有如下代码，主要是对请求的流确定用什么documentLoad加载

for (ContentStream stream : streams) {          documentLoader.load(req, rsp, stream, processor);        }

以xml格式为例，这里会调用xmlloader的load方法，load方法又会调用xmlload里面的processUpdate方法

这个方法会调用当前processor的processAdd方法，从LogUpdateProcessor开始下面贴出processAdd的代码

 @Override  public void processAdd(AddUpdateCommand cmd) throws IOException {    if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString() + " " + req); }    // call delegate first so we can log things like the version that get set later    if (next != null) next.processAdd(cmd);<span style="white-space:pre">//调用下一个处理链进行处理    // Add a list of added id's to the response    if (adds == null) {      adds = new ArrayList<>();      toLog.add("add",adds);<span style="white-space:pre"></span>    }    if (adds.size() < maxNumToLog) {      long version = cmd.getVersion();      String msg = cmd.getPrintableId();      if (version != 0) msg = msg + " (" + version + ')';      adds.add(msg);<span style="white-space:pre">    }    numAdds++;  }

由于不是solrcloud，所以DistributedUpdateProcessor基本上没有做什么处理，所以继续下一个process，就到了RunUpdateProcessor里面

  @Override  public void processAdd(AddUpdateCommand cmd) throws IOException {        if (AtomicUpdateDocumentMerger.isAtomicUpdate(cmd)) {      throw new SolrException        (SolrException.ErrorCode.BAD_REQUEST,         "RunUpdateProcessor has received an AddUpdateCommand containing a document that appears to still contain Atomic document update operations, most likely because DistributedUpdateProcessorFactory was explicitly disabled from this updateRequestProcessorChain");    }    updateHandler.addDoc(cmd);<span style="white-space:pre">//关键代码    super.processAdd(cmd);    changesSinceCommit = true;  }

这个addDoc调用的是DirectUpdateHandler2的addDoc0方法代码如下：

  private int addDoc0(AddUpdateCommand cmd) throws IOException {    int rc = -1;    RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);    try {      IndexWriter writer = iw.get();      addCommands.incrementAndGet();      addCommandsCumulative.incrementAndGet();            // if there is no ID field, don't overwrite      if (idField == null) {        cmd.overwrite = false;      }            try {        IndexSchema schema = cmd.getReq().getSchema();                if (cmd.overwrite) {                    // Check for delete by query commands newer (i.e. reordered). This          // should always be null on a leader          List<UpdateLog.DBQ> deletesAfter = null;          if (ulog != null && cmd.version > 0) {            deletesAfter = ulog.getDBQNewer(cmd.version);          }                    if (deletesAfter != null) {            log.info("Reordered DBQs detected.  Update=" + cmd + " DBQs="                + deletesAfter);            List<Query> dbqList = new ArrayList<>(deletesAfter.size());            for (UpdateLog.DBQ dbq : deletesAfter) {              try {                DeleteUpdateCommand tmpDel = new DeleteUpdateCommand(cmd.req);                tmpDel.query = dbq.q;                tmpDel.version = -dbq.version;                dbqList.add(getQuery(tmpDel));              } catch (Exception e) {                log.error("Exception parsing reordered query : " + dbq, e);              }            }                        addAndDelete(cmd, dbqList);          } else {            // normal update                        Term updateTerm;            Term idTerm = new Term(cmd.isBlock() ? "_root_" : idField.getName(), cmd.getIndexedId());            boolean del = false;            if (cmd.updateTerm == null) {              updateTerm = idTerm;            } else {              // this is only used by the dedup update processor              del = true;              updateTerm = cmd.updateTerm;            }            if (cmd.isBlock()) {              writer.updateDocuments(updateTerm, cmd);            } else {              Document luceneDocument = cmd.getLuceneDocument();              // SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);              writer.updateDocument(updateTerm, luceneDocument);<span style="white-space:pre"></span>//调用lucene的indexwriter的updateDocument            }

可以看到，这里终于和lucene打交道了，用到了indexWriter。并且核心方法是writer.updateDocument(updateTerm, luceneDocument);;

solr为了丰富的功能和可扩展性，设计模式用了太多了，眼花缭乱。。。。

RunUpdateProcessor处理完后又回到了LogUpdateProcessor的那段代码，

并写入日志，完成一些收尾工作，一条数据的插入就完成了，这个过程涉及到的东西很多，以后我会把indexWriter.updateDocument()方法展开来介绍一下。

如有不对请不吝指正。谢谢

0 0