ZooKeeper源码阅读(三):服务器端

来源:互联网 发布:中信证券怎么样 知乎 编辑:程序博客网 时间:2024/05/18 03:08

服务端入口:org.apache.zookeeper.server.QuorumPeerMain.main

(调用ZooKeeperServerMain.main)

 

运行参数:配置文件

如:QuorumPeerMain /opt/zookeeper/zookeeper-3.4.3/bin/../conf/zoo.cfg

 

 

读取配置文件

       QuorumPeerConfig config = newQuorumPeerConfig();

        if (args.length == 1) {

           config.parse(args[0]);

        }

 

//dynamicConfigFile如果没有设置,则会以兼容模式

//将配置文件当作dynamicConfigFile重新parse

 

//dynamicConfiFile中可以设置服务器权重等选项

 

启动autopurge任务

       DatadirCleanupManager purgeMgr = newDatadirCleanupManager(config

               .getDataDir(), config.getDataLogDir(), config

               .getSnapRetainCount(), config.getPurgeInterval());

        purgeMgr.start();

                   //清理snapshotlog,保留最近SnapRetainCountsnapshots

//PurgeInterval个小时运行

 

决定是standalone还是Distributed运行模式

        if (args.length == 1&& config.isDistributed()) {

            runFromConfig(config);

        } else {

            LOG.warn("Eitherno config or no quorum defined in config, running "

                   + " in standalone mode");

            //there is only server in thequorum -- run asstandalone

           ZooKeeperServerMain.main(args);

        }

 

注册log4j JMX

         ManagedUtil.registerLog4jMBeans();

 

创建ServerCnxnFactory

         ServerCnxnFactory cnxnFactory = ServerCnxnFactory.createFactory();

/**

*ServerCnxnFactory

 * +NettyServerCnxnFactory

 * +NIOServerCnxnFactory

 * +NullServerCnxnFactory

 *

*默认是NIOServerCnxnFactory

*/

 

创建Selector、Worker、Accept线程,ServerSocketChannel,配置登录认证

        // cnxnFactory.configure(config.getClientPortAddress(),

        //                       config.getMaxClientCnxns());

//NIOServerCnxnFactory.configure

 

         包括如下工作:

 

//配置安全登录,在jass.conf中配置

       configureSaslLogin();

 

                   //清理超时Session链接

        cnxnExpiryQueue =

            newExpiryQueue<NIOServerCnxn>(sessionlessCnxnTimeout);  //二级CHash

        expirerThread = newConnectionExpirerThread();

 

                   //Selector配置

        int numCores= Runtime.getRuntime().availableProcessors();

        // 32cores sweet spot seems to be 4 selector threads

        numSelectorThreads =Integer.getInteger(

            ZOOKEEPER_NIO_NUM_SELECTOR_THREADS,

            Math.max((int) Math.sqrt((float)numCores/2), 1));

 

                   //Worker配置

        numWorkerThreads =Integer.getInteger(

            ZOOKEEPER_NIO_NUM_WORKER_THREADS, 2 *numCores);

 

                   //ServerSocketChannel

        this.ss =ServerSocketChannel.open();

        ss.socket().setReuseAddress(true);

        LOG.info("bindingto port " + addr);

        ss.socket().bind(addr);

        ss.configureBlocking(false);

 

                   //AcceptThread包含一个Channel,多个Selector

        acceptThread = newAcceptThread(ss, addr,selectorThreads);

 

 

创建QuorumPeer

        quorumPeer = newQuorumPeer();

 

                   //QuorumPeer构造函数中对zkDB初始化

        this.logFactory =newFileTxnSnapLog(dataLogDir, dataDir);

        this.zkDb =newZKDatabase(this.logFactory);

 

                   //设置TxnLog,Snapshot

        quorumPeer.setTxnFactory(newFileTxnSnapLog(

                  config.getDataLogDir(),

                  config.getDataDir()));

                   …

                   //zk维护的目录树结构

        quorumPeer.setZKDatabase(newZKDatabase(quorumPeer.getTxnFactory()));

        quorumPeer.initConfigInZKDatabase();//初始化/zookeeper/config节点

 

        quorumPeer.setCnxnFactory(cnxnFactory);

                   …

        quorumPeer.start();

        quorumPeer.join();

 

启动QuorumPeer.start()

    public synchronized void start() {

       loadDataBase();

        cnxnFactory.start();

        startLeaderElection();

        super.start();

    }

 

         //loadDataBase 从zk的事务日志snapLog中恢复

        long zxid =snapLog.restore(dataTree,sessionsWithTimeouts,playbacklistener);

 

                   //playbacklistener回调onTxnLoaded

                   Requestr = new Request(0,hdr.getCxid(),hdr.getType(), hdr, txn, hdr.getZxid());

        addCommittedProposal(r);

 

                   //addCommittedProposal提交Proposal

                            QuorumPacketpp = new QuorumPacket(Leader.PROPOSAL, request.zxid,

                   baos.toByteArray(), null);

           Proposal p = newProposal();

            p.packet = pp;

            p.request =request;

            committedLog.add(p);

 

                   //LearnerHandler线程将消费committedLog,发送提交请求。

 

         //cnxnFactory.start()启动NIOServerCnxnFactory所有线程

        if (workerPool ==null) {

            workerPool = newWorkerService(

                "NIOWorker", numWorkerThreads, false);

        }

        for(SelectorThreadthread :selectorThreads) {

            if(thread.getState() == Thread.State.NEW) {

               thread.start();

            }

        }

        //ensure thread is started once and only once

        if (acceptThread.getState()== Thread.State.NEW) {

            acceptThread.start();

        }

        if (expirerThread.getState()== Thread.State.NEW) {

            expirerThread.start();

        }

 

         //startLeaderElection()开始选举过程

                   //投票设为投自己

           if(getPeerState() == ServerState.LOOKING) {

               currentVote = new Vote(myid,getLastLoggedZxid(), getCurrentEpoch());

           }

 

                   //创建responder

                            try {

                           udpSocket =new DatagramSocket(myQuorumAddr.getPort());

                responder = newResponderThread();

                responder.start();

                            }

                            //接收xid,回复myid|leader-id | leader-zxid

 

                   //选举算法

        this.electionAlg =createElectionAlgorithm(electionType);

 

        switch(electionAlgorithm) {

        case 0:

            le = newLeaderElection(this); //默认

            break;

        case 1:

            le = newAuthFastLeaderElection(this);

            break;

        case 2:

            le = newAuthFastLeaderElection(this,true);

            break;

        case 3:

            qcm = newQuorumCnxManager(this);

           QuorumCnxManager.Listener listener = qcm.listener;

            if(listener!=null){

               listener.start();

                le= new FastLeaderElection(this,qcm);

            } else {

                LOG.error("Nulllistener when initializing cnx manager");

            }

            break;

        default:

            assert false;

        }

 

         //super.start(); 在LOOKING、OBSERVING、FOLLOWING、LEADING状态之间切换

            /*

             * Mainloop

             */

            while (running) {

                switch (getPeerState()){

                caseLOOKING:

                   

                   break;

                caseOBSERVING:

                   

                   break;

                caseFOLLOWING:

                   

                   break;

                caseLEADING:

                   

                   break;

                }

                start_fle =System.currentTimeMillis();

            }

 

 

OBSERVING状态

            //observer.observeLeader();

 

                            //内部循环

               QuorumPacket qp = newQuorumPacket();

                while (self.isRunning()){

                   readPacket(qp);

                   processPacket(qp);

                }

 

FOLLOWING状态

            //follower.followLeader();

 

                            //内部循环

                QuorumPacket qp =newQuorumPacket();

                while (self.isRunning()){

                   readPacket(qp);

                   processPacket(qp);

                }

 

LEADING状态

           //leader.lead();

 

                            //从log中恢复

                            zk.loadData();

 

                            //接受Learner连接

            //Start thread that waits for connection requests from

            // newfollowers.

            cnxAcceptor = newLearnerCnxAcceptor();

            cnxAcceptor.setName("LearnerCnxAcceptor-" +ss.getLocalSocketAddress());

            cnxAcceptor.start();

 

                            //发送NEWLEADER,并等待回复

            newLeaderProposal.packet =newQuorumPacket(NEWLEADER,zk.getZxid(),

                   null,null);

            waitForNewLeaderAck(self.getId(),zk.getZxid(), LearnerType.PARTICIPANT);

 

                            //

             startZkServer();

 

                            //内部循环

            while (true) {

               //check we have a supportingquorum, so only

               //PARTICIPANT, not OBSERVER, learners should be used

                                     //If not, return

            }

 

//startZKServer

//ZooKeeperServer.startup

       startSessionTracker(); //关闭失效Session

               for(SessionImpl s :sessionExpiryQueue.poll()){

                   setSessionClosing(s.sessionId);

                    expirer.expire(s);

                }

 

       setupRequestProcessors(); //该方法被子类重写

 

//上图转自淘宝技术博客:http://rdc.taobao.com/team/jm/archives/448

 

//Leader责任链

//LeaderZooKeeperServer.setupRequestProcessors

         //对应LeaderZooKeeperServer的第一条责任链

       RequestProcessor finalProcessor = newFinalRequestProcessor(this);

       RequestProcessor toBeAppliedProcessor = new Leader.

ToBeAppliedRequestProcessor(finalProcessor,getLeader());

        commitProcessor = newCommitProcessor(toBeAppliedProcessor,

               Long.toString(getServerId()), false);

        commitProcessor.start();

       ProposalRequestProcessor proposalProcessor = newProposalRequestProcessor

                                    (this,commitProcessor);

       proposalProcessor.initialize();

        firstProcessor = newPrepRequestProcessor(this,proposalProcessor);

        ((PrepRequestProcessor)firstProcessor).start();

 

         //在调用ProposalRequestProcessor时,设置了另外一条链

         // ProposalRequestProcessor(this,commitProcessor);

         //

       AckRequestProcessor ackProcessor = newAckRequestProcessor(zks.getLeader());

        syncProcessor = newSyncRequestProcessor(zks, ackProcessor);

 

//PreRequestProcessor在最前, 处理各种请求

//processRequest()将来自客户端或者Follower转发的request添加到submittedRequests

//run()线程消费submittedRequests

         //检查ACL,根据Sequential设置path,

        checkACL(zks,parentRecord.acl, ZooDefs.Perms.CREATE,request.authInfo);

 

        Requestrequest = submittedRequests.take();

 

         //写请求生成一个Txn,然后交给下一个ProposalRequestProcessor

        try {

            switch(request.type) {

            case OpCode.create:

               CreateRequest createRequest = newCreateRequest();

               pRequest2Txn(request.type,zks.getNextZxid(),request, createRequest,true);

                break;

            case OpCode.create2:

               Create2Request create2Request = newCreate2Request();

                pRequest2Txn(request.type,zks.getNextZxid(),request, create2Request,true);

                break;

                   }

 

        nextProcessor.processRequest(request);

 

 

//ProposalRequestProcessor任务

//转给CommitProcessor,SyncRequestProcessor。

//发送propose

        if(requestinstanceof LearnerSyncRequest){

            zks.getLeader().processSync((LearnerSyncRequest)request);

        } else {

            nextProcessor.processRequest(request);

 

            if (request.getHdr()!=null) {

                // Weneed to sync and get consensus on any transactions

                try {

                    zks.getLeader().propose(request);

                } catch(XidRolloverException e) {

                   thrownew RequestProcessorException(e.getMessage(),e);

                }

                syncProcessor.processRequest(request);

            }

        }

 

         //propose

                 QuorumPacket pp = newQuorumPacket(Leader.PROPOSAL,request.zxid,

               baos.toByteArray(), null);

 

                   Proposal p =new Proposal();

                 p.packet = pp;

                 p.request =request;               

 

            lastProposed = p.packet.getZxid();

            outstandingProposals.put(lastProposed,p);

           sendPacket(pp);

 

 

//CommitProcessor消费两个队列,queuedRequests,committedRequest

//queuedRequests保存PrepRequestProcessor线程下发的submittedRequest

//committedRequests保存Proposal通过后,LearnerHandler线程发来的提交请求

         //检查queuedRequests或者committedRequests是否有内容

 

         //如果是写,则作为pendingRequest,等待表决结果返回到committedRequest

         //如果是读,则直接返回本地数据

   request = queuedRequests.poll()//run

if(needCommit(request)) {

        nextPending.set(request);

    } else {

       sendToNextProcessor(request);

    }

 

         // 等待committedRequest在commit()函数中返回

         //committedRequests.add(request);

 

        request = committedRequests.poll()

 

         // LearnerHandler收到过半ACK消息时,调用trycommit-> commit

         //见后文LearnerHander

 

 

//FinalRequest处理,FinalRequest.processRequest(request)

//写ZKDatabase

        Record txn = request.getTxn();

 

        rc = zks.processTxn(hdr,txn);

 

                   //->转给ZkDatabase

               //rc = getZKDatabase().processTxn(hdr, txn);

 

                   //->在dataTree上操作

               //returndataTree.processTxn(hdr,txn);

 

 

//SynRequestProcessor任务run,

//写日志,超过设置则轮转日志,建立snapshot

//将request记录到磁盘,批处理request,提高io效率

                   while(true ){

                            Request si =queuedRequests.take();

 

                            //记录到log

                            if (zks.getZKDatabase().append(si))

 

                            //如果logCount> (snapCount/2 + randRoll),则建以下线程

                 snapInProcess =new Thread("SnapshotThread") {

                     publicvoid run() {

                         try {

                             zks.takeSnapshot();

                          } catch(Exceptione) {

                             LOG.warn("Unexpectedexception", e);

                      }

                 };

 

                   //传递任务

        nextProcessor.processRequest(si);

 

 

// AckRequestProcessor 和LearnerHandler一样会处理Follower返回的ACK响应

//发起请求的默认是ACK ?不确定

    /**

     * Forward therequest as an ACK to the leader

     */

                   leader.processAck(self.getId(),request.zxid,null);

 

 

//下面介绍前面提到的LearnerHandler

//Leader.lead同时也会创建LearnerCnxAcceptor

        // Startthread that waits for connection requests from

        // newfollowers.

        cnxAcceptor = newLearnerCnxAcceptor();

        cnxAcceptor.setName("LearnerCnxAcceptor-" +ss.getLocalSocketAddress());

        cnxAcceptor.start();

 

 

//LearnerCnxAcceptor对每个连接,用LearnerHandler处理

        Socket s = ss.accept();

        //start with the initLimit, once theack is processed

        // inLearnerHandler switch to the syncLimit

       s.setSoTimeout(self.tickTime *self.initLimit);

       s.setTcpNoDelay(nodelay);

       LearnerHandler fh = newLearnerHandler(s, Leader.this);

        fh.start();

 

 

//LearnerHandler线程

         //ACK:Follower对PROPOSAL消息的响应。

         //REQUEST:写请求、同步请求

        while (true) {

            qp = newQuorumPacket();

 

            switch (qp.getType()) {

            case Leader.ACK:

                if (this.learnerType ==LearnerType.OBSERVER) {

                   if (LOG.isDebugEnabled()){

                       LOG.debug("ReceivedACK from Observer  " +this.sid);

                   }

                }

                leader.processAck(this.sid,qp.getZxid(),

                                                       sock.getLocalSocketAddress());

                break;

 

//Leader.processAck

        booleanhasCommitted = tryToCommit(p,zxid, followerAddr);

 

//Leader.tryToCommit

//如果有过半的voter通过,则发送commit请求,添加到committedRequest队列

        //getting a quorum from all necessary configurations

        if(!p.hasAllQuorums()) {

           return false;                

        }

 

       commit(zxid);

        inform(p);

 

 

//客户端请求流程

//找出哪里调用了第一个processRequest

         //ZooKeeperServer.submitRequest

             firstProcessor.processRequest(si);//PrepRequestProcessor

 

         //<- ZooKeeperServer.createSession

    //  Request si = newRequest(cnxn, sessionId, xid, type, bb, authInfo);

//  submitRequest(si);

             submitRequest(cnxn, sessionId, OpCode.createSession, 0,to,null);

 

         //<- ZooKeeperServer.processConnectRequest

             createSession(cnxn, passwd,sessionTimeout);

 

         //<- NIOServerCnxn.readConnectRequest

               zkServer.processConnectRequest(this,incomingBuffer);

 

 

 

//Follower责任链

//FollowerZooKeeperServer.setupRequestProcessors

       RequestProcessor finalProcessor = newFinalRequestProcessor(this);

        commitProcessor = newCommitProcessor(finalProcessor,

               Long.toString(getServerId()), true);

        commitProcessor.start();

        firstProcessor = newFollowerRequestProcessor(this,commitProcessor);

       ((FollowerRequestProcessor) firstProcessor).start();

 

        syncProcessor = newSyncRequestProcessor(this,

                newSendAckRequestProcessor((Learner)getFollower()));

        syncProcessor.start();

 

                   // 两条线

// 触发第一条线的仍是基类 ZooKeeperServer.submitRequest

 

// Follower继承自Learner,与Leader建立了socket连接

// Follower.followLeader处理的消息

         PING:返回PING给Leader

         PROPOSAL:放入pendingTxns队列,转发给SyncRequestProcessor线程

         COMMIT:比较和pendingTxns队首zxid是否相同,相同交给commitProcessor/退出

         UPTODATE:同步后,Leader发送此消息,表示follower可以提供服务了

         SYNC:返回SYNC结果到客户端,对应于Paxos中的慢速读

             while (self.isRunning()){

                 readPacket(qp);

                 processPacket(qp);

             }

 

         //Leader.PROPOSAL:

         // fzk.logRequest(hdr,txn);

        Requestrequest = new Request(hdr.getClientId(),

hdr.getCxid(),hdr.getType(), hdr, txn, hdr.getZxid());

        if((request.zxid & 0xffffffffL) != 0) {

            pendingTxns.add(request);

        }

        syncProcessor.processRequest(request);

 

 

// syncProcessor后接 SendAckRequestProcessor

// SendAckRequestProcessor

         //发送ACK给Leader

       QuorumPacket qp= new QuorumPacket(Leader.ACK,si.getHdr().getZxid(),null,nul

learner.writePacket(qp,false);

 

 

//FollowerRequestProcessor 添加到queuedRequest交给run线程处理

        if (!finished) {

            queuedRequests.add(request);

        }

 

//run消费queueRequest

        // Wewant to queue the request to be processed before we submit

        // therequest to the leader so that we are ready to receive the response

        nextProcessor.processRequest(request);//CommitRequestProcessor

 

//发现如果是写请求,则发送REQUEST消息给Leader

        zks.getFollower().request(request);

 

 

//CommitProcessor同Leader

//如果是写,则作为pendingRequest,等待表决结果返回到committedRequest

 

//FinalRequestProcessor 同Leader,更新zkDatabase

 

 

Follower提交请求,更新log,回复ACK,收到COMMIT后更新zkDatabase


//Observer责任链

       RequestProcessor finalProcessor = newFinalRequestProcessor(this);

        commitProcessor = newCommitProcessor(finalProcessor,

               Long.toString(getServerId()), true);

        commitProcessor.start();

        firstProcessor = new ObserverRequestProcessor(this,commitProcessor);

       ((ObserverRequestProcessor) firstProcessor).start();

 

        syncProcessor = newSyncRequestProcessor(this,

                newSendAckRequestProcessor(getObserver()));

        syncProcessor.start();

 

         //行为几乎和Follower一样,只是不参与投票(Observer.processPacket忽略消息)

         //忽略Leader.PROPOSALLeader.COMMIT



//以下图片及文字转自淘宝技术博客:http://rdc.taobao.com/team/jm/archives/448
http://rdc.taobao.com/team/jm/files/2010/11/zookeeper-3.3.1-message-flow2.gif

Zookeeper最特别的一点是,Leader在发送PROPOSAL消息之前,和Follower接收到PROPOSAL消息之后,都会立即将消息记录到日志中。这样在收到过半的ACK之后,既可以确认消息已经在过半的server中保存过了。即使之后的Commit消息发送失败,也在事实上通过了消息。丢失commit消息的follower会在下一个事务中发现这一点,并自动退出。通过重启来重新取得一致性



参考: Paxos算法之旅(四)zookeeper代码解析
原创粉丝点击