Raft一致性协议实现源码
来源:互联网 发布:php redis 消息队列 编辑:程序博客网 时间:2024/06/05 04:37
Leader.go
func (rf *Raft) leaderCommit() {if rf.role != LEADER {return}// find the first entry in current termminIdx := 0for i := len(rf.log) - 1; i > 0; i-- {if rf.log[i].Term == rf.currentTerm {minIdx = i} else if rf.log[i].Term < rf.currentTerm {break} else {// already lost leadership, but haven't applied this changerf.logger.Trace.Printf("get term %v > current term %v, in server %v, is leader %v\n", rf.log[i].Term, rf.currentTerm, rf.me, rf.role == LEADER)return}}if minIdx == 0 {// can't find entry in current term// unsafe to commitreturn}minIdx += int(rf.startIdx)// find the safe upper boundupperBound := rf.commitIdxfor minIdx < len(rf.log)+int(rf.startIdx) {replicatedNum := 1safe := false// loop all peers to check whether this entry is replicatedfor i := 0; i < len(rf.peers); i++ {if i == rf.me {continue}if int(rf.matchIdx[i]) >= minIdx {// entry minIdx has replicated in server ireplicatedNum++if replicatedNum > len(rf.peers)/2 {// replicated in the majoritysafe = trueupperBound = uint64(minIdx)minIdx++break}}}if !safe {break}}cId := rf.commitIdx + 1rf.logger.Trace.Printf("leader %v upperbound %v min %v\n", rf.me, upperBound, cId)for cId <= upperBound {if cId >= uint64(len(rf.log))+rf.startIdx {rf.logger.Error.Fatalln("out of bound")}rf.logger.Trace.Printf("leader %v commit %v %v", rf.me, cId, rf.log[cId-rf.startIdx])rf.applyCh <- ApplyMsg{int(cId), rf.log[cId-rf.startIdx].Command, false, nil}rf.commitIdx = cIdrf.persist()cId++}}
其中startIndex表示raft中的server进行snapshot后,之前的log被丢弃。
例如,此时startIndex值为10,也就是0~9的值被丢弃,下次故障恢复从index值为10开始,log数组长度定长。因此,下标也是从startIndex + len(rf.log)
状态 所有服务器上持久存在的
func (rf *Raft) sync(server int) (bool, uint64) {rf.mu.Lock()if rf.role != LEADER {rf.mu.Unlock()return false, 0}var matchedLogIdx uint64var matchedTerm uint64var entries []Entry = nilvar snapshot []byte = nilif rf.nextIdx[server]-1 < rf.startIdx {// a slow follower, send snapshotrf.logger.Trace.Printf("server %v is a slow server, send snapshot and whole log to it\n", server)matchedLogIdx = rf.startIdxmatchedTerm = rf.startTermsnapshot = rf.persister.ReadSnapshot()entries = rf.logif len(entries) == 0 {rf.logger.Error.Fatalln("0 log")}} else if rf.nextIdx[server]-1 == rf.startIdx {rf.logger.Trace.Printf("server %v matched to startIdx %v\n", server, rf.nextIdx[server]-1)matchedLogIdx = rf.startIdxmatchedTerm = rf.startTermentries = rf.log[matchedLogIdx-rf.startIdx+1:]} else if rf.nextIdx[server] == rf.matchIdx[server]+1 {// consistentmatchedLogIdx = rf.matchIdx[server]if matchedLogIdx+1 <= uint64(len(rf.log))+rf.startIdx {entries = rf.log[matchedLogIdx-rf.startIdx+1:]} else if matchedLogIdx == uint64(len(rf.log))+rf.startIdx {rf.logger.Warning.Printf("%v matched to %v, log len in master %v %v\n", server, matchedLogIdx, rf.me, len(rf.log))}matchedTerm = rf.log[matchedLogIdx-rf.startIdx].Term} else {// haven't achieve consistency, but follower is up-to-datematchedLogIdx = rf.nextIdx[server] - 1entries = rf.log[matchedLogIdx-rf.startIdx+1:]matchedTerm = rf.log[matchedLogIdx-rf.startIdx].Term}rf.mu.Unlock()args := AppendEntriesArgs{rf.currentTerm, rf.me, matchedLogIdx, matchedTerm, entries, rf.commitIdx, snapshot}rf.logger.Trace.Printf("leader %v to %v send %+v \n", rf.me, server, args)reply := new(AppendEntriesReply)ok := rf.sendAppendEntries(server, args, reply)if !ok {return false, 0}rf.mu.Lock()defer rf.mu.Unlock()if rf.role != LEADER {return false, 0}if reply.Term == rf.currentTerm {if reply.MatchedId >= uint64(len(rf.log))+rf.startIdx {rf.logger.Error.Fatalf("follower %v commit %v more than leader %v's logsize %v, commit %v, term %v, follower size %v\n", server, reply.MatchedId, rf.me, uint64(len(rf.log))+rf.startIdx, rf.commitIdx, rf.currentTerm, reply.LogSize)}rf.matchIdx[server] = reply.MatchedIdrf.nextIdx[server] = reply.MatchedId + 1if rf.matchIdx[server] == uint64(len(rf.log))+rf.startIdx {rf.logger.Warning.Printf("%v matched to %v, log len in master %v log len %v, commit %v, term %v follower size %v\n", server, rf.matchIdx[server], rf.me, len(rf.log), rf.commitIdx, rf.currentTerm, reply.LogSize)}}if reply.Success {rf.leaderCommit()}return true, reply.Term}
func (rf *Raft) broadcastHeartBeat() {waitTime := time.Duration(HEARTBEATINTERVAL)timmer := time.NewTimer(waitTime * time.Millisecond)for {if rf.role != LEADER {log.Fatalf("call broadcast heartbeat, but I'm not a leader\n")}// send out heartheat every HEARTBEATINTERVAL msstaleSignal := make(chan bool, len(rf.peers)-1)// broadcast heartheat in parallelfor i := 0; i < len(rf.peers); i++ {if i == rf.me {// escape myselfcontinue}go func(server int) {ok, term := rf.sync(server)if ok && term > rf.currentTerm {staleSignal <- true}}(i)}endLoop := falsefor !endLoop {select {case <-rf.kill:returncase <-staleSignal:// my Term is stale// convert to follower stagerf.mu.Lock()rf.role = FOLLOWERrf.mu.Unlock()rf.logger.Trace.Printf("leader %v is stale, turns to follower\n", rf.me)go rf.heartBeatTimer()returncase msg := <-rf.heartBeatCh:// get a heart beat from othersif rf.currentTerm == msg.Term {// in this Term, there are 2 leaders// impossiblerf.logger.Error.Fatalf("in leader %v's broadcast, receive the same heartbeat Term, value: %v leader: %v\n", rf.me, msg.Term, msg.LeaderId)} else if rf.currentTerm < msg.Term {// heart beat from a superior leaderrf.mu.Lock()rf.role = FOLLOWERrf.currentTerm = msg.Termrf.votedFor = TermLeader{msg.Term, msg.LeaderId}rf.mu.Unlock()rf.logger.Trace.Printf("leader %v finds a superior leader %v, turns to follower\n", rf.me, rf.votedFor)go rf.heartBeatTimer()return}case <-timmer.C:// begin another broadcast roundendLoop = truetimmer.Reset(waitTime * time.Millisecond)break}}}}switch语句中的case msg := <-rf.heartBeatCh: 选项表示:
(以上代码和follower相似)
On conversion to candidate, start election:
• Increment currentTerm
• Vote for self
• Reset election timer
Send RequestVote RPCs to all other servers
• If votes received from majority of servers: become leader
• If AppendEntries RPC received from new leader: convert to
follower
• If election timeout elapses: start new election(case <-timmer.C, 表示超时)
heartBeatCh表示心跳包
Raft.go
// return currentTerm and whether this server// believes it is the leader.func (rf *Raft) GetState() (int, bool) {rf.mu.Lock()defer rf.mu.Unlock()return int(rf.currentTerm), rf.role == LEADER}两个返回值,1.当前节点的currentTerm和当前的节点是否是leader
//// the service using Raft (e.g. a k/v server) wants to start// agreement on the next command to be appended to Raft's log. if this// server isn't the leader, returns false. otherwise start the// agreement and return immediately. there is no guarantee that this// command will ever be committed to the Raft log, since the leader// may fail or lose an election.//// the first return value is the index that the command will appear at// if it's ever committed. the second return value is the current// Term. the third return value is true if this server believes it is// the leader.// 第一个参数表示,这条指令在server的什么位置,第二个参数表示当前的纪元,第三个表示是否是leader节点func (rf *Raft) Start(command interface{}) (int, int, bool) {rf.mu.Lock()defer rf.mu.Unlock()if rf.role != LEADER {return -1, -1, false}//for idx, entry := range rf.log {//if entry.Command == command {//return idx, int(entry.Term), true//}//}index := len(rf.log)Term := rf.currentTermrf.log = append(rf.log, Entry{Term, command})rf.matchIdx[rf.me] = uint64(len(rf.log)) - 1rf.nextIdx[rf.me] = uint64(len(rf.log))rf.logger.Trace.Printf("start a new cmd in server %v term %v\n", rf.me, rf.currentTerm)rf.persist()for i := 0; i < len(rf.peers); i++ {if i == rf.me {continue}go rf.sync(i)}rf.logger.Trace.Printf("new entry %v start in leader %v, index %v, term %v, log size %v\n", command, rf.me, index, Term, len(rf.log))return index + int(rf.startIdx), int(Term), true}start函数表示,发起一次选举,该条指令是否要append到raft的leader的log里。
follower.go
package raftimport ("time")// used by followerfunc (rf *Raft) heartBeatTimer() {// in the same Term, we use the same timeoutwaitTime := time.Duration(HEARTHEATTIMEOUTBASE + rf.rand.Intn(HEARTBEATTIMEOUTRANGE))timmer := time.NewTimer(waitTime * time.Millisecond)for {if rf.role != FOLLOWER {rf.logger.Error.Fatalln("call heartBeatTimer, but I'm not a follower")}// loop until time out or receive a correct heartbeatendLoop := falsefor !endLoop {select {case <- rf.kill:returncase msg := <-rf.heartBeatCh:if rf.currentTerm > msg.Term {// stale heart beat// ignore and continue the looprf.logger.Trace.Println("%v receive a stale heartbeat", rf.me)}else {// receive a legal heartbeat// break the loop to wait next heartBeatrf.mu.Lock()rf.currentTerm = msg.Termrf.votedFor = TermLeader{msg.Term, msg.LeaderId}rf.persist()rf.mu.Unlock()timmer.Reset(waitTime * time.Millisecond)endLoop = true}case <-timmer.C:// time out, end the heartbeat timer// and fire a new election Termgo rf.election(rf.currentTerm + 1)return}}}}
config.go
type config struct {mu sync.Mutext *testing.Tnet *labrpc.Networkn intdone int32 // tell internal threads to dierafts []*RaftapplyErr []string // from apply channel readersconnected []bool // whether each server is on the net(每个K/V Server是否联网)saved []*Persisterendnames [][]string // the port file names each sends tologs []map[int]int // copy of each server's committed entries}
func make_config(t *testing.T, n int, unreliable bool) *config {runtime.GOMAXPROCS(4)cfg := &config{}cfg.t = tcfg.net = labrpc.MakeNetwork()cfg.n = n //(k/v server 数量)cfg.applyErr = make([]string, cfg.n)cfg.rafts = make([]*Raft, cfg.n)cfg.connected = make([]bool, cfg.n)cfg.saved = make([]*Persister, cfg.n)cfg.endnames = make([][]string, cfg.n)cfg.logs = make([]map[int]int, cfg.n)cfg.setunreliable(unreliable)cfg.net.LongDelays(true)// create a full set of Rafts.for i := 0; i < cfg.n; i++ {cfg.logs[i] = map[int]int{}cfg.start1(i)}// connect everyonefor i := 0; i < cfg.n; i++ {cfg.connect(i)}return cfg}
// attach server i to the net.func (cfg *config) connect(i int) {// fmt.Printf("connect(%d)\n", i)cfg.connected[i] = true// outgoing ClientEndsfor j := 0; j < cfg.n; j++ {if cfg.connected[j] {endname := cfg.endnames[i][j]cfg.net.Enable(endname, true)}}// incoming ClientEndsfor j := 0; j < cfg.n; j++ {if cfg.connected[j] {endname := cfg.endnames[j][i]cfg.net.Enable(endname, true)}}}测试,发送如果i号k/v server与j号k/v server联通
接收返回值,
func (cfg *config) checkOneLeader() int {for iters := 0; iters < 10; iters++ {time.Sleep(500 * time.Millisecond)leaders := make(map[int][]int)for i := 0; i < cfg.n; i++ {if cfg.connected[i] {if t, leader := cfg.rafts[i].GetState(); leader { leaders[t] = append(leaders[t], i)}}}lastTermWithLeader := -1for t, leaders := range leaders {if len(leaders) > 1 {cfg.t.Fatalf("term %d has %d (>1) leaders", t, len(leaders))}if t > lastTermWithLeader {lastTermWithLeader = t}}if len(leaders) != 0 {return leaders[lastTermWithLeader][0]}}cfg.t.Fatalf("expected one leader, got none")return -1}
1、GetState两个返回值,1.当前节点的currentTerm和当前的节点是否是leader
2、leaders := make(map[int][]int),key/value的形式为:键为int型,值为数组。当判断出一个k/v server是leader,就回在leader添加一个记录。
3、t记录leader的当前term
// check that everyone agrees on the term.func (cfg *config) checkTerms() int {term := -1for i := 0; i < cfg.n; i++ {if cfg.connected[i] {xterm, _ := cfg.rafts[i].GetState()if term == -1 {term = xterm} else if term != xterm {cfg.t.Fatalf("servers disagree on term")}}}return term}判断是否所有的k/v server都在同一个term,首先获取已经连接的k/v server的currentTerm(term = xterm),接下来只要有一个不等,就返回错误
test_test.go
func TestInitialElection(t *testing.T) {servers := 3cfg := make_config(t, servers, false)defer cfg.cleanup()fmt.Printf("Test: initial election ...\n")// is a leader elected?cfg.checkOneLeader()// does the leader+term stay the same there is no failure?term1 := cfg.checkTerms()time.Sleep(2 * RaftElectionTimeout)term2 := cfg.checkTerms()if term1 != term2 {fmt.Printf("warning: term changed even though there were no failures")}fmt.Printf(" ... Passed\n")}1、判断k/v server集群中是否有leader选举出
1 0
- Raft一致性协议实现源码
- Raft一致性协议
- Raft一致性协议
- 一致性协议:RAFT
- Raft分布式一致性协议
- raft一致性协议(转载)
- 分布式一致性协议Raft原理与实例
- 分布式一致性协议Raft原理与实例
- 分布式一致性协议Raft原理与实例
- 分布式一致性协议Raft原理与实例
- 分布式一致性协议Raft原理与实例
- (笔记)理解和学习分布式一致性协议:raft
- 分布式一致性协议Raft,以及难搞的Paxos
- 分布式一致性算法Paxos,Zookeeper的ZAB协议,Raft算法
- raft协议的go语言实现
- Raft一致性算法
- Raft一致性算法
- RAFT分布式一致性算法
- Fatal NI connect error 12170错误解决办法
- ARM64的__create_page_tables代码走读
- Lamda OrderBy之后再GroupBy,排序没有作用.
- spring boot web
- HDU 2686 Matrix By Assassin 多线程dp
- Raft一致性协议实现源码
- body{font-size: 62.5%;}这种写法的原因
- SQL那些事儿(十二)--DATASET 与 DATAREADER区别
- python爬虫笔记 --------scrapy框架(1)
- Android照片墙加强版,使用ViewPager实现画廊效果
- 每日一练---js动态添加删除table行
- Red Hat Linux 6 安装 JDK8 和 Tomcat8
- leetcode (23) - Merge k Sorted Lists
- CI应用程序流程图