diff --git a/conn/node.go b/conn/node.go index 644756161b4..8652e9e474c 100644 --- a/conn/node.go +++ b/conn/node.go @@ -40,6 +40,34 @@ var ( ErrNoNode = errors.Errorf("No node has been set up yet") ) +const ( + heartbeatTick = 1 + defaultElectionTick = 20 + recommendedElectionTick = 10 +) + +func normalizeElectionTick(electionTick int) (tick int, warning string) { + if electionTick < 0 { + return defaultElectionTick, fmt.Sprintf( + "--raft election-tick=%d is invalid; defaulting to %d. Use 0 or omit the flag to accept the default.", + electionTick, defaultElectionTick) + } + if electionTick == 0 { + return defaultElectionTick, "" + } + if electionTick <= heartbeatTick { + glog.Fatalf("invalid --raft election-tick=%d: must be greater than internal heartbeat tick (%d).", + electionTick, heartbeatTick) + } + if electionTick < recommendedElectionTick { + return electionTick, fmt.Sprintf( + "--raft election-tick=%d gives a %dms minimum election timeout. Values below %d (1s) "+ + "may cause spurious leader elections under GC pauses or network jitter.", + electionTick, electionTick*100, recommendedElectionTick) + } + return electionTick, "" +} + // Node represents a node participating in the RAFT protocol. type Node struct { x.SafeMutex @@ -79,7 +107,18 @@ type Node struct { } // NewNode returns a new Node instance. -func NewNode(rc *pb.RaftContext, store *raftwal.DiskStorage, tlsConfig *tls.Config) *Node { +// electionTick controls how many Raft Tick() calls pass before election timeout. +// In production Alpha/Zero, Tick() runs every 100ms and Raft randomizes the timeout. +// If electionTick <= 0, defaults to 20. +func NewNode(rc *pb.RaftContext, store *raftwal.DiskStorage, tlsConfig *tls.Config, + electionTick int) *Node { + + var warning string + electionTick, warning = normalizeElectionTick(electionTick) + if warning != "" { + glog.Warningf(warning) + } + snap, err := store.Snapshot() x.Check(err) @@ -90,8 +129,8 @@ func NewNode(rc *pb.RaftContext, store *raftwal.DiskStorage, tlsConfig *tls.Conf Store: store, Cfg: &raft.Config{ ID: rc.Id, - ElectionTick: 20, // 2s if we call Tick() every 100 ms. - HeartbeatTick: 1, // 100ms if we call Tick() every 100 ms. + ElectionTick: electionTick, + HeartbeatTick: heartbeatTick, // 100ms if we call Tick() every 100 ms. Storage: store, MaxInflightMsgs: 256, MaxSizePerMsg: 256 << 10, // 256 KB should allow more batching. diff --git a/conn/node_test.go b/conn/node_test.go index 66cb421c139..bcc9e1cec0a 100644 --- a/conn/node_test.go +++ b/conn/node_test.go @@ -53,7 +53,7 @@ func TestProposal(t *testing.T) { store := raftwal.Init(dir) rc := &pb.RaftContext{Id: 1} - n := NewNode(rc, store, nil) + n := NewNode(rc, store, nil, 0) peers := []raft.Peer{{ID: n.Id}} n.SetRaft(raft.StartNode(n.Cfg, peers)) @@ -71,3 +71,63 @@ func TestProposal(t *testing.T) { } wg.Wait() } + +func TestNormalizeElectionTick(t *testing.T) { + tests := []struct { + name string + electionTick int + wantTick int + wantWarning string + }{ + { + name: "zero defaults silently", + electionTick: 0, + wantTick: 20, + wantWarning: "", + }, + { + name: "negative defaults with warning", + electionTick: -1, + wantTick: 20, + wantWarning: "--raft election-tick=-1 is invalid; defaulting to 20. Use 0 or omit the flag to accept the default.", + }, + { + name: "large negative defaults with warning", + electionTick: -100, + wantTick: 20, + wantWarning: "--raft election-tick=-100 is invalid; defaulting to 20. Use 0 or omit the flag to accept the default.", + }, + { + name: "low valid value warns below recommended", + electionTick: 2, + wantTick: 2, + wantWarning: "--raft election-tick=2 gives a 200ms minimum election timeout. Values below 10 (1s) may cause spurious leader elections under GC pauses or network jitter.", + }, + { + name: "recommended minimum no warning", + electionTick: 10, + wantTick: 10, + wantWarning: "", + }, + { + name: "default value no warning", + electionTick: 20, + wantTick: 20, + wantWarning: "", + }, + { + name: "large value accepted", + electionTick: 100, + wantTick: 100, + wantWarning: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotTick, gotWarning := normalizeElectionTick(tc.electionTick) + require.Equal(t, tc.wantTick, gotTick) + require.Equal(t, tc.wantWarning, gotWarning) + }) + } +} diff --git a/dgraph/cmd/alpha/run.go b/dgraph/cmd/alpha/run.go index 901d7b0a808..2cca04d4a85 100644 --- a/dgraph/cmd/alpha/run.go +++ b/dgraph/cmd/alpha/run.go @@ -164,6 +164,10 @@ they form a Raft group and provide synchronous replication. "to 0 to disable duration based snapshot."). Flag("pending-proposals", "Number of pending mutation proposals. Useful for rate limiting."). + Flag("election-tick", + "Number of Raft ticks before a follower starts an election. Each production tick is "+ + "100ms and Raft randomizes the timeout between N and 2N-1 ticks. Default 20 "+ + "means ~2s-4s; values below 10 may cause spurious elections under jitter."). String()) flag.String("security", worker.SecurityDefaults, z.NewSuperFlagHelp(worker.SecurityDefaults). diff --git a/dgraph/cmd/zero/raft.go b/dgraph/cmd/zero/raft.go index f9fea51f82b..14bf9cec25c 100644 --- a/dgraph/cmd/zero/raft.go +++ b/dgraph/cmd/zero/raft.go @@ -37,7 +37,7 @@ import ( ) const ( - raftDefaults = "idx=1; learner=false;" + raftDefaults = "idx=1; learner=false; election-tick=20;" ) var proposalKey uint64 diff --git a/dgraph/cmd/zero/run.go b/dgraph/cmd/zero/run.go index 4dc39401357..48a82b46544 100644 --- a/dgraph/cmd/zero/run.go +++ b/dgraph/cmd/zero/run.go @@ -105,6 +105,10 @@ instances to achieve high-availability. Flag("learner", `Make this Zero a "learner" node. In learner mode, this Zero will not participate `+ "in Raft elections. This can be used to achieve a read-only replica."). + Flag("election-tick", + "Number of Raft ticks before a follower starts an election. Each production tick is "+ + "100ms and Raft randomizes the timeout between N and 2N-1 ticks. Default 20 "+ + "means ~2s-4s; values below 10 may cause spurious elections under jitter."). String()) flag.String("audit", worker.AuditDefaults, z.NewSuperFlagHelp(worker.AuditDefaults). @@ -160,7 +164,8 @@ func (st *state) serveGRPC(l net.Listener, store *raftwal.DiskStorage) { Group: 0, IsLearner: opts.raft.GetBool("learner"), } - m := conn.NewNode(&rc, store, opts.tlsClientConfig) + electionTick := opts.raft.GetInt64("election-tick") + m := conn.NewNode(&rc, store, opts.tlsClientConfig, int(electionTick)) // Zero followers should not be forwarding proposals to the leader, to avoid txn commits which // were calculated in a previous Zero leader. diff --git a/worker/draft.go b/worker/draft.go index c2cb9947519..2709d6d61dd 100644 --- a/worker/draft.go +++ b/worker/draft.go @@ -264,7 +264,8 @@ func newNode(store *raftwal.DiskStorage, gid uint32, id uint64, myAddr string) * IsLearner: isLearner, } glog.Infof("RaftContext: %+v\n", rc) - m := conn.NewNode(rc, store, x.WorkerConfig.TLSClientConfig) + electionTick := x.WorkerConfig.Raft.GetInt64("election-tick") + m := conn.NewNode(rc, store, x.WorkerConfig.TLSClientConfig, int(electionTick)) n := &node{ Node: m, diff --git a/worker/server_state.go b/worker/server_state.go index 0591ccb4b5b..2286c09f4b4 100644 --- a/worker/server_state.go +++ b/worker/server_state.go @@ -31,7 +31,7 @@ const ( AuditDefaults = `compress=false; days=10; size=100; dir=; output=; encrypt-file=;` BadgerDefaults = `compression=snappy; numgoroutines=8;` RaftDefaults = `learner=false; snapshot-after-entries=10000; ` + - `snapshot-after-duration=30m; pending-proposals=256; idx=; group=;` + `snapshot-after-duration=30m; pending-proposals=256; idx=; group=; election-tick=20;` SecurityDefaults = `token=; whitelist=;` CDCDefaults = `file=; kafka=; sasl_user=; sasl_password=; ca_cert=; client_cert=; ` + `client_key=; sasl-mechanism=PLAIN; tls=false;`