本文整理汇总了Golang中github.com/cockroachdb/cockroach/util/timeutil.Timer.Read方法的典型用法代码示例。如果您正苦于以下问题:Golang Timer.Read方法的具体用法?Golang Timer.Read怎么用?Golang Timer.Read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/cockroachdb/cockroach/util/timeutil.Timer
的用法示例。
在下文中一共展示了Timer.Read方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: WaitForRebalance
// WaitForRebalance waits until there's been no recent range adds, removes, and
// splits. We wait until the cluster is balanced or until `StableInterval`
// elapses, whichever comes first. Then, it prints stats about the rebalancing
// process. If the replica count appears unbalanced, an error is returned.
//
// This method is crude but necessary. If we were to wait until range counts
// were just about even, we'd miss potential post-rebalance thrashing.
func (at *allocatorTest) WaitForRebalance(t *testing.T) error {
const statsInterval = 20 * time.Second
db, err := gosql.Open("postgres", at.f.PGUrl(0))
if err != nil {
return err
}
defer func() {
_ = db.Close()
}()
var statsTimer timeutil.Timer
var assertTimer timeutil.Timer
defer statsTimer.Stop()
defer assertTimer.Stop()
statsTimer.Reset(statsInterval)
assertTimer.Reset(0)
for {
select {
case <-statsTimer.C:
statsTimer.Read = true
stats, err := at.allocatorStats(db)
if err != nil {
return err
}
log.Info(context.Background(), stats)
if StableInterval <= stats.ElapsedSinceLastEvent {
host := at.f.Nodes()[0]
log.Infof(context.Background(), "replica count = %f, max = %f", stats.ReplicaCountStdDev, *flagATMaxStdDev)
if stats.ReplicaCountStdDev > *flagATMaxStdDev {
_ = at.printRebalanceStats(db, host)
return errors.Errorf(
"%s elapsed without changes, but replica count standard "+
"deviation is %.2f (>%.2f)", stats.ElapsedSinceLastEvent,
stats.ReplicaCountStdDev, *flagATMaxStdDev)
}
return at.printRebalanceStats(db, host)
}
statsTimer.Reset(statsInterval)
case <-assertTimer.C:
assertTimer.Read = true
at.f.Assert(t)
assertTimer.Reset(time.Minute)
case <-stopper:
return errors.New("interrupted")
}
}
}
示例2: processQueue
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(ch chan *RaftMessageRequest, toReplica roachpb.ReplicaDescriptor) error {
addr, err := t.resolver(toReplica.NodeID)
if err != nil {
return err
}
conn, err := t.rpcContext.GRPCDial(addr.String())
if err != nil {
return err
}
client := NewMultiRaftClient(conn)
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
stream, err := client.RaftMessage(ctx)
if err != nil {
return err
}
errCh := make(chan error, 1)
// Starting workers in a task prevents data races during shutdown.
if err := t.rpcContext.Stopper.RunTask(func() {
t.rpcContext.Stopper.RunWorker(func() {
errCh <- stream.RecvMsg(new(RaftMessageResponse))
})
}); err != nil {
return err
}
var raftIdleTimer timeutil.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-t.rpcContext.Stopper.ShouldStop():
return nil
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
return nil
case err := <-errCh:
return err
case req := <-ch:
err := stream.Send(req)
if req.Message.Type == raftpb.MsgSnap {
select {
case <-t.rpcContext.Stopper.ShouldStop():
return nil
case t.SnapshotStatusChan <- RaftSnapshotStatus{req, err}:
}
}
if err != nil {
return err
}
}
}
}
示例3: runHeartbeat
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
request := PingRequest{Addr: ctx.Addr}
heartbeatClient := NewHeartbeatClient(cc)
var heartbeatTimer timeutil.Timer
defer heartbeatTimer.Stop()
for {
// If we should stop, return immediately. Note that we check this
// at the beginning and end of the loop because we may 'continue'
// before reaching the end.
select {
case <-ctx.Stopper.ShouldStop():
return nil
default:
}
sendTime := ctx.localClock.PhysicalTime()
response, err := ctx.heartbeat(heartbeatClient, request)
ctx.setConnHealthy(remoteAddr, err == nil)
if err != nil {
if grpc.Code(err) == codes.DeadlineExceeded {
continue
}
return err
}
receiveTime := ctx.localClock.PhysicalTime()
// Only update the clock offset measurement if we actually got a
// successful response from the server.
if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
request.Offset.Reset()
} else {
// Offset and error are measured using the remote clock reading
// technique described in
// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
// However, we assume that drift and min message delay are 0, for
// now.
request.Offset.MeasuredAt = receiveTime.UnixNano()
request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
}
ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)
if cb := ctx.HeartbeatCB; cb != nil {
cb()
}
// Wait after the heartbeat so that the first iteration gets a wait-free
// heartbeat attempt.
heartbeatTimer.Reset(ctx.HeartbeatInterval)
select {
case <-ctx.Stopper.ShouldStop():
return nil
case <-heartbeatTimer.C:
heartbeatTimer.Read = true
}
}
}
示例4: startStats
// startStats blocks and periodically logs transaction statistics (throughput,
// success rates, durations, ...). Note that this only captures write txns,
// since read-only txns are stateless as far as TxnCoordSender is concerned.
// stats).
func (tc *TxnCoordSender) startStats() {
res := time.Millisecond // for duration logging resolution
var statusLogTimer timeutil.Timer
defer statusLogTimer.Stop()
scale := metric.Scale1M
for {
statusLogTimer.Reset(statusLogInterval)
select {
case <-statusLogTimer.C:
statusLogTimer.Read = true
if !log.V(1) {
continue
}
// Take a snapshot of metrics. There's some chance of skew, since the snapshots are
// not done atomically, but that should be fine for these debug stats.
metrics := tc.metrics
durations := metrics.Durations[scale].Current()
restarts := metrics.Restarts.Current()
commitRate := metrics.Commits.Rates[scale].Value()
commit1PCRate := metrics.Commits1PC.Rates[scale].Value()
abortRate := metrics.Aborts.Rates[scale].Value()
abandonRate := metrics.Abandons.Rates[scale].Value()
// Show transaction stats over the last minute. Maybe this should be shorter in the future.
// We'll revisit if we get sufficient feedback.
totalRate := commitRate + abortRate + abandonRate
var pCommitted, pCommitted1PC, pAbandoned, pAborted float64
if totalRate > 0 {
pCommitted = 100 * (commitRate / totalRate)
pCommitted1PC = 100 * (commit1PCRate / totalRate)
pAborted = 100 * (abortRate / totalRate)
pAbandoned = 100 * (abandonRate / totalRate)
}
dMean := durations.Mean()
dDev := durations.StdDev()
dMax := durations.Max()
rMean := restarts.Mean()
rDev := restarts.StdDev()
rMax := restarts.Max()
num := durations.TotalCount()
log.Infof(tc.ctx,
"txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f/%.2f %%cmmt/cmmt1pc/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%d avg/σ/max restarts (%d samples)",
totalRate, pCommitted, pCommitted1PC, pAborted, pAbandoned,
util.TruncateDuration(time.Duration(dMean), res),
util.TruncateDuration(time.Duration(dDev), res),
util.TruncateDuration(time.Duration(dMax), res),
rMean, rDev, rMax, num,
)
case <-tc.stopper.ShouldStop():
return
}
}
}
示例5: runHeartbeat
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
request := PingRequest{Addr: ctx.Addr}
heartbeatClient := NewHeartbeatClient(cc)
var heartbeatTimer timeutil.Timer
defer heartbeatTimer.Stop()
// Give the first iteration a wait-free heartbeat attempt.
nextHeartbeat := 0 * time.Nanosecond
for {
heartbeatTimer.Reset(nextHeartbeat)
select {
case <-ctx.Stopper.ShouldStop():
return nil
case <-heartbeatTimer.C:
heartbeatTimer.Read = true
}
sendTime := ctx.localClock.PhysicalTime()
response, err := ctx.heartbeat(heartbeatClient, request)
ctx.setConnHealthy(remoteAddr, err == nil)
if err == nil {
receiveTime := ctx.localClock.PhysicalTime()
// Only update the clock offset measurement if we actually got a
// successful response from the server.
if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
request.Offset.Reset()
} else {
// Offset and error are measured using the remote clock reading
// technique described in
// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
// However, we assume that drift and min message delay are 0, for
// now.
request.Offset.MeasuredAt = receiveTime.UnixNano()
request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
}
ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)
if cb := ctx.HeartbeatCB; cb != nil {
cb()
}
}
// If the heartbeat timed out, run the next one immediately. Otherwise,
// wait out the heartbeat interval on the next iteration.
if grpc.Code(err) == codes.DeadlineExceeded {
nextHeartbeat = 0
} else {
nextHeartbeat = ctx.HeartbeatInterval
}
}
}
示例6: bootstrap
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
g.server.stopper.RunWorker(func() {
ctx := log.WithLogTag(g.ctx, "bootstrap", nil)
var bootstrapTimer timeutil.Timer
defer bootstrapTimer.Stop()
for {
if g.server.stopper.RunTask(func() {
g.mu.Lock()
defer g.mu.Unlock()
haveClients := g.outgoing.len() > 0
haveSentinel := g.mu.is.getInfo(KeySentinel) != nil
log.Eventf(ctx, "have clients: %t, have sentinel: %t", haveClients, haveSentinel)
if !haveClients || !haveSentinel {
// Try to get another bootstrap address from the resolvers.
if addr := g.getNextBootstrapAddress(); addr != nil {
g.startClient(addr, g.mu.is.NodeID)
} else {
bootstrapAddrs := make([]string, 0, len(g.bootstrapping))
for addr := range g.bootstrapping {
bootstrapAddrs = append(bootstrapAddrs, addr)
}
log.Eventf(ctx, "no next bootstrap address; currently bootstrapping: %v", bootstrapAddrs)
// We couldn't start a client, signal that we're stalled so that
// we'll retry.
g.maybeSignalStatusChangeLocked()
}
}
}) != nil {
return
}
// Pause an interval before next possible bootstrap.
bootstrapTimer.Reset(g.bootstrapInterval)
log.Eventf(ctx, "sleeping %s until bootstrap", g.bootstrapInterval)
select {
case <-bootstrapTimer.C:
bootstrapTimer.Read = true
// break
case <-g.server.stopper.ShouldStop():
return
}
log.Eventf(ctx, "idling until bootstrap required")
// Block until we need bootstrapping again.
select {
case <-g.stalledCh:
log.Eventf(ctx, "detected stall; commencing bootstrap")
// break
case <-g.server.stopper.ShouldStop():
return
}
}
})
}
示例7: bootstrap
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
stopper := g.server.stopper
stopper.RunWorker(func() {
var bootstrapTimer timeutil.Timer
defer bootstrapTimer.Stop()
for {
if stopper.RunTask(func() {
g.mu.Lock()
defer g.mu.Unlock()
haveClients := g.outgoing.len() > 0
haveSentinel := g.is.getInfo(KeySentinel) != nil
if !haveClients || !haveSentinel {
// Try to get another bootstrap address from the resolvers.
if addr := g.getNextBootstrapAddress(); addr != nil {
g.startClient(addr, stopper)
} else {
// We couldn't start a client, signal that we're stalled so that
// we'll retry.
g.maybeSignalStalledLocked()
}
}
}) != nil {
return
}
// Pause an interval before next possible bootstrap.
bootstrapTimer.Reset(g.bootstrapInterval)
select {
case <-bootstrapTimer.C:
bootstrapTimer.Read = true
// break
case <-stopper.ShouldStop():
return
}
// Block until we need bootstrapping again.
select {
case <-g.stalled:
// break
case <-stopper.ShouldStop():
return
}
}
})
}
示例8: start
// start will run continuously and expire old reservations.
func (b *bookie) start(stopper *stop.Stopper) {
stopper.RunWorker(func() {
var timeoutTimer timeutil.Timer
defer timeoutTimer.Stop()
for {
var timeout time.Duration
b.mu.Lock()
nextExpiration := b.mu.queue.peek()
if nextExpiration == nil {
// No reservations to expire.
timeout = b.reservationTimeout
} else {
now := b.clock.Now()
if now.GoTime().After(nextExpiration.expireAt.GoTime()) {
// We have a reservation expiration, remove it.
expiredReservation := b.mu.queue.dequeue()
// Is it an active reservation?
if b.mu.reservationsByRangeID[expiredReservation.RangeID] == expiredReservation {
b.fillReservationLocked(expiredReservation)
} else if log.V(2) {
log.Infof(context.TODO(), "the reservation for rangeID %d has already been filled.",
expiredReservation.RangeID)
}
// Set the timeout to 0 to force another peek.
timeout = 0
} else {
timeout = nextExpiration.expireAt.GoTime().Sub(now.GoTime())
}
}
b.mu.Unlock()
timeoutTimer.Reset(timeout)
select {
case <-timeoutTimer.C:
timeoutTimer.Read = true
case <-stopper.ShouldStop():
return
}
}
})
}
示例9: start
// start will run continuously and mark stores as offline if they haven't been
// heard from in longer than timeUntilStoreDead.
func (sp *StorePool) start(stopper *stop.Stopper) {
stopper.RunWorker(func() {
var timeoutTimer timeutil.Timer
defer timeoutTimer.Stop()
for {
var timeout time.Duration
sp.mu.Lock()
detail := sp.mu.queue.peek()
if detail == nil {
// No stores yet, wait the full timeout.
timeout = sp.timeUntilStoreDead
} else {
// Check to see if the store should be marked as dead.
deadAsOf := detail.lastUpdatedTime.GoTime().Add(sp.timeUntilStoreDead)
now := sp.clock.Now()
if now.GoTime().After(deadAsOf) {
deadDetail := sp.mu.queue.dequeue()
deadDetail.markDead(now)
// The next store might be dead as well, set the timeout to
// 0 to process it immediately.
timeout = 0
} else {
// Store is still alive, schedule the next check for when
// it should timeout.
timeout = deadAsOf.Sub(now.GoTime())
}
}
sp.mu.Unlock()
timeoutTimer.Reset(timeout)
select {
case <-timeoutTimer.C:
timeoutTimer.Read = true
case <-stopper.ShouldStop():
return
}
}
})
}
示例10: sendToReplicas
// sendToReplicas sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
func (ds *DistSender) sendToReplicas(
opts SendOptions,
rangeID roachpb.RangeID,
replicas ReplicaSlice,
args roachpb.BatchRequest,
rpcContext *rpc.Context,
) (*roachpb.BatchResponse, error) {
if len(replicas) < 1 {
return nil, roachpb.NewSendError(
fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
len(replicas), 1))
}
done := make(chan BatchCall, len(replicas))
transportFactory := opts.transportFactory
if transportFactory == nil {
transportFactory = grpcTransportFactory
}
transport, err := transportFactory(opts, rpcContext, replicas, args)
if err != nil {
return nil, err
}
defer transport.Close()
if transport.IsExhausted() {
return nil, roachpb.NewSendError(
fmt.Sprintf("sending to all %d replicas failed", len(replicas)))
}
// Send the first request.
pending := 1
transport.SendNext(done)
// Wait for completions. This loop will retry operations that fail
// with errors that reflect per-replica state and may succeed on
// other replicas.
var sendNextTimer timeutil.Timer
defer sendNextTimer.Stop()
for {
sendNextTimer.Reset(opts.SendNextTimeout)
select {
case <-sendNextTimer.C:
sendNextTimer.Read = true
// On successive RPC timeouts, send to additional replicas if available.
if !transport.IsExhausted() {
log.Trace(opts.Context, "timeout, trying next peer")
pending++
transport.SendNext(done)
}
case call := <-done:
pending--
err := call.Err
if err == nil {
if log.V(2) {
log.Infof(opts.Context, "RPC reply: %+v", call.Reply)
} else if log.V(1) && call.Reply.Error != nil {
log.Infof(opts.Context, "application error: %s", call.Reply.Error)
}
if !ds.handlePerReplicaError(rangeID, call.Reply.Error) {
return call.Reply, nil
}
// Extract the detail so it can be included in the error
// message if this is our last replica.
//
// TODO(bdarnell): The last error is not necessarily the best
// one to return; we may want to remember the "best" error
// we've seen (for example, a NotLeaseHolderError conveys more
// information than a RangeNotFound).
err = call.Reply.Error.GoError()
} else if log.V(1) {
log.Warningf(opts.Context, "RPC error: %s", err)
}
// Send to additional replicas if available.
if !transport.IsExhausted() {
log.Tracef(opts.Context, "error, trying next peer: %s", err)
pending++
transport.SendNext(done)
}
if pending == 0 {
return nil, roachpb.NewSendError(
fmt.Sprintf("sending to all %d replicas failed; last error: %v",
len(replicas), err))
}
}
}
}
示例11: processQueue
// processQueue opens a Raft client stream and sends messages from the
// designated queue (ch) via that stream, exiting when an error is received or
// when it idles out. All messages remaining in the queue at that point are
// lost and a new instance of processQueue will be started by the next message
// to be sent.
func (t *RaftTransport) processQueue(
nodeID roachpb.NodeID,
ch chan *RaftMessageRequest,
stats *raftTransportStats,
stream MultiRaft_RaftMessageClient,
) error {
errCh := make(chan error, 1)
// Starting workers in a task prevents data races during shutdown.
if err := t.rpcContext.Stopper.RunTask(func() {
t.rpcContext.Stopper.RunWorker(func() {
errCh <- func() error {
for {
resp, err := stream.Recv()
if err != nil {
return err
}
atomic.AddInt64(&stats.clientRecv, 1)
t.mu.Lock()
handler, ok := t.mu.handlers[resp.ToReplica.StoreID]
t.mu.Unlock()
if !ok {
log.Warningf(context.TODO(), "no handler found for store %s in response %s",
resp.ToReplica.StoreID, resp)
continue
}
handler.HandleRaftResponse(stream.Context(), resp)
}
}()
})
}); err != nil {
return err
}
var raftIdleTimer timeutil.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-t.rpcContext.Stopper.ShouldStop():
return nil
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
return nil
case err := <-errCh:
return err
case req := <-ch:
err := stream.Send(req)
atomic.AddInt64(&stats.clientSent, 1)
if req.Message.Type == raftpb.MsgSnap {
select {
case <-t.rpcContext.Stopper.ShouldStop():
return nil
case t.SnapshotStatusChan <- RaftSnapshotStatus{req, err}:
}
}
if err != nil {
return err
}
}
}
}
示例12: Run
// Run performs a continuous load test with the given parameters, checking the
// health of the cluster regularly. Any failure of a CockroachDB node or load
// generator constitutes a test failure. The test runs for the duration given
// by the `test.timeout` flag, minus the time it takes to reliably tear down
// the test cluster.
func (cl continuousLoadTest) Run(t *testing.T) {
f := farmer(t, cl.Prefix+cl.shortTestTimeout())
ctx, err := WithClusterTimeout(context.Background())
if err != nil {
t.Fatal(err)
}
if deadline, ok := ctx.Deadline(); ok {
log.Infof(ctx, "load test will end at %s", deadline)
} else {
log.Infof(ctx, "load test will run indefinitely")
}
// Start cluster and load.
defer func() {
if r := recover(); r != nil {
t.Errorf("recovered from panic to destroy cluster: %v", r)
}
f.MustDestroy(t)
}()
f.AddVars["benchmark_name"] = cl.BenchmarkPrefix + cl.shortTestTimeout()
if cl.CockroachDiskSizeGB != 0 {
f.AddVars["cockroach_disk_size"] = strconv.Itoa(cl.CockroachDiskSizeGB)
}
if err := f.Resize(cl.NumNodes); err != nil {
t.Fatal(err)
}
checkGossip(t, f, longWaitTime, hasPeers(cl.NumNodes))
start := timeutil.Now()
if err := cl.startLoad(f); err != nil {
t.Fatal(err)
}
cl.assert(t, f)
// Run load, checking the health of the cluster periodically.
const healthCheckInterval = 2 * time.Minute
var healthCheckTimer timeutil.Timer
healthCheckTimer.Reset(healthCheckInterval)
lastQueryCount := 0.0
lastHealthCheck := timeutil.Now()
for {
select {
case <-healthCheckTimer.C:
// Check that all nodes are up and that queries are being processed.
healthCheckTimer.Read = true
cl.assert(t, f)
queryCount, err := cl.queryCount(f)
if err != nil {
t.Fatal(err)
}
if lastQueryCount == queryCount {
t.Fatalf("no queries in the last %s", healthCheckInterval)
}
qps := (queryCount - lastQueryCount) / timeutil.Now().Sub(lastHealthCheck).Seconds()
if qps < *flagCLTMinQPS {
t.Fatalf("qps (%.1f) < min (%.1f)", qps, *flagCLTMinQPS)
}
lastQueryCount = queryCount
lastHealthCheck = timeutil.Now()
healthCheckTimer.Reset(healthCheckInterval)
log.Infof(ctx, "health check ok %s (%.1f qps)", timeutil.Now().Sub(start), qps)
case <-ctx.Done():
log.Infof(ctx, "load test finished")
cl.assert(t, f)
if err := f.Stop(0, cl.Process); err != nil {
t.Error(err)
}
return
case <-stopper:
t.Fatal("interrupted")
}
}
}
示例13: processQueue
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(nodeID roachpb.NodeID) {
t.mu.Lock()
ch, ok := t.mu.queues[nodeID]
t.mu.Unlock()
if !ok {
return
}
// Clean-up when the loop below shuts down.
defer func() {
t.mu.Lock()
delete(t.mu.queues, nodeID)
t.mu.Unlock()
}()
addr, err := t.resolver(nodeID)
if err != nil {
if log.V(1) {
log.Errorf("failed to get address for node %d: %s", nodeID, err)
}
return
}
if log.V(1) {
log.Infof("dialing node %d at %s", nodeID, addr)
}
conn, err := t.rpcContext.GRPCDial(addr.String())
if err != nil {
if log.V(1) {
log.Errorf("failed to dial: %s", err)
}
return
}
client := NewMultiRaftClient(conn)
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
if log.V(1) {
log.Infof("establishing Raft transport stream to node %d at %s", nodeID, addr)
}
// We start two streams; one will be used for snapshots, the other for all
// other traffic. This is done to prevent snapshots from blocking other
// traffic.
streams := make([]MultiRaft_RaftMessageClient, 2)
for i := range streams {
stream, err := client.RaftMessage(ctx)
if err != nil {
if log.V(1) {
log.Errorf("failed to establish Raft transport stream to node %d at %s: %s", nodeID, addr, err)
}
return
}
streams[i] = stream
}
errCh := make(chan error, len(streams))
// Starting workers in a task prevents data races during shutdown.
t.rpcContext.Stopper.RunTask(func() {
for i := range streams {
// Avoid closing over a `range` binding.
stream := streams[i]
t.rpcContext.Stopper.RunWorker(func() {
// NB: only one error will ever be read from this channel. That's fine,
// given that the channel is buffered to the maximum number of errors
// that will be written to it.
errCh <- stream.RecvMsg(new(RaftMessageResponse))
})
}
})
snapStream := streams[0]
restStream := streams[1]
var raftIdleTimer timeutil.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-t.rpcContext.Stopper.ShouldStop():
return
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
if log.V(1) {
log.Infof("closing Raft transport to %d at %s due to inactivity", nodeID, addr)
}
return
case err := <-errCh:
if log.V(1) {
if err != nil {
log.Infof("remote node %d at %s closed Raft transport with error: %s", nodeID, addr, err)
} else {
log.Infof("remote node %d at %s closed Raft transport", nodeID, addr)
}
//.........这里部分代码省略.........