本文整理汇总了Golang中github.com/cockroachdb/cockroach/util.Timer.Stop方法的典型用法代码示例。如果您正苦于以下问题:Golang Timer.Stop方法的具体用法?Golang Timer.Stop怎么用?Golang Timer.Stop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/cockroachdb/cockroach/util.Timer
的用法示例。
在下文中一共展示了Timer.Stop方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: startStats
// startStats blocks and periodically logs transaction statistics (throughput,
// success rates, durations, ...). Note that this only captures write txns,
// since read-only txns are stateless as far as TxnCoordSender is concerned.
// stats).
func (tc *TxnCoordSender) startStats() {
res := time.Millisecond // for duration logging resolution
var statusLogTimer util.Timer
defer statusLogTimer.Stop()
scale := metric.Scale1M
for {
statusLogTimer.Reset(statusLogInterval)
select {
case <-statusLogTimer.C:
statusLogTimer.Read = true
if !log.V(1) {
continue
}
// Take a snapshot of metrics. There's some chance of skew, since the snapshots are
// not done atomically, but that should be fine for these debug stats.
metrics := tc.metrics
durations := metrics.Durations[scale].Current()
restarts := metrics.Restarts.Current()
commitRate := metrics.Commits.Rates[scale].Value()
commit1PCRate := metrics.Commits1PC.Rates[scale].Value()
abortRate := metrics.Aborts.Rates[scale].Value()
abandonRate := metrics.Abandons.Rates[scale].Value()
// Show transaction stats over the last minute. Maybe this should be shorter in the future.
// We'll revisit if we get sufficient feedback.
totalRate := commitRate + abortRate + abandonRate
var pCommitted, pCommitted1PC, pAbandoned, pAborted float64
if totalRate > 0 {
pCommitted = 100 * (commitRate / totalRate)
pCommitted1PC = 100 * (commit1PCRate / totalRate)
pAborted = 100 * (abortRate / totalRate)
pAbandoned = 100 * (abandonRate / totalRate)
}
dMean := durations.Mean()
dDev := durations.StdDev()
dMax := durations.Max()
rMean := restarts.Mean()
rDev := restarts.StdDev()
rMax := restarts.Max()
num := durations.TotalCount()
log.Infof(
"txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f/%.2f %%cmmt/cmmt1pc/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%d avg/σ/max restarts (%d samples)",
totalRate, pCommitted, pCommitted1PC, pAborted, pAbandoned,
util.TruncateDuration(time.Duration(dMean), res),
util.TruncateDuration(time.Duration(dDev), res),
util.TruncateDuration(time.Duration(dMax), res),
rMean, rDev, rMax, num,
)
case <-tc.stopper.ShouldStop():
return
}
}
}
示例2: runHeartbeat
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
request := PingRequest{Addr: ctx.localAddr}
heartbeatClient := NewHeartbeatClient(cc)
var heartbeatTimer util.Timer
defer heartbeatTimer.Stop()
for {
sendTime := ctx.localClock.PhysicalTime()
response, err := ctx.heartbeat(heartbeatClient, request)
if err != nil {
if grpc.Code(err) == codes.DeadlineExceeded {
continue
}
return err
}
receiveTime := ctx.localClock.PhysicalTime()
// Only update the clock offset measurement if we actually got a
// successful response from the server.
if pingDuration := receiveTime.Sub(sendTime); pingDuration > maximumPingDurationMult*ctx.localClock.MaxOffset() {
request.Offset.Reset()
} else {
// Offset and error are measured using the remote clock reading
// technique described in
// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
// However, we assume that drift and min message delay are 0, for
// now.
request.Offset.MeasuredAt = receiveTime.UnixNano()
request.Offset.Uncertainty = (pingDuration / 2).Nanoseconds()
remoteTimeNow := time.Unix(0, response.ServerTime).Add(pingDuration / 2)
request.Offset.Offset = remoteTimeNow.Sub(receiveTime).Nanoseconds()
}
ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)
if cb := ctx.HeartbeatCB; cb != nil {
cb()
}
// Wait after the heartbeat so that the first iteration gets a wait-free
// heartbeat attempt.
heartbeatTimer.Reset(ctx.HeartbeatInterval)
select {
case <-ctx.Stopper.ShouldStop():
return nil
case <-heartbeatTimer.C:
heartbeatTimer.Read = true
}
}
}
示例3: runHeartbeat
func (ctx *Context) runHeartbeat(cc *grpc.ClientConn, remoteAddr string) error {
request := PingRequest{Addr: ctx.localAddr}
heartbeatClient := NewHeartbeatClient(cc)
var heartbeatTimer util.Timer
defer heartbeatTimer.Stop()
for {
sendTime := ctx.localClock.PhysicalNow()
goCtx, cancel := context.WithTimeout(context.Background(), ctx.HeartbeatTimeout)
response, err := heartbeatClient.Ping(goCtx, &request)
if err != nil {
cancel()
return err
}
receiveTime := ctx.localClock.PhysicalNow()
// Only update the clock offset measurement if we actually got a
// successful response from the server.
if receiveTime > sendTime+maximumClockReadingDelay.Nanoseconds() {
request.Offset.Reset()
} else {
// Offset and error are measured using the remote clock reading
// technique described in
// http://se.inf.tu-dresden.de/pubs/papers/SRDS1994.pdf, page 6.
// However, we assume that drift and min message delay are 0, for
// now.
request.Offset.MeasuredAt = receiveTime
request.Offset.Uncertainty = (receiveTime - sendTime) / 2
remoteTimeNow := response.ServerTime + request.Offset.Uncertainty
request.Offset.Offset = remoteTimeNow - receiveTime
ctx.RemoteClocks.UpdateOffset(remoteAddr, request.Offset)
}
// Wait after the heartbeat so that the first iteration gets a wait-free
// heartbeat attempt.
heartbeatTimer.Reset(ctx.HeartbeatInterval)
select {
case <-ctx.Stopper.ShouldStop():
return nil
case <-heartbeatTimer.C:
heartbeatTimer.Read = true
}
}
}
示例4: MonitorRemoteOffsets
// MonitorRemoteOffsets periodically checks that the offset of this server's
// clock from the true cluster time is within MaxOffset. If the offset exceeds
// MaxOffset, then this method will trigger a fatal error, causing the node to
// suicide.
func (r *RemoteClockMonitor) MonitorRemoteOffsets(stopper *stop.Stopper) error {
if log.V(1) {
log.Infof("monitoring cluster offset every %s", r.monitorInterval)
}
var monitorTimer util.Timer
defer monitorTimer.Stop()
for {
monitorTimer.Reset(r.monitorInterval)
select {
case <-stopper.ShouldStop():
return nil
case <-monitorTimer.C:
monitorTimer.Read = true
offsetInterval, err := r.findOffsetInterval()
// By the contract of the hlc, if the value is 0, then safety checking
// of the max offset is disabled. However we may still want to
// propagate the information to a status node.
// TODO(embark): once there is a framework for collecting timeseries
// data about the db, propagate the offset status to that.
if maxOffset := r.clock.MaxOffset(); maxOffset != 0 {
if err != nil {
return util.Errorf("clock offset could not be determined: %s", err)
}
if !isHealthyOffsetInterval(offsetInterval, maxOffset) {
return util.Errorf(
"clock offset is in interval: %s, which indicates that the true offset is greater than the max offset: %s",
offsetInterval, maxOffset,
)
}
if log.V(1) {
log.Infof("healthy cluster offset: %s", offsetInterval)
}
}
r.metrics.clusterOffsetLowerBound.Update(int64(offsetInterval.lowerbound))
r.metrics.clusterOffsetUpperBound.Update(int64(offsetInterval.upperbound))
r.mu.Lock()
r.mu.lastMonitoredAt = r.clock.PhysicalTime()
r.mu.Unlock()
}
}
}
示例5: bootstrap
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap() {
stopper := g.server.stopper
stopper.RunWorker(func() {
var bootstrapTimer util.Timer
defer bootstrapTimer.Stop()
for {
stopper.RunTask(func() {
g.mu.Lock()
defer g.mu.Unlock()
haveClients := g.outgoing.len() > 0
haveSentinel := g.is.getInfo(KeySentinel) != nil
if !haveClients || !haveSentinel {
// Try to get another bootstrap address from the resolvers.
if addr := g.getNextBootstrapAddress(); addr != nil {
g.startClient(addr, stopper)
} else {
// We couldn't start a client, signal that we're stalled so that
// we'll retry.
g.maybeSignalStalledLocked()
}
}
})
// Pause an interval before next possible bootstrap.
bootstrapTimer.Reset(g.bootstrapInterval)
select {
case <-bootstrapTimer.C:
bootstrapTimer.Read = true
// break
case <-stopper.ShouldStop():
return
}
// Block until we need bootstrapping again.
select {
case <-g.stalled:
// break
case <-stopper.ShouldStop():
return
}
}
})
}
示例6: start
// start will run continuously and mark stores as offline if they haven't been
// heard from in longer than timeUntilStoreDead.
func (sp *StorePool) start(stopper *stop.Stopper) {
stopper.RunWorker(func() {
var timeoutTimer util.Timer
defer timeoutTimer.Stop()
for {
var timeout time.Duration
sp.mu.Lock()
detail := sp.queue.peek()
if detail == nil {
// No stores yet, wait the full timeout.
timeout = sp.timeUntilStoreDead
} else {
// Check to see if the store should be marked as dead.
deadAsOf := detail.lastUpdatedTime.GoTime().Add(sp.timeUntilStoreDead)
now := sp.clock.Now()
if now.GoTime().After(deadAsOf) {
deadDetail := sp.queue.dequeue()
deadDetail.markDead(now)
// The next store might be dead as well, set the timeout to
// 0 to process it immediately.
timeout = 0
} else {
// Store is still alive, schedule the next check for when
// it should timeout.
timeout = deadAsOf.Sub(now.GoTime())
}
}
sp.mu.Unlock()
timeoutTimer.Reset(timeout)
select {
case <-timeoutTimer.C:
timeoutTimer.Read = true
case <-stopper.ShouldStop():
return
}
}
})
}
示例7: runHeartbeat
// runHeartbeat sends periodic heartbeats to client, marking the client healthy
// or unhealthy and reconnecting appropriately until either the Client or the
// supplied channel is closed.
func (c *Client) runHeartbeat(retryOpts retry.Options) {
healthReceived := c.healthReceived
setHealthReceived := func() {
if healthReceived != nil {
close(healthReceived)
healthReceived = nil
}
}
isHealthy := false
setHealthy := func() {
if isHealthy {
return
}
isHealthy = true
close(c.healthy.Load().(chan struct{}))
setHealthReceived()
}
setUnhealthy := func() {
if isHealthy {
isHealthy = false
c.healthy.Store(make(chan struct{}))
}
setHealthReceived()
}
var err = errUnstarted // initial condition
var heartbeatTimer util.Timer
defer heartbeatTimer.Stop()
for {
for r := retry.Start(retryOpts); r.Next(); {
if c.maybeClose(retryOpts.Closer) {
return
}
// Reconnect on failure.
if err != nil {
// If reconnects are disabled, and we already have a failed connection, return now.
if c.disableReconnects && c.internalConn() != nil {
return
}
if err = c.connect(); err != nil {
setUnhealthy()
log.Warning(err)
continue
}
}
// Heartbeat regardless of failure.
if err = c.heartbeat(retryOpts.Closer); err != nil {
setUnhealthy()
log.Warning(err)
if c.maybeClose(retryOpts.Closer) {
return
}
continue
}
setHealthy()
break
}
// Wait after the heartbeat so that the first iteration gets a wait-free
// heartbeat attempt.
heartbeatTimer.Reset(c.heartbeatInterval)
select {
case <-c.closer:
return
case <-retryOpts.Closer:
c.close()
return
case <-heartbeatTimer.C:
heartbeatTimer.Read = true
// TODO(tamird): Perhaps retry more aggressively when the client is unhealthy.
}
}
}
示例8: send
// Send sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
//
// TODO(pmattis): Get rid of the getArgs function which requires the caller to
// maintain a map from address to replica. Instead, pass in the list of
// replicas instead of a list of addresses and use that to populate the
// requests.
func send(opts SendOptions, replicas ReplicaSlice,
args roachpb.BatchRequest, context *rpc.Context) (proto.Message, error) {
sp := opts.Trace
if sp == nil {
sp = tracing.NilSpan()
}
if len(replicas) < 1 {
return nil, roachpb.NewSendError(
fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
len(replicas), 1), false)
}
done := make(chan *netrpc.Call, len(replicas))
clients := make([]batchClient, 0, len(replicas))
for i, replica := range replicas {
clients = append(clients, batchClient{
Client: rpc.NewClient(&replica.NodeDesc.Address, context),
replica: &replicas[i],
args: args,
})
}
var orderedClients []batchClient
switch opts.Ordering {
case orderStable:
orderedClients = clients
case orderRandom:
// Randomly permute order, but keep known-unhealthy clients last.
var nHealthy int
for i, client := range clients {
select {
case <-client.Healthy():
clients[i], clients[nHealthy] = clients[nHealthy], clients[i]
nHealthy++
default:
}
}
shuffleClients(clients[:nHealthy])
shuffleClients(clients[nHealthy:])
orderedClients = clients
}
// TODO(spencer): going to need to also sort by affinity; closest
// ping time should win. Makes sense to have the rpc client/server
// heartbeat measure ping times. With a bit of seasoning, each
// node will be able to order the healthy replicas based on latency.
// Send the first request.
sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done)
orderedClients = orderedClients[1:]
var errors, retryableErrors int
// Wait for completions.
var sendNextTimer util.Timer
defer sendNextTimer.Stop()
for {
sendNextTimer.Reset(opts.SendNextTimeout)
select {
case <-sendNextTimer.C:
sendNextTimer.Read = true
// On successive RPC timeouts, send to additional replicas if available.
if len(orderedClients) > 0 {
sp.LogEvent("timeout, trying next peer")
sendOneFn(&orderedClients[0], opts.Timeout, context, sp, done)
orderedClients = orderedClients[1:]
}
case call := <-done:
if call.Error == nil {
// Verify response data integrity if this is a proto response.
if req, reqOk := call.Args.(roachpb.Request); reqOk {
if resp, respOk := call.Reply.(roachpb.Response); respOk {
if err := resp.Verify(req); err != nil {
call.Error = err
}
} else {
call.Error = util.Errorf("response to proto request must be a proto")
}
}
}
err := call.Error
if err == nil {
if log.V(2) {
log.Infof("successful reply: %+v", call.Reply)
}
return call.Reply.(proto.Message), nil
//.........这里部分代码省略.........
示例9: processQueue
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(nodeID roachpb.NodeID) {
t.mu.Lock()
ch, ok := t.mu.queues[nodeID]
t.mu.Unlock()
if !ok {
return
}
// Clean-up when the loop below shuts down.
defer func() {
t.mu.Lock()
delete(t.mu.queues, nodeID)
t.mu.Unlock()
}()
addr, err := t.resolver(nodeID)
if err != nil {
if log.V(1) {
log.Errorf("failed to get address for node %d: %s", nodeID, err)
}
return
}
if log.V(1) {
log.Infof("dialing node %d at %s", nodeID, addr)
}
conn, err := t.rpcContext.GRPCDial(addr.String())
if err != nil {
if log.V(1) {
log.Errorf("failed to dial: %s", err)
}
return
}
client := NewMultiRaftClient(conn)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if log.V(1) {
log.Infof("establishing Raft transport stream to node %d at %s", nodeID, addr)
}
stream, err := client.RaftMessage(ctx)
if err != nil {
if log.V(1) {
log.Errorf("failed to establish Raft transport stream to node %d at %s: %s", nodeID, addr, err)
}
return
}
errCh := make(chan error, 1)
// Starting workers in a task prevents data races during shutdown.
t.rpcContext.Stopper.RunTask(func() {
t.rpcContext.Stopper.RunWorker(func() {
errCh <- stream.RecvMsg(&RaftMessageResponse{})
})
})
var raftIdleTimer util.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-t.rpcContext.Stopper.ShouldStop():
return
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
if log.V(1) {
log.Infof("closing Raft transport to %d at %s due to inactivity", nodeID, addr)
}
return
case err := <-errCh:
if log.V(1) {
if err != nil {
log.Infof("remote node %d at %s closed Raft transport with error: %s", nodeID, addr, err)
} else {
log.Infof("remote node %d at %s closed Raft transport", nodeID, addr)
}
}
return
case req := <-ch:
if err := stream.Send(req); err != nil {
log.Error(err)
return
}
}
}
}
示例10: processQueue
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *rpcTransport) processQueue(nodeID roachpb.NodeID, storeID roachpb.StoreID) {
t.mu.Lock()
ch, ok := t.queues[storeID]
t.mu.Unlock()
if !ok {
return
}
// Clean-up when the loop below shuts down.
defer func() {
t.mu.Lock()
delete(t.queues, storeID)
t.mu.Unlock()
}()
addr, err := t.gossip.GetNodeIDAddress(nodeID)
if err != nil {
if log.V(1) {
log.Errorf("could not get address for node %d: %s", nodeID, err)
}
return
}
var dialOpt grpc.DialOption
if t.rpcContext.Insecure {
dialOpt = grpc.WithInsecure()
} else {
tlsConfig, err := t.rpcContext.GetClientTLSConfig()
if err != nil {
log.Error(err)
return
}
dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
}
conn, err := grpc.Dial(addr.String(), dialOpt)
if err != nil {
log.Errorf("failed to dial: %v", err)
return
}
defer func() {
if err := conn.Close(); err != nil {
log.Error(err)
}
}()
client := storage.NewMultiRaftClient(conn)
ctx := grpcutil.NewContextWithStopper(context.Background(), t.rpcContext.Stopper)
stream, err := client.RaftMessage(ctx)
if err != nil {
log.Error(err)
return
}
defer func() {
if err := stream.CloseSend(); err != nil {
log.Error(err)
}
}()
var raftIdleTimer util.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-ctx.Done():
return
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
if log.V(1) {
log.Infof("closing Raft transport to %d due to inactivity", nodeID)
}
return
case req := <-ch:
if err := stream.Send(req); err != nil {
log.Error(err)
return
}
}
}
}
示例11: startStats
// startStats blocks and periodically logs transaction statistics (throughput,
// success rates, durations, ...). Note that this only captures write txns,
// since read-only txns are stateless as far as TxnCoordSender is concerned.
// stats).
// TODO(mrtracy): Add this to TimeSeries.
func (tc *TxnCoordSender) startStats() {
res := time.Millisecond // for duration logging resolution
lastNow := tc.clock.PhysicalNow()
var statusLogTimer util.Timer
defer statusLogTimer.Stop()
for {
statusLogTimer.Reset(statusLogInterval)
select {
case <-statusLogTimer.C:
statusLogTimer.Read = true
if !log.V(1) {
continue
}
tc.Lock()
curStats := tc.txnStats
tc.txnStats = txnCoordStats{}
tc.Unlock()
now := tc.clock.PhysicalNow()
// Tests have weird clocks.
if now-lastNow <= 0 {
continue
}
num := len(curStats.durations)
// Only compute when non-empty input.
var dMax, dMean, dDev, rMax, rMean, rDev float64
var err error
if num > 0 {
// There should never be an error in the below
// computations.
dMax, err = stats.Max(curStats.durations)
if err != nil {
panic(err)
}
dMean, err = stats.Mean(curStats.durations)
if err != nil {
panic(err)
}
dDev, err = stats.StdDevP(curStats.durations)
if err != nil {
panic(err)
}
rMax, err = stats.Max(curStats.restarts)
if err != nil {
panic(err)
}
rMean, err = stats.Mean(curStats.restarts)
if err != nil {
panic(err)
}
rDev, err = stats.StdDevP(curStats.restarts)
if err != nil {
panic(err)
}
}
rate := float64(int64(num)*int64(time.Second)) / float64(now-lastNow)
var pCommitted, pAbandoned, pAborted float32
if fNum := float32(num); fNum > 0 {
pCommitted = 100 * float32(curStats.committed) / fNum
pAbandoned = 100 * float32(curStats.abandoned) / fNum
pAborted = 100 * float32(curStats.aborted) / fNum
}
log.Infof(
"txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f %%cmmt/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%.1f avg/σ/max restarts (%d samples)",
rate, pCommitted, pAborted, pAbandoned,
util.TruncateDuration(time.Duration(dMean), res),
util.TruncateDuration(time.Duration(dDev), res),
util.TruncateDuration(time.Duration(dMax), res),
rMean, rDev, rMax, num,
)
lastNow = now
case <-tc.stopper.ShouldStop():
return
}
}
}
示例12: send
// Send sends one or more RPCs to clients specified by the slice of
// replicas. On success, Send returns the first successful reply. Otherwise,
// Send returns an error if and as soon as the number of failed RPCs exceeds
// the available endpoints less the number of required replies.
func send(opts SendOptions, replicas ReplicaSlice,
args roachpb.BatchRequest, rpcContext *rpc.Context) (*roachpb.BatchResponse, error) {
if len(replicas) < 1 {
return nil, roachpb.NewSendError(
fmt.Sprintf("insufficient replicas (%d) to satisfy send request of %d",
len(replicas), 1), false)
}
done := make(chan batchCall, len(replicas))
clients := make([]batchClient, 0, len(replicas))
for _, replica := range replicas {
conn, err := rpcContext.GRPCDial(replica.NodeDesc.Address.String())
if err != nil {
return nil, err
}
argsCopy := args
argsCopy.Replica = replica.ReplicaDescriptor
clients = append(clients, batchClient{
remoteAddr: replica.NodeDesc.Address.String(),
conn: conn,
client: roachpb.NewInternalClient(conn),
args: argsCopy,
})
}
// Put known-unhealthy clients last.
nHealthy, err := splitHealthy(clients)
if err != nil {
return nil, err
}
var orderedClients []batchClient
switch opts.Ordering {
case orderStable:
orderedClients = clients
case orderRandom:
// Randomly permute order, but keep known-unhealthy clients last.
shuffleClients(clients[:nHealthy])
shuffleClients(clients[nHealthy:])
orderedClients = clients
}
// TODO(spencer): going to need to also sort by affinity; closest
// ping time should win. Makes sense to have the rpc client/server
// heartbeat measure ping times. With a bit of seasoning, each
// node will be able to order the healthy replicas based on latency.
// Send the first request.
sendOneFn(opts, rpcContext, orderedClients[0], done)
orderedClients = orderedClients[1:]
var errors, retryableErrors int
// Wait for completions.
var sendNextTimer util.Timer
defer sendNextTimer.Stop()
for {
sendNextTimer.Reset(opts.SendNextTimeout)
select {
case <-sendNextTimer.C:
sendNextTimer.Read = true
// On successive RPC timeouts, send to additional replicas if available.
if len(orderedClients) > 0 {
log.Trace(opts.Context, "timeout, trying next peer")
sendOneFn(opts, rpcContext, orderedClients[0], done)
orderedClients = orderedClients[1:]
}
case call := <-done:
err := call.err
if err == nil {
if log.V(2) {
log.Infof("successful reply: %+v", call.reply)
}
return call.reply, nil
}
// Error handling.
if log.V(1) {
log.Warningf("error reply: %s", err)
}
errors++
// Since we have a reconnecting client here, disconnect errors are retryable.
disconnected := err == io.ErrUnexpectedEOF
if retryErr, ok := err.(retry.Retryable); disconnected || (ok && retryErr.CanRetry()) {
retryableErrors++
}
if remainingNonErrorRPCs := len(replicas) - errors; remainingNonErrorRPCs < 1 {
return nil, roachpb.NewSendError(
fmt.Sprintf("too many errors encountered (%d of %d total): %v",
//.........这里部分代码省略.........
示例13: processQueue
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let raft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *RaftTransport) processQueue(nodeID roachpb.NodeID) {
t.mu.Lock()
ch, ok := t.mu.queues[nodeID]
t.mu.Unlock()
if !ok {
return
}
// Clean-up when the loop below shuts down.
defer func() {
t.mu.Lock()
delete(t.mu.queues, nodeID)
t.mu.Unlock()
}()
addr, err := t.resolver(nodeID)
if err != nil {
if log.V(1) {
log.Errorf("failed to get address for node %d: %s", nodeID, err)
}
return
}
if log.V(1) {
log.Infof("dialing node %d at %s", nodeID, addr)
}
conn, err := t.rpcContext.GRPCDial(addr.String())
if err != nil {
if log.V(1) {
log.Errorf("failed to dial: %s", err)
}
return
}
client := NewMultiRaftClient(conn)
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
if log.V(1) {
log.Infof("establishing Raft transport stream to node %d at %s", nodeID, addr)
}
// We start two streams; one will be used for snapshots, the other for all
// other traffic. This is done to prevent snapshots from blocking other
// traffic.
streams := make([]MultiRaft_RaftMessageClient, 2)
for i := range streams {
stream, err := client.RaftMessage(ctx)
if err != nil {
if log.V(1) {
log.Errorf("failed to establish Raft transport stream to node %d at %s: %s", nodeID, addr, err)
}
return
}
streams[i] = stream
}
errCh := make(chan error, len(streams))
// Starting workers in a task prevents data races during shutdown.
t.rpcContext.Stopper.RunTask(func() {
for i := range streams {
// Avoid closing over a `range` binding.
stream := streams[i]
t.rpcContext.Stopper.RunWorker(func() {
// NB: only one error will ever be read from this channel. That's fine,
// given that the channel is buffered to the maximum number of errors
// that will be written to it.
errCh <- stream.RecvMsg(new(RaftMessageResponse))
})
}
})
snapStream := streams[0]
restStream := streams[1]
var raftIdleTimer util.Timer
defer raftIdleTimer.Stop()
for {
raftIdleTimer.Reset(raftIdleTimeout)
select {
case <-t.rpcContext.Stopper.ShouldStop():
return
case <-raftIdleTimer.C:
raftIdleTimer.Read = true
if log.V(1) {
log.Infof("closing Raft transport to %d at %s due to inactivity", nodeID, addr)
}
return
case err := <-errCh:
if log.V(1) {
if err != nil {
log.Infof("remote node %d at %s closed Raft transport with error: %s", nodeID, addr, err)
} else {
log.Infof("remote node %d at %s closed Raft transport", nodeID, addr)
}
//.........这里部分代码省略.........