本文整理汇总了Golang中github.com/youtube/vitess/go/vt/discovery.NewShardReplicationWatcher函数的典型用法代码示例。如果您正苦于以下问题:Golang NewShardReplicationWatcher函数的具体用法?Golang NewShardReplicationWatcher怎么用?Golang NewShardReplicationWatcher使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了NewShardReplicationWatcher函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: startHealthWatchers
// startHealthWatchers launches the topology watchers and health checking to monitor
// all tablets on the shard. Function should be called before the start of the schema
// swap process.
func (shardSwap *shardSchemaSwap) startHealthWatchers() error {
shardSwap.tabletHealthCheck = discovery.NewHealthCheck(
*vtctl.HealthCheckTopologyRefresh, *vtctl.HealthcheckRetryDelay, *vtctl.HealthCheckTimeout)
shardSwap.tabletHealthCheck.SetListener(shardSwap, true /* sendDownEvents */)
topoServer := shardSwap.parent.topoServer
cellList, err := topoServer.GetKnownCells(shardSwap.parent.ctx)
if err != nil {
return err
}
for _, cell := range cellList {
watcher := discovery.NewShardReplicationWatcher(
topoServer,
shardSwap.tabletHealthCheck,
cell,
shardSwap.parent.keyspace,
shardSwap.shardName,
*vtctl.HealthCheckTimeout,
discovery.DefaultTopoReadConcurrency)
shardSwap.tabletWatchers = append(shardSwap.tabletWatchers, watcher)
}
for _, watcher := range shardSwap.tabletWatchers {
if err := watcher.WaitForInitialTopology(); err != nil {
return err
}
}
shardSwap.tabletHealthCheck.WaitForInitialStatsUpdates()
return nil
}
示例2: FindHealthyRdonlyEndPoint
// FindHealthyRdonlyEndPoint returns a random healthy endpoint.
// Since we don't want to use them all, we require at least
// minHealthyEndPoints servers to be healthy.
// May block up to -wait_for_healthy_rdonly_endpoints_timeout.
func FindHealthyRdonlyEndPoint(ctx context.Context, wr *wrangler.Wrangler, cell, keyspace, shard string) (*topodatapb.TabletAlias, error) {
busywaitCtx, busywaitCancel := context.WithTimeout(ctx, *WaitForHealthyEndPointsTimeout)
defer busywaitCancel()
// create a discovery healthcheck, wait for it to have one rdonly
// endpoints at this point
healthCheck := discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout, "" /* statsSuffix */)
watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), healthCheck, cell, keyspace, shard, *healthCheckTopologyRefresh, 5 /*topoReadConcurrency*/)
defer watcher.Stop()
defer healthCheck.Close()
if err := discovery.WaitForEndPoints(ctx, healthCheck, cell, keyspace, shard, []topodatapb.TabletType{topodatapb.TabletType_RDONLY}); err != nil {
return nil, fmt.Errorf("error waiting for rdonly endpoints for (%v,%v/%v): %v", cell, keyspace, shard, err)
}
var healthyEndpoints []*topodatapb.EndPoint
for {
select {
case <-busywaitCtx.Done():
return nil, fmt.Errorf("Not enough endpoints to choose from in (%v,%v/%v), have %v healthy ones, need at least %v Context Error: %v", cell, keyspace, shard, len(healthyEndpoints), *minHealthyEndPoints, busywaitCtx.Err())
default:
}
addrs := healthCheck.GetEndPointStatsFromTarget(keyspace, shard, topodatapb.TabletType_RDONLY)
healthyEndpoints = make([]*topodatapb.EndPoint, 0, len(addrs))
for _, addr := range addrs {
// Note we do not check the 'Serving' flag here.
// This is mainly to avoid the case where we run a
// Diff between a source and destination, and the source
// is not serving (disabled by TabletControl).
// When we switch the tablet to 'worker', it will
// go back to serving state.
if addr.Stats == nil || addr.Stats.HealthError != "" || addr.Stats.SecondsBehindMaster > 30 {
continue
}
healthyEndpoints = append(healthyEndpoints, addr.EndPoint)
}
if len(healthyEndpoints) >= *minHealthyEndPoints {
break
}
deadlineForLog, _ := busywaitCtx.Deadline()
wr.Logger().Infof("Waiting for enough endpoints to become available. available: %v required: %v Waiting up to %.1f more seconds.", len(healthyEndpoints), *minHealthyEndPoints, deadlineForLog.Sub(time.Now()).Seconds())
// Block for 1 second because 2 seconds is the -health_check_interval flag value in integration tests.
timer := time.NewTimer(1 * time.Second)
select {
case <-busywaitCtx.Done():
timer.Stop()
case <-timer.C:
}
}
// random server in the list is what we want
index := rand.Intn(len(healthyEndpoints))
return &topodatapb.TabletAlias{
Cell: cell,
Uid: healthyEndpoints[index].Uid,
}, nil
}
示例3: init
// init phase:
// - read the destination keyspace, make sure it has 'servedFrom' values
func (scw *SplitCloneWorker) init(ctx context.Context) error {
scw.setState(WorkerStateInit)
// read the keyspace and validate it
shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout)
var err error
scw.destinationKeyspaceInfo, err = scw.wr.TopoServer().GetKeyspace(shortCtx, scw.destinationKeyspace)
cancel()
if err != nil {
return fmt.Errorf("cannot read (destination) keyspace %v: %v", scw.destinationKeyspace, err)
}
// Set source and destination shard infos.
switch scw.cloneType {
case horizontalResharding:
if err := scw.initShardsForHorizontalResharding(ctx); err != nil {
return err
}
case verticalSplit:
if err := scw.initShardsForVerticalSplit(ctx); err != nil {
return err
}
}
if err := scw.sanityCheckShardInfos(); err != nil {
return err
}
if scw.cloneType == horizontalResharding {
if err := scw.loadVSchema(ctx); err != nil {
return err
}
}
// Initialize healthcheck and add destination shards to it.
scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
scw.tsc = discovery.NewTabletStatsCacheDoNotSetListener(scw.cell)
// We set sendDownEvents=true because it's required by TabletStatsCache.
scw.healthCheck.SetListener(scw, true /* sendDownEvents */)
// Start watchers to get tablets added automatically to healthCheck.
allShards := append(scw.sourceShards, scw.destinationShards...)
for _, si := range allShards {
watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck,
scw.cell, si.Keyspace(), si.ShardName(),
*healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
scw.shardWatchers = append(scw.shardWatchers, watcher)
}
return nil
}
示例4: newBinlogPlayerController
func newBinlogPlayerController(ts topo.Server, vtClientFactory func() binlogplayer.VtClient, mysqld mysqlctl.MysqlDaemon, cell string, keyRange *topodatapb.KeyRange, sourceShard *topodatapb.Shard_SourceShard, dbName string) *BinlogPlayerController {
blc := &BinlogPlayerController{
ts: ts,
vtClientFactory: vtClientFactory,
mysqld: mysqld,
cell: cell,
keyRange: keyRange,
dbName: dbName,
sourceShard: sourceShard,
binlogPlayerStats: binlogplayer.NewStats(),
healthCheck: discovery.NewHealthCheck(*binlogplayer.BinlogPlayerConnTimeout, *retryDelay, *healthCheckTimeout),
}
blc.shardReplicationWatcher = discovery.NewShardReplicationWatcher(ts, blc.healthCheck, cell, sourceShard.Keyspace, sourceShard.Shard, *healthCheckTopologyRefresh, 5)
return blc
}
示例5: newBinlogPlayerController
// newBinlogPlayerController instantiates a new BinlogPlayerController.
// Use Start() and Stop() to start and stop it.
// Once stopped, you should call Close() to stop and free resources e.g. the
// healthcheck instance.
func newBinlogPlayerController(ts topo.Server, vtClientFactory func() binlogplayer.VtClient, mysqld mysqlctl.MysqlDaemon, cell string, keyRange *topodatapb.KeyRange, sourceShard *topodatapb.Shard_SourceShard, dbName string) *BinlogPlayerController {
healthCheck := discovery.NewHealthCheck(*binlogplayer.BinlogPlayerConnTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
return &BinlogPlayerController{
ts: ts,
vtClientFactory: vtClientFactory,
mysqld: mysqld,
cell: cell,
keyRange: keyRange,
dbName: dbName,
sourceShard: sourceShard,
binlogPlayerStats: binlogplayer.NewStats(),
// Note: healthCheck and shardReplicationWatcher remain active independent
// of whether the BinlogPlayerController is Start()'d or Stop()'d.
// Use Close() after Stop() to finally close them and free their resources.
healthCheck: healthCheck,
shardReplicationWatcher: discovery.NewShardReplicationWatcher(ts, healthCheck, cell, sourceShard.Keyspace, sourceShard.Shard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency),
}
}
示例6: newBinlogPlayerController
func newBinlogPlayerController(ts topo.Server, vtClientFactory func() binlogplayer.VtClient, mysqld mysqlctl.MysqlDaemon, cell string, keyspaceIDType pb.KeyspaceIdType, keyRange *pb.KeyRange, sourceShard *pb.Shard_SourceShard, dbName string) *BinlogPlayerController {
blc := &BinlogPlayerController{
ts: ts,
vtClientFactory: vtClientFactory,
mysqld: mysqld,
cell: cell,
keyspaceIDType: keyspaceIDType,
keyRange: keyRange,
dbName: dbName,
sourceShard: sourceShard,
binlogPlayerStats: binlogplayer.NewBinlogPlayerStats(),
healthCheck: discovery.NewHealthCheck(*binlogplayer.BinlogPlayerConnTimeout, *retryDelay),
initialEndpointFound: make(chan struct{}),
}
blc.healthCheck.SetListener(blc)
blc.shardReplicationWatcher = discovery.NewShardReplicationWatcher(ts, blc.healthCheck, cell, sourceShard.Keyspace, sourceShard.Shard, *healthcheckTopologyRefresh, 5)
return blc
}
示例7: FindHealthyRdonlyTablet
// FindHealthyRdonlyTablet returns a random healthy RDONLY tablet.
// Since we don't want to use them all, we require at least
// minHealthyRdonlyTablets servers to be healthy.
// May block up to -wait_for_healthy_rdonly_tablets_timeout.
func FindHealthyRdonlyTablet(ctx context.Context, wr *wrangler.Wrangler, healthCheck discovery.HealthCheck, cell, keyspace, shard string, minHealthyRdonlyTablets int) (*topodatapb.TabletAlias, error) {
if healthCheck == nil {
// No healthcheck instance provided. Create one.
healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), healthCheck, cell, keyspace, shard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
defer watcher.Stop()
defer healthCheck.Close()
}
healthyTablets, err := waitForHealthyRdonlyTablets(ctx, wr, healthCheck, cell, keyspace, shard, minHealthyRdonlyTablets, *waitForHealthyTabletsTimeout)
if err != nil {
return nil, err
}
// random server in the list is what we want
index := rand.Intn(len(healthyTablets))
return healthyTablets[index].Tablet.Alias, nil
}
示例8: findTargets
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'worker' pointing back to us
// - get the aliases of all the targets
func (vscw *VerticalSplitCloneWorker) findTargets(ctx context.Context) error {
vscw.setState(WorkerStateFindTargets)
// find an appropriate tablet in the source shard
var err error
vscw.sourceAlias, err = FindWorkerTablet(ctx, vscw.wr, vscw.cleaner, nil /* tsc */, vscw.cell, vscw.sourceKeyspace, "0", vscw.minHealthyRdonlyTablets)
if err != nil {
return fmt.Errorf("FindWorkerTablet() failed for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err)
}
vscw.wr.Logger().Infof("Using tablet %v as the source", topoproto.TabletAliasString(vscw.sourceAlias))
// get the tablet info for it
shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout)
ti, err := vscw.wr.TopoServer().GetTablet(shortCtx, vscw.sourceAlias)
cancel()
if err != nil {
return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(vscw.sourceAlias), err)
}
vscw.sourceTablet = ti.Tablet
// stop replication on it
shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout)
err = vscw.wr.TabletManagerClient().StopSlave(shortCtx, vscw.sourceTablet)
cancel()
if err != nil {
return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(vscw.sourceAlias))
}
wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet)
// Initialize healthcheck and add destination shards to it.
vscw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
vscw.tsc = discovery.NewTabletStatsCache(vscw.healthCheck, vscw.cell)
watcher := discovery.NewShardReplicationWatcher(vscw.wr.TopoServer(), vscw.healthCheck,
vscw.cell, vscw.destinationKeyspace, vscw.destinationShard,
*healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
vscw.destinationShardWatchers = append(vscw.destinationShardWatchers, watcher)
// Make sure we find a master for each destination shard and log it.
vscw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...")
waitCtx, waitCancel := context.WithTimeout(ctx, *waitForHealthyTabletsTimeout)
defer waitCancel()
if err := vscw.tsc.WaitForTablets(waitCtx, vscw.cell, vscw.destinationKeyspace, vscw.destinationShard, []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil {
return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v): %v", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell, err)
}
masters := vscw.tsc.GetHealthyTabletStats(vscw.destinationKeyspace, vscw.destinationShard, topodatapb.TabletType_MASTER)
if len(masters) == 0 {
return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v (in cell: %v) in HealthCheck: empty TabletStats list", vscw.destinationKeyspace, vscw.destinationShard, vscw.cell)
}
master := masters[0]
// Get the MySQL database name of the tablet.
keyspaceAndShard := topoproto.KeyspaceShardString(vscw.destinationKeyspace, vscw.destinationShard)
vscw.destinationDbNames[keyspaceAndShard] = topoproto.TabletDbName(master.Tablet)
// TODO(mberlin): Verify on the destination master that the
// _vt.blp_checkpoint table has the latest schema.
vscw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), vscw.destinationKeyspace, vscw.destinationShard)
vscw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.")
return nil
}
示例9: findTargets
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'worker' pointing back to us
// - get the aliases of all the targets
func (scw *LegacySplitCloneWorker) findTargets(ctx context.Context) error {
scw.setState(WorkerStateFindTargets)
var err error
// find an appropriate tablet in the source shards
scw.sourceAliases = make([]*topodatapb.TabletAlias, len(scw.sourceShards))
for i, si := range scw.sourceShards {
scw.sourceAliases[i], err = FindWorkerTablet(ctx, scw.wr, scw.cleaner, scw.tsc, scw.cell, si.Keyspace(), si.ShardName(), scw.minHealthyRdonlyTablets)
if err != nil {
return fmt.Errorf("FindWorkerTablet() failed for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err)
}
scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", topoproto.TabletAliasString(scw.sourceAliases[i]), si.Keyspace(), si.ShardName())
}
// get the tablet info for them, and stop their replication
scw.sourceTablets = make([]*topodatapb.Tablet, len(scw.sourceAliases))
for i, alias := range scw.sourceAliases {
shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout)
ti, err := scw.wr.TopoServer().GetTablet(shortCtx, alias)
cancel()
if err != nil {
return fmt.Errorf("cannot read tablet %v: %v", topoproto.TabletAliasString(alias), err)
}
scw.sourceTablets[i] = ti.Tablet
shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout)
err = scw.wr.TabletManagerClient().StopSlave(shortCtx, scw.sourceTablets[i])
cancel()
if err != nil {
return fmt.Errorf("cannot stop replication on tablet %v", topoproto.TabletAliasString(alias))
}
wrangler.RecordStartSlaveAction(scw.cleaner, scw.sourceTablets[i])
}
// Initialize healthcheck and add destination shards to it.
scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
scw.tsc = discovery.NewTabletStatsCache(scw.healthCheck, scw.cell)
for _, si := range scw.destinationShards {
watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck,
scw.cell, si.Keyspace(), si.ShardName(),
*healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
scw.destinationShardWatchers = append(scw.destinationShardWatchers, watcher)
}
// Make sure we find a master for each destination shard and log it.
scw.wr.Logger().Infof("Finding a MASTER tablet for each destination shard...")
for _, si := range scw.destinationShards {
waitCtx, waitCancel := context.WithTimeout(ctx, 10*time.Second)
defer waitCancel()
if err := scw.tsc.WaitForTablets(waitCtx, scw.cell, si.Keyspace(), si.ShardName(), []topodatapb.TabletType{topodatapb.TabletType_MASTER}); err != nil {
return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v: %v", si.Keyspace(), si.ShardName(), err)
}
masters := scw.tsc.GetHealthyTabletStats(si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER)
if len(masters) == 0 {
return fmt.Errorf("cannot find MASTER tablet for destination shard for %v/%v in HealthCheck: empty TabletStats list", si.Keyspace(), si.ShardName())
}
master := masters[0]
// Get the MySQL database name of the tablet.
shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout)
ti, err := scw.wr.TopoServer().GetTablet(shortCtx, master.Tablet.Alias)
cancel()
if err != nil {
return fmt.Errorf("cannot get the TabletInfo for destination master (%v) to find out its db name: %v", topoproto.TabletAliasString(master.Tablet.Alias), err)
}
keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())
scw.destinationDbNames[keyspaceAndShard] = ti.DbName()
// TODO(mberlin): Verify on the destination master that the
// _vt.blp_checkpoint table has the latest schema.
scw.wr.Logger().Infof("Using tablet %v as destination master for %v/%v", topoproto.TabletAliasString(master.Tablet.Alias), si.Keyspace(), si.ShardName())
}
scw.wr.Logger().Infof("NOTE: The used master of a destination shard might change over the course of the copy e.g. due to a reparent. The HealthCheck module will track and log master changes and any error message will always refer the actually used master address.")
// Set up the throttler for each destination shard.
for _, si := range scw.destinationShards {
keyspaceAndShard := topoproto.KeyspaceShardString(si.Keyspace(), si.ShardName())
t, err := throttler.NewThrottler(
keyspaceAndShard, "transactions", scw.destinationWriterCount, scw.maxTPS, throttler.ReplicationLagModuleDisabled)
if err != nil {
return fmt.Errorf("cannot instantiate throttler: %v", err)
}
scw.destinationThrottlers[keyspaceAndShard] = t
}
return nil
}
示例10: waitForDrainInCell
func (wr *Wrangler) waitForDrainInCell(ctx context.Context, cell, keyspace, shard string, servedType topodatapb.TabletType,
retryDelay, healthCheckTopologyRefresh, healthcheckRetryDelay, healthCheckTimeout time.Duration) error {
hc := discovery.NewHealthCheck(healthCheckTimeout /* connectTimeout */, healthcheckRetryDelay, healthCheckTimeout)
defer hc.Close()
watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), hc, cell, keyspace, shard, healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
defer watcher.Stop()
if err := discovery.WaitForTablets(ctx, hc, cell, keyspace, shard, []topodatapb.TabletType{servedType}); err != nil {
return fmt.Errorf("%v: error waiting for initial %v tablets for %v/%v: %v", cell, servedType, keyspace, shard, err)
}
wr.Logger().Infof("%v: Waiting for %.1f seconds to make sure that the discovery module retrieves healthcheck information from all tablets.",
cell, healthCheckTimeout.Seconds())
// Wait at least for -vtctl_healthcheck_timeout to elapse to make sure that we
// see all healthy tablets. Otherwise, we might miss some tablets.
// It's safe to wait not longer for this because we would only miss slow
// tablets and vtgate would not serve from such tablets anyway.
time.Sleep(healthCheckTimeout)
// Now check the QPS rate of all tablets until the timeout expires.
startTime := time.Now()
for {
// map key: tablet uid
drainedHealthyTablets := make(map[uint32]*discovery.TabletStats)
notDrainedHealtyTablets := make(map[uint32]*discovery.TabletStats)
healthyTablets := discovery.RemoveUnhealthyTablets(
hc.GetTabletStatsFromTarget(keyspace, shard, servedType))
for _, ts := range healthyTablets {
if ts.Stats.Qps == 0.0 {
drainedHealthyTablets[ts.Tablet.Alias.Uid] = ts
} else {
notDrainedHealtyTablets[ts.Tablet.Alias.Uid] = ts
}
}
if len(drainedHealthyTablets) == len(healthyTablets) {
wr.Logger().Infof("%v: All %d healthy tablets were drained after %.1f seconds (not counting %.1f seconds for the initial wait).",
cell, len(healthyTablets), time.Now().Sub(startTime).Seconds(), healthCheckTimeout.Seconds())
break
}
// Continue waiting, sleep in between.
deadlineString := ""
if d, ok := ctx.Deadline(); ok {
deadlineString = fmt.Sprintf(" up to %.1f more seconds", d.Sub(time.Now()).Seconds())
}
wr.Logger().Infof("%v: Waiting%v for all healthy tablets to be drained (%d/%d done).",
cell, deadlineString, len(drainedHealthyTablets), len(healthyTablets))
timer := time.NewTimer(retryDelay)
select {
case <-ctx.Done():
timer.Stop()
var l []string
for _, ts := range notDrainedHealtyTablets {
l = append(l, formatTabletStats(ts))
}
return fmt.Errorf("%v: WaitForDrain failed for %v tablets in %v/%v. Only %d/%d tablets were drained. err: %v List of tablets which were not drained: %v",
cell, servedType, keyspace, shard, len(drainedHealthyTablets), len(healthyTablets), ctx.Err(), strings.Join(l, ";"))
case <-timer.C:
}
}
return nil
}
示例11: init
// init phase:
// - read the destination keyspace, make sure it has 'servedFrom' values
func (scw *SplitCloneWorker) init(ctx context.Context) error {
scw.setState(WorkerStateInit)
var err error
// read the keyspace and validate it
shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout)
scw.keyspaceInfo, err = scw.wr.TopoServer().GetKeyspace(shortCtx, scw.keyspace)
cancel()
if err != nil {
return fmt.Errorf("cannot read keyspace %v: %v", scw.keyspace, err)
}
// find the OverlappingShards in the keyspace
shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout)
osList, err := topotools.FindOverlappingShards(shortCtx, scw.wr.TopoServer(), scw.keyspace)
cancel()
if err != nil {
return fmt.Errorf("cannot FindOverlappingShards in %v: %v", scw.keyspace, err)
}
// find the shard we mentioned in there, if any
os := topotools.OverlappingShardsForShard(osList, scw.shard)
if os == nil {
return fmt.Errorf("the specified shard %v/%v is not in any overlapping shard", scw.keyspace, scw.shard)
}
scw.wr.Logger().Infof("Found overlapping shards: %+v\n", os)
// one side should have served types, the other one none,
// figure out wich is which, then double check them all
if len(os.Left[0].ServedTypes) > 0 {
scw.sourceShards = os.Left
scw.destinationShards = os.Right
} else {
scw.sourceShards = os.Right
scw.destinationShards = os.Left
}
// Verify that filtered replication is not already enabled.
for _, si := range scw.destinationShards {
if len(si.SourceShards) > 0 {
return fmt.Errorf("destination shard %v/%v has filtered replication already enabled from a previous resharding (ShardInfo is set)."+
" This requires manual intervention e.g. use vtctl SourceShardDelete to remove it",
si.Keyspace(), si.ShardName())
}
}
// validate all serving types
servingTypes := []topodatapb.TabletType{topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}
for _, st := range servingTypes {
for _, si := range scw.sourceShards {
if si.GetServedType(st) == nil {
return fmt.Errorf("source shard %v/%v is not serving type %v", si.Keyspace(), si.ShardName(), st)
}
}
}
for _, si := range scw.destinationShards {
if len(si.ServedTypes) > 0 {
return fmt.Errorf("destination shard %v/%v is serving some types", si.Keyspace(), si.ShardName())
}
}
// read the vschema if needed
var keyspaceSchema *vindexes.KeyspaceSchema
if *useV3ReshardingMode {
kschema, err := scw.wr.TopoServer().GetVSchema(ctx, scw.keyspace)
if err != nil {
return fmt.Errorf("cannot load VSchema for keyspace %v: %v", scw.keyspace, err)
}
if kschema == nil {
return fmt.Errorf("no VSchema for keyspace %v", scw.keyspace)
}
keyspaceSchema, err = vindexes.BuildKeyspaceSchema(kschema, scw.keyspace)
if err != nil {
return fmt.Errorf("cannot build vschema for keyspace %v: %v", scw.keyspace, err)
}
scw.keyspaceSchema = keyspaceSchema
}
// Initialize healthcheck and add destination shards to it.
scw.healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout)
allShards := append(scw.sourceShards, scw.destinationShards...)
for _, si := range allShards {
watcher := discovery.NewShardReplicationWatcher(scw.wr.TopoServer(), scw.healthCheck,
scw.cell, si.Keyspace(), si.ShardName(),
*healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency)
scw.shardWatchers = append(scw.shardWatchers, watcher)
}
return nil
}
示例12: waitForDrainInCell
func (wr *Wrangler) waitForDrainInCell(ctx context.Context, cell, keyspace, shard string, servedType topodatapb.TabletType,
retryDelay, healthCheckTopologyRefresh, healthcheckRetryDelay, healthCheckTimeout time.Duration) error {
hc := discovery.NewHealthCheck(healthCheckTimeout /* connectTimeout */, healthcheckRetryDelay, healthCheckTimeout, cell)
defer hc.Close()
watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), hc, cell, keyspace, shard, healthCheckTopologyRefresh, 5 /* topoReadConcurrency */)
defer watcher.Stop()
if err := discovery.WaitForEndPoints(ctx, hc, cell, keyspace, shard, []topodatapb.TabletType{servedType}); err != nil {
return fmt.Errorf("%v: error waiting for initial %v endpoints for %v/%v: %v", cell, servedType, keyspace, shard, err)
}
wr.Logger().Infof("%v: Waiting for %.1f seconds to make sure that the discovery module retrieves healthcheck information from all tablets.",
cell, healthCheckTimeout.Seconds())
// Wait at least for -vtctl_healthcheck_timeout to elapse to make sure that we
// see all healthy tablets. Otherwise, we might miss some tablets.
// It's safe to wait not longer for this because we would only miss slow
// tablets and vtgate would not serve from such tablets anyway.
time.Sleep(healthCheckTimeout)
// Now check the QPS rate of all tablets until the timeout expires.
startTime := time.Now()
for {
healthyTabletsCount := 0
// map key: tablet uid
drainedHealthyTablets := make(map[uint32]*discovery.EndPointStats)
notDrainedHealtyTablets := make(map[uint32]*discovery.EndPointStats)
addrs := hc.GetEndPointStatsFromTarget(keyspace, shard, servedType)
healthyTabletsCount = 0
for _, addr := range addrs {
// TODO(mberlin): Move this health check logic into a common function
// because other code uses it as well e.g. go/vt/worker/topo_utils.go.
if addr.Stats == nil || addr.Stats.HealthError != "" || addr.Stats.SecondsBehindMaster > 30 {
// not healthy
continue
}
healthyTabletsCount++
if addr.Stats.Qps == 0.0 {
drainedHealthyTablets[addr.EndPoint.Uid] = addr
} else {
notDrainedHealtyTablets[addr.EndPoint.Uid] = addr
}
}
if len(drainedHealthyTablets) == healthyTabletsCount {
wr.Logger().Infof("%v: All %d healthy tablets were drained after %.1f seconds (not counting %.1f seconds for the initial wait).",
cell, healthyTabletsCount, time.Now().Sub(startTime).Seconds(), healthCheckTimeout.Seconds())
break
}
// Continue waiting, sleep in between.
deadlineString := ""
if d, ok := ctx.Deadline(); ok {
deadlineString = fmt.Sprintf(" up to %.1f more seconds", d.Sub(time.Now()).Seconds())
}
wr.Logger().Infof("%v: Waiting%v for all healthy tablets to be drained (%d/%d done).",
cell, deadlineString, len(drainedHealthyTablets), healthyTabletsCount)
timer := time.NewTimer(retryDelay)
select {
case <-ctx.Done():
timer.Stop()
var l []string
for _, eps := range notDrainedHealtyTablets {
l = append(l, formatEndpointStats(eps))
}
return fmt.Errorf("%v: WaitForDrain failed for %v tablets in %v/%v. Only %d/%d tablets were drained. err: %v List of tablets which were not drained:\n%v",
cell, servedType, keyspace, shard, len(drainedHealthyTablets), healthyTabletsCount, ctx.Err(), strings.Join(l, "\n"))
case <-timer.C:
}
}
return nil
}