本文整理汇总了Golang中github.com/youtube/vitess/go/vt/topo.GetTabletMapForShard函数的典型用法代码示例。如果您正苦于以下问题:Golang GetTabletMapForShard函数的具体用法?Golang GetTabletMapForShard怎么用?Golang GetTabletMapForShard使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了GetTabletMapForShard函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: SetBlacklistedTablesByShard
// SetBlacklistedTablesByShard sets the blacklisted table list of all
// tablets of a given type in a shard. It would work for the master,
// but it wouldn't be very efficient.
func (wr *Wrangler) SetBlacklistedTablesByShard(keyspace, shard string, tabletType topo.TabletType, tables []string) error {
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
switch err {
case nil:
// keep going
case topo.ErrPartialResult:
log.Warningf("SetBlacklistedTablesByShard: got partial result, may not blacklist everything everywhere")
default:
return err
}
// ignore errors in this phase
wg := sync.WaitGroup{}
for _, ti := range tabletMap {
if ti.Type != tabletType {
continue
}
wg.Add(1)
go func(ti *topo.TabletInfo) {
if err := wr.ai.SetBlacklistedTables(ti, tables, wr.ActionTimeout()); err != nil {
log.Warningf("SetBlacklistedTablesByShard: failed to set tables for %v: %v", ti.Alias, err)
}
wg.Done()
}(ti)
}
wg.Wait()
return nil
}
示例2: reparentShardLocked
func (wr *Wrangler) reparentShardLocked(keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool) error {
// critical read, we want up to date info (and the shard is locked).
shardInfo, err := wr.ts.GetShardCritical(keyspace, shard)
if err != nil {
return err
}
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
if err != nil {
return err
}
slaveTabletMap, masterTabletMap := sortedTabletMap(tabletMap)
if shardInfo.MasterAlias == masterElectTabletAlias && !forceReparentToCurrentMaster {
return fmt.Errorf("master-elect tablet %v is already master - specify -force to override", masterElectTabletAlias)
}
masterElectTablet, ok := tabletMap[masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, mapKeys(tabletMap))
}
if !shardInfo.MasterAlias.IsZero() && !forceReparentToCurrentMaster {
err = wr.reparentShardGraceful(shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly)
} else {
err = wr.reparentShardBrutal(shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, forceReparentToCurrentMaster)
}
if err == nil {
// only log if it works, if it fails we'll show the error
log.Infof("reparentShard finished")
}
return err
}
示例3: shardExternallyReparentedLocked
func (wr *Wrangler) shardExternallyReparentedLocked(keyspace, shard string, masterElectTabletAlias topo.TabletAlias) error {
// read the shard, make sure the master is not already good.
// critical read, we want up to date info (and the shard is locked).
shardInfo, err := wr.ts.GetShardCritical(keyspace, shard)
if err != nil {
return err
}
if shardInfo.MasterAlias == masterElectTabletAlias {
return fmt.Errorf("master-elect tablet %v is already master", masterElectTabletAlias)
}
// Read the tablets, make sure the master elect is known to us.
// Note we will keep going with a partial tablet map, which usually
// happens when a cell is not reachable. After these checks, the
// guarantees we'll have are:
// - global cell is reachable (we just locked and read the shard)
// - the local cell that contains the new master is reachable
// (as we're going to check the new master is in the list)
// That should be enough.
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
switch err {
case nil:
// keep going
case topo.ErrPartialResult:
log.Warningf("Got topo.ErrPartialResult from GetTabletMapForShard, may need to re-init some tablets")
default:
return err
}
masterElectTablet, ok := tabletMap[masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, mapKeys(tabletMap))
}
// sort the tablets, and handle them
slaveTabletMap, masterTabletMap := sortedTabletMap(tabletMap)
err = wr.reparentShardExternal(slaveTabletMap, masterTabletMap, masterElectTablet)
if err != nil {
log.Infof("Skipping shard rebuild with failed reparent")
return err
}
// Compute the list of Cells we need to rebuild: old master and
// new master cells.
cells := []string{shardInfo.MasterAlias.Cell}
if shardInfo.MasterAlias.Cell != masterElectTabletAlias.Cell {
cells = append(cells, masterElectTabletAlias.Cell)
}
// now update the master record in the shard object
log.Infof("Updating Shard's MasterAlias record")
shardInfo.MasterAlias = masterElectTabletAlias
if err = wr.ts.UpdateShard(shardInfo); err != nil {
return err
}
// and rebuild the shard serving graph
log.Infof("Rebuilding shard serving graph data")
return topotools.RebuildShard(wr.ts, masterElectTablet.Keyspace, masterElectTablet.Shard, cells, wr.lockTimeout, interrupted)
}
示例4: shardReplicationStatuses
func (wr *Wrangler) shardReplicationStatuses(shardInfo *topo.ShardInfo) ([]*topo.TabletInfo, []*myproto.ReplicationStatus, error) {
// FIXME(msolomon) this assumes no hierarchical replication, which is currently the case.
tabletMap, err := topo.GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
if err != nil {
return nil, nil, err
}
tablets := topotools.CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
stats, err := wr.tabletReplicationStatuses(tablets)
return tablets, stats, err
}
示例5: DeleteShard
// DeleteShard will do all the necessary changes in the topology server
// to entirely remove a shard.
func (wr *Wrangler) DeleteShard(ctx context.Context, keyspace, shard string, recursive bool) error {
shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
if err != nil {
return err
}
tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
if err != nil {
return err
}
if recursive {
wr.Logger().Infof("Deleting all tablets in shard %v/%v", keyspace, shard)
for tabletAlias := range tabletMap {
// We don't care about scrapping or updating the replication graph,
// because we're about to delete the entire replication graph.
wr.Logger().Infof("Deleting tablet %v", tabletAlias)
if err := wr.TopoServer().DeleteTablet(ctx, tabletAlias); err != nil && err != topo.ErrNoNode {
// Unlike the errors below in non-recursive steps, we don't want to
// continue if a DeleteTablet fails. If we continue and delete the
// replication graph, the tablet record will be orphaned, since we'll
// no longer know it belongs to this shard.
//
// If the problem is temporary, or resolved externally, re-running
// DeleteShard will skip over tablets that were already deleted.
return fmt.Errorf("can't delete tablet %v: %v", tabletAlias, err)
}
}
} else if len(tabletMap) > 0 {
return fmt.Errorf("shard %v/%v still has %v tablets; use -recursive or remove them manually", keyspace, shard, len(tabletMap))
}
// remove the replication graph and serving graph in each cell
for _, cell := range shardInfo.Cells {
if err := wr.ts.DeleteShardReplication(ctx, cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
wr.Logger().Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err)
}
for _, t := range topo.AllTabletTypes {
if !topo.IsInServingGraph(t) {
continue
}
if err := wr.ts.DeleteEndPoints(ctx, cell, keyspace, shard, t, -1); err != nil && err != topo.ErrNoNode {
wr.Logger().Warningf("Cannot delete EndPoints in cell %v for %v/%v/%v: %v", cell, keyspace, shard, t, err)
}
}
if err := wr.ts.DeleteSrvShard(ctx, cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
wr.Logger().Warningf("Cannot delete SrvShard in cell %v for %v/%v: %v", cell, keyspace, shard, err)
}
}
return wr.ts.DeleteShard(ctx, keyspace, shard)
}
示例6: reparentShardLocked
func (wr *Wrangler) reparentShardLocked(keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool) error {
shardInfo, err := wr.ts.GetShard(keyspace, shard)
if err != nil {
return err
}
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
if err != nil {
return err
}
slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap)
if shardInfo.MasterAlias == masterElectTabletAlias && !forceReparentToCurrentMaster {
return fmt.Errorf("master-elect tablet %v is already master - specify -force to override", masterElectTabletAlias)
}
masterElectTablet, ok := tabletMap[masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, topotools.MapKeys(tabletMap))
}
// Create reusable Reparent event with available info
ev := &events.Reparent{
ShardInfo: *shardInfo,
NewMaster: *masterElectTablet.Tablet,
}
if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok {
ev.OldMaster = *oldMasterTablet.Tablet
}
if !shardInfo.MasterAlias.IsZero() && !forceReparentToCurrentMaster {
err = wr.reparentShardGraceful(ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly)
} else {
err = wr.reparentShardBrutal(ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, forceReparentToCurrentMaster)
}
if err == nil {
// only log if it works, if it fails we'll show the error
wr.Logger().Infof("reparentShard finished")
}
return err
}
示例7: DeleteShard
// DeleteShard will do all the necessary changes in the topology server
// to entirely remove a shard. It can only work if there are no tablets
// in that shard.
func (wr *Wrangler) DeleteShard(keyspace, shard string) error {
shardInfo, err := wr.ts.GetShard(keyspace, shard)
if err != nil {
return err
}
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
if err != nil {
return err
}
if len(tabletMap) > 0 {
return fmt.Errorf("shard %v/%v still has %v tablets", keyspace, shard, len(tabletMap))
}
// remove the replication graph and serving graph in each cell
for _, cell := range shardInfo.Cells {
if err := wr.ts.DeleteShardReplication(cell, keyspace, shard); err != nil {
log.Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err)
}
for _, t := range topo.AllTabletTypes {
if !topo.IsInServingGraph(t) {
continue
}
if err := wr.ts.DeleteEndPoints(cell, keyspace, shard, t); err != nil && err != topo.ErrNoNode {
log.Warningf("Cannot delete EndPoints in cell %v for %v/%v/%v: %v", cell, keyspace, shard, t, err)
}
}
if err := wr.ts.DeleteSrvShard(cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
log.Warningf("Cannot delete SrvShard in cell %v for %v/%v: %v", cell, keyspace, shard, err)
}
}
return wr.ts.DeleteShard(keyspace, shard)
}
示例8: TestShardExternallyReparented
func TestShardExternallyReparented(t *testing.T) {
ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"})
wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second)
wr.UseRPCs = false
// Create an old master, a new master, two good slaves, one bad slave
oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER)
newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA,
TabletParent(oldMaster.Tablet.Alias))
goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA,
TabletParent(oldMaster.Tablet.Alias))
goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topo.TYPE_REPLICA,
TabletParent(oldMaster.Tablet.Alias))
badSlave := NewFakeTablet(t, wr, "cell1", 4, topo.TYPE_REPLICA,
TabletParent(oldMaster.Tablet.Alias))
// Add a new Cell to the Shard, that doesn't map to any read topo cell,
// to simulate a data center being unreachable.
si, err := ts.GetShard("test_keyspace", "0")
if err != nil {
t.Fatalf("GetShard failed: %v", err)
}
si.Cells = append(si.Cells, "cell666")
if err := topo.UpdateShard(ts, si); err != nil {
t.Fatalf("UpdateShard failed: %v", err)
}
// Slightly unrelated test: make sure we can find the tablets
// even with a datacenter being down.
tabletMap, err := topo.GetTabletMapForShardByCell(ts, "test_keyspace", "0", []string{"cell1"})
if err != nil {
t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err)
}
master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != nil || master != oldMaster.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master)
}
slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.IPAddr, "vt", goodSlave1.Tablet.Portmap["vt"])
if err != nil || slave1 != goodSlave1.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master)
}
slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.IPAddr, "vt", goodSlave2.Tablet.Portmap["vt"])
if err != topo.ErrNoNode {
t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2)
}
// Make sure the master is not exported in other cells
tabletMap, err = topo.GetTabletMapForShardByCell(ts, "test_keyspace", "0", []string{"cell2"})
master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != topo.ErrNoNode {
t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master)
}
tabletMap, err = topo.GetTabletMapForShard(ts, "test_keyspace", "0")
if err != topo.ErrPartialResult {
t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err)
}
master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != nil || master != oldMaster.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master)
}
// First test: reparent to the same master, make sure it works
// as expected.
if err := wr.ShardExternallyReparented("test_keyspace", "0", oldMaster.Tablet.Alias); err == nil {
t.Fatalf("ShardExternallyReparented(same master) should have failed")
} else {
if !strings.Contains(err.Error(), "already master") {
t.Fatalf("ShardExternallyReparented(same master) should have failed with an error that contains 'already master' but got: %v", err)
}
}
// Second test: reparent to the replica, and pretend the old
// master is still good to go.
// On the elected master, we will respond to
// TABLET_ACTION_SLAVE_WAS_PROMOTED
newMaster.FakeMysqlDaemon.MasterAddr = ""
newMaster.StartActionLoop(t, wr)
defer newMaster.StopActionLoop(t)
// On the old master, we will only respond to
// TABLET_ACTION_SLAVE_WAS_RESTARTED.
oldMaster.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr()
oldMaster.StartActionLoop(t, wr)
defer oldMaster.StopActionLoop(t)
// On the good slaves, we will respond to
// TABLET_ACTION_SLAVE_WAS_RESTARTED.
goodSlave1.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr()
goodSlave1.StartActionLoop(t, wr)
defer goodSlave1.StopActionLoop(t)
goodSlave2.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr()
goodSlave2.StartActionLoop(t, wr)
defer goodSlave2.StopActionLoop(t)
// On the bad slave, we will respond to
// TABLET_ACTION_SLAVE_WAS_RESTARTED with bad data.
badSlave.FakeMysqlDaemon.MasterAddr = "234.0.0.1:3301"
//.........这里部分代码省略.........
示例9: applySchemaShardComplex
func (wr *Wrangler) applySchemaShardComplex(statusArray []*TabletStatus, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias topo.TabletAlias, change string, newParentTabletAlias topo.TabletAlias, force bool) (*myproto.SchemaChangeResult, error) {
// apply the schema change to all replica / slave tablets
for _, status := range statusArray {
// if already applied, we skip this guy
diffs := myproto.DiffSchemaToArray("after", preflight.AfterSchema, status.ti.Alias.String(), status.beforeSchema)
if len(diffs) == 0 {
log.Infof("Tablet %v already has the AfterSchema, skipping", status.ti.Alias)
continue
}
// make sure the before schema matches
diffs = myproto.DiffSchemaToArray("master", preflight.BeforeSchema, status.ti.Alias.String(), status.beforeSchema)
if len(diffs) > 0 {
if force {
log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.Alias, strings.Join(diffs, "\n"))
} else {
return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.Alias, strings.Join(diffs, "\n"))
}
}
// take this guy out of the serving graph if necessary
ti, err := wr.ts.GetTablet(status.ti.Alias)
if err != nil {
return nil, err
}
typeChangeRequired := ti.Tablet.IsInServingGraph()
if typeChangeRequired {
// note we want to update the serving graph there
err = wr.changeTypeInternal(ti.Alias, topo.TYPE_SCHEMA_UPGRADE)
if err != nil {
return nil, err
}
}
// apply the schema change
log.Infof("Applying schema change to slave %v in complex mode", status.ti.Alias)
sc := &myproto.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}
_, err = wr.ApplySchema(status.ti.Alias, sc)
if err != nil {
return nil, err
}
// put this guy back into the serving graph
if typeChangeRequired {
err = wr.changeTypeInternal(ti.Alias, ti.Tablet.Type)
if err != nil {
return nil, err
}
}
}
// if newParentTabletAlias is passed in, use that as the new master
if !newParentTabletAlias.IsZero() {
log.Infof("Reparenting with new master set to %v", newParentTabletAlias)
tabletMap, err := topo.GetTabletMapForShard(wr.ts, shardInfo.Keyspace(), shardInfo.ShardName())
if err != nil {
return nil, err
}
slaveTabletMap, masterTabletMap := sortedTabletMap(tabletMap)
newMasterTablet, err := wr.ts.GetTablet(newParentTabletAlias)
if err != nil {
return nil, err
}
err = wr.reparentShardGraceful(shardInfo, slaveTabletMap, masterTabletMap, newMasterTablet /*leaveMasterReadOnly*/, false)
if err != nil {
return nil, err
}
// Here we would apply the schema change to the old
// master, but after a reparent it's in Scrap state,
// so no need to. When/if reparent leaves the
// original master in a different state (like replica
// or rdonly), then we should apply the schema there
// too.
log.Infof("Skipping schema change on old master %v in complex mode, it's been Scrapped", masterTabletAlias)
}
return &myproto.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil
}
示例10: tabletExternallyReparentedLocked
func tabletExternallyReparentedLocked(ts topo.Server, tablet *topo.TabletInfo, actionTimeout, lockTimeout time.Duration, interrupted chan struct{}) (err error) {
// read the shard, make sure again the master is not already good.
// critical read, we want up to date info (and the shard is locked).
shardInfo, err := ts.GetShardCritical(tablet.Keyspace, tablet.Shard)
if err != nil {
return err
}
if shardInfo.MasterAlias == tablet.Alias {
return fmt.Errorf("this tablet is already the master")
}
// Read the tablets, make sure the master elect is known to the shard
// (it's this tablet, so it better be!).
// Note we will keep going with a partial tablet map, which usually
// happens when a cell is not reachable. After these checks, the
// guarantees we'll have are:
// - global cell is reachable (we just locked and read the shard)
// - the local cell that contains the new master is reachable
// (as we're going to check the new master is in the list)
// That should be enough.
tabletMap, err := topo.GetTabletMapForShard(ts, tablet.Keyspace, tablet.Shard)
switch err {
case nil:
// keep going
case topo.ErrPartialResult:
log.Warningf("Got topo.ErrPartialResult from GetTabletMapForShard, may need to re-init some tablets")
default:
return err
}
masterElectTablet, ok := tabletMap[tablet.Alias]
if !ok {
return fmt.Errorf("this master-elect tablet %v not found in replication graph %v/%v %v", tablet.Alias, tablet.Keyspace, tablet.Shard, topotools.MapKeys(tabletMap))
}
// Create reusable Reparent event with available info
ev := &events.Reparent{
ShardInfo: *shardInfo,
NewMaster: *tablet.Tablet,
}
if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok {
ev.OldMaster = *oldMasterTablet.Tablet
}
defer func() {
if err != nil {
event.DispatchUpdate(ev, "failed: "+err.Error())
}
}()
// sort the tablets, and handle them
slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap)
event.DispatchUpdate(ev, "starting external from tablet")
// we fix the new master in the replication graph
event.DispatchUpdate(ev, "mark ourself as new master")
err = updateReplicationGraphForPromotedSlave(ts, tablet)
if err != nil {
// This suggests we can't talk to topo server. This is bad.
return fmt.Errorf("updateReplicationGraphForPromotedSlave failed: %v", err)
}
// Once this tablet is promoted, remove it from our maps
delete(slaveTabletMap, tablet.Alias)
delete(masterTabletMap, tablet.Alias)
// Then fix all the slaves, including the old master. This
// last step is very likely to time out for some tablets (one
// random guy is dead, the old master is dead, ...). We
// execute them all in parallel until we get to
// wr.ActionTimeout(). After this, no other action with a
// timeout is executed, so even if we got to the timeout,
// we're still good.
event.DispatchUpdate(ev, "restarting slaves")
logger := logutil.NewConsoleLogger()
ai := initiator.NewActionInitiator(ts)
topotools.RestartSlavesExternal(ts, logger, slaveTabletMap, masterTabletMap, masterElectTablet.Alias, func(ti *topo.TabletInfo, swrd *actionnode.SlaveWasRestartedArgs) error {
return ai.RpcSlaveWasRestarted(ti, swrd, actionTimeout)
})
// Compute the list of Cells we need to rebuild: old master and
// all other cells if reparenting to another cell.
cells := []string{shardInfo.MasterAlias.Cell}
if shardInfo.MasterAlias.Cell != tablet.Alias.Cell {
cells = nil
}
// now update the master record in the shard object
event.DispatchUpdate(ev, "updating shard record")
log.Infof("Updating Shard's MasterAlias record")
shardInfo.MasterAlias = tablet.Alias
if err = topo.UpdateShard(ts, shardInfo); err != nil {
return err
}
// and rebuild the shard serving graph
event.DispatchUpdate(ev, "rebuilding shard serving graph")
log.Infof("Rebuilding shard serving graph data")
if err = topotools.RebuildShard(logger, ts, tablet.Keyspace, tablet.Shard, cells, lockTimeout, interrupted); err != nil {
return err
//.........这里部分代码省略.........
示例11: shardExternallyReparentedLocked
func (wr *Wrangler) shardExternallyReparentedLocked(keyspace, shard string, masterElectTabletAlias topo.TabletAlias) (err error) {
// read the shard, make sure the master is not already good.
shardInfo, err := wr.ts.GetShard(keyspace, shard)
if err != nil {
return err
}
if shardInfo.MasterAlias == masterElectTabletAlias {
return fmt.Errorf("master-elect tablet %v is already master", masterElectTabletAlias)
}
// Read the tablets, make sure the master elect is known to us.
// Note we will keep going with a partial tablet map, which usually
// happens when a cell is not reachable. After these checks, the
// guarantees we'll have are:
// - global cell is reachable (we just locked and read the shard)
// - the local cell that contains the new master is reachable
// (as we're going to check the new master is in the list)
// That should be enough.
tabletMap, err := topo.GetTabletMapForShard(wr.ts, keyspace, shard)
switch err {
case nil:
// keep going
case topo.ErrPartialResult:
wr.logger.Warningf("Got topo.ErrPartialResult from GetTabletMapForShard, may need to re-init some tablets")
default:
return err
}
masterElectTablet, ok := tabletMap[masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, topotools.MapKeys(tabletMap))
}
// Create reusable Reparent event with available info
ev := &events.Reparent{
ShardInfo: *shardInfo,
NewMaster: *masterElectTablet.Tablet,
}
if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok {
ev.OldMaster = *oldMasterTablet.Tablet
}
defer func() {
if err != nil {
event.DispatchUpdate(ev, "failed: "+err.Error())
}
}()
// sort the tablets, and handle them
slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap)
err = wr.reparentShardExternal(ev, slaveTabletMap, masterTabletMap, masterElectTablet)
if err != nil {
wr.logger.Infof("Skipping shard rebuild with failed reparent")
return err
}
// Compute the list of Cells we need to rebuild: old master and
// all other cells if reparenting to another cell.
cells := []string{shardInfo.MasterAlias.Cell}
if shardInfo.MasterAlias.Cell != masterElectTabletAlias.Cell {
cells = nil
}
// now update the master record in the shard object
event.DispatchUpdate(ev, "updating shard record")
wr.logger.Infof("Updating Shard's MasterAlias record")
shardInfo.MasterAlias = masterElectTabletAlias
if err = topo.UpdateShard(wr.ts, shardInfo); err != nil {
return err
}
// and rebuild the shard serving graph
event.DispatchUpdate(ev, "rebuilding shard serving graph")
wr.logger.Infof("Rebuilding shard serving graph data")
if _, err = topotools.RebuildShard(wr.logger, wr.ts, masterElectTablet.Keyspace, masterElectTablet.Shard, cells, wr.lockTimeout, interrupted); err != nil {
return err
}
event.DispatchUpdate(ev, "finished")
return nil
}
示例12: TestTabletExternallyReparented
func TestTabletExternallyReparented(t *testing.T) {
tabletmanager.SetReparentFlags(time.Minute /* finalizeTimeout */)
ctx := context.Background()
ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"})
wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second)
vp := NewVtctlPipe(t, ts)
defer vp.Close()
// Create an old master, a new master, two good slaves, one bad slave
oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER)
newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA)
goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA)
goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topo.TYPE_REPLICA)
badSlave := NewFakeTablet(t, wr, "cell1", 4, topo.TYPE_REPLICA)
// Add a new Cell to the Shard, that doesn't map to any read topo cell,
// to simulate a data center being unreachable.
si, err := ts.GetShard(ctx, "test_keyspace", "0")
if err != nil {
t.Fatalf("GetShard failed: %v", err)
}
si.Cells = append(si.Cells, "cell666")
if err := topo.UpdateShard(ctx, ts, si); err != nil {
t.Fatalf("UpdateShard failed: %v", err)
}
// Slightly unrelated test: make sure we can find the tablets
// even with a datacenter being down.
tabletMap, err := topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell1"})
if err != nil {
t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err)
}
master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != nil || master != oldMaster.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master)
}
slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.IPAddr, "vt", goodSlave1.Tablet.Portmap["vt"])
if err != nil || slave1 != goodSlave1.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master)
}
slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.IPAddr, "vt", goodSlave2.Tablet.Portmap["vt"])
if err != topo.ErrNoNode {
t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2)
}
// Make sure the master is not exported in other cells
tabletMap, err = topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell2"})
master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != topo.ErrNoNode {
t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master)
}
tabletMap, err = topo.GetTabletMapForShard(ctx, ts, "test_keyspace", "0")
if err != topo.ErrPartialResult {
t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err)
}
master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"])
if err != nil || master != oldMaster.Tablet.Alias {
t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master)
}
// On the elected master, we will respond to
// TabletActionSlaveWasPromoted
newMaster.StartActionLoop(t, wr)
defer newMaster.StopActionLoop(t)
// On the old master, we will only respond to
// TabletActionSlaveWasRestarted.
oldMaster.StartActionLoop(t, wr)
defer oldMaster.StopActionLoop(t)
// On the good slaves, we will respond to
// TabletActionSlaveWasRestarted.
goodSlave1.StartActionLoop(t, wr)
defer goodSlave1.StopActionLoop(t)
goodSlave2.StartActionLoop(t, wr)
defer goodSlave2.StopActionLoop(t)
// On the bad slave, we will respond to
// TabletActionSlaveWasRestarted with bad data.
badSlave.StartActionLoop(t, wr)
defer badSlave.StopActionLoop(t)
// First test: reparent to the same master, make sure it works
// as expected.
tmc := tmclient.NewTabletManagerClient()
ti, err := ts.GetTablet(ctx, oldMaster.Tablet.Alias)
if err != nil {
t.Fatalf("GetTablet failed: %v", err)
}
if err := vp.Run([]string{"TabletExternallyReparented", oldMaster.Tablet.Alias.String()}); err != nil {
t.Fatalf("TabletExternallyReparented(same master) should have worked")
}
// Second test: reparent to a replica, and pretend the old
// master is still good to go.
// This tests a bad case; the new designated master is a slave,
//.........这里部分代码省略.........
示例13: emergencyReparentShardLocked
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error {
shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
if err != nil {
return err
}
ev.ShardInfo = *shardInfo
event.DispatchUpdate(ev, "reading all tablets")
tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
if err != nil {
return err
}
// Check corner cases we're going to depend on
masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias)
}
ev.NewMaster = *masterElectTabletInfo.Tablet
if topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) {
return fmt.Errorf("master-elect tablet %v is already the master", masterElectTabletAlias)
}
// Deal with the old master: try to remote-scrap it, if it's
// truely dead we force-scrap it. Remove it from our map in any case.
if !topo.TabletAliasIsZero(shardInfo.MasterAlias) {
scrapOldMaster := true
oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias]
if ok {
delete(tabletMap, *shardInfo.MasterAlias)
} else {
oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias)
if err != nil {
wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", shardInfo.MasterAlias, err)
scrapOldMaster = false
}
}
if scrapOldMaster {
ev.OldMaster = *oldMasterTabletInfo.Tablet
wr.logger.Infof("scrapping old master %v", shardInfo.MasterAlias)
ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout)
defer cancel()
if err := wr.tmc.Scrap(ctx, oldMasterTabletInfo); err != nil {
wr.logger.Warningf("remote scrapping failed master failed, will force the scrap: %v", err)
if err := topotools.Scrap(ctx, wr.ts, shardInfo.MasterAlias, true); err != nil {
wr.logger.Warningf("old master topo scrapping failed, continuing anyway: %v", err)
}
}
}
}
// Stop replication on all slaves, get their current
// replication position
event.DispatchUpdate(ev, "stop replication on all slaves")
wg := sync.WaitGroup{}
mu := sync.Mutex{}
statusMap := make(map[pb.TabletAlias]myproto.ReplicationStatus)
for alias, tabletInfo := range tabletMap {
wg.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
defer wg.Done()
wr.logger.Infof("getting replication position from %v", alias)
ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout)
defer cancel()
rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo)
if err != nil {
wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", alias, err)
return
}
mu.Lock()
statusMap[alias] = rp
mu.Unlock()
}(alias, tabletInfo)
}
wg.Wait()
// Verify masterElect is alive and has the most advanced position
masterElectStatus, ok := statusMap[*masterElectTabletAlias]
if !ok {
return fmt.Errorf("couldn't get master elect %v replication position", masterElectTabletAlias)
}
for alias, status := range statusMap {
if topo.TabletAliasEqual(&alias, masterElectTabletAlias) {
continue
}
if !masterElectStatus.Position.AtLeast(status.Position) {
return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", alias, masterElectTabletAlias, status.Position, masterElectStatus)
}
}
// Promote the masterElect
wr.logger.Infof("promote slave %v", masterElectTabletAlias)
event.DispatchUpdate(ev, "promoting slave")
rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo)
if err != nil {
return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", masterElectTabletAlias, err)
//.........这里部分代码省略.........
示例14: plannedReparentShardLocked
func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error {
shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
if err != nil {
return err
}
ev.ShardInfo = *shardInfo
event.DispatchUpdate(ev, "reading tablet map")
tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
if err != nil {
return err
}
// Check corner cases we're going to depend on
masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias)
}
ev.NewMaster = *masterElectTabletInfo.Tablet
if topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) {
return fmt.Errorf("master-elect tablet %v is already the master", masterElectTabletAlias)
}
oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias]
if !ok {
return fmt.Errorf("old master tablet %v is not in the shard", shardInfo.MasterAlias)
}
ev.OldMaster = *oldMasterTabletInfo.Tablet
// Demote the current master, get its replication position
wr.logger.Infof("demote current master %v", shardInfo.MasterAlias)
event.DispatchUpdate(ev, "demoting old master")
rp, err := wr.tmc.DemoteMaster(ctx, oldMasterTabletInfo)
if err != nil {
return fmt.Errorf("old master tablet %v DemoteMaster failed: %v", shardInfo.MasterAlias, err)
}
// Wait on the master-elect tablet until it reaches that position,
// then promote it
wr.logger.Infof("promote slave %v", masterElectTabletAlias)
event.DispatchUpdate(ev, "promoting slave")
rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(ctx, masterElectTabletInfo, rp)
if err != nil {
return fmt.Errorf("master-elect tablet %v failed to catch up with replication or be upgraded to master: %v", masterElectTabletAlias, err)
}
// Go through all the tablets:
// - new master: populate the reparent journal
// - everybody else: reparent to new master, wait for row
event.DispatchUpdate(ev, "reparenting all tablets")
now := time.Now().UnixNano()
wgMaster := sync.WaitGroup{}
wgSlaves := sync.WaitGroup{}
rec := concurrency.AllErrorRecorder{}
var masterErr error
for alias, tabletInfo := range tabletMap {
if topo.TabletAliasEqual(&alias, masterElectTabletAlias) {
wgMaster.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
defer wgMaster.Done()
wr.logger.Infof("populating reparent journal on new master %v", alias)
masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, plannedReparentShardOperation, &alias, rp)
}(alias, tabletInfo)
} else {
wgSlaves.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
defer wgSlaves.Done()
wr.logger.Infof("setting new master on slave %v", alias)
// also restart replication on old master
forceStartSlave := topo.TabletAliasEqual(&alias, oldMasterTabletInfo.Alias)
if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil {
rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", alias, err))
return
}
}(alias, tabletInfo)
}
}
// After the master is done, we can update the shard record
// (note with semi-sync, it also means at least one slave is done)
wgMaster.Wait()
if masterErr != nil {
wgSlaves.Wait()
return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr)
}
wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias)
shardInfo.MasterAlias = masterElectTabletAlias
if err := topo.UpdateShard(ctx, wr.ts, shardInfo); err != nil {
wgSlaves.Wait()
return fmt.Errorf("failed to update shard master record: %v", err)
}
// Wait for the slaves to complete. If some of them fail, we
// will rebuild the shard serving graph anyway
wgSlaves.Wait()
if err := rec.Error(); err != nil {
wr.Logger().Errorf("Some slaves failed to reparent: %v", err)
return err
}
// Then we rebuild the entire serving graph for the shard,
//.........这里部分代码省略.........
示例15: initShardMasterLocked
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error {
shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
if err != nil {
return err
}
ev.ShardInfo = *shardInfo
event.DispatchUpdate(ev, "reading tablet map")
tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
if err != nil {
return err
}
// Check the master elect is in tabletMap
masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias)
}
ev.NewMaster = *masterElectTabletInfo.Tablet
// Check the master is the only master is the shard, or -force was used.
_, masterTabletMap := topotools.SortedTabletMap(tabletMap)
if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) {
if !force {
return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", masterElectTabletAlias)
}
wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", masterElectTabletAlias)
}
if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok {
if !force {
return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", masterElectTabletAlias)
}
wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", masterElectTabletAlias)
}
haveOtherMaster := false
for alias, ti := range masterTabletMap {
if !topo.TabletAliasEqual(&alias, masterElectTabletAlias) && ti.Type != pb.TabletType_SCRAP {
haveOtherMaster = true
}
}
if haveOtherMaster {
if !force {
return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", masterElectTabletAlias)
}
wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", masterElectTabletAlias)
}
// First phase: reset replication on all tablets. If anyone fails,
// we stop. It is probably because it is unreachable, and may leave
// an unstable database process in the mix, with a database daemon
// at a wrong replication spot.
event.DispatchUpdate(ev, "resetting replication on all tablets")
wg := sync.WaitGroup{}
rec := concurrency.AllErrorRecorder{}
for alias, tabletInfo := range tabletMap {
wg.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
defer wg.Done()
wr.logger.Infof("resetting replication on tablet %v", alias)
if err := wr.TabletManagerClient().ResetReplication(ctx, tabletInfo); err != nil {
rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", alias, err))
}
}(alias, tabletInfo)
}
wg.Wait()
if err := rec.Error(); err != nil {
return err
}
// Tell the new master to break its slaves, return its replication
// position
wr.logger.Infof("initializing master on %v", masterElectTabletAlias)
event.DispatchUpdate(ev, "initializing master")
rp, err := wr.TabletManagerClient().InitMaster(ctx, masterElectTabletInfo)
if err != nil {
return err
}
// Now tell the new master to insert the reparent_journal row,
// and tell everybody else to become a slave of the new master,
// and wait for the row in the reparent_journal table.
// We start all these in parallel, to handle the semi-sync
// case: for the master to be able to commit its row in the
// reparent_journal table, it needs connected slaves.
event.DispatchUpdate(ev, "reparenting all tablets")
now := time.Now().UnixNano()
wgMaster := sync.WaitGroup{}
wgSlaves := sync.WaitGroup{}
var masterErr error
for alias, tabletInfo := range tabletMap {
if topo.TabletAliasEqual(&alias, masterElectTabletAlias) {
wgMaster.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
defer wgMaster.Done()
wr.logger.Infof("populating reparent journal on new master %v", alias)
masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, initShardMasterOperation, &alias, rp)
}(alias, tabletInfo)
} else {
wgSlaves.Add(1)
go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) {
//.........这里部分代码省略.........