本文整理匯總了Golang中github.com/outbrain/orchestrator/go/inst.AuditOperation函數的典型用法代碼示例。如果您正苦於以下問題:Golang AuditOperation函數的具體用法?Golang AuditOperation怎麽用?Golang AuditOperation使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了AuditOperation函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: checkAndRecoverDeadCoMaster
// checkAndRecoverDeadCoMaster checks a given analysis, decides whether to take action, and possibly takes action
// Returns true when action was taken.
func checkAndRecoverDeadCoMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (bool, *TopologyRecovery, error) {
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
if !(forceInstanceRecovery || analysisEntry.ClusterDetails.HasAutomatedMasterRecovery) {
return false, nil, nil
}
topologyRecovery, err := AttemptRecoveryRegistration(&analysisEntry, !forceInstanceRecovery, !forceInstanceRecovery)
if topologyRecovery == nil {
log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadCoMaster.", analysisEntry.AnalyzedInstanceKey)
return false, nil, err
}
// That's it! We must do recovery!
recoverDeadCoMasterCounter.Inc(1)
coMaster, lostSlaves, err := RecoverDeadCoMaster(topologyRecovery, skipProcesses)
ResolveRecovery(topologyRecovery, coMaster)
if coMaster == nil {
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "Failure: no slave promoted.")
} else {
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("promoted co-master: %+v", coMaster.Key))
}
topologyRecovery.LostSlaves.AddInstances(lostSlaves)
if coMaster != nil {
// success
recoverDeadCoMasterSuccessCounter.Inc(1)
if !skipProcesses {
// Execute post intermediate-master-failover processes
topologyRecovery.SuccessorKey = &coMaster.Key
executeProcesses(config.Config.PostMasterFailoverProcesses, "PostMasterFailoverProcesses", topologyRecovery, false)
}
} else {
recoverDeadCoMasterFailureCounter.Inc(1)
}
return true, topologyRecovery, err
}
示例2: RecoverDeadIntermediateMaster
func RecoverDeadIntermediateMaster(analysisEntry inst.ReplicationAnalysis) (actionTaken bool, successorInstance *inst.Instance, err error) {
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok {
log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadIntermediateMaster.", *failedInstanceKey)
return false, nil, err
}
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover")
log.Debugf("topology_recovery: RecoverDeadIntermediateMaster: will recover %+v", *failedInstanceKey)
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, true); err != nil {
return false, nil, err
}
// Plan A: find a replacement intermediate master
if candidateSibling, err := GetCandidateSiblingOfIntermediateMaster(failedInstanceKey); err == nil {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will attempt a candidate intermediate master: %+v", candidateSibling.Key)
// We have a candidate
if matchedSlaves, candidateSibling, err, errs := inst.MultiMatchSlaves(failedInstanceKey, &candidateSibling.Key, ""); err == nil {
ResolveRecovery(failedInstanceKey, &candidateSibling.Key)
successorInstance = candidateSibling
actionTaken = true
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) went with %d errors", candidateSibling.Key, len(errs))
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Matched %d slaves under candidate sibling: %+v; %d errors: %+v", len(matchedSlaves), candidateSibling.Key, len(errs), errs))
} else {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) did not complete: %+v", candidateSibling.Key, err)
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Matched %d slaves under candidate sibling: %+v; %d errors: %+v", len(matchedSlaves), candidateSibling.Key, len(errs), errs))
}
}
if !actionTaken {
// Either no candidate or only partial match of slaves. Regroup as plan B
inst.RegroupSlaves(failedInstanceKey, nil)
// We don't care much if regroup made it or not. We prefer that it made it, in whcih case we only need to match up
// one slave, but the operation is still valid if regroup partially/completely failed. We just promote anything
// not regrouped.
// So, match up all that's left, plan C
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt a match up from %+v", *failedInstanceKey)
var errs []error
var matchedSlaves [](*inst.Instance)
matchedSlaves, successorInstance, err, errs = inst.MatchUpSlaves(failedInstanceKey, "")
if len(matchedSlaves) == 0 {
log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey)
return false, successorInstance, err
}
ResolveRecovery(failedInstanceKey, &successorInstance.Key)
actionTaken = true
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: matched up to %+v", successorInstance.Key)
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Matched slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs))
}
return actionTaken, successorInstance, err
}
示例3: auditAgentOperation
// AuditAgentOperation creates and writes a new audit entry by given agent
func auditAgentOperation(auditType string, agent *Agent, message string) error {
instanceKey := &inst.InstanceKey{}
if agent != nil {
instanceKey = &inst.InstanceKey{Hostname: agent.Hostname, Port: int(agent.MySQLPort)}
}
return inst.AuditOperation(auditType, instanceKey, message)
}
示例4: emergentlyReadTopologyInstance
// Force a re-read of a topology instance; this is done because we need to substantiate a suspicion that we may have a failover
// scenario. we want to speed up rading the complete picture.
func emergentlyReadTopologyInstance(instanceKey *inst.InstanceKey, analysisCode inst.AnalysisCode) {
if err := emergencyReadTopologyInstanceMap.Add(instanceKey.DisplayString(), true, 0); err == nil {
emergencyReadTopologyInstanceMap.Set(instanceKey.DisplayString(), true, 0)
go inst.ExecuteOnTopology(func() {
inst.ReadTopologyInstance(instanceKey)
inst.AuditOperation("emergently-read-topology-instance", instanceKey, string(analysisCode))
})
}
}
示例5: emergentlyReadTopologyInstance
// Force a re-read of a topology instance; this is done because we need to substantiate a suspicion that we may have a failover
// scenario. we want to speed up reading the complete picture.
func emergentlyReadTopologyInstance(instanceKey *inst.InstanceKey, analysisCode inst.AnalysisCode) {
if existsInCacheError := emergencyReadTopologyInstanceMap.Add(instanceKey.DisplayString(), true, cache.DefaultExpiration); existsInCacheError != nil {
// Just recently attempted
return
}
go inst.ExecuteOnTopology(func() {
inst.ReadTopologyInstance(instanceKey)
inst.AuditOperation("emergently-read-topology-instance", instanceKey, string(analysisCode))
})
}
示例6: RecoverDeadMaster
func RecoverDeadMaster(analysisEntry inst.ReplicationAnalysis) (bool, *inst.Instance, error) {
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok {
log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMaster.", *failedInstanceKey)
return false, nil, err
}
inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover")
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, true); err != nil {
return false, nil, err
}
log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey)
_, _, _, candidateSlave, err := inst.RegroupSlaves(failedInstanceKey, nil)
ResolveRecovery(failedInstanceKey, &candidateSlave.Key)
log.Debugf("topology_recovery: - RecoverDeadMaster: candidate slave is %+v", candidateSlave.Key)
inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", candidateSlave.Key))
return true, candidateSlave, err
}
示例7: acceptSignals
// acceptSignals registers for OS signals
func acceptSignals() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP)
go func() {
for sig := range c {
switch sig {
case syscall.SIGHUP:
log.Debugf("Received SIGHUP. Reloading configuration")
config.Reload()
inst.AuditOperation("reload-configuration", nil, "Triggered via SIGHUP")
}
}
}()
}
示例8: StartDiscovery
// Start discovery begins a one time asynchronuous discovery process for the given
// instance and all of its topology connected instances.
// That is, the instance will be investigated for master and slaves, and the routines will follow on
// each and every such found master/slave.
// In essense, assuming all slaves in a replication topology are running, and given a single instance
// in such topology, this function will detect the entire topology.
func StartDiscovery(instanceKey inst.InstanceKey) {
log.Infof("Starting discovery at %+v", instanceKey)
pendingTokens := make(chan bool, maxConcurrency)
completedTokens := make(chan bool, maxConcurrency)
accountedDiscoverInstance(instanceKey, pendingTokens, completedTokens)
go handleDiscoveryRequests(pendingTokens, completedTokens)
// Block until all are complete
for {
select {
case <-pendingTokens:
<-completedTokens
default:
inst.AuditOperation("start-discovery", &instanceKey, "")
return
}
}
}
示例9: RecoverDeadIntermediateMaster
func RecoverDeadIntermediateMaster(analysisEntry inst.ReplicationAnalysis, skipProcesses bool) (actionTaken bool, successorInstance *inst.Instance, err error) {
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok {
log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadIntermediateMaster.", *failedInstanceKey)
return false, nil, err
}
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover")
log.Debugf("topology_recovery: RecoverDeadIntermediateMaster: will recover %+v", *failedInstanceKey)
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, emptySlavesList, true); err != nil {
return false, nil, err
}
}
intermediateMasterInstance, _, err := inst.ReadInstance(failedInstanceKey)
if err != nil {
return false, nil, err
}
// Plan A: find a replacement intermediate master in same Data Center
candidateSiblingOfIntermediateMaster, err := GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance)
relocateSlavesToCandidateSibling := func() {
if candidateSiblingOfIntermediateMaster == nil {
return
}
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will attempt a candidate intermediate master: %+v", candidateSiblingOfIntermediateMaster.Key)
// We have a candidate
if relocatedSlaves, candidateSibling, err, errs := inst.RelocateSlaves(failedInstanceKey, &candidateSiblingOfIntermediateMaster.Key, ""); err == nil {
ResolveRecovery(failedInstanceKey, &candidateSibling.Key)
successorInstance = candidateSibling
actionTaken = true
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) went with %d errors", candidateSibling.Key, len(errs))
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs))
} else {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) did not complete: %+v", candidateSibling.Key, err)
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs))
}
}
if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter == intermediateMasterInstance.DataCenter {
relocateSlavesToCandidateSibling()
}
if !actionTaken {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt regrouping of slaves")
// Plan B: regroup (we wish to reduce cross-DC replication streams)
_, _, _, _, err = inst.RegroupSlaves(failedInstanceKey, true, nil)
if err != nil {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: regroup failed on: %+v", err)
}
// Plan C: try replacement intermediate master in other DC...
if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter != intermediateMasterInstance.DataCenter {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt relocating to another DC server")
relocateSlavesToCandidateSibling()
}
}
if !actionTaken {
// Do we still have leftovers? Some slaves couldn't move? Couldn't regroup? Only left with regroup's resulting leader?
// nothing moved?
// We don't care much if regroup made it or not. We prefer that it made it, in whcih case we only need to relocate up
// one slave, but the operation is still valid if regroup partially/completely failed. We just promote anything
// not regrouped.
// So, match up all that's left, plan D
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt to relocate up from %+v", *failedInstanceKey)
var errs []error
var relocatedSlaves [](*inst.Instance)
relocatedSlaves, successorInstance, err, errs = inst.RelocateSlaves(failedInstanceKey, &analysisEntry.AnalyzedInstanceMasterKey, "")
if len(relocatedSlaves) > 0 {
actionTaken = true
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: relocated up to %+v", successorInstance.Key)
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs))
} else {
err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey)
}
}
if successorInstance != nil {
ResolveRecovery(failedInstanceKey, &successorInstance.Key)
} else {
ResolveRecovery(failedInstanceKey, nil)
}
return actionTaken, successorInstance, err
}
示例10: RecoverDeadMaster
func RecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) {
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
if ok, err := AttemptRecoveryRegistration(&analysisEntry); !ok {
log.Debugf("topology_recovery: found an active or recent recovery on %+v. Will not issue another RecoverDeadMaster.", *failedInstanceKey)
return nil, lostSlaves, err
}
inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover")
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", analysisEntry, nil, emptySlavesList, true); err != nil {
return nil, lostSlaves, err
}
}
log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey)
var masterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID
if (analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology) && !analysisEntry.PseudoGTIDImmediateTopology {
masterRecoveryType = MasterRecoveryGTID
} else if analysisEntry.BinlogServerImmediateTopology {
masterRecoveryType = MasterRecoveryBinlogServer
}
log.Debugf("topology_recovery: RecoverDeadMaster: masterRecoveryType=%+v", masterRecoveryType)
switch masterRecoveryType {
case MasterRecoveryGTID:
{
lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil)
}
case MasterRecoveryPseudoGTID:
{
lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil)
}
case MasterRecoveryBinlogServer:
{
promotedSlave, err = inst.RegroupSlavesBinlogServers(failedInstanceKey, true, nil)
}
}
if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover {
log.Debugf("topology_recovery: - RecoverDeadMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves))
go func() {
for _, slave := range lostSlaves {
slave := slave
inst.DetachSlaveOperation(&slave.Key)
}
}()
}
if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 {
inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
for _, slave := range lostSlaves {
slave := slave
inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
}
}
if promotedSlave == nil {
log.Debugf("topology_recovery: - RecoverDeadMaster: Failure: no slave promoted.")
inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.")
} else {
log.Debugf("topology_recovery: - RecoverDeadMaster: promoted slave is %+v", promotedSlave.Key)
inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", promotedSlave.Key))
}
return promotedSlave, lostSlaves, err
}
示例11: RecoverDeadCoMaster
// RecoverDeadCoMaster recovers a dead co-master, complete logic inside
func RecoverDeadCoMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) {
analysisEntry := &topologyRecovery.AnalysisEntry
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
otherCoMasterKey := &analysisEntry.AnalyzedInstanceMasterKey
otherCoMaster, found, _ := inst.ReadInstance(otherCoMasterKey)
if otherCoMaster == nil || !found {
return nil, lostSlaves, topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not read info for co-master %+v of %+v", *otherCoMasterKey, *failedInstanceKey))
}
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "problem found; will recover")
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil {
return nil, lostSlaves, topologyRecovery.AddError(err)
}
}
log.Debugf("topology_recovery: RecoverDeadCoMaster: will recover %+v", *failedInstanceKey)
var coMasterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID
if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology {
coMasterRecoveryType = MasterRecoveryGTID
}
log.Debugf("topology_recovery: RecoverDeadCoMaster: coMasterRecoveryType=%+v", coMasterRecoveryType)
switch coMasterRecoveryType {
case MasterRecoveryGTID:
{
lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil)
}
case MasterRecoveryPseudoGTID:
{
lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer)
}
}
topologyRecovery.AddError(err)
mustPromoteOtherCoMaster := config.Config.CoMasterRecoveryMustPromoteOtherCoMaster
if !otherCoMaster.ReadOnly {
log.Debugf("topology_recovery: RecoverDeadCoMaster: other co-master %+v is writeable hence has to be promoted", otherCoMaster.Key)
mustPromoteOtherCoMaster = true
}
log.Debugf("topology_recovery: RecoverDeadCoMaster: mustPromoteOtherCoMaster? %+v", mustPromoteOtherCoMaster)
if promotedSlave != nil {
topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key)
if mustPromoteOtherCoMaster {
log.Debugf("topology_recovery: mustPromoteOtherCoMaster. Verifying that %+v is/can be promoted", *otherCoMasterKey)
promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey)
} else {
// We are allowed to promote any server
promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, nil)
if promotedSlave.DataCenter == otherCoMaster.DataCenter &&
promotedSlave.PhysicalEnvironment == otherCoMaster.PhysicalEnvironment && false {
// and _still_ we prefer to promote the co-master! They're in same env & DC so no worries about geo issues!
promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey)
}
}
topologyRecovery.AddError(err)
}
if promotedSlave != nil {
if mustPromoteOtherCoMaster && !promotedSlave.Key.Equals(otherCoMasterKey) {
topologyRecovery.AddError(log.Errorf("RecoverDeadCoMaster: could not manage to promote other-co-master %+v; was only able to promote %+v; CoMasterRecoveryMustPromoteOtherCoMaster is true, therefore failing", *otherCoMasterKey, promotedSlave.Key))
promotedSlave = nil
}
}
if promotedSlave != nil {
topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key)
}
// OK, we may have someone promoted. Either this was the other co-master or another slave.
// Noting down that we DO NOT attempt to set a new co-master topology. We are good with remaining with a single master.
// I tried solving the "let's promote a slave and create a new co-master setup" but this turns so complex due to various factors.
// I see this as risky and not worth the questionable benefit.
// Maybe future me is a smarter person and finds a simple solution. Unlikely. I'm getting dumber.
//
// ...
// Now that we're convinved, take a look at what we can be left with:
// Say we started with M1<->M2<-S1, with M2 failing, and we promoted S1.
// We now have M1->S1 (because S1 is promoted), S1->M2 (because that's what it remembers), M2->M1 (because that's what it remembers)
// !! This is an evil 3-node circle that must be broken.
// config.Config.ApplyMySQLPromotionAfterMasterFailover, if true, will cause it to break, because we would RESET SLAVE on S1
// but we want to make sure the circle is broken no matter what.
// So in the case we promoted not-the-other-co-master, we issue a detach-slave-master-host, which is a reversible operation
if promotedSlave != nil && !promotedSlave.Key.Equals(otherCoMasterKey) {
_, err = inst.DetachSlaveMasterHost(&promotedSlave.Key)
topologyRecovery.AddError(log.Errore(err))
}
if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover {
postponedFunction := func() error {
log.Debugf("topology_recovery: - RecoverDeadCoMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves))
for _, slave := range lostSlaves {
slave := slave
inst.DetachSlaveOperation(&slave.Key)
}
return nil
}
topologyRecovery.AddPostponedFunction(postponedFunction)
//.........這裏部分代碼省略.........
示例12: RecoverDeadIntermediateMaster
// RecoverDeadIntermediateMaster performs intermediate master recovery; complete logic inside
func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (successorInstance *inst.Instance, err error) {
analysisEntry := &topologyRecovery.AnalysisEntry
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
recoveryResolved := false
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover")
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil {
return nil, topologyRecovery.AddError(err)
}
}
intermediateMasterInstance, _, err := inst.ReadInstance(failedInstanceKey)
if err != nil {
return nil, topologyRecovery.AddError(err)
}
// Find possible candidate
candidateSiblingOfIntermediateMaster, err := GetCandidateSiblingOfIntermediateMaster(intermediateMasterInstance)
relocateSlavesToCandidateSibling := func() {
if candidateSiblingOfIntermediateMaster == nil {
return
}
// We have a candidate
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will attempt a candidate intermediate master: %+v", candidateSiblingOfIntermediateMaster.Key)
relocatedSlaves, candidateSibling, err, errs := inst.RelocateSlaves(failedInstanceKey, &candidateSiblingOfIntermediateMaster.Key, "")
topologyRecovery.AddErrors(errs)
topologyRecovery.ParticipatingInstanceKeys.AddKey(candidateSiblingOfIntermediateMaster.Key)
if len(relocatedSlaves) == 0 {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: failed to move any slave to candidate intermediate master (%+v)", candidateSibling.Key)
return
}
if err != nil || len(errs) > 0 {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) did not complete: %+v", candidateSibling.Key, err)
return
}
if err == nil {
recoveryResolved = true
successorInstance = candidateSibling
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs))
}
}
// Plan A: find a replacement intermediate master in same Data Center
if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter == intermediateMasterInstance.DataCenter {
relocateSlavesToCandidateSibling()
}
if !recoveryResolved {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt regrouping of slaves")
// Plan B: regroup (we wish to reduce cross-DC replication streams)
_, _, _, regroupPromotedSlave, err := inst.RegroupSlaves(failedInstanceKey, true, nil, nil)
if err != nil {
topologyRecovery.AddError(err)
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: regroup failed on: %+v", err)
}
if regroupPromotedSlave != nil {
topologyRecovery.ParticipatingInstanceKeys.AddKey(regroupPromotedSlave.Key)
}
// Plan C: try replacement intermediate master in other DC...
if candidateSiblingOfIntermediateMaster != nil && candidateSiblingOfIntermediateMaster.DataCenter != intermediateMasterInstance.DataCenter {
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt relocating to another DC server")
relocateSlavesToCandidateSibling()
}
}
if !recoveryResolved {
// Do we still have leftovers? Some slaves couldn't move? Couldn't regroup? Only left with regroup's resulting leader?
// nothing moved?
// We don't care much if regroup made it or not. We prefer that it made it, in whcih case we only need to relocate up
// one slave, but the operation is still valid if regroup partially/completely failed. We just promote anything
// not regrouped.
// So, match up all that's left, plan D
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: will next attempt to relocate up from %+v", *failedInstanceKey)
var errs []error
var relocatedSlaves [](*inst.Instance)
relocatedSlaves, successorInstance, err, errs = inst.RelocateSlaves(failedInstanceKey, &analysisEntry.AnalyzedInstanceMasterKey, "")
topologyRecovery.AddErrors(errs)
topologyRecovery.ParticipatingInstanceKeys.AddKey(analysisEntry.AnalyzedInstanceMasterKey)
if len(relocatedSlaves) > 0 {
recoveryResolved = true
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs))
} else {
err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey)
topologyRecovery.AddError(err)
}
}
if !recoveryResolved {
successorInstance = nil
}
ResolveRecovery(topologyRecovery, successorInstance)
return successorInstance, err
}
示例13: RecoverDeadMaster
// RecoverDeadMaster recovers a dead master, complete logic inside
func RecoverDeadMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (promotedSlave *inst.Instance, lostSlaves [](*inst.Instance), err error) {
analysisEntry := &topologyRecovery.AnalysisEntry
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
inst.AuditOperation("recover-dead-master", failedInstanceKey, "problem found; will recover")
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil {
return nil, lostSlaves, topologyRecovery.AddError(err)
}
}
log.Debugf("topology_recovery: RecoverDeadMaster: will recover %+v", *failedInstanceKey)
var masterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID
if analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology {
masterRecoveryType = MasterRecoveryGTID
} else if analysisEntry.BinlogServerImmediateTopology {
masterRecoveryType = MasterRecoveryBinlogServer
}
log.Debugf("topology_recovery: RecoverDeadMaster: masterRecoveryType=%+v", masterRecoveryType)
switch masterRecoveryType {
case MasterRecoveryGTID:
{
lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil)
}
case MasterRecoveryPseudoGTID:
{
lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer)
}
case MasterRecoveryBinlogServer:
{
promotedSlave, err = recoverDeadMasterInBinlogServerTopology(topologyRecovery)
}
}
topologyRecovery.AddError(err)
if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover {
postponedFunction := func() error {
log.Debugf("topology_recovery: - RecoverDeadMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves))
for _, slave := range lostSlaves {
slave := slave
inst.DetachSlaveOperation(&slave.Key)
}
return nil
}
topologyRecovery.AddPostponedFunction(postponedFunction)
}
if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 {
postponedFunction := func() error {
inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
for _, slave := range lostSlaves {
slave := slave
inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), "RecoverDeadMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
}
return nil
}
topologyRecovery.AddPostponedFunction(postponedFunction)
}
if promotedSlave == nil {
inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.")
} else {
inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("promoted slave: %+v", promotedSlave.Key))
}
return promotedSlave, lostSlaves, err
}
示例14: DiscoverInstance
// DiscoverInstance will attempt discovering an instance (unless it is already up to date) and will
// list down its master and slaves (if any) for further discovery.
func DiscoverInstance(instanceKey inst.InstanceKey) {
instanceKey.Formalize()
if !instanceKey.IsValid() {
return
}
instance, found, err := inst.ReadInstance(&instanceKey)
if found && instance.IsUpToDate && instance.IsLastCheckValid {
// we've already discovered this one. Skip!
goto Cleanup
}
// First we've ever heard of this instance. Continue investigation:
instance, err = inst.ReadTopologyInstance(&instanceKey)
// panic can occur (IO stuff). Therefore it may happen
// that instance is nil. Check it.
if err != nil || instance == nil {
log.Warningf("instance is nil in DiscoverInstance. key=%+v, error=%+v", instanceKey, err)
goto Cleanup
}
log.Debugf("Discovered host: %+v, master: %+v", instance.Key, instance.MasterKey)
// Investigate slaves:
for _, slaveKey := range instance.SlaveHosts.GetInstanceKeys() {
discoveryInstanceKeys <- slaveKey
}
// Investigate master:
discoveryInstanceKeys <- instance.MasterKey
Cleanup:
}
// Start discovery begins a one time asynchronuous discovery process for the given
// instance and all of its topology connected instances.
// That is, the instance will be investigated for master and slaves, and the routines will follow on
// each and every such found master/slave.
// In essense, assuming all slaves in a replication topology are running, and given a single instance
// in such topology, this function will detect the entire topology.
func StartDiscovery(instanceKey inst.InstanceKey) {
log.Infof("Starting discovery at %+v", instanceKey)
pendingTokens := make(chan bool, maxConcurrency)
completedTokens := make(chan bool, maxConcurrency)
AccountedDiscoverInstance(instanceKey, pendingTokens, completedTokens)
go handleDiscoveryRequests(pendingTokens, completedTokens)
// Block until all are complete
for {
select {
case <-pendingTokens:
<-completedTokens
default:
inst.AuditOperation("start-discovery", &instanceKey, "")
return
}
}
}
// ContinuousDiscovery starts an asynchronuous infinite discovery process where instances are
// periodically investigated and their status captured, and long since unseen instances are
// purged and forgotten.
func ContinuousDiscovery() {
log.Infof("Starting continuous discovery")
inst.LoadHostnameResolveCacheFromDatabase()
go handleDiscoveryRequests(nil, nil)
tick := time.Tick(time.Duration(config.Config.DiscoveryPollSeconds) * time.Second)
forgetUnseenTick := time.Tick(time.Minute)
recoverTick := time.Tick(10 * time.Second)
var snapshotTopologiesTick <-chan time.Time
if config.Config.SnapshotTopologiesIntervalHours > 0 {
snapshotTopologiesTick = time.Tick(time.Duration(config.Config.SnapshotTopologiesIntervalHours) * time.Hour)
}
elected := false
_ = CreateElectionAnchor(false)
for {
select {
case <-tick:
if elected, _ = AttemptElection(); elected {
instanceKeys, _ := inst.ReadOutdatedInstanceKeys()
log.Debugf("outdated keys: %+v", instanceKeys)
for _, instanceKey := range instanceKeys {
discoveryInstanceKeys <- instanceKey
}
} else {
log.Debugf("Not elected as active node; polling")
}
case <-forgetUnseenTick:
// See if we should also forget objects (lower frequency)
go func() {
if elected {
inst.ForgetLongUnseenInstances()
inst.ForgetUnseenInstancesDifferentlyResolved()
inst.ForgetExpiredHostnameResolves()
inst.DeleteInvalidHostnameResolves()
inst.ReviewUnseenInstances()
//.........這裏部分代碼省略.........
示例15: RecoverDeadCoMaster
// RecoverDeadCoMaster recovers a dead co-master, complete logic inside
func RecoverDeadCoMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (otherCoMaster *inst.Instance, lostSlaves [](*inst.Instance), err error) {
analysisEntry := &topologyRecovery.AnalysisEntry
failedInstanceKey := &analysisEntry.AnalyzedInstanceKey
otherCoMasterKey := &analysisEntry.AnalyzedInstanceMasterKey
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "problem found; will recover")
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil {
return nil, lostSlaves, topologyRecovery.AddError(err)
}
}
log.Debugf("topology_recovery: RecoverDeadCoMaster: will recover %+v", *failedInstanceKey)
var coMasterRecoveryType MasterRecoveryType = MasterRecoveryPseudoGTID
if (analysisEntry.OracleGTIDImmediateTopology || analysisEntry.MariaDBGTIDImmediateTopology) && !analysisEntry.PseudoGTIDImmediateTopology {
coMasterRecoveryType = MasterRecoveryGTID
}
log.Debugf("topology_recovery: RecoverDeadCoMaster: coMasterRecoveryType=%+v", coMasterRecoveryType)
var promotedSlave *inst.Instance
switch coMasterRecoveryType {
case MasterRecoveryGTID:
{
lostSlaves, _, promotedSlave, err = inst.RegroupSlavesGTID(failedInstanceKey, true, nil)
}
case MasterRecoveryPseudoGTID:
{
lostSlaves, _, _, promotedSlave, err = inst.RegroupSlavesPseudoGTIDIncludingSubSlavesOfBinlogServers(failedInstanceKey, true, nil, &topologyRecovery.PostponedFunctionsContainer)
}
}
topologyRecovery.AddError(err)
if promotedSlave != nil {
topologyRecovery.ParticipatingInstanceKeys.AddKey(promotedSlave.Key)
promotedSlave, err = replacePromotedSlaveWithCandidate(failedInstanceKey, promotedSlave, otherCoMasterKey)
topologyRecovery.AddError(err)
}
if promotedSlave != nil {
if promotedSlave.Key.Equals(otherCoMasterKey) {
topologyRecovery.ParticipatingInstanceKeys.AddKey(*otherCoMasterKey)
otherCoMaster = promotedSlave
} else {
err = log.Errorf("RecoverDeadCoMaster: could not manage to promote other-co-master %+v; was only able to promote %+v", *otherCoMasterKey, promotedSlave.Key)
promotedSlave = nil
}
}
if promotedSlave != nil && len(lostSlaves) > 0 && config.Config.DetachLostSlavesAfterMasterFailover {
postponedFunction := func() error {
log.Debugf("topology_recovery: - RecoverDeadCoMaster: lost %+v slaves during recovery process; detaching them", len(lostSlaves))
for _, slave := range lostSlaves {
slave := slave
inst.DetachSlaveOperation(&slave.Key)
}
return nil
}
topologyRecovery.AddPostponedFunction(postponedFunction)
}
if config.Config.MasterFailoverLostInstancesDowntimeMinutes > 0 {
postponedFunction := func() error {
inst.BeginDowntime(failedInstanceKey, inst.GetMaintenanceOwner(), "RecoverDeadCoMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
for _, slave := range lostSlaves {
slave := slave
inst.BeginDowntime(&slave.Key, inst.GetMaintenanceOwner(), "RecoverDeadCoMaster indicates this instance is lost", config.Config.MasterFailoverLostInstancesDowntimeMinutes*60)
}
return nil
}
topologyRecovery.AddPostponedFunction(postponedFunction)
}
return otherCoMaster, lostSlaves, err
}