Skip to content
This repository has been archived by the owner on Feb 18, 2025. It is now read-only.

topology recovery: supporting {instanceType}, {isMaster}, {isCoMaster} placeholders #1008

Merged
merged 6 commits into from
Dec 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/configuration-recovery.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ This information is passed independently in two ways, and you may choose to use
1. Environment variables: `orchestrator` will set the following, which can be retrieved by your hooks:

- `ORC_FAILURE_TYPE`
- `ORC_INSTANCE_TYPE` ("master", "co-master", "intermediate-master")
- `ORC_IS_MASTER` (true/false)
- `ORC_IS_CO_MASTER` (true/false)
- `ORC_FAILURE_DESCRIPTION`
- `ORC_FAILED_HOST`
- `ORC_FAILED_PORT`
Expand All @@ -129,6 +132,9 @@ And, in the event a recovery was successful:
2. Command line text replacement. `orchestrator` replaces the following magic tokens in your `*Proccesses` commands:

- `{failureType}`
- `{instanceType}` ("master", "co-master", "intermediate-master")
- `{isMaster}` (true/false)
- `{isCoMaster}` (true/false)
- `{failureDescription}`
- `{failedHost}`
- `{failedPort}`
Expand Down
10 changes: 5 additions & 5 deletions go/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ type Configuration struct {
RecoverMasterClusterFilters []string // Only do master recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything)
RecoverIntermediateMasterClusterFilters []string // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything)
ProcessesShellCommand string // Shell that executes command scripts
OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery}
PreGracefulTakeoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
OnFailureDetectionProcesses []string // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery}
PreGracefulTakeoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PreFailoverProcesses []string // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
PostFailoverProcesses []string // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
PostUnsuccessfulFailoverProcesses []string // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
PostMasterFailoverProcesses []string // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses
PostIntermediateMasterFailoverProcesses []string // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses
PostGracefulTakeoverProcesses []string // Processes to execute after runnign a graceful master takeover. Uses same placeholders as PostFailoverProcesses
Expand Down
19 changes: 19 additions & 0 deletions go/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ const (
GracefulMasterTakeoverCommandHint string = "graceful-master-takeover"
)

type AnalysisInstanceType string

const (
AnalysisInstanceTypeMaster AnalysisInstanceType = "master"
AnalysisInstanceTypeCoMaster AnalysisInstanceType = "co-master"
AnalysisInstanceTypeIntermediateMaster AnalysisInstanceType = "intermediate-master"
)

// ReplicationAnalysis notes analysis on replication chain status, per instance
type ReplicationAnalysis struct {
AnalyzedInstanceKey InstanceKey
Expand Down Expand Up @@ -178,6 +186,17 @@ func (this *ReplicationAnalysis) AnalysisString() string {
return strings.Join(result, ", ")
}

// Get a string description of the analyzed instance type (master? co-master? intermediate-master?)
func (this *ReplicationAnalysis) GetAnalysisInstanceType() AnalysisInstanceType {
if this.IsCoMaster {
return AnalysisInstanceTypeCoMaster
}
if this.IsMaster {
return AnalysisInstanceTypeMaster
}
return AnalysisInstanceTypeIntermediateMaster
}

// ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
// between last_attempted_check to last_checked before we consider the instance as invalid.
func ValidSecondsFromSeenToLastAttemptedCheck() uint {
Expand Down
37 changes: 19 additions & 18 deletions go/inst/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
package inst

import (
"testing"

"github.com/github/orchestrator/go/config"
"github.com/openark/golib/log"
// test "github.com/openark/golib/tests"
"testing"
test "github.com/openark/golib/tests"
)

func init() {
Expand All @@ -30,20 +31,20 @@ func init() {
}

func TestGetAnalysisInstanceType(t *testing.T) {
// {
// analysis := &ReplicationAnalysis{}
// test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "intermediate-master")
// }
// {
// analysis := &ReplicationAnalysis{IsMaster: true}
// test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "master")
// }
// {
// analysis := &ReplicationAnalysis{IsCoMaster: true}
// test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
// }
// {
// analysis := &ReplicationAnalysis{IsMaster: true, IsCoMaster: true}
// test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
// }
{
analysis := &ReplicationAnalysis{}
test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "intermediate-master")
}
{
analysis := &ReplicationAnalysis{IsMaster: true}
test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "master")
}
{
analysis := &ReplicationAnalysis{IsCoMaster: true}
test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
}
{
analysis := &ReplicationAnalysis{IsMaster: true, IsCoMaster: true}
test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
}
}
6 changes: 6 additions & 0 deletions go/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,9 @@ func prepareCommand(command string, topologyRecovery *TopologyRecovery) (result
async = true
}
command = strings.Replace(command, "{failureType}", string(analysisEntry.Analysis), -1)
command = strings.Replace(command, "{instanceType}", string(analysisEntry.GetAnalysisInstanceType()), -1)
command = strings.Replace(command, "{isMaster}", fmt.Sprintf("%t", analysisEntry.IsMaster), -1)
command = strings.Replace(command, "{isCoMaster}", fmt.Sprintf("%t", analysisEntry.IsCoMaster), -1)
command = strings.Replace(command, "{failureDescription}", analysisEntry.Description, -1)
command = strings.Replace(command, "{command}", analysisEntry.CommandHint, -1)
command = strings.Replace(command, "{failedHost}", analysisEntry.AnalyzedInstanceKey.Hostname, -1)
Expand Down Expand Up @@ -299,6 +302,9 @@ func applyEnvironmentVariables(topologyRecovery *TopologyRecovery) []string {
analysisEntry := &topologyRecovery.AnalysisEntry
env := goos.Environ()
env = append(env, fmt.Sprintf("ORC_FAILURE_TYPE=%s", string(analysisEntry.Analysis)))
env = append(env, fmt.Sprintf("ORC_INSTANCE_TYPE=%s", string(analysisEntry.GetAnalysisInstanceType())))
env = append(env, fmt.Sprintf("ORC_IS_MASTER=%t", analysisEntry.IsMaster))
env = append(env, fmt.Sprintf("ORC_IS_CO_MASTER=%t", analysisEntry.IsCoMaster))
env = append(env, fmt.Sprintf("ORC_FAILURE_DESCRIPTION=%s", analysisEntry.Description))
env = append(env, fmt.Sprintf("ORC_COMMAND=%s", analysisEntry.CommandHint))
env = append(env, fmt.Sprintf("ORC_FAILED_HOST=%s", analysisEntry.AnalyzedInstanceKey.Hostname))
Expand Down