Skip to content

Commit 445c903

Browse files
authored
Merge branch 'master' into patch-1
2 parents 83c2c7d + 8335f13 commit 445c903

4 files changed

Lines changed: 40 additions & 2 deletions

File tree

go/base/context.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,9 @@ const (
4040
type ThrottleReasonHint string
4141

4242
const (
43-
NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint"
44-
UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint"
43+
NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint"
44+
UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint"
45+
LeavingHibernationThrottleReasonHint = "LeavingHibernationThrottleReasonHint"
4546
)
4647

4748
const (
@@ -105,9 +106,11 @@ type MigrationContext struct {
105106
throttleQuery string
106107
throttleHTTP string
107108
ThrottleCommandedByUser int64
109+
HibernateUntil int64
108110
maxLoad LoadMap
109111
criticalLoad LoadMap
110112
CriticalLoadIntervalMilliseconds int64
113+
CriticalLoadHibernateSeconds int64
111114
PostponeCutOverFlagFile string
112115
CutOverLockTimeoutSeconds int64
113116
ForceNamedCutOverCommand bool

go/cmd/gh-ost/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ func main() {
112112
maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes")
113113
criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits")
114114
flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load")
115+
flag.Int64Var(&migrationContext.CriticalLoadHibernateSeconds, "critical-load-hibernate-seconds", 0, "When nonzero, critical-load does not panic and bail out; instead, gh-ost goes into hibernate for the specified duration. It will not read/write anything to from/to any server")
115116
quiet := flag.Bool("quiet", false, "quiet")
116117
verbose := flag.Bool("verbose", false, "verbose")
117118
debug := flag.Bool("debug", false, "debug mode (very verbose)")

go/logic/applier.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,9 @@ func (this *Applier) WriteChangelogState(value string) (string, error) {
293293
func (this *Applier) InitiateHeartbeat() {
294294
var numSuccessiveFailures int64
295295
injectHeartbeat := func() error {
296+
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
297+
return nil
298+
}
296299
if _, err := this.WriteChangelog("heartbeat", time.Now().Format(time.RFC3339Nano)); err != nil {
297300
numSuccessiveFailures++
298301
if numSuccessiveFailures > this.migrationContext.MaxRetries() {

go/logic/throttler.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ func NewThrottler(applier *Applier, inspector *Inspector) *Throttler {
3838
// It merely observes the metrics collected by other components, it does not issue
3939
// its own metric collection.
4040
func (this *Throttler) shouldThrottle() (result bool, reason string, reasonHint base.ThrottleReasonHint) {
41+
if hibernateUntil := atomic.LoadInt64(&this.migrationContext.HibernateUntil); hibernateUntil > 0 {
42+
hibernateUntilTime := time.Unix(0, hibernateUntil)
43+
return true, fmt.Sprintf("critical-load-hibernate until %+v", hibernateUntilTime), base.NoThrottleReasonHint
44+
}
4145
generalCheckResult := this.migrationContext.GetThrottleGeneralCheckResult()
4246
if generalCheckResult.ShouldThrottle {
4347
return generalCheckResult.ShouldThrottle, generalCheckResult.Reason, generalCheckResult.ReasonHint
@@ -96,6 +100,9 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
96100
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
97101
return nil
98102
}
103+
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
104+
return nil
105+
}
99106

100107
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
101108
// when running on replica, the heartbeat injection is also done on the replica.
@@ -128,6 +135,10 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
128135
// collectControlReplicasLag polls all the control replicas to get maximum lag value
129136
func (this *Throttler) collectControlReplicasLag() {
130137

138+
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
139+
return
140+
}
141+
131142
replicationLagQuery := fmt.Sprintf(`
132143
select value from %s.%s where hint = 'heartbeat' and id <= 255
133144
`,
@@ -222,6 +233,9 @@ func (this *Throttler) criticalLoadIsMet() (met bool, variableName string, value
222233
// collectReplicationLag reads the latest changelog heartbeat value
223234
func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<- bool) {
224235
collectFunc := func() (sleep bool, err error) {
236+
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
237+
return true, nil
238+
}
225239
url := this.migrationContext.GetThrottleHTTP()
226240
if url == "" {
227241
return true, nil
@@ -247,6 +261,9 @@ func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<-
247261

248262
// collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext
249263
func (this *Throttler) collectGeneralThrottleMetrics() error {
264+
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
265+
return nil
266+
}
250267

251268
setThrottle := func(throttle bool, reason string, reasonHint base.ThrottleReasonHint) error {
252269
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(throttle, reason, reasonHint))
@@ -264,6 +281,20 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
264281
if err != nil {
265282
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err), base.NoThrottleReasonHint)
266283
}
284+
285+
if criticalLoadMet && this.migrationContext.CriticalLoadHibernateSeconds > 0 {
286+
hibernateDuration := time.Duration(this.migrationContext.CriticalLoadHibernateSeconds) * time.Second
287+
hibernateUntilTime := time.Now().Add(hibernateDuration)
288+
atomic.StoreInt64(&this.migrationContext.HibernateUntil, hibernateUntilTime.UnixNano())
289+
log.Errorf("critical-load met: %s=%d, >=%d. Will hibernate for the duration of %+v, until %+v", variableName, value, threshold, hibernateDuration, hibernateUntilTime)
290+
go func() {
291+
time.Sleep(hibernateDuration)
292+
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(true, "leaving hibernation", base.LeavingHibernationThrottleReasonHint))
293+
atomic.StoreInt64(&this.migrationContext.HibernateUntil, 0)
294+
}()
295+
return nil
296+
}
297+
267298
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
268299
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
269300
}

0 commit comments

Comments
 (0)