Skip to content

Commit 109b82b

Browse files
author
Shlomi Noach
authored
Merge branch 'master' into master
2 parents 3511aa3 + 067c4df commit 109b82b

7 files changed

Lines changed: 102 additions & 36 deletions

File tree

doc/interactive-commands.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ Both interfaces may serve at the same time. Both respond to simple text command,
2626
- The `critical-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`'
2727
- For example: `Threads_running=1000,threads_connected=5000`, and you would then write/echo `critical-load=Threads_running=1000,threads_connected=5000` to the socket.
2828
- `nice-ratio=<ratio>`: change _nice_ ratio: 0 for aggressive (not nice, not sleeping), positive integer `n`:
29-
- For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping.
30-
- Examples: assume a single rows chunk copy takes `100ms` to complete.
31-
- `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following.
29+
- For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping.
30+
- Examples: assume a single rows chunk copy takes `100ms` to complete.
31+
- `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following.
3232
- `nice-ratio=1` will cause `gh-ost` to sleep for `100ms`, effectively doubling runtime
3333
- value of `2` will effectively triple the runtime; etc.
3434
- `throttle-query`: change throttle query
@@ -38,6 +38,10 @@ Both interfaces may serve at the same time. Both respond to simple text command,
3838
- `unpostpone`: at a time where `gh-ost` is postponing the [cut-over](cut-over.md) phase, instruct `gh-ost` to stop postponing and proceed immediately to cut-over.
3939
- `panic`: immediately panic and abort operation
4040

41+
### Querying for data
42+
43+
For commands that accept an argumetn as value, pass `?` (question mark) to _get_ current value rather than _set_ a new one.
44+
4145
### Examples
4246

4347
While migration is running:
@@ -63,6 +67,11 @@ $ echo "chunk-size=250" | nc -U /tmp/gh-ost.test.sample_data_0.sock
6367
# Serving on TCP port: 10001
6468
```
6569

70+
```shell
71+
$ echo "chunk-size=?" | nc -U /tmp/gh-ost.test.sample_data_0.sock
72+
250
73+
```
74+
6675
```shell
6776
$ echo throttle | nc -U /tmp/gh-ost.test.sample_data_0.sock
6877

go/base/context.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,11 @@ func (this *MigrationContext) GetControlReplicasLagResult() mysql.ReplicationLag
561561
func (this *MigrationContext) SetControlReplicasLagResult(lagResult *mysql.ReplicationLagResult) {
562562
this.throttleMutex.Lock()
563563
defer this.throttleMutex.Unlock()
564-
this.controlReplicasLagResult = *lagResult
564+
if lagResult == nil {
565+
this.controlReplicasLagResult = *mysql.NewNoReplicationLagResult()
566+
} else {
567+
this.controlReplicasLagResult = *lagResult
568+
}
565569
}
566570

567571
func (this *MigrationContext) GetThrottleControlReplicaKeys() *mysql.InstanceKeyMap {

go/logic/migrator.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,12 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
761761
throttleQuery,
762762
))
763763
}
764+
if throttleControlReplicaKeys := this.migrationContext.GetThrottleControlReplicaKeys(); throttleControlReplicaKeys.Len() > 0 {
765+
fmt.Fprintln(w, fmt.Sprintf("# throttle-control-replicas count: %+v",
766+
throttleControlReplicaKeys.Len(),
767+
))
768+
}
769+
764770
if this.migrationContext.PostponeCutOverFlagFile != "" {
765771
setIndicator := ""
766772
if base.FileExists(this.migrationContext.PostponeCutOverFlagFile) {
@@ -940,7 +946,9 @@ func (this *Migrator) initiateThrottler() error {
940946

941947
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
942948
log.Infof("Waiting for first throttle metrics to be collected")
943-
<-this.firstThrottlingCollected
949+
<-this.firstThrottlingCollected // replication lag
950+
<-this.firstThrottlingCollected // other metrics
951+
log.Infof("First throttle metrics collected")
944952
go this.throttler.initiateThrottlerChecks()
945953

946954
return nil

go/logic/server.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func (this *Server) applyServerCommand(command string, writer *bufio.Writer) (pr
126126
if len(tokens) > 1 {
127127
arg = strings.TrimSpace(tokens[1])
128128
}
129-
129+
argIsQuestion := (arg == "?")
130130
throttleHint := "# Note: you may only throttle for as long as your binary logs are not purged\n"
131131

132132
if err := this.hooksExecutor.onInteractiveCommand(command); err != nil {
@@ -152,6 +152,7 @@ no-throttle # End forced throttling (other throttling m
152152
unpostpone # Bail out a cut-over postpone; proceed to cut-over
153153
panic # panic and quit without cleanup
154154
help # This message
155+
- use '?' (question mark) as argument to get info rather than set. e.g. "max-load=?" will just print out current max-load.
155156
`)
156157
}
157158
case "sup":
@@ -160,6 +161,10 @@ help # This message
160161
return ForcePrintStatusAndHintRule, nil
161162
case "chunk-size":
162163
{
164+
if argIsQuestion {
165+
fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.ChunkSize))
166+
return NoPrintStatusRule, nil
167+
}
163168
if chunkSize, err := strconv.Atoi(arg); err != nil {
164169
return NoPrintStatusRule, err
165170
} else {
@@ -169,6 +174,10 @@ help # This message
169174
}
170175
case "max-lag-millis":
171176
{
177+
if argIsQuestion {
178+
fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold))
179+
return NoPrintStatusRule, nil
180+
}
172181
if maxLagMillis, err := strconv.Atoi(arg); err != nil {
173182
return NoPrintStatusRule, err
174183
} else {
@@ -182,6 +191,10 @@ help # This message
182191
}
183192
case "nice-ratio":
184193
{
194+
if argIsQuestion {
195+
fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetNiceRatio())
196+
return NoPrintStatusRule, nil
197+
}
185198
if niceRatio, err := strconv.ParseFloat(arg, 64); err != nil {
186199
return NoPrintStatusRule, err
187200
} else {
@@ -191,26 +204,44 @@ help # This message
191204
}
192205
case "max-load":
193206
{
207+
if argIsQuestion {
208+
maxLoad := this.migrationContext.GetMaxLoad()
209+
fmt.Fprintf(writer, "%s\n", maxLoad.String())
210+
return NoPrintStatusRule, nil
211+
}
194212
if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
195213
return NoPrintStatusRule, err
196214
}
197215
return ForcePrintStatusAndHintRule, nil
198216
}
199217
case "critical-load":
200218
{
219+
if argIsQuestion {
220+
criticalLoad := this.migrationContext.GetCriticalLoad()
221+
fmt.Fprintf(writer, "%s\n", criticalLoad.String())
222+
return NoPrintStatusRule, nil
223+
}
201224
if err := this.migrationContext.ReadCriticalLoad(arg); err != nil {
202225
return NoPrintStatusRule, err
203226
}
204227
return ForcePrintStatusAndHintRule, nil
205228
}
206229
case "throttle-query":
207230
{
231+
if argIsQuestion {
232+
fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetThrottleQuery())
233+
return NoPrintStatusRule, nil
234+
}
208235
this.migrationContext.SetThrottleQuery(arg)
209236
fmt.Fprintf(writer, throttleHint)
210237
return ForcePrintStatusAndHintRule, nil
211238
}
212239
case "throttle-control-replicas":
213240
{
241+
if argIsQuestion {
242+
fmt.Fprintf(writer, "%s\n", this.migrationContext.GetThrottleControlReplicaKeys().ToCommaDelimitedList())
243+
return NoPrintStatusRule, nil
244+
}
214245
if err := this.migrationContext.ReadThrottleControlReplicaKeys(arg); err != nil {
215246
return NoPrintStatusRule, err
216247
}

go/logic/throttler.go

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -85,32 +85,37 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
8585
}
8686

8787
// collectReplicationLag reads the latest changelog heartbeat value
88-
func (this *Throttler) collectReplicationLag() {
89-
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
90-
for range ticker {
91-
go func() error {
92-
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
93-
return nil
94-
}
88+
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
89+
collectFunc := func() error {
90+
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
91+
return nil
92+
}
9593

96-
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
97-
// when running on replica, the heartbeat injection is also done on the replica.
98-
// This means we will always get a good heartbeat value.
99-
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
100-
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
101-
return log.Errore(err)
102-
} else {
103-
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
104-
}
94+
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
95+
// when running on replica, the heartbeat injection is also done on the replica.
96+
// This means we will always get a good heartbeat value.
97+
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
98+
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
99+
return log.Errore(err)
105100
} else {
106-
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
107-
return log.Errore(err)
108-
} else {
109-
this.parseChangelogHeartbeat(heartbeatValue)
110-
}
101+
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
111102
}
112-
return nil
113-
}()
103+
} else {
104+
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
105+
return log.Errore(err)
106+
} else {
107+
this.parseChangelogHeartbeat(heartbeatValue)
108+
}
109+
}
110+
return nil
111+
}
112+
113+
collectFunc()
114+
firstThrottlingCollected <- true
115+
116+
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
117+
for range ticker {
118+
go collectFunc()
114119
}
115120
}
116121

@@ -171,9 +176,7 @@ func (this *Throttler) collectControlReplicasLag() {
171176
// No need to read lag
172177
return
173178
}
174-
if result := readControlReplicasLag(); result != nil {
175-
this.migrationContext.SetControlReplicasLagResult(result)
176-
}
179+
this.migrationContext.SetControlReplicasLagResult(readControlReplicasLag())
177180
}
178181
aggressiveTicker := time.Tick(100 * time.Millisecond)
179182
relaxedFactor := 10
@@ -285,13 +288,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
285288
// that may affect throttling. There are several components, all running independently,
286289
// that collect such metrics.
287290
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
288-
go this.collectReplicationLag()
291+
go this.collectReplicationLag(firstThrottlingCollected)
289292
go this.collectControlReplicasLag()
290293

291294
go func() {
292-
throttlerMetricsTick := time.Tick(1 * time.Second)
293295
this.collectGeneralThrottleMetrics()
294296
firstThrottlingCollected <- true
297+
298+
throttlerMetricsTick := time.Tick(1 * time.Second)
295299
for range throttlerMetricsTick {
296300
this.collectGeneralThrottleMetrics()
297301
}

go/mysql/utils.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@ type ReplicationLagResult struct {
2222
Err error
2323
}
2424

25+
func NewNoReplicationLagResult() *ReplicationLagResult {
26+
return &ReplicationLagResult{Lag: 0, Err: nil}
27+
}
28+
29+
func (this *ReplicationLagResult) HasLag() bool {
30+
return this.Lag > 0
31+
}
32+
2533
// GetReplicationLag returns replication lag for a given connection config; either by explicit query
2634
// or via SHOW SLAVE STATUS
2735
func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.Duration, err error) {
@@ -32,9 +40,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
3240
}
3341

3442
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
43+
slaveIORunning := m.GetString("Slave_IO_Running")
44+
slaveSQLRunning := m.GetString("Slave_SQL_Running")
3545
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
3646
if !secondsBehindMaster.Valid {
37-
return fmt.Errorf("replication not running")
47+
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
3848
}
3949
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
4050
return nil

localtests/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ test_single() {
8888
--throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc' \
8989
--serve-socket-file=/tmp/gh-ost.test.sock \
9090
--initially-drop-socket-file \
91-
--postpone-cut-over-flag-file="" \
91+
--postpone-cut-over-flag-file=/tmp/gh-ost.test.postpone.flag \
9292
--test-on-replica \
9393
--default-retries=1 \
9494
--verbose \

0 commit comments

Comments
 (0)