Skip to content

Commit 6ecf7cc

Browse files
author
Shlomi Noach
authored
Auto-merged master into interactive-command-question on deployment
2 parents e98ffd1 + 3150726 commit 6ecf7cc

7 files changed

Lines changed: 49 additions & 18 deletions

File tree

doc/cheatsheet.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,12 @@ gh-ost --allow-master-master --assume-master-host=a.specific.master.com
146146

147147
Topologies using _tungsten replicator_ are peculiar in that the participating servers are not actually aware they are replicating. The _tungsten replicator_ looks just like another app issuing queries on those hosts. `gh-ost` is unable to identify that a server participates in a _tungsten_ topology.
148148

149-
If you choose to migrate directly on master (see above), there's nothing special you need to do. If you choose to migrate via replica, then you must supply the identity of the master, and indicate this is a tungsten setup, as follows:
149+
If you choose to migrate directly on master (see above), there's nothing special you need to do.
150+
151+
If you choose to migrate via replica, then you need to make sure Tungsten is configured with log-slave-updates parameter (note this is different from MySQL's own log-slave-updates parameter), otherwise changes will not be in the replica's binlog, causing data to be corrupted after table swap. You must also supply the identity of the master, and indicate this is a tungsten setup, as follows:
150152

151153
```
152154
gh-ost --tungsten --assume-master-host=the.topology.master.com
153155
```
156+
157+
Also note that `--switch-to-rbr` does not work for a Tungsten setup as the replication process is external, so you need to make sure `binlog_format` is set to ROW before Tungsten Replicator connects to the server and starts applying events from the master.

go/base/context.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ type MigrationContext struct {
135135
OriginalBinlogFormat string
136136
OriginalBinlogRowImage string
137137
InspectorConnectionConfig *mysql.ConnectionConfig
138+
InspectorMySQLVersion string
138139
ApplierConnectionConfig *mysql.ConnectionConfig
140+
ApplierMySQLVersion string
139141
StartTime time.Time
140142
RowCopyStartTime time.Time
141143
RowCopyEndTime time.Time

go/logic/applier.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,15 @@ func (this *Applier) InitDBConnections() (err error) {
7070
if err := this.readTableColumns(); err != nil {
7171
return err
7272
}
73+
log.Infof("Applier initiated on %+v, version %+v", this.connectionConfig.ImpliedKey, this.migrationContext.ApplierMySQLVersion)
7374
return nil
7475
}
7576

7677
// validateConnection issues a simple can-connect to MySQL
7778
func (this *Applier) validateConnection(db *gosql.DB) error {
78-
query := `select @@global.port`
79+
query := `select @@global.port, @@global.version`
7980
var port int
80-
if err := db.QueryRow(query).Scan(&port); err != nil {
81+
if err := db.QueryRow(query).Scan(&port, &this.migrationContext.ApplierMySQLVersion); err != nil {
8182
return err
8283
}
8384
if port != this.connectionConfig.Key.Port {

go/logic/inspect.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ func (this *Inspector) InitDBConnections() (err error) {
6060
if err := this.applyBinlogFormat(); err != nil {
6161
return err
6262
}
63+
log.Infof("Inspector initiated on %+v, version %+v", this.connectionConfig.ImpliedKey, this.migrationContext.InspectorMySQLVersion)
6364
return nil
6465
}
6566

@@ -168,9 +169,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) {
168169

169170
// validateConnection issues a simple can-connect to MySQL
170171
func (this *Inspector) validateConnection() error {
171-
query := `select @@global.port`
172+
query := `select @@global.port, @@global.version`
172173
var port int
173-
if err := this.db.QueryRow(query).Scan(&port); err != nil {
174+
if err := this.db.QueryRow(query).Scan(&port, &this.migrationContext.InspectorMySQLVersion); err != nil {
174175
return err
175176
}
176177
if port != this.connectionConfig.Key.Port {

go/logic/migrator.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,9 @@ func (this *Migrator) initiateThrottler() error {
946946

947947
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
948948
log.Infof("Waiting for first throttle metrics to be collected")
949-
<-this.firstThrottlingCollected
949+
<-this.firstThrottlingCollected // replication lag
950+
<-this.firstThrottlingCollected // other metrics
951+
log.Infof("First throttle metrics collected")
950952
go this.throttler.initiateThrottlerChecks()
951953

952954
return nil

go/logic/throttler.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,38 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
8484
}
8585
}
8686

87-
// collectHeartbeat reads the latest changelog heartbeat value
88-
func (this *Throttler) collectHeartbeat() {
89-
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
90-
for range ticker {
91-
go func() error {
92-
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
93-
return nil
87+
// collectReplicationLag reads the latest changelog heartbeat value
88+
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
89+
collectFunc := func() error {
90+
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
91+
return nil
92+
}
93+
94+
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
95+
// when running on replica, the heartbeat injection is also done on the replica.
96+
// This means we will always get a good heartbeat value.
97+
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
98+
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
99+
return log.Errore(err)
100+
} else {
101+
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
94102
}
103+
} else {
95104
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
96105
return log.Errore(err)
97106
} else {
98107
this.parseChangelogHeartbeat(heartbeatValue)
99108
}
100-
return nil
101-
}()
109+
}
110+
return nil
111+
}
112+
113+
collectFunc()
114+
firstThrottlingCollected <- true
115+
116+
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
117+
for range ticker {
118+
go collectFunc()
102119
}
103120
}
104121

@@ -114,6 +131,7 @@ func (this *Throttler) collectControlReplicasLag() {
114131

115132
readReplicaLag := func(connectionConfig *mysql.ConnectionConfig) (lag time.Duration, err error) {
116133
dbUri := connectionConfig.GetDBUri("information_schema")
134+
117135
var heartbeatValue string
118136
if db, _, err := sqlutils.GetDB(dbUri); err != nil {
119137
return lag, err
@@ -272,13 +290,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
272290
// that may affect throttling. There are several components, all running independently,
273291
// that collect such metrics.
274292
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
275-
go this.collectHeartbeat()
293+
go this.collectReplicationLag(firstThrottlingCollected)
276294
go this.collectControlReplicasLag()
277295

278296
go func() {
279-
throttlerMetricsTick := time.Tick(1 * time.Second)
280297
this.collectGeneralThrottleMetrics()
281298
firstThrottlingCollected <- true
299+
300+
throttlerMetricsTick := time.Tick(1 * time.Second)
282301
for range throttlerMetricsTick {
283302
this.collectGeneralThrottleMetrics()
284303
}

go/mysql/utils.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
3232
}
3333

3434
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
35+
slaveIORunning := m.GetString("Slave_IO_Running")
36+
slaveSQLRunning := m.GetString("Slave_SQL_Running")
3537
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
3638
if !secondsBehindMaster.Valid {
37-
return fmt.Errorf("replication not running")
39+
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
3840
}
3941
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
4042
return nil

0 commit comments

Comments
 (0)