Skip to content

Commit f95b4ff

Browse files
authored
metis: implement Metis SQLite store initialization (#1009)
* metis: implement Metis SQLite store initialization * address comments * remove tests around the sidelock file * go mod tidy && address new comments from Ivy * actually ensure that all the goroutines start at the same time * update comments in the test * add docstring to the concurrency test * argh, last edit * add MaxOpenConns test * make DefaultDBPath a well known metis location * address qzhao's comments * fix test
1 parent eb2e9ea commit f95b4ff

58 files changed

Lines changed: 287990 additions & 1 deletion

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

metis/go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ module k8s.io/metis
33
go 1.24.13
44

55
require (
6+
github.com/go-logr/logr v1.4.3
7+
github.com/mattn/go-sqlite3 v1.14.37
68
github.com/spf13/cobra v1.10.0
79
github.com/spf13/pflag v1.0.10
810
k8s.io/component-base v0.34.2
@@ -15,7 +17,6 @@ require (
1517
github.com/cespare/xxhash/v2 v2.3.0 // indirect
1618
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
1719
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
18-
github.com/go-logr/logr v1.4.3 // indirect
1920
github.com/gogo/protobuf v1.3.2 // indirect
2021
github.com/google/go-cmp v0.7.0 // indirect
2122
github.com/inconshreveable/mousetrap v1.1.0 // indirect

metis/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
3434
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
3535
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
3636
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
37+
github.com/mattn/go-sqlite3 v1.14.37 h1:3DOZp4cXis1cUIpCfXLtmlGolNLp2VEqhiB/PARNBIg=
38+
github.com/mattn/go-sqlite3 v1.14.37/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
3739
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
3840
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
3941
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=

metis/ipam/consts.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
Copyright 2026 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package ipam
18+
19+
const DefaultDBPath = "/var/lib/cni/metis/metis.sqlite"

metis/ipam/schema.sql

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
-- cidr_blocks tracks the lifecycle and utilization of all IP alias ranges
2+
-- provisioned to this node by GCE.
3+
CREATE TABLE IF NOT EXISTS cidr_blocks (
4+
-- Unique identifier for the CIDR block.
5+
-- Note: SQLite INTEGER uses variable length encoding with a max of 8 bytes.
6+
-- This provides a theoretical max of 2^63, which is sufficient for
7+
-- 1 pod allocation per second for ~6 billion years.
8+
id INTEGER PRIMARY KEY AUTOINCREMENT,
9+
10+
-- The actual IP range allocated from GCE.
11+
-- Example: '10.0.1.0/28'
12+
cidr TEXT UNIQUE NOT NULL,
13+
14+
-- The logical network this block belongs to, matching the CNI networkName.
15+
-- Example: 'gke-pod-network'
16+
network TEXT NOT NULL,
17+
18+
-- The protocol family of the IP range.
19+
-- Example: 'ipv4' or 'ipv6'
20+
ip_family TEXT NOT NULL,
21+
22+
-- The total number of IP addresses contained within this block.
23+
-- Note: SQLite does not support unsigned integers, so standard INTEGER is used.
24+
-- Example: 16
25+
total_ips INTEGER NOT NULL,
26+
27+
-- The current count of IPs within this block that are actively assigned to pods.
28+
-- Example: 5
29+
allocated_ips INTEGER DEFAULT 0,
30+
31+
-- The current operational state of the block.
32+
-- Expected values: 'Ready', 'Draining', 'Deleting'
33+
state TEXT NOT NULL DEFAULT 'Ready',
34+
35+
-- Timestamp of when this block was successfully pulled from the CRD.
36+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
37+
38+
-- Timestamp of the last mutation to this block's state or capacity.
39+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
40+
);
41+
42+
-- ip_addresses tracks the assignment state, ownership, and cooldown periods
43+
-- of every individual IP address managed by the daemon.
44+
CREATE TABLE IF NOT EXISTS ip_addresses (
45+
-- Unique identifier for the individual IP record.
46+
id INTEGER PRIMARY KEY AUTOINCREMENT,
47+
48+
-- The specific IP address string.
49+
-- Example: '10.0.1.2'
50+
address TEXT UNIQUE NOT NULL,
51+
52+
-- The parent CIDR block this IP belongs to.
53+
-- Enforces cascading deletes if the parent block is removed by the daemon.
54+
cidr_block_id INTEGER NOT NULL,
55+
56+
-- The CNI_CONTAINER_ID of the pod currently holding this IP.
57+
-- Example: 'f093u09jfioj...'
58+
container_id TEXT,
59+
60+
-- The Kubernetes Pod Name holding this IP.
61+
pod_name TEXT,
62+
63+
-- The Kubernetes Pod Namespace holding this IP.
64+
pod_namespace TEXT,
65+
66+
-- The CNI_IFNAME inside the container holding this IP.
67+
-- Example: 'eth0'
68+
interface_name TEXT,
69+
70+
-- Represents whether the IP is currently held by an active pod.
71+
is_allocated BOOLEAN DEFAULT FALSE,
72+
73+
-- Timestamp indicating when a released IP has finished its "cool-down"
74+
-- period and is safe to be reassigned without risking NEG routing collisions.
75+
release_at TIMESTAMP,
76+
77+
-- Timestamp of when the IP was assigned to its current container_id.
78+
allocated_at TIMESTAMP,
79+
80+
-- Timestamp of the last mutation to this IP record.
81+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
82+
83+
FOREIGN KEY (cidr_block_id) REFERENCES cidr_blocks(id) ON DELETE CASCADE
84+
);
85+
86+
-- Index to optimize the daemon's search for the next available IP address.
87+
CREATE INDEX IF NOT EXISTS idx_available_ips
88+
ON ip_addresses(cidr_block_id, id)
89+
WHERE is_allocated = FALSE;
90+
91+
-- Composite index to guarantee fast, idempotent lookups during CNI cmdAdd retries.
92+
CREATE INDEX IF NOT EXISTS idx_ip_idempotency
93+
ON ip_addresses(container_id, interface_name);
94+
95+
-- Automatically update the updated_at timestamp on cidr_blocks mutations.
96+
CREATE TRIGGER IF NOT EXISTS update_cidr_blocks_updated_at
97+
AFTER UPDATE ON cidr_blocks FOR EACH ROW BEGIN
98+
UPDATE cidr_blocks SET updated_at = CURRENT_TIMESTAMP WHERE id = OLD.id;
99+
END;
100+
101+
-- Automatically update the updated_at timestamp on ip_addresses mutations.
102+
CREATE TRIGGER IF NOT EXISTS update_ip_addresses_updated_at
103+
AFTER UPDATE ON ip_addresses FOR EACH ROW BEGIN
104+
UPDATE ip_addresses SET updated_at = CURRENT_TIMESTAMP WHERE id = OLD.id;
105+
END;

metis/ipam/store.go

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
Copyright 2026 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package ipam
18+
19+
import (
20+
"database/sql"
21+
_ "embed"
22+
"fmt"
23+
"os"
24+
"path/filepath"
25+
26+
"github.com/go-logr/logr"
27+
_ "github.com/mattn/go-sqlite3" // SQLite driver
28+
)
29+
30+
//go:embed schema.sql
31+
var schemaSQL string
32+
33+
const (
34+
// dbSchemaVersion tracks the SQLite schema version to allow safe local
35+
// migrations and prevent state corruption across daemon restarts.
36+
dbSchemaVersion = 1
37+
maxOpenConns = 10
38+
maxIdleConns = 10
39+
)
40+
41+
// Store manages database operations for IPAM.
42+
type Store struct {
43+
db *sql.DB
44+
log logr.Logger
45+
}
46+
47+
// NewStore creates a new Store instance and initializes the database.
48+
func NewStore(log logr.Logger, dbPath string) (*Store, error) {
49+
if dbPath == "" {
50+
return nil, fmt.Errorf("dbPath cannot be empty: an absolute path must be explicitly provided")
51+
}
52+
53+
log.Info("Opening or creating database", "path", dbPath)
54+
55+
if err := os.MkdirAll(filepath.Dir(dbPath), 0755); err != nil {
56+
return nil, fmt.Errorf("failed to create db directory: %w", err)
57+
}
58+
59+
// SQLite is configured directly through the DSN string. This approach
60+
// guarantees every new connection spawned by the sql.DB pool inherits these
61+
// exact configurations natively.
62+
dsn := dbPath +
63+
// Enables Write-Ahead Logging (WAL) mode. This significantly improves
64+
// concurrency by allowing multiple readers to access the database
65+
// simultaneously without blocking a writer, which is critical for burst
66+
// requests.
67+
// See: https://www.sqlite.org/pragma.html#pragma_journal_mode
68+
"?_journal_mode=WAL" +
69+
// Enforces foreign key constraints. SQLite ignores these by default.
70+
// This is required to ensure ON DELETE CASCADE functions correctly on the
71+
// ip_addresses table when a draining CIDR block is officially removed.
72+
// See: https://www.sqlite.org/pragma.html#pragma_foreign_keys
73+
"&_foreign_keys=on" +
74+
// Sets the busy timeout to 5000 milliseconds. If the database is locked
75+
// by another transaction, this tells the SQLite driver to wait for up
76+
// to 5 seconds before giving up and returning a locked error.
77+
// See: https://www.sqlite.org/pragma.html#pragma_busy_timeout
78+
"&_busy_timeout=5000" +
79+
// Instructs the Go driver to send "BEGIN IMMEDIATE" instead of standard
80+
// "BEGIN" when starting a transaction. This grabs a write lock instantly,
81+
// preventing deadlocks when concurrent requests try to upgrade their
82+
// read locks to write locks simultaneously. Note: This is a go-sqlite3
83+
// driver feature, not a native SQLite PRAGMA.
84+
// See: https://github.com/mattn/go-sqlite3#connection-string
85+
"&_txlock=immediate" +
86+
// Maps to PRAGMA synchronous = NORMAL. In WAL mode, this is the optimal
87+
// setting for high-concurrency daemons. It prevents database corruption
88+
// during power loss or hard crashes while offering much faster write
89+
// performance than FULL mode, sacrificing only a few milliseconds of
90+
// un-checkpointed durability.
91+
// See: https://www.sqlite.org/pragma.html#pragma_synchronous
92+
"&_synchronous=1"
93+
94+
db, err := sql.Open("sqlite3", dsn)
95+
if err != nil {
96+
return nil, fmt.Errorf("failed to open database: %w", err)
97+
}
98+
99+
db.SetMaxOpenConns(maxOpenConns)
100+
db.SetMaxIdleConns(maxIdleConns)
101+
// Sets the maximum amount of time a connection may be reused to infinity
102+
// (0). This guarantees the single connection never expires.
103+
db.SetConnMaxLifetime(0)
104+
105+
store := &Store{
106+
db: db,
107+
log: log,
108+
}
109+
110+
// Only a single process enters this execution block at a time.
111+
if err := store.initSchema(); err != nil {
112+
db.Close()
113+
return nil, fmt.Errorf("failed to initialize schema: %w", err)
114+
}
115+
116+
log.Info("Initialized or updated database schema", "path", dbPath)
117+
118+
return store, nil
119+
}
120+
121+
// initSchema creates the necessary tables if they don't exist.
122+
func (s *Store) initSchema() error {
123+
var currentVersion int
124+
err := s.db.QueryRow("PRAGMA user_version").Scan(&currentVersion)
125+
if err != nil {
126+
return fmt.Errorf("failed to check schema version: %w", err)
127+
}
128+
129+
if currentVersion == dbSchemaVersion {
130+
s.log.V(4).Info("Database schema already initialized", "version", currentVersion)
131+
return nil
132+
}
133+
134+
s.log.Info("Initializing DB schema", "currentVersion", currentVersion, "expectedVersion", dbSchemaVersion)
135+
136+
// 1. Begin an atomic transaction
137+
tx, err := s.db.Begin()
138+
if err != nil {
139+
return fmt.Errorf("failed to begin transaction: %w", err)
140+
}
141+
// Safe to defer; Rollback does nothing if Commit() is successful
142+
defer tx.Rollback()
143+
144+
// 2. Execute the embedded schema.sql file
145+
if _, err := tx.Exec(schemaSQL); err != nil {
146+
return fmt.Errorf("failed to execute schema.sql: %w", err)
147+
}
148+
149+
// 3. Set User Version
150+
setVersion := fmt.Sprintf("PRAGMA user_version = %d;", dbSchemaVersion)
151+
if _, err := tx.Exec(setVersion); err != nil {
152+
return fmt.Errorf("failed to set user_version: %w", err)
153+
}
154+
155+
// 4. Commit everything atomically
156+
if err := tx.Commit(); err != nil {
157+
return fmt.Errorf("failed to commit schema transaction: %w", err)
158+
}
159+
160+
s.log.Info("Database schema initialized or updated successfully")
161+
return nil
162+
}
163+
164+
// Close safely closes the database connection and releases any file locks.
165+
// This should be called during the daemon's graceful shutdown sequence.
166+
func (s *Store) Close() error {
167+
s.log.Info("Closing IPAM database connection")
168+
169+
if err := s.db.Close(); err != nil {
170+
return fmt.Errorf("failed to close database connection: %w", err)
171+
}
172+
173+
return nil
174+
}

0 commit comments

Comments
 (0)