Skip to content

Commit a865f82

Browse files
wesmclaude
andauthored
Fix import of old WhatsApp msgstore.db backups (#272)
## Summary - Detect missing `chat.group_type` and `message_media.media_caption` columns in old WhatsApp schemas (pre-2022) via `PRAGMA table_info`, defaulting to `0`/`NULL` when absent - Column cache scoped per `(db pointer, table)` so multiple databases with different schemas in the same process get independent detection Fixes #271 --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ef9b86a commit a865f82

File tree

2 files changed

+303
-5
lines changed

2 files changed

+303
-5
lines changed

internal/whatsapp/queries.go

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,87 @@ import (
44
"database/sql"
55
"fmt"
66
"strings"
7+
"sync"
78
)
89

10+
// hasColumn checks whether a table has a given column using PRAGMA table_info.
11+
// Results are cached per (db pointer, table) to avoid repeated PRAGMA queries
12+
// while correctly handling multiple databases with different schemas.
13+
var (
14+
columnCache = make(map[columnCacheKey]map[string]bool)
15+
columnCacheMu sync.Mutex
16+
)
17+
18+
type columnCacheKey struct {
19+
db *sql.DB
20+
table string
21+
}
22+
23+
func hasColumn(db *sql.DB, table, column string) bool {
24+
columnCacheMu.Lock()
25+
defer columnCacheMu.Unlock()
26+
27+
key := columnCacheKey{db: db, table: table}
28+
cols, ok := columnCache[key]
29+
if !ok {
30+
cols = make(map[string]bool)
31+
rows, err := db.Query(
32+
fmt.Sprintf("PRAGMA table_info(%s)", table),
33+
)
34+
if err == nil {
35+
defer func() { _ = rows.Close() }()
36+
for rows.Next() {
37+
var cid int
38+
var name, colType string
39+
var notNull, pk int
40+
var dfltValue sql.NullString
41+
if err := rows.Scan(
42+
&cid, &name, &colType,
43+
&notNull, &dfltValue, &pk,
44+
); err == nil {
45+
cols[name] = true
46+
}
47+
}
48+
}
49+
columnCache[key] = cols
50+
}
51+
return cols[column]
52+
}
53+
54+
// resetColumnCache clears the cached column info (for testing).
55+
func resetColumnCache() {
56+
columnCacheMu.Lock()
57+
defer columnCacheMu.Unlock()
58+
columnCache = make(map[columnCacheKey]map[string]bool)
59+
}
60+
961
// fetchChats returns all non-hidden chats from the WhatsApp database.
1062
// Joins with the jid table to get JID details for each chat.
63+
// Handles old WhatsApp schemas that lack the group_type column.
1164
func fetchChats(db *sql.DB) ([]waChat, error) {
12-
rows, err := db.Query(`
65+
hasGroupType := hasColumn(db, "chat", "group_type")
66+
67+
groupTypeExpr := "0"
68+
if hasGroupType {
69+
groupTypeExpr = "COALESCE(c.group_type, 0)"
70+
}
71+
72+
rows, err := db.Query(fmt.Sprintf(`
1373
SELECT
1474
c._id,
1575
c.jid_row_id,
1676
j.raw_string,
1777
COALESCE(j.user, ''),
1878
COALESCE(j.server, ''),
1979
c.subject,
20-
COALESCE(c.group_type, 0),
80+
%s,
2181
COALESCE(c.hidden, 0),
2282
COALESCE(c.sort_timestamp, 0)
2383
FROM chat c
2484
JOIN jid j ON c.jid_row_id = j._id
2585
WHERE COALESCE(c.hidden, 0) = 0
2686
ORDER BY c.sort_timestamp DESC
27-
`)
87+
`, groupTypeExpr))
2888
if err != nil {
2989
return nil, fmt.Errorf("fetch chats: %w", err)
3090
}
@@ -94,11 +154,14 @@ func fetchMessages(db *sql.DB, chatRowID int64, afterID int64, limit int) ([]waM
94154

95155
// fetchMedia returns media metadata for a batch of message row IDs.
96156
// Returns a map of message_row_id → waMedia.
157+
// Handles old WhatsApp schemas that lack the media_caption column.
97158
func fetchMedia(db *sql.DB, messageRowIDs []int64) (map[int64]waMedia, error) {
98159
if len(messageRowIDs) == 0 {
99160
return make(map[int64]waMedia), nil
100161
}
101162

163+
hasCaption := hasColumn(db, "message_media", "media_caption")
164+
102165
result := make(map[int64]waMedia)
103166

104167
// Process in chunks to stay within SQLite's parameter limit.
@@ -117,19 +180,24 @@ func fetchMedia(db *sql.DB, messageRowIDs []int64) (map[int64]waMedia, error) {
117180
args[j] = id
118181
}
119182

183+
captionExpr := "NULL"
184+
if hasCaption {
185+
captionExpr = "mm.media_caption"
186+
}
187+
120188
query := fmt.Sprintf(`
121189
SELECT
122190
mm.message_row_id,
123191
mm.mime_type,
124-
mm.media_caption,
192+
%s,
125193
mm.file_size,
126194
mm.file_path,
127195
mm.width,
128196
mm.height,
129197
mm.media_duration
130198
FROM message_media mm
131199
WHERE mm.message_row_id IN (%s)
132-
`, strings.Join(placeholders, ","))
200+
`, captionExpr, strings.Join(placeholders, ","))
133201

134202
rows, err := db.Query(query, args...)
135203
if err != nil {

internal/whatsapp/queries_test.go

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,236 @@ import (
77
_ "github.com/mattn/go-sqlite3"
88
)
99

10+
func TestFetchChatsOldSchema(t *testing.T) {
11+
// Old WhatsApp schemas (pre-2022) lack the group_type column on chat.
12+
// fetchChats should handle this gracefully, defaulting group_type to 0.
13+
db, err := sql.Open("sqlite3", ":memory:")
14+
if err != nil {
15+
t.Fatal(err)
16+
}
17+
defer func() { _ = db.Close() }()
18+
resetColumnCache()
19+
20+
// Create old-style chat table without group_type.
21+
_, err = db.Exec(`
22+
CREATE TABLE jid (
23+
_id INTEGER PRIMARY KEY,
24+
user TEXT,
25+
server TEXT,
26+
raw_string TEXT
27+
);
28+
CREATE TABLE chat (
29+
_id INTEGER PRIMARY KEY,
30+
jid_row_id INTEGER UNIQUE,
31+
hidden INTEGER,
32+
subject TEXT,
33+
sort_timestamp INTEGER
34+
);
35+
36+
INSERT INTO jid (_id, user, server, raw_string)
37+
VALUES (1, '447700900000', 's.whatsapp.net', '447700900000@s.whatsapp.net');
38+
INSERT INTO jid (_id, user, server, raw_string)
39+
VALUES (2, '120363001234567890', 'g.us', '120363001234567890@g.us');
40+
41+
INSERT INTO chat (_id, jid_row_id, hidden, subject, sort_timestamp)
42+
VALUES (10, 1, 0, NULL, 1609459200000);
43+
INSERT INTO chat (_id, jid_row_id, hidden, subject, sort_timestamp)
44+
VALUES (20, 2, 0, 'Family Group', 1609459300000);
45+
`)
46+
if err != nil {
47+
t.Fatal(err)
48+
}
49+
50+
chats, err := fetchChats(db)
51+
if err != nil {
52+
t.Fatalf("fetchChats with old schema: %v", err)
53+
}
54+
55+
if len(chats) != 2 {
56+
t.Fatalf("expected 2 chats, got %d", len(chats))
57+
}
58+
59+
// All chats should have GroupType=0 since column is missing.
60+
for _, c := range chats {
61+
if c.GroupType != 0 {
62+
t.Errorf("chat %d: GroupType = %d, want 0", c.RowID, c.GroupType)
63+
}
64+
}
65+
66+
// Group chat (g.us) should still be detected via server.
67+
group := chats[0] // sorted by sort_timestamp DESC
68+
if group.Server != "g.us" {
69+
t.Errorf("expected first chat to be group (g.us), got server=%q", group.Server)
70+
}
71+
if !isGroupChat(group) {
72+
t.Error("g.us chat should be detected as group even without group_type column")
73+
}
74+
}
75+
76+
func TestFetchChatsNewSchema(t *testing.T) {
77+
// New WhatsApp schemas have group_type on chat.
78+
db, err := sql.Open("sqlite3", ":memory:")
79+
if err != nil {
80+
t.Fatal(err)
81+
}
82+
defer func() { _ = db.Close() }()
83+
resetColumnCache()
84+
85+
_, err = db.Exec(`
86+
CREATE TABLE jid (
87+
_id INTEGER PRIMARY KEY,
88+
user TEXT,
89+
server TEXT,
90+
raw_string TEXT
91+
);
92+
CREATE TABLE chat (
93+
_id INTEGER PRIMARY KEY,
94+
jid_row_id INTEGER UNIQUE,
95+
hidden INTEGER,
96+
subject TEXT,
97+
sort_timestamp INTEGER,
98+
group_type INTEGER
99+
);
100+
101+
INSERT INTO jid (_id, user, server, raw_string)
102+
VALUES (1, '120363009999', 'g.us', '120363009999@g.us');
103+
INSERT INTO chat (_id, jid_row_id, hidden, subject, sort_timestamp, group_type)
104+
VALUES (10, 1, 0, 'Work Chat', 1609459200000, 1);
105+
`)
106+
if err != nil {
107+
t.Fatal(err)
108+
}
109+
110+
chats, err := fetchChats(db)
111+
if err != nil {
112+
t.Fatalf("fetchChats with new schema: %v", err)
113+
}
114+
115+
if len(chats) != 1 {
116+
t.Fatalf("expected 1 chat, got %d", len(chats))
117+
}
118+
if chats[0].GroupType != 1 {
119+
t.Errorf("GroupType = %d, want 1", chats[0].GroupType)
120+
}
121+
}
122+
123+
func TestFetchMediaOldSchema(t *testing.T) {
124+
// Old WhatsApp schemas lack media_caption on message_media.
125+
db, err := sql.Open("sqlite3", ":memory:")
126+
if err != nil {
127+
t.Fatal(err)
128+
}
129+
defer func() { _ = db.Close() }()
130+
resetColumnCache()
131+
132+
_, err = db.Exec(`
133+
CREATE TABLE message_media (
134+
message_row_id INTEGER PRIMARY KEY,
135+
mime_type TEXT,
136+
file_size INTEGER,
137+
file_path TEXT,
138+
width INTEGER,
139+
height INTEGER,
140+
media_duration INTEGER
141+
);
142+
143+
INSERT INTO message_media (message_row_id, mime_type, file_size, file_path, width, height, media_duration)
144+
VALUES (100, 'image/jpeg', 54321, 'Media/IMG-20200101.jpg', 1920, 1080, 0);
145+
`)
146+
if err != nil {
147+
t.Fatal(err)
148+
}
149+
150+
mediaMap, err := fetchMedia(db, []int64{100})
151+
if err != nil {
152+
t.Fatalf("fetchMedia with old schema: %v", err)
153+
}
154+
155+
m, ok := mediaMap[100]
156+
if !ok {
157+
t.Fatal("expected media for message 100")
158+
}
159+
if m.MediaCaption.Valid {
160+
t.Error("MediaCaption should be NULL for old schema")
161+
}
162+
if !m.MimeType.Valid || m.MimeType.String != "image/jpeg" {
163+
t.Errorf("MimeType = %v, want image/jpeg", m.MimeType)
164+
}
165+
}
166+
167+
func TestColumnCacheScopedPerDB(t *testing.T) {
168+
// Verify that inspecting an old-schema DB then a new-schema DB
169+
// (and vice versa) produces correct results without resetColumnCache.
170+
resetColumnCache()
171+
172+
// DB 1: old schema, no group_type.
173+
oldDB, err := sql.Open("sqlite3", ":memory:")
174+
if err != nil {
175+
t.Fatal(err)
176+
}
177+
defer func() { _ = oldDB.Close() }()
178+
_, err = oldDB.Exec(`
179+
CREATE TABLE jid (_id INTEGER PRIMARY KEY, user TEXT, server TEXT, raw_string TEXT);
180+
CREATE TABLE chat (_id INTEGER PRIMARY KEY, jid_row_id INTEGER UNIQUE, hidden INTEGER, subject TEXT, sort_timestamp INTEGER);
181+
INSERT INTO jid VALUES (1, '441234567890', 's.whatsapp.net', '441234567890@s.whatsapp.net');
182+
INSERT INTO chat VALUES (1, 1, 0, NULL, 1000);
183+
`)
184+
if err != nil {
185+
t.Fatal(err)
186+
}
187+
188+
// DB 2: new schema, has group_type.
189+
newDB, err := sql.Open("sqlite3", ":memory:")
190+
if err != nil {
191+
t.Fatal(err)
192+
}
193+
defer func() { _ = newDB.Close() }()
194+
_, err = newDB.Exec(`
195+
CREATE TABLE jid (_id INTEGER PRIMARY KEY, user TEXT, server TEXT, raw_string TEXT);
196+
CREATE TABLE chat (_id INTEGER PRIMARY KEY, jid_row_id INTEGER UNIQUE, hidden INTEGER, subject TEXT, sort_timestamp INTEGER, group_type INTEGER);
197+
INSERT INTO jid VALUES (1, '120363009999', 'g.us', '120363009999@g.us');
198+
INSERT INTO chat VALUES (1, 1, 0, 'Test Group', 2000, 3);
199+
`)
200+
if err != nil {
201+
t.Fatal(err)
202+
}
203+
204+
// Query old DB first — should NOT cache "no group_type" for new DB.
205+
oldChats, err := fetchChats(oldDB)
206+
if err != nil {
207+
t.Fatalf("old DB: %v", err)
208+
}
209+
if oldChats[0].GroupType != 0 {
210+
t.Errorf("old DB: GroupType = %d, want 0", oldChats[0].GroupType)
211+
}
212+
213+
// Query new DB — must see group_type despite old DB being queried first.
214+
newChats, err := fetchChats(newDB)
215+
if err != nil {
216+
t.Fatalf("new DB: %v", err)
217+
}
218+
if newChats[0].GroupType != 3 {
219+
t.Errorf("new DB: GroupType = %d, want 3", newChats[0].GroupType)
220+
}
221+
222+
// Reverse: query new DB again then old DB again — still correct.
223+
newChats2, err := fetchChats(newDB)
224+
if err != nil {
225+
t.Fatalf("new DB (2nd): %v", err)
226+
}
227+
if newChats2[0].GroupType != 3 {
228+
t.Errorf("new DB (2nd): GroupType = %d, want 3", newChats2[0].GroupType)
229+
}
230+
231+
oldChats2, err := fetchChats(oldDB)
232+
if err != nil {
233+
t.Fatalf("old DB (2nd): %v", err)
234+
}
235+
if oldChats2[0].GroupType != 0 {
236+
t.Errorf("old DB (2nd): GroupType = %d, want 0", oldChats2[0].GroupType)
237+
}
238+
}
239+
10240
func TestFetchLidMap(t *testing.T) {
11241
db, err := sql.Open("sqlite3", ":memory:")
12242
if err != nil {

0 commit comments

Comments
 (0)