Language_Learning_Bot/scripts.py at main · Preston-Robertson/Language_Learning_Bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
"""
Writing system / script data for languages with non-Latin alphabets.

Japanese gets the most detailed treatment with full kana tables and
structured kanji sets by JLPT level. Other languages have their core
alphabet/syllabary grouped into progressive sets.
"""

# ─── Japanese ─────────────────────────────────────────────────────────────────

HIRAGANA = {
    "name": "Hiragana",
    "description": "The foundational phonetic script used for native Japanese words, grammar particles, and verb endings.",
    "sets": [
        {
            "set": 1,
            "name": "Vowels",
            "chars": ["あ (a)", "い (i)", "う (u)", "え (e)", "お (o)"],
            "tip": "These five vowels are the building blocks of all Japanese pronunciation.",
        },
        {
            "set": 2,
            "name": "K-row",
            "chars": ["か (ka)", "き (ki)", "く (ku)", "け (ke)", "こ (ko)"],
            "tip": "か is used in the question particle か at the end of sentences.",
        },
        {
            "set": 3,
            "name": "S-row",
            "chars": ["さ (sa)", "し (shi)", "す (su)", "せ (se)", "そ (so)"],
            "tip": "し is 'shi', not 'si' — this is a common stumbling point.",
        },
        {
            "set": 4,
            "name": "T-row",
            "chars": ["た (ta)", "ち (chi)", "つ (tsu)", "て (te)", "と (to)"],
            "tip": "ち is 'chi' and つ is 'tsu' — both are irregular readings in this row.",
        },
        {
            "set": 5,
            "name": "N-row",
            "chars": ["な (na)", "に (ni)", "ぬ (nu)", "ね (ne)", "の (no)"],
            "tip": "の is one of the most common particles — it shows possession (like 's in English).",
        },
        {
            "set": 6,
            "name": "H-row",
            "chars": ["は (ha)", "ひ (hi)", "ふ (fu)", "へ (he)", "ほ (ho)"],
            "tip": "は is read 'wa' when used as the topic particle. ふ is 'fu', not 'hu'.",
        },
        {
            "set": 7,
            "name": "M-row",
            "chars": ["ま (ma)", "み (mi)", "む (mu)", "め (me)", "も (mo)"],
            "tip": "も means 'also/too' and is a very common particle.",
        },
        {
            "set": 8,
            "name": "Y-row",
            "chars": ["や (ya)", "ゆ (yu)", "よ (yo)"],
            "tip": "Only three characters in this row. よ at sentence-end adds emphasis.",
        },
        {
            "set": 9,
            "name": "R-row",
            "chars": ["ら (ra)", "り (ri)", "る (ru)", "れ (re)", "ろ (ro)"],
            "tip": "The Japanese 'r' is a light tap — between English 'r' and 'l'.",
        },
        {
            "set": 10,
            "name": "W-row & N",
            "chars": ["わ (wa)", "を (wo)", "ん (n)"],
            "tip": "を is only used as the object particle. ん is the only consonant that stands alone.",
        },
        {
            "set": 11,
            "name": "Dakuten (voiced)",
            "chars": [
                "が (ga)", "ぎ (gi)", "ぐ (gu)", "げ (ge)", "ご (go)",
                "ざ (za)", "じ (ji)", "ず (zu)", "ぜ (ze)", "ぞ (zo)",
                "だ (da)", "ぢ (di/ji)", "づ (du/zu)", "で (de)", "ど (do)",
                "ば (ba)", "び (bi)", "ぶ (bu)", "べ (be)", "ぼ (bo)",
            ],
            "tip": "Dakuten (゛) voicing: k→g, s→z, t→d, h→b. じ and ぢ both sound like 'ji'.",
        },
        {
            "set": 12,
            "name": "Handakuten & Combos",
            "chars": [
                "ぱ (pa)", "ぴ (pi)", "ぷ (pu)", "ぺ (pe)", "ぽ (po)",
                "きゃ (kya)", "きゅ (kyu)", "きょ (kyo)",
                "しゃ (sha)", "しゅ (shu)", "しょ (sho)",
                "ちゃ (cha)", "ちゅ (chu)", "ちょ (cho)",
                "にゃ (nya)", "にゅ (nyu)", "にょ (nyo)",
            ],
            "tip": "Handakuten (゜) only applies to the H-row: h→p. Combos use small や/ゆ/よ.",
        },
    ],
}

KATAKANA = {
    "name": "Katakana",
    "description": "Used for foreign loanwords, onomatopoeia, emphasis, and technical/scientific terms.",
    "sets": [
        {
            "set": 1,
            "name": "Vowels",
            "chars": ["ア (a)", "イ (i)", "ウ (u)", "エ (e)", "オ (o)"],
            "tip": "Same sounds as hiragana — katakana is just a different visual style.",
        },
        {
            "set": 2,
            "name": "K-row",
            "chars": ["カ (ka)", "キ (ki)", "ク (ku)", "ケ (ke)", "コ (ko)"],
            "tip": "コーヒー (koohii) = coffee — one of the first katakana words you'll see.",
        },
        {
            "set": 3,
            "name": "S-row",
            "chars": ["サ (sa)", "シ (shi)", "ス (su)", "セ (se)", "ソ (so)"],
            "tip": "シ (shi) and ツ (tsu) look similar — シ has more horizontal strokes.",
        },
        {
            "set": 4,
            "name": "T-row",
            "chars": ["タ (ta)", "チ (chi)", "ツ (tsu)", "テ (te)", "ト (to)"],
            "tip": "ツ and シ are the most commonly confused pair. Remember: ツ strokes go more vertical.",
        },
        {
            "set": 5,
            "name": "N-row",
            "chars": ["ナ (na)", "ニ (ni)", "ヌ (nu)", "ネ (ne)", "ノ (no)"],
            "tip": "ノ is just a single diagonal stroke — the simplest katakana character.",
        },
        {
            "set": 6,
            "name": "H-row",
            "chars": ["ハ (ha)", "ヒ (hi)", "フ (fu)", "ヘ (he)", "ホ (ho)"],
            "tip": "ヘ looks identical to the hiragana へ — one of the few that match exactly.",
        },
        {
            "set": 7,
            "name": "M-row",
            "chars": ["マ (ma)", "ミ (mi)", "ム (mu)", "メ (me)", "モ (mo)"],
            "tip": "マクドナルド (makudonarudo) = McDonald's — katakana adapts foreign words to Japanese sounds.",
        },
        {
            "set": 8,
            "name": "Y-row",
            "chars": ["ヤ (ya)", "ユ (yu)", "ヨ (yo)"],
            "tip": "Small versions ャュョ are used for combination sounds like キャ (kya).",
        },
        {
            "set": 9,
            "name": "R-row",
            "chars": ["ラ (ra)", "リ (ri)", "ル (ru)", "レ (re)", "ロ (ro)"],
            "tip": "Foreign 'l' sounds become 'r' in katakana: ラーメン (raamen) = ramen.",
        },
        {
            "set": 10,
            "name": "W-row, N & Long vowel",
            "chars": ["ワ (wa)", "ヲ (wo)", "ン (n)", "ー (long vowel mark)"],
            "tip": "ー extends the previous vowel: ケーキ (keeki) = cake.",
        },
        {
            "set": 11,
            "name": "Dakuten (voiced)",
            "chars": [
                "ガ (ga)", "ギ (gi)", "グ (gu)", "ゲ (ge)", "ゴ (go)",
                "ザ (za)", "ジ (ji)", "ズ (zu)", "ゼ (ze)", "ゾ (zo)",
                "ダ (da)", "ヂ (di/ji)", "ヅ (du/zu)", "デ (de)", "ド (do)",
                "バ (ba)", "ビ (bi)", "ブ (bu)", "ベ (be)", "ボ (bo)",
            ],
            "tip": "Same voicing rules as hiragana. ビール (biiru) = beer.",
        },
        {
            "set": 12,
            "name": "Handakuten & Modern extensions",
            "chars": [
                "パ (pa)", "ピ (pi)", "プ (pu)", "ペ (pe)", "ポ (po)",
                "ティ (ti)", "ディ (di)", "ファ (fa)", "フィ (fi)", "フェ (fe)",
                "ヴァ (va)", "ヴィ (vi)", "ヴ (vu)",
            ],
            "tip": "Modern katakana extensions like ティ and ファ handle sounds that don't exist in native Japanese.",
        },
    ],
}

KANJI_SETS = {
    "name": "Kanji",
    "description": "Chinese-origin characters. Each has multiple readings (on'yomi / kun'yomi) and meanings. Grouped by JLPT N5 order.",
    "sets": [
        {
            "set": 1,
            "name": "Numbers & Basic Concepts",
            "chars": [
                "一 (いち/ichi) one", "二 (に/ni) two", "三 (さん/san) three",
                "四 (よん/yon) four", "五 (ご/go) five", "六 (ろく/roku) six",
                "七 (なな/nana) seven", "八 (はち/hachi) eight",
                "九 (きゅう/kyuu) nine", "十 (じゅう/juu) ten",
            ],
            "tip": "Number kanji are among the simplest stroke-wise. 一二三 are literally 1, 2, 3 horizontal strokes.",
        },
        {
            "set": 2,
            "name": "People & Body",
            "chars": [
                "人 (ひと/hito) person", "子 (こ/ko) child",
                "女 (おんな/onna) woman", "男 (おとこ/otoko) man",
                "目 (め/me) eye", "口 (くち/kuchi) mouth",
                "手 (て/te) hand", "足 (あし/ashi) foot/leg",
                "耳 (みみ/mimi) ear",
            ],
            "tip": "人 appears in many compounds: 日本人 (nihonjin) = Japanese person.",
        },
        {
            "set": 3,
            "name": "Nature & Time",
            "chars": [
                "日 (ひ/hi) day/sun", "月 (つき/tsuki) moon/month",
                "火 (ひ/hi) fire", "水 (みず/mizu) water",
                "木 (き/ki) tree", "金 (かね/kane) gold/money",
                "土 (つち/tsuchi) earth", "山 (やま/yama) mountain",
                "川 (かわ/kawa) river", "天 (てん/ten) heaven/sky",
            ],
            "tip": "日月火水木金土 are the days of the week: Sunday through Saturday.",
        },
        {
            "set": 4,
            "name": "Direction & Position",
            "chars": [
                "上 (うえ/ue) up/above", "下 (した/shita) down/below",
                "中 (なか/naka) middle/inside", "右 (みぎ/migi) right",
                "左 (ひだり/hidari) left", "前 (まえ/mae) before/front",
                "後 (うしろ/ushiro) after/behind", "外 (そと/soto) outside",
                "北 (きた/kita) north", "南 (みなみ/minami) south",
            ],
            "tip": "上 and 下 are used constantly: 上手 (jouzu) = skillful, 下手 (heta) = unskillful.",
        },
        {
            "set": 5,
            "name": "Actions & Basic Verbs",
            "chars": [
                "見 (み/mi) see", "行 (い/i · こう/kou) go",
                "来 (く/ku · らい/rai) come", "食 (た/ta · しょく/shoku) eat",
                "飲 (の/no) drink", "書 (か/ka · しょ/sho) write",
                "読 (よ/yo · どく/doku) read", "話 (はな/hana · わ/wa) speak",
                "聞 (き/ki · ぶん/bun) hear/ask", "学 (まな/mana · がく/gaku) learn",
            ],
            "tip": "Most verb kanji have a kun'yomi (native) reading for verbs and an on'yomi (Chinese) reading for compounds.",
        },
        {
            "set": 6,
            "name": "Everyday Life",
            "chars": [
                "大 (おお/oo) big", "小 (ちい/chii) small",
                "新 (あたら/atara) new", "古 (ふる/furu) old",
                "高 (たか/taka) tall/expensive", "安 (やす/yasu) cheap/peaceful",
                "長 (なが/naga) long", "白 (しろ/shiro) white",
                "赤 (あか/aka) red", "青 (あお/ao) blue/green",
            ],
            "tip": "大きい (ookii) and 小さい (chiisai) are among the first adjectives you learn.",
        },
    ],
}

# ─── Korean ───────────────────────────────────────────────────────────────────

KOREAN_SCRIPT = {
    "name": "Hangul",
    "description": "Korean alphabet — a scientific writing system where consonants and vowels combine into syllable blocks.",
    "sets": [
        {
            "set": 1,
            "name": "Basic Vowels",
            "chars": ["ㅏ (a)", "ㅓ (eo)", "ㅗ (o)", "ㅜ (u)", "ㅡ (eu)", "ㅣ (i)"],
            "tip": "Korean vowels are built from three elements: a dot (now a short stroke), a horizontal line, and a vertical line.",
        },
        {
            "set": 2,
            "name": "Compound Vowels",
            "chars": ["ㅐ (ae)", "ㅔ (e)", "ㅑ (ya)", "ㅕ (yeo)", "ㅛ (yo)", "ㅠ (yu)", "ㅒ (yae)", "ㅖ (ye)"],
            "tip": "Adding an extra stroke to a basic vowel creates the 'y-' version: ㅏ→ㅑ, ㅓ→ㅕ.",
        },
        {
            "set": 3,
            "name": "Basic Consonants",
            "chars": ["ㄱ (g/k)", "ㄴ (n)", "ㄷ (d/t)", "ㄹ (r/l)", "ㅁ (m)", "ㅂ (b/p)", "ㅅ (s)", "ㅇ (ng/silent)", "ㅈ (j)", "ㅎ (h)"],
            "tip": "ㅇ is silent at the start of a syllable but pronounced 'ng' at the end.",
        },
        {
            "set": 4,
            "name": "Tense & Aspirated Consonants",
            "chars": ["ㄲ (kk)", "ㄸ (tt)", "ㅃ (pp)", "ㅆ (ss)", "ㅉ (jj)", "ㅋ (k)", "ㅌ (t)", "ㅍ (p)", "ㅊ (ch)"],
            "tip": "Double consonants (ㄲ, ㄸ) are tense — said with a tight throat. Aspirated (ㅋ, ㅌ) have a puff of air.",
        },
        {
            "set": 5,
            "name": "Building Syllable Blocks",
            "chars": ["가 (ga)", "나 (na)", "다 (da)", "한 (han)", "글 (geul)", "말 (mal)"],
            "tip": "Every Korean syllable block must start with a consonant (use ㅇ as a silent placeholder for vowel-initial syllables).",
        },
    ],
}

# ─── Arabic ───────────────────────────────────────────────────────────────────

ARABIC_SCRIPT = {
    "name": "Arabic Alphabet",
    "description": "28 letters written right-to-left. Most letters change shape depending on position (initial, medial, final, isolated).",
    "sets": [
        {
            "set": 1,
            "name": "Group 1 — Non-connecting",
            "chars": ["ا (alif)", "د (daal)", "ذ (dhaal)", "ر (raa)", "ز (zaay)", "و (waaw)"],
            "tip": "These letters only connect to the letter before them, never after. This is the simplest group.",
        },
        {
            "set": 2,
            "name": "Group 2 — Dots below/above",
            "chars": ["ب (baa)", "ت (taa)", "ث (thaa)", "ن (nuun)", "ي (yaa)"],
            "tip": "ب ت ث share the same base shape — only the dots differ (1 below, 2 above, 3 above).",
        },
        {
            "set": 3,
            "name": "Group 3 — Common letters",
            "chars": ["ج (jiim)", "ح (haa)", "خ (khaa)", "س (siin)", "ش (shiin)", "ص (saad)", "ض (daad)"],
            "tip": "ج ح خ share a base shape. Arabic is called 'the language of ض' because this sound is unique to it.",
        },
        {
            "set": 4,
            "name": "Group 4 — Remaining letters",
            "chars": ["ط (taa)", "ظ (dhaa)", "ع (ayn)", "غ (ghayn)", "ف (faa)", "ق (qaaf)", "ك (kaaf)", "ل (laam)", "م (miim)", "ه (haa)", "ة (taa marbuuta)"],
            "tip": "ع (ayn) is one of the hardest sounds for English speakers — a deep throat sound with no English equivalent.",
        },
    ],
}

# ─── Mandarin ─────────────────────────────────────────────────────────────────

MANDARIN_SCRIPT = {
    "name": "Pinyin & Basic Characters",
    "description": "Mandarin uses Chinese characters (hanzi). Pinyin is the romanization system with tone marks used for pronunciation.",
    "sets": [
        {
            "set": 1,
            "name": "Tones & Pinyin Basics",
            "chars": ["mā (1st: flat)", "má (2nd: rising)", "mǎ (3rd: dip)", "mà (4th: falling)", "ma (neutral)"],
            "tip": "Tones change meaning completely: 妈 mā = mother, 马 mǎ = horse, 骂 mà = scold.",
        },
        {
            "set": 2,
            "name": "Basic Radicals",
            "chars": ["人 (rén) person", "口 (kǒu) mouth", "日 (rì) sun", "月 (yuè) moon", "水 (shuǐ) water", "火 (huǒ) fire", "山 (shān) mountain"],
            "tip": "Radicals are building blocks. Learning the ~214 radicals helps you decode unfamiliar characters.",
        },
        {
            "set": 3,
            "name": "Numbers & Essentials",
            "chars": ["一 (yī) one", "二 (èr) two", "三 (sān) three", "大 (dà) big", "小 (xiǎo) small", "好 (hǎo) good", "不 (bù) not"],
            "tip": "好 combines 女 (woman) + 子 (child) — a nice example of how meaning is built from components.",
        },
    ],
}

# ─── Russian ──────────────────────────────────────────────────────────────────

RUSSIAN_SCRIPT = {
    "name": "Cyrillic",
    "description": "33-letter alphabet. Some letters look like Latin but sound different (false friends).",
    "sets": [
        {
            "set": 1,
            "name": "Familiar-looking letters",
            "chars": ["А (a)", "Е (ye)", "К (k)", "М (m)", "О (o)", "Т (t)"],
            "tip": "These look like English letters and sound similar — the easy ones.",
        },
        {
            "set": 2,
            "name": "False friends — look familiar, sound different",
            "chars": ["В (v)", "Н (n)", "Р (r)", "С (s)", "У (u)", "Х (kh)"],
            "tip": "В looks like 'B' but sounds like 'v'. Р looks like 'P' but sounds like 'r'. Watch out!",
        },
        {
            "set": 3,
            "name": "New shapes",
            "chars": ["Б (b)", "Г (g)", "Д (d)", "Ж (zh)", "З (z)", "И (i)", "Л (l)", "П (p)", "Ф (f)", "Ц (ts)", "Ч (ch)", "Ш (sh)", "Щ (shch)"],
            "tip": "Ж, Ц, Ч, Ш, Щ are uniquely Cyrillic — spend extra time on these.",
        },
        {
            "set": 4,
            "name": "Vowels & Special signs",
            "chars": ["Э (e)", "Ю (yu)", "Я (ya)", "Ё (yo)", "Ы (y)", "Ъ (hard sign)", "Ь (soft sign)"],
            "tip": "Ъ and Ь don't have sounds — they modify the preceding consonant (hard or soft).",
        },
    ],
}

# ─── Hindi ────────────────────────────────────────────────────────────────────

HINDI_SCRIPT = {
    "name": "Devanagari",
    "description": "An abugida script where each consonant carries an inherent 'a' vowel. Vowel marks modify the consonant.",
    "sets": [
        {
            "set": 1,
            "name": "Vowels (स्वर)",
            "chars": ["अ (a)", "आ (aa)", "इ (i)", "ई (ii)", "उ (u)", "ऊ (uu)", "ए (e)", "ऐ (ai)", "ओ (o)", "औ (au)"],
            "tip": "Short and long vowel pairs: अ/आ, इ/ई, उ/ऊ. The long version simply holds the sound longer.",
        },
        {
            "set": 2,
            "name": "Consonants Group 1 (velar to palatal)",
            "chars": ["क (ka)", "ख (kha)", "ग (ga)", "घ (gha)", "च (cha)", "छ (chha)", "ज (ja)", "झ (jha)"],
            "tip": "Hindi consonants are organized by where in the mouth they're produced — velar (throat) to palatal (roof).",
        },
        {
            "set": 3,
            "name": "Consonants Group 2 (retroflex to dental)",
            "chars": ["ट (ṭa)", "ठ (ṭha)", "ड (ḍa)", "ढ (ḍha)", "त (ta)", "थ (tha)", "द (da)", "ध (dha)", "न (na)"],
            "tip": "Retroflex ट ठ ड ढ are pronounced with the tongue curled back — a key Hindi sound not in English.",
        },
        {
            "set": 4,
            "name": "Consonants Group 3 & Common marks",
            "chars": ["प (pa)", "फ (pha)", "ब (ba)", "भ (bha)", "म (ma)", "य (ya)", "र (ra)", "ल (la)", "व (va)", "श (sha)", "ष (ṣha)", "स (sa)", "ह (ha)"],
            "tip": "The halant (्) removes the inherent 'a' vowel: क = ka, क् = k.",
        },
    ],
}

# ─── Registry ─────────────────────────────────────────────────────────────────

# Maps language name → list of script practice options
SCRIPT_REGISTRY: dict[str, list[dict]] = {
    "Japanese": [HIRAGANA, KATAKANA, KANJI_SETS],
    "Korean": [KOREAN_SCRIPT],
    "Arabic": [ARABIC_SCRIPT],
    "Mandarin": [MANDARIN_SCRIPT],
    "Russian": [RUSSIAN_SCRIPT],
    "Hindi": [HINDI_SCRIPT],
}

# Languages that use Latin script (no script practice needed)
LATIN_SCRIPT_LANGUAGES = [
    "Spanish", "French", "German", "Italian", "Portuguese",
    "Dutch", "Swedish", "Polish", "Turkish",
]


def get_scripts_for_language(language: str) -> list[dict] | None:
    """Return available scripts for a language, or None if Latin-based."""
    return SCRIPT_REGISTRY.get(language)


def get_script_by_name(language: str, script_name: str) -> dict | None:
    """Find a specific script (e.g. 'Hiragana') for a language."""
    scripts = SCRIPT_REGISTRY.get(language, [])
    for s in scripts:
        if s["name"].lower() == script_name.lower():
            return s
    return None


def get_set_from_script(script: dict, set_number: int) -> dict | None:
    """Return a specific set from a script by 1-based number."""
    for s in script["sets"]:
        if s["set"] == set_number:
            return s
    return None