1010
1111"""
1212
13+ import os
1314import sys
1415import encodings
1516import encodings .aliases
@@ -468,6 +469,30 @@ def normalize(localename):
468469
469470 return localename
470471
472+ def _conv_to_windows (locale ):
473+ locale = locale .replace ('_' , '-' )
474+ if '@' in locale :
475+ locale , modifier = locale .split ('@' , 1 )
476+ locale , _ , encoding = locale .partition ('.' )
477+ locale , _ , territory = locale .partition ('-' )
478+ suffix = ''
479+ modifier = modifier .lower ()
480+ if modifier == 'valencia' :
481+ suffix = '-' + modifier
482+ elif modifier :
483+ if modifier in _modifier_to_script :
484+ modifier = _modifier_to_script [modifier ]
485+ else :
486+ modifier = modifier .title ()
487+ locale += '-' + modifier
488+ if territory :
489+ locale += '-' + territory
490+ if suffix :
491+ locale += suffix
492+ if encoding :
493+ locale += '.' + encoding
494+ return locale
495+
471496def _parse_localename (localename ):
472497
473498 """ Parses the locale code for localename and returns the
@@ -621,6 +646,8 @@ def setlocale(category, locale=None):
621646 if locale and not isinstance (locale , _builtin_str ):
622647 # convert to string
623648 locale = normalize (_build_localename (locale ))
649+ if os .name == 'nt' :
650+ locale = _conv_to_windows (locale )
624651 return _setlocale (category , locale )
625652
626653
@@ -1546,9 +1573,9 @@ def getpreferredencoding(do_setlocale=True):
15461573 0x004d : "as" , # Assamese
15471574 0x044d : "as_IN" , # Assamese - India
15481575 0x002c : "az" , # Azerbaijani (Latin)
1549- 0x742c : "az" , # Azerbaijani (Cyrillic)
1550- 0x782c : "az" , # Azerbaijani (Latin)
1551- 0x042c : "az_AZ" , # Azerbaijani (Latin) - Azerbaijan
1576+ 0x742c : "az@cyrillic " , # Azerbaijani (Cyrillic)
1577+ 0x782c : "az@latin " , # Azerbaijani (Latin)
1578+ 0x042c : "az_AZ@latin " , # Azerbaijani (Latin) - Azerbaijan
15521579 0x0045 : "bn" , # Bangla
15531580 0x0445 : "bn_IN" , # Bangla - India
15541581 0x0845 : "bn_BD" , # Bangla - Bangladesh
@@ -1558,10 +1585,10 @@ def getpreferredencoding(do_setlocale=True):
15581585 0x042d : "eu_ES" , # Basque - Spain
15591586 0x0023 : "be" , # Belarusian
15601587 0x0423 : "be_BY" , # Belarusian - Belarus
1561- 0x641a : "bs" , # Bosnian (Cyrillic)
1562- 0x681a : "bs" , # Bosnian (Latin)
1563- 0x141a : "bs_BA" , # Bosnian (Latin) - Bosnia and Herzegovina
1564- 0x201a : "bs_BA" , # Bosnian (Cyrillic) - Bosnia and Herzegovina
1588+ 0x641a : "bs@cyrillic " , # Bosnian (Cyrillic)
1589+ 0x681a : "bs@latin " , # Bosnian (Latin)
1590+ 0x141a : "bs_BA@latin " , # Bosnian (Latin) - Bosnia and Herzegovina
1591+ 0x201a : "bs_BA@cyrillic " , # Bosnian (Cyrillic) - Bosnia and Herzegovina
15651592 0x781a : "bs" , # Bosnian (Latin)
15661593 0x007e : "br" , # Breton
15671594 0x047e : "br_FR" , # Breton - France
@@ -1571,16 +1598,16 @@ def getpreferredencoding(do_setlocale=True):
15711598 0x0455 : "my_MM" , # Burmese - Myanmar
15721599 0x0003 : "ca" , # Catalan
15731600 0x0403 : "ca_ES" , # Catalan - Spain
1574- 0x0803 : "ca_ES" , # Valencian - Spain
1601+ 0x0803 : "ca_ES@valencia " , # Valencian - Spain
15751602 0x0092 : "ku" , # Central Kurdish
1576- 0x7c92 : "ku" , # Central Kurdish
1577- 0x0492 : "ku_IQ" , # Central Kurdish - Iraq
1603+ 0x7c92 : "ku@arabic " , # Central Kurdish
1604+ 0x0492 : "ku_IQ@arabic " , # Central Kurdish - Iraq
15781605 0x005c : "chr" , # Cherokee
1579- 0x7c5c : "chr" , # Cherokee
1580- 0x045c : "chr_US" , # Cherokee - United States
1581- 0x0004 : "zh" , # Chinese (Simplified)
1606+ 0x7c5c : "chr@Cher " , # Cherokee
1607+ 0x045c : "chr_US@Cher " , # Cherokee - United States
1608+ 0x0004 : "zh@Hans " , # Chinese (Simplified)
15821609 0x7804 : "zh" , # Chinese (Simplified)
1583- 0x7c04 : "zh" , # Chinese (Traditional)
1610+ 0x7c04 : "zh@Hant " , # Chinese (Traditional)
15841611 0x0404 : "zh_TW" , # Chinese (Traditional) - Taiwan
15851612 0x0804 : "zh_CN" , # Chinese (Simplified) - People's Republic of China
15861613 0x0c04 : "zh_HK" , # Chinese (Traditional) - Hong Kong S.A.R.
@@ -1648,9 +1675,9 @@ def getpreferredencoding(do_setlocale=True):
16481675 0x0062 : "fy" , # Frisian
16491676 0x0462 : "fy_NL" , # Frisian - Netherlands
16501677 0x0067 : "ff" , # Fulah
1651- 0x7c67 : "ff" , # Fulah (Latin)
1652- 0x0467 : "ff_NG" ,
1653- 0x0867 : "ff_SN" , # Fulah - Senegal
1678+ 0x7c67 : "ff@latin " , # Fulah (Latin)
1679+ 0x0467 : "ff_NG@latin " ,
1680+ 0x0867 : "ff_SN@latin " , # Fulah - Senegal
16541681 0x0056 : "gl" , # Galician
16551682 0x0456 : "gl_ES" , # Galician - Spain
16561683 0x0037 : "ka" , # Georgian
@@ -1670,8 +1697,8 @@ def getpreferredencoding(do_setlocale=True):
16701697 0x0047 : "gu" , # Gujarati
16711698 0x0447 : "gu_IN" , # Gujarati - India
16721699 0x0068 : "ha" , # Hausa (Latin)
1673- 0x7c68 : "ha" , # Hausa (Latin)
1674- 0x0468 : "ha_NG" , # Hausa (Latin) - Nigeria
1700+ 0x7c68 : "ha@latin " , # Hausa (Latin)
1701+ 0x0468 : "ha_NG@latin " , # Hausa (Latin) - Nigeria
16751702 0x0075 : "haw" , # Hawaiian
16761703 0x0475 : "haw_US" , # Hawaiian - United States
16771704 0x000d : "he" , # Hebrew
@@ -1687,10 +1714,10 @@ def getpreferredencoding(do_setlocale=True):
16871714 0x0021 : "id" , # Indonesian
16881715 0x0421 : "id_ID" , # Indonesian - Indonesia
16891716 0x005d : "iu" , # Inuktitut (Latin)
1690- 0x785d : "iu" , # Inuktitut (Syllabics)
1691- 0x7c5d : "iu" , # Inuktitut (Latin)
1692- 0x045d : "iu_CA" , # Inuktitut (Syllabics) - Canada
1693- 0x085d : "iu_CA" , # Inuktitut (Latin) - Canada
1717+ 0x785d : "iu@Cans " , # Inuktitut (Syllabics)
1718+ 0x7c5d : "iu@latin " , # Inuktitut (Latin)
1719+ 0x045d : "iu_CA@Cans " , # Inuktitut (Syllabics) - Canada
1720+ 0x085d : "iu_CA@latin " , # Inuktitut (Latin) - Canada
16941721 0x003c : "ga" , # Irish
16951722 0x083c : "ga_IE" , # Irish - Ireland
16961723 0x0010 : "it" , # Italian
@@ -1700,10 +1727,10 @@ def getpreferredencoding(do_setlocale=True):
17001727 0x0411 : "ja_JP" , # Japanese - Japan
17011728 0x004b : "kn" , # Kannada
17021729 0x044b : "kn_IN" , # Kannada - India
1703- 0x0471 : "kr_NG" , # Kanuri (Latin) - Nigeria
1730+ 0x0471 : "kr_NG@latin " , # Kanuri (Latin) - Nigeria
17041731 0x0060 : "ks" , # Kashmiri
1705- 0x0460 : "ks" , # Kashmiri - Perso_Arabic
1706- 0x0860 : "ks_IN" , # Kashmiri (Devanagari) - India
1732+ 0x0460 : "ks@arabic " , # Kashmiri - Perso_Arabic
1733+ 0x0860 : "ks_IN@devanagari " , # Kashmiri (Devanagari) - India
17071734 0x003f : "kk" , # Kazakh
17081735 0x043f : "kk_KZ" , # Kazakh - Kazakhstan
17091736 0x0053 : "km" , # Khmer
@@ -1747,10 +1774,10 @@ def getpreferredencoding(do_setlocale=True):
17471774 0x007c : "moh" , # Mohawk
17481775 0x047c : "moh_CA" , # Mohawk - Canada
17491776 0x0050 : "mn" , # Mongolian (Cyrillic)
1750- 0x7850 : "mn" , # Mongolian (Cyrillic)
1751- 0x7c50 : "mn" , # Mongolian (Traditional Mongolian)
1777+ 0x7850 : "mn@cyrillic " , # Mongolian (Cyrillic)
1778+ 0x7c50 : "mn@Mong " , # Mongolian (Traditional Mongolian)
17521779 0x0450 : "mn_MN" , # Mongolian (Cyrillic) - Mongolia
1753- 0x0c50 : "mn_MN" , # Mongolian (Traditional Mongolian) - Mongolia
1780+ 0x0c50 : "mn_MN@Mong " , # Mongolian (Traditional Mongolian) - Mongolia
17541781 0x0061 : "ne" , # Nepali
17551782 0x0461 : "ne_NP" , # Nepali - Nepal
17561783 0x0861 : "ne_IN" , # Nepali - India
@@ -1775,9 +1802,9 @@ def getpreferredencoding(do_setlocale=True):
17751802 0x0416 : "pt_BR" , # Portuguese - Brazil
17761803 0x0816 : "pt_PT" , # Portuguese - Portugal
17771804 0x0046 : "pa" , # Punjabi
1778- 0x7c46 : "pa" , # Punjabi
1805+ 0x7c46 : "pa@arabic " , # Punjabi
17791806 0x0446 : "pa_IN" , # Punjabi - India
1780- 0x0846 : "pa_PK" , # Punjabi - Islamic Republic of Pakistan
1807+ 0x0846 : "pa_PK@arabic " , # Punjabi - Islamic Republic of Pakistan
17811808 0x006b : "quz" , # Quechua
17821809 0x046b : "quz_BO" , # Quechua - Bolivia
17831810 0x086b : "quz_EC" , # Quechua - Ecuador
@@ -1810,25 +1837,25 @@ def getpreferredencoding(do_setlocale=True):
18101837 0x044f : "sa_IN" , # Sanskrit - India
18111838 0x0091 : "gd" , # Scottish Gaelic
18121839 0x0491 : "gd_GB" , # Scottish Gaelic - United Kingdom
1813- 0x6c1a : "sr" , # Serbian (Cyrillic)
1814- 0x701a : "sr" , # Serbian (Latin)
1840+ 0x6c1a : "sr@cyrillic " , # Serbian (Cyrillic)
1841+ 0x701a : "sr@latin " , # Serbian (Latin)
18151842 0x7c1a : "sr" , # Serbian (Latin)
1816- 0x081a : "sr_CS" , # Serbian (Latin) - Serbia and Montenegro (Former)
1817- 0x0c1a : "sr_CS" , # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1818- 0x181a : "sr_BA" , # Serbian (Latin) - Bosnia and Herzegovina
1819- 0x1c1a : "sr_BA" , # Serbian (Cyrillic) - Bosnia and Herzegovina
1820- 0x241a : "sr_RS" , # Serbian (Latin) - Serbia
1821- 0x281a : "sr_RS" , # Serbian (Cyrillic) - Serbia
1822- 0x2c1a : "sr_ME" , # Serbian (Latin) - Montenegro
1823- 0x301a : "sr_ME" , # Serbian (Cyrillic) - Montenegro
1843+ 0x081a : "sr_CS@latin " , # Serbian (Latin) - Serbia and Montenegro (Former)
1844+ 0x0c1a : "sr_CS@cyrillic " , # Serbian (Cyrillic) - Serbia and Montenegro (Former)
1845+ 0x181a : "sr_BA@latin " , # Serbian (Latin) - Bosnia and Herzegovina
1846+ 0x1c1a : "sr_BA@cyrillic " , # Serbian (Cyrillic) - Bosnia and Herzegovina
1847+ 0x241a : "sr_RS@latin " , # Serbian (Latin) - Serbia
1848+ 0x281a : "sr_RS@cyrillic " , # Serbian (Cyrillic) - Serbia
1849+ 0x2c1a : "sr_ME@latin " , # Serbian (Latin) - Montenegro
1850+ 0x301a : "sr_ME@cyrillic " , # Serbian (Cyrillic) - Montenegro
18241851 0x006c : "nso" , # Sesotho sa Leboa
18251852 0x046c : "nso_ZA" , # Sesotho sa Leboa - South Africa
18261853 0x0032 : "tn" , # Setswana
18271854 0x0432 : "tn_ZA" , # Setswana - South Africa
18281855 0x0832 : "tn_BW" , # Setswana - Botswana
18291856 0x0059 : "sd" , # Sindhi
1830- 0x7c59 : "sd" , # Sindhi
1831- 0x0859 : "sd_PK" , # Sindhi - Islamic Republic of Pakistan
1857+ 0x7c59 : "sd@arabic " , # Sindhi
1858+ 0x0859 : "sd_PK@arabic " , # Sindhi - Islamic Republic of Pakistan
18321859 0x005b : "si" , # Sinhala
18331860 0x045b : "si_LK" , # Sinhala - Sri Lanka
18341861 0x001b : "sk" , # Slovak
@@ -1867,14 +1894,14 @@ def getpreferredencoding(do_setlocale=True):
18671894 0x005a : "syr" , # Syriac
18681895 0x045a : "syr_SY" , # Syriac - Syria
18691896 0x0028 : "tg" , # Tajik (Cyrillic)
1870- 0x7c28 : "tg" , # Tajik (Cyrillic)
1871- 0x0428 : "tg_TJ" , # Tajik (Cyrillic) - Tajikistan
1897+ 0x7c28 : "tg@cyrillic " , # Tajik (Cyrillic)
1898+ 0x0428 : "tg_TJ@cyrillic " , # Tajik (Cyrillic) - Tajikistan
18721899 0x005f : "tzm" , # Tamazight (Latin)
1873- 0x785f : "tzm" ,
1874- 0x7c5f : "tzm" , # Tamazight (Latin)
1875- 0x085f : "tzm_DZ" , # Tamazight (Latin) - Algeria
1876- 0x045f : "tzm_MA" , # Central Atlas Tamazight (Arabic) - Morocco
1877- 0x105f : "tzm_MA" ,
1900+ 0x785f : "tzm@Tfng " ,
1901+ 0x7c5f : "tzm@latin " , # Tamazight (Latin)
1902+ 0x085f : "tzm_DZ@latin " , # Tamazight (Latin) - Algeria
1903+ 0x045f : "tzm_MA@arabic " , # Central Atlas Tamazight (Arabic) - Morocco
1904+ 0x105f : "tzm_MA@Tfng " ,
18781905 0x0049 : "ta" , # Tamil
18791906 0x0449 : "ta_IN" , # Tamil - India
18801907 0x0849 : "ta_LK" , # Tamil - Sri Lanka
@@ -1905,9 +1932,9 @@ def getpreferredencoding(do_setlocale=True):
19051932 0x0080 : "ug" , # Uyghur
19061933 0x0480 : "ug_CN" , # Uyghur - People's Republic of China
19071934 0x0043 : "uz" , # Uzbek (Latin)
1908- 0x7843 : "uz" , # Uzbek (Cyrillic)
1909- 0x7c43 : "uz" , # Uzbek (Latin)
1910- 0x0443 : "uz_UZ" , # Uzbek (Latin) - Uzbekistan
1935+ 0x7843 : "uz@cyrillic " , # Uzbek (Cyrillic)
1936+ 0x7c43 : "uz@latin " , # Uzbek (Latin)
1937+ 0x0443 : "uz_UZ@latin " , # Uzbek (Latin) - Uzbekistan
19111938 0x0033 : "ve" , # Venda
19121939 0x0433 : "ve_ZA" , # Venda - South Africa
19131940 0x002a : "vi" , # Vietnamese
@@ -1943,6 +1970,16 @@ def getpreferredencoding(do_setlocale=True):
19431970 0x00051004 : "zh_SG" ,
19441971}
19451972
1973+ # Maps Unix-like modifiers to ISO15924 script names
1974+ # https://www.unicode.org/iso15924/iso15924.txt
1975+
1976+ _modifier_to_script = {
1977+ 'arabic' : 'Arab' ,
1978+ 'cyrillic' : 'Cyrl' ,
1979+ 'devanagari' : 'Deva' ,
1980+ 'latin' : 'Latn' ,
1981+ }
1982+
19461983def _print_locale ():
19471984
19481985 """ Test function.
0 commit comments