Skip to content

Commit bbf2fb6

Browse files
authored
bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (pythonGH-27059)
1 parent af4a2dc commit bbf2fb6

5 files changed

Lines changed: 189 additions & 7 deletions

File tree

Lib/mimetypes.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
import sys
2828
import posixpath
2929
import urllib.parse
30+
31+
try:
32+
from _winapi import _mimetypes_read_windows_registry
33+
except ImportError:
34+
_mimetypes_read_windows_registry = None
35+
3036
try:
3137
import winreg as _winreg
3238
except ImportError:
@@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True):
237243
types.
238244
"""
239245

240-
# Windows only
241-
if not _winreg:
246+
if not _mimetypes_read_windows_registry and not _winreg:
242247
return
243248

249+
add_type = self.add_type
250+
if strict:
251+
add_type = lambda type, ext: self.add_type(type, ext, True)
252+
253+
# Accelerated function if it is available
254+
if _mimetypes_read_windows_registry:
255+
_mimetypes_read_windows_registry(add_type)
256+
elif _winreg:
257+
self._read_windows_registry(add_type)
258+
259+
@classmethod
260+
def _read_windows_registry(cls, add_type):
244261
def enum_types(mimedb):
245262
i = 0
246263
while True:
@@ -265,7 +282,7 @@ def enum_types(mimedb):
265282
subkey, 'Content Type')
266283
if datatype != _winreg.REG_SZ:
267284
continue
268-
self.add_type(mimetype, subkeyname, strict)
285+
add_type(mimetype, subkeyname)
269286
except OSError:
270287
continue
271288

@@ -349,8 +366,8 @@ def init(files=None):
349366

350367
if files is None or _db is None:
351368
db = MimeTypes()
352-
if _winreg:
353-
db.read_windows_registry()
369+
# Quick return if not supported
370+
db.read_windows_registry()
354371

355372
if files is None:
356373
files = knownfiles

Lib/test/test_mimetypes.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
from test.support import os_helper
1010
from platform import win32_edition
1111

12+
try:
13+
import _winapi
14+
except ImportError:
15+
_winapi = None
16+
1217

1318
def setUpModule():
1419
global knownfiles
@@ -235,6 +240,21 @@ def test_registry_parsing(self):
235240
eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
236241
eq(self.db.guess_type("image.png"), ("image/png", None))
237242

243+
@unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"),
244+
"read_windows_registry accelerator unavailable")
245+
def test_registry_accelerator(self):
246+
from_accel = {}
247+
from_reg = {}
248+
_winapi._mimetypes_read_windows_registry(
249+
lambda v, k: from_accel.setdefault(k, set()).add(v)
250+
)
251+
mimetypes.MimeTypes._read_windows_registry(
252+
lambda v, k: from_reg.setdefault(k, set()).add(v)
253+
)
254+
self.assertEqual(list(from_reg), list(from_accel))
255+
for k in from_reg:
256+
self.assertEqual(from_reg[k], from_accel[k])
257+
238258

239259
class MiscTestCase(unittest.TestCase):
240260
def test__all__(self):
@@ -288,6 +308,5 @@ def test_guess_type(self):
288308
type_info = self.mimetypes_cmd("foo.pic")
289309
eq(type_info, "I don't know anything about type foo.pic")
290310

291-
292311
if __name__ == "__main__":
293312
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Accelerate speed of :mod:`mimetypes` initialization using a native
2+
implementation of the registry scan.

Modules/_winapi.c

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
18941894
return result;
18951895
}
18961896

1897+
/*[clinic input]
1898+
_winapi._mimetypes_read_windows_registry
1899+
1900+
on_type_read: object
1901+
1902+
Optimized function for reading all known MIME types from the registry.
1903+
1904+
*on_type_read* is a callable taking *type* and *ext* arguments, as for
1905+
MimeTypes.add_type.
1906+
[clinic start generated code]*/
1907+
1908+
static PyObject *
1909+
_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
1910+
PyObject *on_type_read)
1911+
/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/
1912+
{
1913+
#define CCH_EXT 128
1914+
#define CB_TYPE 510
1915+
struct {
1916+
wchar_t ext[CCH_EXT];
1917+
wchar_t type[CB_TYPE / sizeof(wchar_t) + 1];
1918+
} entries[64];
1919+
int entry = 0;
1920+
HKEY hkcr = NULL;
1921+
LRESULT err;
1922+
1923+
Py_BEGIN_ALLOW_THREADS
1924+
err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr);
1925+
for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) {
1926+
LPWSTR ext = entries[entry].ext;
1927+
LPWSTR type = entries[entry].type;
1928+
DWORD cchExt = CCH_EXT;
1929+
DWORD cbType = CB_TYPE;
1930+
HKEY subkey;
1931+
DWORD regType;
1932+
1933+
err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL);
1934+
if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) {
1935+
continue;
1936+
}
1937+
1938+
err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey);
1939+
if (err == ERROR_FILE_NOT_FOUND) {
1940+
err = ERROR_SUCCESS;
1941+
continue;
1942+
} else if (err != ERROR_SUCCESS) {
1943+
continue;
1944+
}
1945+
1946+
err = RegQueryValueExW(subkey, L"Content Type", NULL,
1947+
&regType, (LPBYTE)type, &cbType);
1948+
RegCloseKey(subkey);
1949+
if (err == ERROR_FILE_NOT_FOUND) {
1950+
err = ERROR_SUCCESS;
1951+
continue;
1952+
} else if (err != ERROR_SUCCESS) {
1953+
continue;
1954+
} else if (regType != REG_SZ || !cbType) {
1955+
continue;
1956+
}
1957+
type[cbType / sizeof(wchar_t)] = L'\0';
1958+
1959+
entry += 1;
1960+
1961+
/* Flush our cached entries if we are full */
1962+
if (entry == sizeof(entries) / sizeof(entries[0])) {
1963+
Py_BLOCK_THREADS
1964+
for (int j = 0; j < entry; ++j) {
1965+
PyObject *r = PyObject_CallFunction(
1966+
on_type_read, "uu", entries[j].type, entries[j].ext
1967+
);
1968+
if (!r) {
1969+
/* We blocked threads, so safe to return from here */
1970+
RegCloseKey(hkcr);
1971+
return NULL;
1972+
}
1973+
Py_DECREF(r);
1974+
}
1975+
Py_UNBLOCK_THREADS
1976+
entry = 0;
1977+
}
1978+
}
1979+
if (hkcr) {
1980+
RegCloseKey(hkcr);
1981+
}
1982+
Py_END_ALLOW_THREADS
1983+
1984+
if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) {
1985+
PyErr_SetFromWindowsErr((int)err);
1986+
return NULL;
1987+
}
1988+
1989+
for (int j = 0; j < entry; ++j) {
1990+
PyObject *r = PyObject_CallFunction(
1991+
on_type_read, "uu", entries[j].type, entries[j].ext
1992+
);
1993+
if (!r) {
1994+
return NULL;
1995+
}
1996+
Py_DECREF(r);
1997+
}
1998+
1999+
Py_RETURN_NONE;
2000+
#undef CCH_EXT
2001+
#undef CB_TYPE
2002+
}
2003+
18972004

18982005
static PyMethodDef winapi_functions[] = {
18992006
_WINAPI_CLOSEHANDLE_METHODDEF
@@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = {
19262033
_WINAPI_WRITEFILE_METHODDEF
19272034
_WINAPI_GETACP_METHODDEF
19282035
_WINAPI_GETFILETYPE_METHODDEF
2036+
_WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
19292037
{NULL, NULL}
19302038
};
19312039

Modules/clinic/_winapi.c.h

Lines changed: 37 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)