Skip to content

Commit 4c85ff0

Browse files
authored
Prepare fixup for lazy deserialization (#21170)
Together with #21158, this gets us 95% there for lazy desrialization. Implementation is generally straightforward. At some point I wanted to unify the new global state with (parts of) the checker state, and potentially get rid of a bunch of `named_type` callbacks we pass around, but decided to do this later in a separate PR, since this is not strictly necessary for parallel checking. Although this PR itself only makes fixup phase (which is the smaller part of deserialization) lazy, it already shows small performance improvements: * ~net zero on self-check * few percent faster cold `torch` with 4 workers * ~15% faster warm `torch`
1 parent 801c0e5 commit 4c85ff0

7 files changed

Lines changed: 163 additions & 93 deletions

File tree

mypy/build.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@
143143

144144
from mypy import errorcodes as codes
145145
from mypy.config_parser import get_config_module_names, parse_mypy_comments
146-
from mypy.fixup import fixup_module
146+
from mypy.fixer_state import fixer_state
147+
from mypy.fixup import NodeFixer
147148
from mypy.freetree import free_tree
148149
from mypy.fscache import FileSystemCache
149150
from mypy.known_modules import get_known_modules, reset_known_modules_cache
@@ -812,6 +813,10 @@ def __init__(
812813
self.options = options
813814
self.version_id = version_id
814815
self.modules: dict[str, MypyFile] = {}
816+
# Share same modules dictionary with the global fixer state.
817+
# We need to set allow_missing when doing a fine-grained cache
818+
# load because we need to gracefully handle missing modules.
819+
fixer_state.node_fixer = NodeFixer(self.modules, self.options.use_fine_grained_cache)
815820
self.import_map: dict[str, set[str]] = {}
816821
self.missing_modules: dict[str, int] = {}
817822
self.fg_deps_meta: dict[str, FgDepMeta] = {}
@@ -2810,9 +2815,21 @@ def load_tree(self, temporary: bool = False) -> None:
28102815

28112816
def fix_cross_refs(self) -> None:
28122817
assert self.tree is not None, "Internal error: method must be called on parsed file only"
2813-
# We need to set allow_missing when doing a fine-grained cache
2814-
# load because we need to gracefully handle missing modules.
2815-
fixup_module(self.tree, self.manager.modules, self.options.use_fine_grained_cache)
2818+
# Do initial lightweight pass fixing TypeInfos and module cross-references.
2819+
assert fixer_state.node_fixer is not None
2820+
fixer_state.node_fixer.visit_symbol_table(self.tree.names)
2821+
type_fixer = fixer_state.node_fixer.type_fixer
2822+
# Eagerly fix shared instances, before they are used by named_type() calls.
2823+
if instance_cache.str_type is not None:
2824+
instance_cache.str_type.accept(type_fixer)
2825+
if instance_cache.function_type is not None:
2826+
instance_cache.function_type.accept(type_fixer)
2827+
if instance_cache.int_type is not None:
2828+
instance_cache.int_type.accept(type_fixer)
2829+
if instance_cache.bool_type is not None:
2830+
instance_cache.bool_type.accept(type_fixer)
2831+
if instance_cache.object_type is not None:
2832+
instance_cache.object_type.accept(type_fixer)
28162833

28172834
# Methods for processing modules from source code.
28182835

mypy/fixer_state.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Final
4+
5+
if TYPE_CHECKING:
6+
from mypy.fixup import NodeFixer
7+
8+
# This is global mutable state. Don't add anything here unless there's a very
9+
# good reason. This exists as a separate file to avoid method-level import in
10+
# hot code in SymbolTableNode.node().
11+
12+
13+
class FixerState:
14+
def __init__(self) -> None:
15+
self.node_fixer: NodeFixer | None = None
16+
17+
18+
fixer_state: Final = FixerState()

mypy/fixup.py

Lines changed: 53 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
OverloadedFuncDef,
1515
ParamSpecExpr,
1616
SymbolTable,
17+
SymbolTableNode,
1718
TypeAlias,
1819
TypeInfo,
1920
TypeVarExpr,
@@ -45,20 +46,14 @@
4546
from mypy.visitor import NodeVisitor
4647

4748

48-
# N.B: we do a allow_missing fixup when fixing up a fine-grained
49-
# incremental cache load (since there may be cross-refs into deleted
50-
# modules)
51-
def fixup_module(tree: MypyFile, modules: dict[str, MypyFile], allow_missing: bool) -> None:
52-
node_fixer = NodeFixer(modules, allow_missing)
53-
node_fixer.visit_symbol_table(tree.names, tree.fullname)
54-
55-
56-
# TODO: Fix up .info when deserializing, i.e. much earlier.
5749
class NodeFixer(NodeVisitor[None]):
5850
current_info: TypeInfo | None = None
5951

6052
def __init__(self, modules: dict[str, MypyFile], allow_missing: bool) -> None:
6153
self.modules = modules
54+
# N.B: we do an allow_missing fixup when fixing up a fine-grained
55+
# incremental cache load (since there may be cross-refs into deleted
56+
# modules)
6257
self.allow_missing = allow_missing
6358
self.type_fixer = TypeFixer(self.modules, allow_missing)
6459

@@ -70,7 +65,7 @@ def visit_type_info(self, info: TypeInfo) -> None:
7065
if info.defn:
7166
info.defn.accept(self)
7267
if info.names:
73-
self.visit_symbol_table(info.names, info.fullname)
68+
self.visit_symbol_table(info.names)
7469
if info.bases:
7570
for base in info.bases:
7671
base.accept(self.type_fixer)
@@ -118,62 +113,66 @@ def visit_type_info(self, info: TypeInfo) -> None:
118113
self.current_info = save_info
119114

120115
# NOTE: This method *definitely* isn't part of the NodeVisitor API.
121-
def visit_symbol_table(self, symtab: SymbolTable, table_fullname: str) -> None:
122-
# Copy the items because we may mutate symtab.
123-
for key in list(symtab):
116+
def visit_symbol_table(self, symtab: SymbolTable) -> None:
117+
for key in symtab:
124118
value = symtab[key]
125119
cross_ref = value.cross_ref
126-
if cross_ref is not None: # Fix up cross-reference.
127-
value.cross_ref = None
120+
# Fix up module cross-reference eagerly because it is very cheap.
121+
if cross_ref is not None:
128122
if cross_ref in self.modules:
129-
value.node = self.modules[cross_ref]
130-
else:
131-
stnode = lookup_fully_qualified(
132-
cross_ref, self.modules, raise_on_missing=not self.allow_missing
133-
)
134-
if stnode is not None:
135-
if stnode is value:
136-
# The node seems to refer to itself, which can mean that
137-
# the target is a deleted submodule of the current module,
138-
# and thus lookup falls back to the symbol table of the parent
139-
# package. Here's how this may happen:
140-
#
141-
# pkg/__init__.py:
142-
# from pkg import sub
143-
#
144-
# Now if pkg.sub is deleted, the pkg.sub symbol table entry
145-
# appears to refer to itself. Replace the entry with a
146-
# placeholder to avoid a crash. We can't delete the entry,
147-
# as it would stop dependency propagation.
148-
value.node = Var(key + "@deleted")
149-
else:
150-
assert stnode.node is not None, (table_fullname + "." + key, cross_ref)
151-
value.node = stnode.node
152-
elif not self.allow_missing:
153-
assert False, f"Could not find cross-ref {cross_ref}"
154-
else:
155-
# We have a missing crossref in allow missing mode, need to put something
156-
value.node = missing_info(self.modules)
123+
value.cross_ref = None
124+
value.unfixed = False
125+
value._node = self.modules[cross_ref]
126+
# TODO: this should not be needed, looks like a daemon bug.
127+
elif self.allow_missing:
128+
self.resolve_cross_ref(value)
129+
# Look at private attribute to avoid triggering fixup eagerly.
130+
elif isinstance(value._node, TypeInfo):
131+
self.visit_type_info(value._node)
157132
else:
158-
if isinstance(value.node, TypeInfo):
159-
# TypeInfo has no accept(). TODO: Add it?
160-
self.visit_type_info(value.node)
161-
elif value.node is not None:
162-
value.node.accept(self)
163-
else:
164-
assert False, f"Unexpected empty node {key!r}: {value}"
133+
value.stored_info = self.current_info
134+
135+
def resolve_cross_ref(self, value: SymbolTableNode) -> None:
136+
"""Replace cross-reference with an actual referred node."""
137+
assert value.cross_ref is not None
138+
cross_ref = value.cross_ref
139+
value.cross_ref = None
140+
value.unfixed = False
141+
stnode = lookup_fully_qualified(
142+
cross_ref, self.modules, raise_on_missing=not self.allow_missing
143+
)
144+
if stnode is not None:
145+
if stnode is value:
146+
# The node seems to refer to itself, which can mean that
147+
# the target is a deleted submodule of the current module,
148+
# and thus lookup falls back to the symbol table of the parent
149+
# package. Here's how this may happen:
150+
#
151+
# pkg/__init__.py:
152+
# from pkg import sub
153+
#
154+
# Now if pkg.sub is deleted, the pkg.sub symbol table entry
155+
# appears to refer to itself. Replace the entry with a
156+
# placeholder to avoid a crash. We can't delete the entry,
157+
# as it would stop dependency propagation.
158+
short_name = cross_ref.rsplit(".", maxsplit=1)[-1]
159+
value._node = Var(short_name + "@deleted")
160+
else:
161+
assert stnode.node is not None, cross_ref
162+
value._node = stnode.node
163+
elif not self.allow_missing:
164+
assert False, f"Could not find cross-ref {cross_ref}"
165+
else:
166+
# We have a missing crossref in allow missing mode, need to put something
167+
value._node = missing_info(self.modules)
165168

166169
def visit_func_def(self, func: FuncDef) -> None:
167-
if self.current_info is not None:
168-
func.info = self.current_info
169170
if func.type is not None:
170171
func.type.accept(self.type_fixer)
171172
if isinstance(func.type, CallableType):
172173
func.type.definition = func
173174

174175
def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None:
175-
if self.current_info is not None:
176-
o.info = self.current_info
177176
if o.type:
178177
o.type.accept(self.type_fixer)
179178
for item in o.items:
@@ -186,14 +185,10 @@ def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None:
186185
typ.definition = item
187186

188187
def visit_decorator(self, d: Decorator) -> None:
189-
if self.current_info is not None:
190-
d.var.info = self.current_info
191188
if d.func:
192189
d.func.accept(self)
193190
if d.var:
194191
d.var.accept(self)
195-
for node in d.decorators:
196-
node.accept(self)
197192
typ = d.var.type
198193
if isinstance(typ, ProperType) and isinstance(typ, CallableType):
199194
typ.definition = d.func
@@ -218,8 +213,6 @@ def visit_type_var_tuple_expr(self, tv: TypeVarTupleExpr) -> None:
218213
tv.default.accept(self.type_fixer)
219214

220215
def visit_var(self, v: Var) -> None:
221-
if self.current_info is not None:
222-
v.info = self.current_info
223216
if v.type is not None:
224217
v.type.accept(self.type_fixer)
225218
if v.setter_type is not None:
@@ -237,7 +230,6 @@ def __init__(self, modules: dict[str, MypyFile], allow_missing: bool) -> None:
237230
self.allow_missing = allow_missing
238231

239232
def visit_instance(self, inst: Instance) -> None:
240-
# TODO: Combine Instances that are exactly the same?
241233
type_ref = inst.type_ref
242234
if type_ref is None:
243235
return # We've already been here.

0 commit comments

Comments
 (0)