Disable GC freeze hack in parallel workers (#21207)

ilevkivskyi · web-flow · commit c22629d027f2 · 2026-04-13T02:07:07.000+01:00
Unfortunately, this hack causes severe performance regression on Python
3.14, where GC works differently. A single freeze still works, but
repeated freezes are very slow.

This PR makes parallel self-check ~5% slower on Python 3.12, but doesn't
have any effect on `torch` (which however is not very parallelizeable).
But I think it is a safer bet to not do any aggressive GC manipulation
(beyond single freeze, which is kind of an established pattern).

We can tune it more later, when we are done with more "algorithmic"
improvements.
diff --git a/mypy/build.py b/mypy/build.py
@@ -4383,9 +4383,7 @@ def process_stale_scc(
         if (
             not manager.options.test_env
             and platform.python_implementation() == "CPython"
-            # Parallel workers perform loading in many smaller "pieces", so we
-            # should repeat the GC hack multiple times to actually benefit from it.
-            and (manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES or manager.parallel_worker)
+            and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
         ):
             # When deserializing cache we create huge amount of new objects, so even
             # with our generous GC thresholds, GC is still doing a lot of pointless
@@ -4394,16 +4392,14 @@ def process_stale_scc(
             # generation with the freeze()/unfreeze() trick below. This is arguably
             # a hack, but it gives huge performance wins for large third-party
             # libraries, like torch.
-            gc.collect(generation=1)
-            gc.collect(generation=0)
             gc.disable()
         for prev_scc in fresh_sccs_to_load:
             manager.done_sccs.add(prev_scc.id)
             process_fresh_modules(graph, sorted(prev_scc.mod_ids), manager)
         if (
             not manager.options.test_env
             and platform.python_implementation() == "CPython"
-            and (manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES or manager.parallel_worker)
+            and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
         ):
             manager.gc_freeze_cycles += 1
             gc.freeze()
diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py
@@ -179,22 +179,17 @@ def serve(server: IPCServer, ctx: ServerContext) -> None:
         manager.add_stats(scc_wait_time=t1 - t0, scc_receive_time=time.time() - t1)
         scc_id = scc_message.scc_id
         if scc_id is None:
+            gc_stats = gc.get_stats()
+            manager.add_stats(
+                gc_collections_gen0=gc_stats[0]["collections"],
+                gc_collections_gen1=gc_stats[1]["collections"],
+            )
             manager.dump_stats()
             break
         scc = manager.scc_by_id[scc_id]
         t0 = time.time()
         try:
-            if platform.python_implementation() == "CPython":
-                # Since we are splitting the GC freeze hack into multiple smaller freezes,
-                # we should collect young generations to not accumulate accidental garbage.
-                gc.collect(generation=1)
-                gc.collect(generation=0)
-                gc.disable()
             load_states(scc, graph, manager, scc_message.import_errors, scc_message.mod_data)
-            if platform.python_implementation() == "CPython":
-                gc.freeze()
-                gc.unfreeze()
-                gc.enable()
             result = process_stale_scc(graph, scc, manager, from_cache=graph_data.from_cache)
             # We must commit after each SCC, otherwise we break --sqlite-cache.
             manager.commit()