Skip to content

Commit 61dc0bb

Browse files
WIP
1 parent 2a2e197 commit 61dc0bb

1 file changed

Lines changed: 75 additions & 34 deletions

File tree

Lib/profiling/sampling/collector.py

Lines changed: 75 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -41,73 +41,114 @@ def _iter_async_frames(self, awaited_info_list):
4141
"""
4242
# First, index all tasks by their IDs so we can look up parents easily
4343
all_tasks = {}
44+
tasks_by_name = {}
4445
for awaited_info in awaited_info_list:
4546
for task_info in awaited_info.awaited_by:
4647
all_tasks[task_info.task_id] = (task_info, awaited_info.thread_id)
48+
display_name = task_info.task_name or f"Task-{task_info.task_id}"
49+
tasks_by_name.setdefault(display_name, []).append(
50+
(task_info, awaited_info.thread_id)
51+
)
52+
fallback_name = f"Task-{task_info.task_id}"
53+
if fallback_name != display_name:
54+
tasks_by_name.setdefault(fallback_name, []).append(
55+
(task_info, awaited_info.thread_id)
56+
)
4757

4858
# Use a cache for memoizing parent chains so we don't recompute them repeatedly
4959
cache = {}
60+
root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
5061

51-
def build_parent_chain(task_id, parent_id):
62+
def build_parent_chain(task_id, parent_name, thread_id, await_frames):
5263
"""
5364
Recursively build the parent chain for a given task by:
5465
- Finding the parent's await-site frames
5566
- Recursing up the parent chain until reaching Program Root
5667
- Add Program Root at the top of the chain
5768
- Cache results along the way to avoid redundant work
5869
"""
59-
if parent_id in cache:
60-
return cache[parent_id]
61-
62-
if parent_id not in all_tasks:
63-
return []
70+
def frame_signature(frame):
71+
func = getattr(frame, "function", None)
72+
if func is None:
73+
func = getattr(frame, "funcname", None)
74+
return (
75+
getattr(frame, "filename", None),
76+
getattr(frame, "lineno", None),
77+
func,
78+
)
79+
80+
frames_signature = tuple(
81+
frame_signature(frame) for frame in await_frames or []
82+
)
83+
cache_key = (task_id, parent_name, thread_id, frames_signature)
84+
if cache_key in cache:
85+
return cache[cache_key]
86+
87+
if not parent_name:
88+
chain = list(await_frames or []) + [root_frame]
89+
cache[cache_key] = chain
90+
return chain
91+
92+
parent_entry = None
93+
for candidate_info, candidate_tid in tasks_by_name.get(parent_name, []):
94+
if candidate_tid == thread_id:
95+
parent_entry = (candidate_info, candidate_tid)
96+
break
6497

65-
parent_info, _ = all_tasks[parent_id]
98+
if parent_entry is None:
99+
chain = list(await_frames or []) + [root_frame]
100+
cache[cache_key] = chain
101+
return chain
66102

67-
# Find the await-site frames for this parent relationship
68-
await_frames = []
69-
for coro_info in all_tasks[task_id][0].awaited_by:
70-
if coro_info.task_name == parent_id:
71-
await_frames = list(coro_info.call_stack or [])
72-
break
103+
parent_info, parent_thread = parent_entry
73104

74105
# Recursively build grandparent chain, or terminate with Program Root
75-
if (parent_info.awaited_by and parent_info.awaited_by[0].task_name and
76-
parent_info.awaited_by[0].task_name in all_tasks):
77-
grandparent_id = parent_info.awaited_by[0].task_name
78-
chain = await_frames + build_parent_chain(parent_id, grandparent_id)
79-
else:
80-
# Parent is root or grandparent not tracked
81-
root_frame = FrameInfo(("<root>", 0, "<all tasks>"))
82-
chain = await_frames + [root_frame]
106+
grandparent_chain = resolve_parent_chain(
107+
parent_info.task_id, parent_thread, parent_info.awaited_by
108+
)
109+
chain = list(await_frames or []) + grandparent_chain
83110

84-
cache[parent_id] = chain
111+
cache[cache_key] = chain
85112
return chain
86113

114+
def resolve_parent_chain(task_id, thread_id, awaited_by_list):
115+
"""Find the best available parent chain for the given task.
116+
Best means the longest chain (most frames) among all possible parents."""
117+
best_chain = [root_frame]
118+
for coro_info in awaited_by_list or []:
119+
parent_name = coro_info.task_name
120+
await_frames = list(coro_info.call_stack or [])
121+
candidate = build_parent_chain(
122+
task_id,
123+
parent_name,
124+
thread_id,
125+
await_frames,
126+
)
127+
if len(candidate) > len(best_chain):
128+
best_chain = candidate
129+
if len(best_chain) > 1:
130+
break
131+
return best_chain
132+
87133
# Yield one complete stack per task in LEAF→ROOT order
88-
for task_id, (task_info, _) in all_tasks.items():
134+
for task_id, (task_info, thread_id) in all_tasks.items():
89135
# Start with the task's own body frames (deepest frames first)
90136
body_frames = [
91137
frame
92138
for coro in (task_info.coroutine_stack or [])
93139
for frame in (coro.call_stack or [])
94140
]
95141

96-
if task_info.awaited_by and task_info.awaited_by[0].task_name:
142+
if task_info.awaited_by:
97143
# Add synthetic frame for the task itself
98144
task_name = task_info.task_name or f"Task-{task_id}"
99145
synthetic = FrameInfo(("<task>", 0, f"running {task_name}"))
100146

101147
# Append parent chain (await-site frames + parents recursively)
102-
parent_id = task_info.awaited_by[0].task_name
103-
if parent_id in all_tasks:
104-
parent_chain = build_parent_chain(task_id, parent_id)
105-
yield body_frames + [synthetic] + parent_chain, task_id
106-
else:
107-
# No tracked parent, just add root marker
108-
root = FrameInfo(("<root>", 0, "<all tasks>"))
109-
yield body_frames + [synthetic, root], task_id
148+
parent_chain = resolve_parent_chain(
149+
task_id, thread_id, task_info.awaited_by
150+
)
151+
yield body_frames + [synthetic] + parent_chain, task_id
110152
else:
111153
# Root task: no synthetic marker needed, just add root marker
112-
root = FrameInfo(("<root>", 0, "<all tasks>"))
113-
yield body_frames + [root], task_id
154+
yield body_frames + [root_frame], task_id

0 commit comments

Comments
 (0)