-
Notifications
You must be signed in to change notification settings - Fork 0
fix(jitdump): support 64-bit tid for macos jitdumps #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
not-matthias
wants to merge
2
commits into
main
Choose a base branch
from
cod-2645-investigate-python_perf_jit_support-not-working-on-macos
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,6 @@ | ||
| use byteorder::{BigEndian, ByteOrder, LittleEndian}; | ||
| use linux_perf_event_reader::{Endianness, RawData}; | ||
|
|
||
| use super::record::JitDumpRecordHeader; | ||
|
|
||
| /// A parsed `JIT_CODE_LOAD` record, for a single jitted function. | ||
| /// | ||
| /// This carries the function name and the code bytes. | ||
|
|
@@ -11,7 +9,11 @@ pub struct JitCodeLoadRecord<'a> { | |
| /// The process ID of the runtime generating the jitted code. | ||
| pub pid: u32, | ||
| /// The thread ID of the runtime thread generating the jitted code. | ||
| pub tid: u32, | ||
| /// | ||
| /// This is a `u64` because some runtimes (e.g. CPython on macOS, which uses | ||
| /// `pthread_threadid_np`) write a 64-bit thread id, in a wider record layout | ||
| /// that is detected during parsing (see `parse_impl`). | ||
| pub tid: u64, | ||
| /// The virtual address where `code_bytes` starts in the memory of the process. | ||
| pub vma: u64, | ||
| /// The code start address for the jitted code. It is unclear in what cases this would differ from `vma`. | ||
|
|
@@ -26,11 +28,6 @@ pub struct JitCodeLoadRecord<'a> { | |
| } | ||
|
|
||
| impl<'a> JitCodeLoadRecord<'a> { | ||
| /// The offset, in bytes, between the start of the record header and | ||
| /// the start of the function name. | ||
| pub const NAME_OFFSET_FROM_RECORD_START: usize = | ||
| JitDumpRecordHeader::SIZE + 4 + 4 + 8 + 8 + 8 + 8; | ||
|
|
||
| pub fn parse(endian: Endianness, data: RawData<'a>) -> Result<Self, std::io::Error> { | ||
| match endian { | ||
| Endianness::LittleEndian => Self::parse_impl::<LittleEndian>(data), | ||
|
|
@@ -39,16 +36,44 @@ impl<'a> JitCodeLoadRecord<'a> { | |
| } | ||
|
|
||
| pub fn parse_impl<O: ByteOrder>(data: RawData<'a>) -> Result<Self, std::io::Error> { | ||
| // Try the standard layout first (the common case); fall back to the wider | ||
| // macOS layout if it isn't self-consistent with the record body length. | ||
| // The two layouts differ only in where the name starts, so exactly one of | ||
| // them makes `prefix + name_len + 1 + code_size == body_len` hold. | ||
| if let Some(record) = Self::try_parse::<O>(data, false) { | ||
| return Ok(record); | ||
| } | ||
| if let Some(record) = Self::try_parse::<O>(data, true) { | ||
| return Ok(record); | ||
| } | ||
| Err(std::io::ErrorKind::InvalidData.into()) | ||
| } | ||
|
|
||
| fn try_parse<O: ByteOrder>(data: RawData<'a>, macos_wide_layout: bool) -> Option<Self> { | ||
| let body_len = data.len(); | ||
| let mut cur = data; | ||
| let pid = cur.read_u32::<O>()?; | ||
| let tid = cur.read_u32::<O>()?; | ||
| let vma = cur.read_u64::<O>()?; | ||
| let code_addr = cur.read_u64::<O>()?; | ||
| let code_size = cur.read_u64::<O>()?; | ||
| let code_index = cur.read_u64::<O>()?; | ||
| let function_name = cur.read_string().ok_or(std::io::ErrorKind::UnexpectedEof)?; | ||
| let code_bytes = cur.split_off_prefix(code_size as usize)?; | ||
| Ok(Self { | ||
| let pid = cur.read_u32::<O>().ok()?; | ||
| let tid = if macos_wide_layout { | ||
| let _pad = cur.read_u32::<O>().ok()?; | ||
| cur.read_u64::<O>().ok()? | ||
| } else { | ||
| u64::from(cur.read_u32::<O>().ok()?) | ||
| }; | ||
|
Comment on lines
+55
to
+61
|
||
| let vma = cur.read_u64::<O>().ok()?; | ||
| let code_addr = cur.read_u64::<O>().ok()?; | ||
| let code_size = cur.read_u64::<O>().ok()?; | ||
| let code_index = cur.read_u64::<O>().ok()?; | ||
| let function_name = cur.read_string()?; | ||
|
|
||
| // Validate this layout against the known body length before trusting it: | ||
| // `cur` now points just past the name's NUL, so the bytes consumed so far | ||
| // plus the code must exactly fill the body. | ||
| let consumed = body_len - cur.len(); | ||
| if consumed.checked_add(code_size as usize)? != body_len { | ||
| return None; | ||
| } | ||
| let code_bytes = cur.split_off_prefix(code_size as usize).ok()?; | ||
| Some(Self { | ||
| pid, | ||
| tid, | ||
| vma, | ||
|
|
@@ -58,16 +83,6 @@ impl<'a> JitCodeLoadRecord<'a> { | |
| code_bytes, | ||
| }) | ||
| } | ||
|
|
||
| /// The offset, in bytes, between the start of the record header and | ||
| /// the start of the code bytes. | ||
| /// | ||
| /// This can be different for each record because the code bytes are after | ||
| /// the function name, so this offset depends on the length of the function | ||
| /// name. | ||
| pub fn code_bytes_offset_from_record_header_start(&self) -> usize { | ||
| JitDumpRecordHeader::SIZE + 4 + 4 + 8 + 8 + 8 + 8 + self.function_name.len() + 1 | ||
| } | ||
| } | ||
|
|
||
| /// A parsed `JIT_CODE_MOVE` record. | ||
|
|
@@ -76,7 +91,9 @@ pub struct JitCodeMoveRecord { | |
| /// The process ID of the runtime generating the jitted code. | ||
| pub pid: u32, | ||
| /// The thread ID of the runtime thread generating the jitted code. | ||
| pub tid: u32, | ||
| /// | ||
| /// This is a `u64` for the same reason as [`JitCodeLoadRecord::tid`]. | ||
| pub tid: u64, | ||
| /// The new address where the jitted code starts in the virtual memory of the process. | ||
| pub vma: u64, | ||
| /// The old address of this function's code bytes. | ||
|
|
@@ -98,15 +115,38 @@ impl JitCodeMoveRecord { | |
| } | ||
|
|
||
| pub fn parse_impl<O: ByteOrder>(data: RawData) -> Result<Self, std::io::Error> { | ||
| // This record is fixed-size with no trailing data, so the correct layout | ||
| // is the one whose fields exactly consume the body (48 bytes for the | ||
| // standard u32 tid, 56 for the wider macOS u64 tid + padding). | ||
|
Comment on lines
+118
to
+120
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than double trying parse, isnt it better to check data.len() here? Or better even directly within try_parse |
||
| if let Some(record) = Self::try_parse::<O>(data, false) { | ||
| return Ok(record); | ||
| } | ||
| if let Some(record) = Self::try_parse::<O>(data, true) { | ||
| return Ok(record); | ||
| } | ||
| Err(std::io::ErrorKind::InvalidData.into()) | ||
| } | ||
|
|
||
| fn try_parse<O: ByteOrder>(data: RawData, macos_wide_layout: bool) -> Option<Self> { | ||
| let mut cur = data; | ||
| let pid = cur.read_u32::<O>()?; | ||
| let tid = cur.read_u32::<O>()?; | ||
| let vma = cur.read_u64::<O>()?; | ||
| let old_code_addr = cur.read_u64::<O>()?; | ||
| let new_code_addr = cur.read_u64::<O>()?; | ||
| let code_size = cur.read_u64::<O>()?; | ||
| let code_index = cur.read_u64::<O>()?; | ||
| Ok(Self { | ||
| let pid = cur.read_u32::<O>().ok()?; | ||
| let tid = if macos_wide_layout { | ||
| let _pad = cur.read_u32::<O>().ok()?; | ||
| cur.read_u64::<O>().ok()? | ||
| } else { | ||
| u64::from(cur.read_u32::<O>().ok()?) | ||
| }; | ||
|
Comment on lines
+132
to
+138
|
||
| let vma = cur.read_u64::<O>().ok()?; | ||
| let old_code_addr = cur.read_u64::<O>().ok()?; | ||
| let new_code_addr = cur.read_u64::<O>().ok()?; | ||
| let code_size = cur.read_u64::<O>().ok()?; | ||
| let code_index = cur.read_u64::<O>().ok()?; | ||
|
|
||
| // The record must be fully consumed; otherwise we picked the wrong layout. | ||
| if !cur.is_empty() { | ||
| return None; | ||
| } | ||
| Some(Self { | ||
| pid, | ||
| tid, | ||
| vma, | ||
|
|
@@ -222,3 +262,49 @@ impl<'a> JitCodeUnwindingInfoRecord<'a> { | |
| }) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use crate::jitdump::{JitDumpReader, JitDumpRecord}; | ||
| use std::fs::File; | ||
|
|
||
| fn parse_jitdump(path: &str) -> (usize, usize) { | ||
| let mut reader = JitDumpReader::new(File::open(path).unwrap()).unwrap(); | ||
| let (mut loads, mut unwinds) = (0, 0); | ||
| while let Some(raw) = reader.next_record().unwrap() { | ||
| match raw.parse().unwrap() { | ||
| JitDumpRecord::CodeLoad(_) => loads += 1, | ||
| JitDumpRecord::CodeUnwindingInfo(_) => unwinds += 1, | ||
| _ => {} | ||
| } | ||
| } | ||
| (loads, unwinds) | ||
| } | ||
|
|
||
| /// A real cpython-on-Linux x86_64 jitdump capture in the **standard** perf | ||
| /// layout (u32 tid, no alignment padding). | ||
| #[test] | ||
| fn parses_standard_layout_python_jitdump() { | ||
| let (loads, unwinds) = parse_jitdump(concat!( | ||
| env!("CARGO_MANIFEST_DIR"), | ||
| "/testdata/jit-python-linux-x86_64.dump" | ||
| )); | ||
| assert_eq!(loads, 195); | ||
| assert_eq!(unwinds, 195); | ||
| } | ||
|
|
||
| /// A real jitdump from cpython-3.15.0a7 on **macOS arm64**, where CPython | ||
| /// declares `thread_id` as `uint64_t`, so every `CODE_LOAD` uses the wider | ||
| /// layout (u64 tid + 4 bytes of alignment padding), shifting the name and | ||
| /// code bytes by 8 vs the perf jitdump spec. The previous u32-only parser | ||
| /// misread `code_size` as the (4 GB) code address and failed on this file. | ||
| #[test] | ||
| fn parses_macos_wide_layout_python_jitdump() { | ||
| let (loads, unwinds) = parse_jitdump(concat!( | ||
| env!("CARGO_MANIFEST_DIR"), | ||
| "/testdata/jit-python-macos-arm64.dump" | ||
| )); | ||
| assert_eq!(loads, 195); | ||
| assert_eq!(unwinds, 195); | ||
| } | ||
| } | ||
Binary file not shown.
Binary file not shown.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.