From 55700c491f09726f947e196b0fbaa4d98bb3d7a1 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Mon, 1 Jun 2026 12:26:55 +0200 Subject: [PATCH 1/2] test(jitdump): add real cpython fixtures for both record layouts Add end-to-end parse tests over real cpython perf jitdump captures: a Linux x86_64 dump in the standard layout (u32 tid) and a macOS arm64 dump in the wider layout (u64 tid + alignment padding). Each must yield 195 CODE_LOAD + 195 CODE_UNWINDING_INFO records, exercising the per-record layout detection. Refs COD-2645 Co-Authored-By: Claude --- src/jitdump/records.rs | 46 ++++++++++++++++++++++++++ testdata/jit-python-linux-x86_64.dump | Bin 0 -> 45543 bytes testdata/jit-python-macos-arm64.dump | Bin 0 -> 49431 bytes 3 files changed, 46 insertions(+) create mode 100644 testdata/jit-python-linux-x86_64.dump create mode 100644 testdata/jit-python-macos-arm64.dump diff --git a/src/jitdump/records.rs b/src/jitdump/records.rs index 26f86ef..c1b4fe2 100644 --- a/src/jitdump/records.rs +++ b/src/jitdump/records.rs @@ -222,3 +222,49 @@ impl<'a> JitCodeUnwindingInfoRecord<'a> { }) } } + +#[cfg(test)] +mod tests { + use crate::jitdump::{JitDumpReader, JitDumpRecord}; + use std::fs::File; + + fn parse_jitdump(path: &str) -> (usize, usize) { + let mut reader = JitDumpReader::new(File::open(path).unwrap()).unwrap(); + let (mut loads, mut unwinds) = (0, 0); + while let Some(raw) = reader.next_record().unwrap() { + match raw.parse().unwrap() { + JitDumpRecord::CodeLoad(_) => loads += 1, + JitDumpRecord::CodeUnwindingInfo(_) => unwinds += 1, + _ => {} + } + } + (loads, unwinds) + } + + /// A real cpython-on-Linux x86_64 jitdump capture in the **standard** perf + /// layout (u32 tid, no alignment padding). + #[test] + fn parses_standard_layout_python_jitdump() { + let (loads, unwinds) = parse_jitdump(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/jit-python-linux-x86_64.dump" + )); + assert_eq!(loads, 195); + assert_eq!(unwinds, 195); + } + + /// A real jitdump from cpython-3.15.0a7 on **macOS arm64**, where CPython + /// declares `thread_id` as `uint64_t`, so every `CODE_LOAD` uses the wider + /// layout (u64 tid + 4 bytes of alignment padding), shifting the name and + /// code bytes by 8 vs the perf jitdump spec. The previous u32-only parser + /// misread `code_size` as the (4 GB) code address and failed on this file. + #[test] + fn parses_macos_wide_layout_python_jitdump() { + let (loads, unwinds) = parse_jitdump(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/jit-python-macos-arm64.dump" + )); + assert_eq!(loads, 195); + assert_eq!(unwinds, 195); + } +} diff --git a/testdata/jit-python-linux-x86_64.dump b/testdata/jit-python-linux-x86_64.dump new file mode 100644 index 0000000000000000000000000000000000000000..2e219c9b613f96b7ed7c0e386d99297e29c2be64 GIT binary patch literal 45543 zcmc)T36vaFxd-qLD4^`1VN+=W%4TLj1To~r&H^Nm2r>|a_n^9_r)H)|Z*+A}W&%Pd zC_6%gCqWS!5PgcWDVvD2EP^2WzG)*wRv$YeD0%m*`v1Fc&lKHLJ@4GqIdeXfjC}RG z_tw2tw{BIiX{+_tUPt|U>@(`$<>&ELt5&TezdLh-jZR!+_WEnh zR==SBIBBaj_qt8}o9s(f#hUt&Uz1JpzKXoEZ}Jnn^5`3OE~tM%^#7ackMjSQEk81C z+N@H4zf*Af=4Dd_C*x#`c6yGJ%Jn&gF3WPWZqc%)P46${mN?nC{B_Ek5j$k=DJxe! zaKx?j90QIu2HuG{Gu^AlSwUVpt8g;Xxn{U??`*{S{BW<; zuvMSqt$+|)7$JU!^ueJ$9-ks zwEEp_pJl6`bk6Rp)7(rxS16|4-Y%;*ms9S7ogaIE_B-aG$ElrPfxN8H8la4XvI~_8 zwHD<1N@-_)F12WZowWzTI->EZZyqqOK3=I`Kwhd7R|n0Qk?OVe>J3w?-qn2o}w&6Ua~7-Gm_2dSN4EYv@(8$t9G4Q6tP{OoiR^ctf)^SFR8I#VW34M@e|=S zJa%LqU(KHAndc5E!sWA_Gu(6~IX?B(rdVw@uuvn$6jF3*$E0=3; zH!sd8#hb`WaTQ`liZ>l2_VP|D;td}m|+19zKeAFJ5^L|(GX;4+f^X{~N&mE3gE%?5aNN$NN54yyXO`^=lA zBVYIaATP}|m>FrFteLIQs_vvCKj|U!>SL7T3go3aaXm(KM5;H_)cg2zofEF;BOftO zmd1S|@{(Kum67DlHL|5n+tSuzw>W6^7wnAVS;6QOwXQUfMCm#O!qIvBJO0e@w zL0)RB&@xiHm8Kna{Hmtx<vzuW(EL*q^QKNaM3w485zkL+SsXiS#W8xm z(Dkgmom!-pvqVH+;F?F5732@dOLV-7;SiDNFSbBWk%x-j%a z{a()$g+3d;yF%q`B1{-_jtj_nxUY6as>ty4k0kxZdM_!U@ATg4(M=O$* z%B9neze3EjLThZ}Mdo=jeYXI4$xGaZF%XfwuV_3=`bYovpv-2aJ#`5)+_-*eZ>=3| z-YQkONg>EfYXwF|TKCkf)6;gQx6f|O>$K-RQ8dr%Q@cE=)Xa;dY1|K7RGc@09)t8ji(x@oy^5Qqm`=mp87$O9P7rtkn zIHGp@4&)^#BMeJ#nofH7 zB8_uGjm2D}&uL$e(-)XGN@L9;FO6l`7-`&lv~gc2t12=Y^3fslNSQm_d1;WB$QqE0 zMDFvE$cgkrx14HTEhnpQBQMp7yD(}ZQoXOH?%?P=?xNMhy9#;9tANEw-rUi=Xl?e1 z-ng)1UugCw;bf$DKh1lPqwZyRZnTT5 zjVEnxe8e17J3SkD*pb`k(IA?-$YvM zG2Q0DGKF&q@)BGIl#$>CT7~wwOSEUH+MgD@KF_>O8tat(g1ofVU}L21>zYlTE3(xE zWt7vN{RYhYq_;T{dFe~sgRv2jz5|+koEENMOSZ4TgO-~I$^xZtBQJp!z!(YSyN%>% z4Bjae0k`5(^EjCYuOcsTv3oH#A`R}nV@bLQ_H#IZu+EFgFF1pRURnFr6 zh`h8WVPvHBAk9i@gsS}DS@S%Z+S&H6L0%r0&qMx`nQ@ID+uX zyUYuvFWvFMATNa#$QUX7rY2M`#qc8X&RB0t<2}xx+U0AIm%P{m7#0!9lQk!@$?>Ny z_%l&`-|RN;lm6vY?|8>`%Tv$V*-sEJpH< z(0Gk`jEM~UyWMMEyjUq5$V+7sIz}qx+DtZ4TC#>r z9&^=hF}JVB!=L+%@p??EJ^nfJQdotMk;0Vl)F#3)%Np}J&mGR&#lf*_8@D*>v&LPr z6nuZ=r7Qjj#!W=JWNK12zvk4MzeRYnKxdD2-t?h)t+c(B$V+V*N=9mBu1mJiS{#sj zlx`}#9v}Yn=Zx2*Ttxm0c`2(w#z>j`u52QkIe)%2>yU%3`7?VCu8$xsb(eRU*U8yk z4SA_cJc`i~kvjQZ*`Pb%P;0@wo}PJM-*v%=@IDSdP4`urgeX z^vPAfY`zZ_rp}p1y=*j0R{pzrnKZ?#k(aU>WQ>%_DYb0Atl#s$LD90~J31CO-toyC z!+FR{P~r)UScnAWd=5bJB(#-EtH&r?ny%U0yh$qQ8RVs@0t+Ke@-+_Gq>>I)XS?&= zMXf=TpEVDX6WToT5)^w9qZ1-Q@(M251T^NP-jdgR;4$}*w__Z>(!5PBLY5&fZAq9I zX_GI)$R@ULl-#1esJRVo=6mL8Qf)5sl2!$cku-UhSvJ43I8aRnMX+^kx{dLEL#i!_ zytKui!Vrc?o7}LL&9}{U()tS&+Em-~3G+0$yEA!ake9SFNQ|VFd{{ussHGqEzA*fK zIv>n7&ym@hZRa#|YTz)EBUjn7NloERnx(v4JT}g7!(p%NV;&~=05;vX8J2h&Ll+`p zGD|C)z%uqCM@((%Dm~9UNk-#yke5xb0tzEZvP4ogAsv;|nqG^Un;-3O-jr1Pcq8)C z6nh4P7a~m~0#cjbQ^-Z5=j7|v)WWs=0P`-H-M#^N=}N-GNSE9ekxg9caj~Y>*Mhai zA?8_0wVN}Lm#ivSjAR`nCbbDGn=52e`imA?xaJ;X-X*>I66B>T{$~tWh;$w6s{+@M zy4b2W!SK#+gR*&!w7xx%mz**fjO56xfMkT?TC?~-j%OCy(NS&U_f)O~xbu2&zX!|QD5 z0rM&u(k?(=)>#=EMykG}spQ_QRN6RL|9HqeONO+qA8xL!8d!{Eov5*}o~cXe>b-vZ zd(P7zH7}GqkbTI@ic9s^8COx7WjFd=GovJWejNmLe~46`&Z2 zJ6Xqt&($|hQq*cL^;P0})rNWBdfvQO<_|x=vbnj&p2I+gNbf1SS33$DNVzbTW3$c- z?`|G^**sKkQoN76tk@)gjD()5LDh++`j~AKs>Mn4xo?=4$z0;i$V*ujGDgZy(`4*% zEY;2U4>Eb$@z}oWx!@n>sd9AQkG!PDpT|guNb2bt)oOQis_%ujJ342qImP(2t4t7m z7kTL|!^ueR8M;>;o$5WVZ7035YnwO8x!UQ-OH&OdMw-4m+N55Jlq4I3iMarlwOD4uTlxMZ?~ktRC52n+16(t3bp^*;$%QHN^7Mh}N3qQWT zku{H!M;*>UUZSc%F%osQhEkusRj>VSoLYyA4l9@!$%>Dq$V*ZD1&nEk6rH1qa7T3P z;)yo**cJ|(H_8!qGV;<`hK-TN|JIDE)>66rNWmQ#Y*%#$9%G&*i&=(|m#i9CjAVUZ zV_CT48@cZ8Twxw5=OFJRFOi8CF`^+7d9H?3bvM|2^EdpMyZ?OiCb?C=#*c!$G*w_> zq-lj_3g#^f=~Pb6L-c$0;kHQ^t{&Tt$V*%7C5&c>w4JBh)R%BXkt%t`Tt>aNp+2&O z+nn3Y+sbNx&qZF^k}xsS_5;m!pgI-a$IbSQ`-7-z`Kqejteqaa9NxQg^|+TKFWptR z8R`CExZ8g%Zrg9rwQz58w|Tc*u1-Tuzg&_D=Tu z738I{3L_(pmuN<<>-JlnBkeA#tn2h9J8p4$)86B%QlJEG;Wz}T!>f@DU`^|#9 zRM((pr1~eCy3wS^9^nxy`|2&svt=snN#rFv@xK_#5y}2(Yj)$HAG^dUVoAKbjro$0 z<(;3I(%b+lC;>*6#Q$g|5gg^C7fwb5AF-`@uykwRL0(pU>@^JMhy?#^4A}J)xzk>J zt#0#NIeXa?dC5(J$w=(5Bhq_?=GFT!x!FFEnYG_u zY~C%aAm2k?)^i13M!K)m+~QPU*UDuqsp;{3Fuui%+DTWLr^}kOE0CA;*qa!o5lO#F zqgz4OHp}+hl-D)>Z^pLZue`}TU)EB7_wMEvp9Gtc{Hs5bKi*Hrw!mL@k9oYT)qVka zS@%`o8HxXuhVRLh3Mr0la=bS!7m7HxrS3BimpjL&Aur+aw=kq568>uqj_ z5$!L>^@;2jjbN*X%$J4qeY265Wl=^MFtRMJ(aJ(Qy^$s$9bnVLH4&S^bB~y(%dDOv>|F%ic?|c5C(CKx`rH`3M%VpU;$V+tMZH)1VL|><&gL4`$pVrom zM6Bc56XvZ)-pTl-i%d7A zk(cZ$*o+O>EC|`$w$&NUGC*ZN(JxV0p~Aio68J-^G}YNbqlUa49S6Os$f7 z?Zj?h>QsE@j^@QOa$kqM6qg}or1&OXJY=U`)t=QPx|?y-0iDTDq_TJ2)jVBB?!%Cm z^cv`lr2kH*%UUJ5RO?mmot?;8>yfs3w48$c0C|Z{`~xF5BGET%=vFf_UH)gGyVW;p z3r=@ws2ejesUS|>t!vlayLPd6pPE*WvJ6ih-g6IYk1tOh$d_$*YAqDaWNF;%k<)2!8`*!wi_L13{Vcu-A zn^?Reh44=j{s>n2j}ky&(TV{THWBGstBqFE*aF?QQn9u+wc5sNr02YMelvIO+uhmO zH@%Vj`7DRs?0(Ms{?57Q-GA@RigUvE%u-4%psz2{7q_|NO5J_y@Er@gUK)ST|CvKy zL-aK=|JGlOzi*aW%|E2}y2PkXvG{)0#zMvfRt}5ip!9Y-``1uty zlhpoAPu#nIw%XdQPTswL(~fPUv%THWVE$tF8eB>&@XH!-g9chOfQ1GJCBGJH&?@$! zt^>mkzc#ztA@LUr_SqA&pIXQe_stvKv;Qe^z%R4UrSGe87iMj-bDnbJ-1qvzLa*o3 zSH9@?R?|EjC>6_SAu)8o%JLov6b%uLQ2hr4rXqp@bF%V4(zxcboyo!#UaJzB*g`n zOp+8|lcW#}DPCqK#oD0a4aA8-eImCmL!ZE9xXgsBroh*N4GVlO8r-Z*NZ_=NX;B9jD#i0P`n&jUGEvS< zsggC{YW|Kh_DT0LalM7#R5JaAB$HUk^eXxrZm~=g)!SgGZsz&oP~rh18o4EjKq_HG z0c|PK7V9vxgrNz0aebECGcML;CH}Z267yryrp6>)#3Ef@ZHiPkY4ZjPe76p=%-(Lk z!+oduC=tQ0-D$MY!$O7Eq*RDXK`z6sM}NjWIFCu-*9KSLknoEI_-jq@Rq^b6H3fZn zk>+Sa6}r1*ACvUo>t2d2gayC06j~&Jg%q${IF}8E`AS$w1fl7B+%YnyMCSwUb>JHa z{MtIGzm${^3;x3zrbN~o$l{0HD{(DR!LO}^76o9T#Onm~2_Y6T9B#MH(Do;3vy^T_i#)7IQ|Qbg#fo zL<7I90B`(YtfYk;77Dz_aB?%!G zavW*M5qE=Dj2LkdE{h@;1NgP!wLrrHo;MY_@n}I-b>ni7agfmcJ@>KaaU=ZN`0Cpd zd$GVjDuz$+t*$_uoqi>drS1&f0GH%46E2xT3M~@ALW(yTQpAffNjhX8U%AJ95?nzP z@S944zm_Br3-V-(T_)J^0*mwnzfvhj83b7Lw0i+IlK}9`0?_@Sc0&+wZ3!@+{)QVD z0ppff^2;=BdXe$-;=yG6YL_H{SR}wPF@1`^-;a2U%jd=13@XeUcb^4Yi3on}ZlHx8 z7Am~iP{FE&!4M4-GuMM{J8yG-_@zit@M{a9z9Z=%7DC9;n7BJENvKb%#y<)x!PHy* z{af#JjyXV>@M~jg!Gi_nTa0+8FpCWi@yt8*F6WR1x?uRVA=P&!mSO?<)~O)&x8}ucxbrUz=D9DJ+QjgdsP+M|7Fh=mdh z4JEwwLAj?G(PTb|qHLXFj_%R0&H zz3`h#guj(U5Q{|Um{^JSa*z-FC`*CVbMAIN297sJ#V;#Rcc59wwWYu!QvvJvX6;^@ z>&bAh=seiH6g=mFU%OkVJ(3J!kqqxJq*%Ia`I?~OSE(Bgp0>Q*J!D8148Jy{7C2Zy z9$ODNeZZj(_ke?RvGB`)+^~V6ejrg43&7)&UyFx662;U3vWC%Y`;mLtB3(HA+OS$6 zVFCM212#2H=Q+>RQ`mjae%5)B+C-NOzc!}&JBg`SV7_ZAOfOf==Y#lEW#l!C?4CpJ zagNN(uJCIkYe9tt^1BUWkLz;070oM-`G}@=L$!HbbNt&5bI;EqhhLjt{k?=-EclN% z_?P7Ufu3H!w$9cZ@|GjqBlGio@M|M$L4^hKVgs27%f)JOZjN{8DEGX)Y5~7CuX_5_1=r62YCJ^9-Bvbg}mwad|UWeQ~Vj09LL zkWVy_u`Jc>65Qq$?!)_Gx^DR8@Yc<#gi%|pOAOYf8~m`6A-?0@fnkPUn^`?8@fC~s z{*l4Foa)se9VeV=PA}f%o|%i*{EsAlQ{7ltwZ(jr!HnT$IanJ;^t@lSf;Ak4z^lt1 z{1J=R%xd)y5@E53@W~Ub>_e_ac0hfc9KDai*(riM?2eu+$L2OG2F#@7k${=D@dI=N?=W z1pKDDxBzPle3=2hG#W1CR#D&1P|oMwcVM{T*UozNj}maP2>9{|xhut5U287hn|HY9 z=0u0zRJ?^(Tih#b?$d(;1$^e<<4?N>=XGHCwZYXtNx;P-;HORs-l|$RHtt>t9-PCk zt%Mc@V4=imh7uWjd;i`8gWUP^#BZv%|FZ;NERavPk!POoduMNRzNT_DT|WG#qAaM| zLS8uma;q|Z<UC-#srUI{ey+uKq=WE*89NjG*)LjXq7tXF7cAyvRK>S2XywnYFOOg89rk zW^4si=irn zDsVA}Ut0w&^srFj97BaQgYhEz%tOEOh(Adib@8wUEMs_`Ednd10_&hSVb;a8J!O9DZ$T^%IG+SWusDP}9qeq72A; zE_RR115)_4k+q=00{H?1nHE+CvM_gF=bo8s82sAI>fa>HV!`}AgE@ZZR~G2)54eZs zMO65;p|!xm0{Z<1v=r_&nM&FCF8AQvso~cKSN|^277OqX7~oy>FhWqs&2$fR{$BUU zJXC>S8(9k~ERg@$K(4JJwQ8uc=U47kxQd8IUr7AgDyW}I;Kf3P4;m_XD}quvpr=8D zWy6)At5T+0c-{HY=Ekgx4!_U&z=bQ_ydx98EWs@L9%EK5=&+FBLxu!3@EP94;T&r= zaDtg1zHjC0&-uhO{HEgm9};)5@b4MWlf|9-Lj5ZXn~VQ~W$xq8uLXc#TM8`_z(R^{ zLyGEXJyQuB{jhs-9>Bt{o%w37B!O6v`v$qk=QH_r0LD>>=Hk8O;`^QV_X-ifZz|qG zt1aptgF1EGxKxb7p^{(epZd)4;&Jz^{AdXLrXu{G5@E53aL!PyL9zxZ`rx$ zc&nFoom0J=O`G`n>)jLc$Q6EVVlAYwAnvOtKI?pM&8n`hRcD^wJo4C1_sE>m@SEz? z|0O{d3*@y1a(t>j7~&6WpjUa&0lq4BqxTz(o&13F4xDFN@tX>;P-=^|-=w8y`l3p0 zmAnz-$a43L{5%u<+KlSw5>c^;DE|$&_+fCrxAr=NMu3YhaSzB(rNOVAvRc4k0eE4I zCxE9G)`w-|g)@yH+a7ig$;(9Wn~LHuB#L4Ixz2=?=dy!@)f=#u5gIX%Z+FkkwFrJ~ zW-Y9+VCEjpEzVpCEX^CNmv}d#z4=M^w4BE9YtyR#mI#XlE&mO-n6@v9KMQKw1-EmT zd&m-9Ec~YWs=#RrvJfK*B)@IK%jN0O@@aE!`-yu_4h;O-oa%ohm|_vkqQTioBmULl zy6Hm>l^%3{_vEu9@M~wR7C2ZyasqJUj7<%HJ$}`bymF+$+mN~|?s+%Rb;EBesQVUA^t1fkq%i)Dz8&?Y^EO5&PuGdKmb~EJNeyw|64ln%Lyy}+{ zUa{bfOx~42-h2^G6X9*U-#spm4)#5m__cAhV8Q~oV&D$YF{o+{Zv2imxN^A=$xph& zFC%ip>Z|%+iKtj0^51ad0wr xU;Myf(^j3_JdL_wg*!MZ>QRs|6Akusp)z#;^na zx_~*Gi0RlP?lJiW0>5_Js$WS=#R8LiHaEt+uxO<1G*{Q)&t>j%SA*YFIE7DJoN>h# z&Uhjo1*K{@r^!0`_2$RjqjFf`*GAQX z2n$qRc;m*Xg<^RiXTC&dnyhnoyJzJ)4E)-xYQKb4ELeHTjT^JBr^#wHeH#~DxN_M` z`8`?i%ZPRRdxEAdM4rlXV?^1_rx|K4IanOcc)Mfo58OwXJ0bk0BK#YPuvkQxw?w%y zzE_)}({2w9{s#>FMw9eC&$tJ^nl2oEZD1{+umI+rFm5sM#LF zVB2%Z^J?7Kaf#AQ%nkEEEu^p@zBKu@ILIEq-np5ujvRkr z!V14Oq&i5#Di)CZ;Vs-alj#PWA*}H1a`45AP4H_IYaxXN@ntb@9dVC2oNnA@;n5{& zJVBD2vPso!iLF?W^3aqU$Cf999NZ>RZ$8m|8uKj|er;4Oh_FEAN1wPcDn`^qT)hJ| zcW$WjbobQU5#iUSR&yl4VnKaH%$yEzZNR~69US(m5RJ2Y@fq&nc@+wNZFnuvuz=^o zN8C8%G^X+k@ja;uI?XZV+%EUDJcWQ?rZuOG>Ln6tv7qIXwA|vp+-M5X8#FbA*z0z}SPkZXnU2u=eWea{&9r>lX&lWloWU4+W%Dx@~lYD~HBt?nr` zT`l~k!ufIur&v&4&xq8RQg-gs2Z@^4;~jsSdv31A@SBRR@M??uhM2k<-CVIWTn^Xv zHxpvGba|`g1*oxwiNvW8zd}MR79qaTK*ec=Ofg=3s{0u8SPFhqF&1ELfq%jPryks2 ze{i93AD+L;JtuE&!mrJ#UMT?ZqzQ@0M=&@#VS$zp3~hEb$c!-cK65oirUdEiCi~<@P=@Uc=AR^@1VF zf`Xr)`q7%9-UFir{MyA#iw3aJ;?E}5A~^>@pSN8N|3n|q^;@jPZNxR17U~em3$f7R zQo)l`5_f6-_A<^3V*7Ew@J zLo|Na3UQv_Tru!o8q!jcW^924ZZzx_X_ZMZ}U}&Ut0k! z+3AQo!e!BS9TH8zGN+(Xi{ zh3Y>}J*sHC&3zU;Lf60V&PlRBc(uiSXH1=N8@G(OM{p9<3PtrtUSF~7)qb}XRRdS; zGD~>P9ez_;@@mNvvB;9U47HMuQoo*=p`+kZTuQ(TJ@9KwpamTk65Kt31pIV&;8li8 zEg5Z%Jm|jb@L~}BvJ!QhHm{Ln5DO*ZdoukUmyB9>NLP#uNr6jpnG~P3q@ZtWhbcq? zZArm@!)+2NYDFY{L@I+i`Vseu!n-%{Yj+v-T1gbKNR)eGZ4z~os$m~!&LBenW9~)Z z(h0w|2wLD_A;Pwl2w_ArwBqiuc&B>_c*HgTsl=}>fqI?ffmldzuOWe1F#5uLzIBt; z1vftIe3yaO?%>x}LyHQqP~$#B4gMfyKFBbWwXEBJ9h)xjp!z^APhL5`JxDEvT?S{+xl#3BS7N^YUHlaqSG{Z{CjW z&ZUUw3SUqBvJ`cLh1W~;#X^b)3@NJ5#de&_CrUf$I0r3e2EEn)X&;~$BlXkknL7sj zTrpZ$8m{yg3kxp{D;<4dkJm$o$otz%!)pJgC+^)pTW#%DC-2_BX~(wF+1~CsmO$D(fBgwIfdktSCYM7=-kyP9TMI23z(R`=LknKwr;l(g4tSgq{64ArBQhE4hSu&=gIBHL*Oo)g zlhhCkIUY9T@RItu%#Xrc)Go)wXzDfCn%=VbYWI@x1`qtQBxWhqA_XiY`TV4k@GxRJ zZyKBCZU`>SWtZVCYxqs&$PtnwVj;*E3_-ezgXJ7 z+{z%w+#B4d3m?#e-&DGY4BC=|*G#$b%0;|inA~roxEr-%a*7LbSwwh#4!^b#>PX28 zvB--r#WI+S6j3Q}9Q?|V3wv*Jp9{SC1;4fgTF_x3!S;zIsFjYEu3l}s%e^W*0)by! z74=3*46#tUZozUs4t09}KCkqYzfa-Rx3vVh-ID!fTjK`c_?tELL; z{d`D$fp&$$fq)9cK&e$pq3&@nM||{K{IVQByyt_rRg*$w(3TvJn{x1>M4rO-(2MR{ zH6m~OwtFGIK~Vd?Q|lddCE{qw3$e(HCk!DbUHWS0$$h>f>i5e*$Kd*oWXY=|Cre+) zwZG$C?Th^57bXj0zF*Guzx$mF3&ldvkq>+1*ZH?PJg;xCGFT2guhA0O`1cQtfbeT~ zEG@#pLcmc&K<~7`XSx&YwZj7;pnlH1fZruC;MW#V&6f-m3judbAYg43`nN&JJ?e4i zw|hPz2fwzGS`>tZk~ Date: Mon, 1 Jun 2026 12:29:39 +0200 Subject: [PATCH 2/2] fix(jitdump): detect wider macOS CODE_LOAD/CODE_MOVE record layout CPython on macOS declares the jitdump thread_id as uint64_t (filled via pthread_threadid_np) instead of the perf spec's uint32_t, adding 8 bytes (wider field + alignment padding) that shift the name and code bytes in every CODE_LOAD/CODE_MOVE record. The u32-only parser misread code_size as the code address and failed to parse these files at all. Widen tid to u64 and detect the layout per record from the body length: try the standard layout first, fall back to the wider macOS layout when the field offsets are not self-consistent with the record size. Fixes COD-2645 Co-Authored-By: Claude --- src/jitdump/records.rs | 112 ++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 36 deletions(-) diff --git a/src/jitdump/records.rs b/src/jitdump/records.rs index c1b4fe2..64d2daa 100644 --- a/src/jitdump/records.rs +++ b/src/jitdump/records.rs @@ -1,8 +1,6 @@ use byteorder::{BigEndian, ByteOrder, LittleEndian}; use linux_perf_event_reader::{Endianness, RawData}; -use super::record::JitDumpRecordHeader; - /// A parsed `JIT_CODE_LOAD` record, for a single jitted function. /// /// This carries the function name and the code bytes. @@ -11,7 +9,11 @@ pub struct JitCodeLoadRecord<'a> { /// The process ID of the runtime generating the jitted code. pub pid: u32, /// The thread ID of the runtime thread generating the jitted code. - pub tid: u32, + /// + /// This is a `u64` because some runtimes (e.g. CPython on macOS, which uses + /// `pthread_threadid_np`) write a 64-bit thread id, in a wider record layout + /// that is detected during parsing (see `parse_impl`). + pub tid: u64, /// The virtual address where `code_bytes` starts in the memory of the process. pub vma: u64, /// The code start address for the jitted code. It is unclear in what cases this would differ from `vma`. @@ -26,11 +28,6 @@ pub struct JitCodeLoadRecord<'a> { } impl<'a> JitCodeLoadRecord<'a> { - /// The offset, in bytes, between the start of the record header and - /// the start of the function name. - pub const NAME_OFFSET_FROM_RECORD_START: usize = - JitDumpRecordHeader::SIZE + 4 + 4 + 8 + 8 + 8 + 8; - pub fn parse(endian: Endianness, data: RawData<'a>) -> Result { match endian { Endianness::LittleEndian => Self::parse_impl::(data), @@ -39,16 +36,44 @@ impl<'a> JitCodeLoadRecord<'a> { } pub fn parse_impl(data: RawData<'a>) -> Result { + // Try the standard layout first (the common case); fall back to the wider + // macOS layout if it isn't self-consistent with the record body length. + // The two layouts differ only in where the name starts, so exactly one of + // them makes `prefix + name_len + 1 + code_size == body_len` hold. + if let Some(record) = Self::try_parse::(data, false) { + return Ok(record); + } + if let Some(record) = Self::try_parse::(data, true) { + return Ok(record); + } + Err(std::io::ErrorKind::InvalidData.into()) + } + + fn try_parse(data: RawData<'a>, macos_wide_layout: bool) -> Option { + let body_len = data.len(); let mut cur = data; - let pid = cur.read_u32::()?; - let tid = cur.read_u32::()?; - let vma = cur.read_u64::()?; - let code_addr = cur.read_u64::()?; - let code_size = cur.read_u64::()?; - let code_index = cur.read_u64::()?; - let function_name = cur.read_string().ok_or(std::io::ErrorKind::UnexpectedEof)?; - let code_bytes = cur.split_off_prefix(code_size as usize)?; - Ok(Self { + let pid = cur.read_u32::().ok()?; + let tid = if macos_wide_layout { + let _pad = cur.read_u32::().ok()?; + cur.read_u64::().ok()? + } else { + u64::from(cur.read_u32::().ok()?) + }; + let vma = cur.read_u64::().ok()?; + let code_addr = cur.read_u64::().ok()?; + let code_size = cur.read_u64::().ok()?; + let code_index = cur.read_u64::().ok()?; + let function_name = cur.read_string()?; + + // Validate this layout against the known body length before trusting it: + // `cur` now points just past the name's NUL, so the bytes consumed so far + // plus the code must exactly fill the body. + let consumed = body_len - cur.len(); + if consumed.checked_add(code_size as usize)? != body_len { + return None; + } + let code_bytes = cur.split_off_prefix(code_size as usize).ok()?; + Some(Self { pid, tid, vma, @@ -58,16 +83,6 @@ impl<'a> JitCodeLoadRecord<'a> { code_bytes, }) } - - /// The offset, in bytes, between the start of the record header and - /// the start of the code bytes. - /// - /// This can be different for each record because the code bytes are after - /// the function name, so this offset depends on the length of the function - /// name. - pub fn code_bytes_offset_from_record_header_start(&self) -> usize { - JitDumpRecordHeader::SIZE + 4 + 4 + 8 + 8 + 8 + 8 + self.function_name.len() + 1 - } } /// A parsed `JIT_CODE_MOVE` record. @@ -76,7 +91,9 @@ pub struct JitCodeMoveRecord { /// The process ID of the runtime generating the jitted code. pub pid: u32, /// The thread ID of the runtime thread generating the jitted code. - pub tid: u32, + /// + /// This is a `u64` for the same reason as [`JitCodeLoadRecord::tid`]. + pub tid: u64, /// The new address where the jitted code starts in the virtual memory of the process. pub vma: u64, /// The old address of this function's code bytes. @@ -98,15 +115,38 @@ impl JitCodeMoveRecord { } pub fn parse_impl(data: RawData) -> Result { + // This record is fixed-size with no trailing data, so the correct layout + // is the one whose fields exactly consume the body (48 bytes for the + // standard u32 tid, 56 for the wider macOS u64 tid + padding). + if let Some(record) = Self::try_parse::(data, false) { + return Ok(record); + } + if let Some(record) = Self::try_parse::(data, true) { + return Ok(record); + } + Err(std::io::ErrorKind::InvalidData.into()) + } + + fn try_parse(data: RawData, macos_wide_layout: bool) -> Option { let mut cur = data; - let pid = cur.read_u32::()?; - let tid = cur.read_u32::()?; - let vma = cur.read_u64::()?; - let old_code_addr = cur.read_u64::()?; - let new_code_addr = cur.read_u64::()?; - let code_size = cur.read_u64::()?; - let code_index = cur.read_u64::()?; - Ok(Self { + let pid = cur.read_u32::().ok()?; + let tid = if macos_wide_layout { + let _pad = cur.read_u32::().ok()?; + cur.read_u64::().ok()? + } else { + u64::from(cur.read_u32::().ok()?) + }; + let vma = cur.read_u64::().ok()?; + let old_code_addr = cur.read_u64::().ok()?; + let new_code_addr = cur.read_u64::().ok()?; + let code_size = cur.read_u64::().ok()?; + let code_index = cur.read_u64::().ok()?; + + // The record must be fully consumed; otherwise we picked the wrong layout. + if !cur.is_empty() { + return None; + } + Some(Self { pid, tid, vma,