@@ -204,7 +204,17 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
204204// DWARF EH FRAME GENERATION
205205// =============================================================================
206206
207- static void elf_init_ehframe (ELFObjectContext * ctx , int absolute_addr );
207+ static void elf_init_ehframe_perf (ELFObjectContext * ctx );
208+ static void elf_init_ehframe_gdb (ELFObjectContext * ctx );
209+
210+ static inline void elf_init_ehframe (ELFObjectContext * ctx , int absolute_addr ) {
211+ if (absolute_addr ) {
212+ elf_init_ehframe_gdb (ctx );
213+ }
214+ else {
215+ elf_init_ehframe_perf (ctx );
216+ }
217+ }
208218
209219size_t
210220_PyJitUnwind_EhFrameSize (int absolute_addr )
@@ -261,14 +271,26 @@ _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
261271 * 1. A CIE (Common Information Entry) describing the calling convention.
262272 * 2. An FDE (Frame Description Entry) describing how to unwind the JIT frame.
263273 *
264- * The caller selects the FDE address encoding through absolute_addr:
265- * - 0: emit PC-relative addresses for perf's synthesized DSO layout.
266- * - 1: emit absolute addresses for the GDB JIT in-memory ELF.
274+ * Two flavors are emitted, dispatched on the absolute_addr flag:
275+ *
276+ * - absolute_addr == 0 (elf_init_ehframe_perf): PC-relative FDE address
277+ * encoding for perf's synthesized DSO layout. The CIE describes the
278+ * trampoline's entry state and the FDE walks through the prologue and
279+ * epilogue with advance_loc instructions. This matches the pre-existing
280+ * perf_jit_trampoline behavior byte-for-byte.
281+ *
282+ * - absolute_addr == 1 (elf_init_ehframe_gdb): absolute FDE address
283+ * encoding for the GDB JIT in-memory ELF. The CIE describes the
284+ * steady-state frame layout (CFA = %rbp+16 / x29+16, with saved fp and
285+ * return-address column at fixed offsets) and the FDE emits no further
286+ * CFI. The same rule applies at every PC in the registered region,
287+ * which is correct for executor stencils (they pin the frame pointer
288+ * across the region). This is the GDB-side fix; see elf_init_ehframe_gdb
289+ * for details.
267290 */
268- static void elf_init_ehframe (ELFObjectContext * ctx , int absolute_addr ) {
269- int fde_ptr_enc = absolute_addr
270- ? DWRF_EH_PE_absptr
271- : (DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4 );
291+ static void elf_init_ehframe_perf (ELFObjectContext * ctx ) {
292+ const int absolute_addr = 0 ;
293+ int fde_ptr_enc = DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4 ;
272294 uint8_t * p = ctx -> p ;
273295 uint8_t * framep = p ; // Remember start of frame data
274296
@@ -619,6 +641,90 @@ static void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr) {
619641 }
620642}
621643
644+ /*
645+ * Build .eh_frame data for the GDB JIT interface.
646+ *
647+ * The FDE PC field is encoded with DWRF_EH_PE_absptr so GDB resolves the
648+ * covered address range directly from the generated ELF, without needing
649+ * the perf-style synthesized DSO layout.
650+ *
651+ * The CIE's initial instructions describe the STEADY-STATE frame layout
652+ * directly:
653+ *
654+ * x86_64: CFA = %rbp + 16, saved %rbp at cfa-16, RA at cfa-8
655+ * AArch64: CFA = x29 + 16, saved x29 at cfa-16, x30 at cfa-8
656+ *
657+ * The FDE emits no further CFI, so that rule applies uniformly at every
658+ * PC in the registered JIT region. This is correct for executor stencils:
659+ * they are compiled with -mframe-pointer=reserved and the optimizer
660+ * asserts they never touch the frame pointer (see Tools/jit/_optimizers.py
661+ * _validate()), so %rbp / x29 stays pinned across the whole region.
662+ *
663+ * The compiled shim does execute a real push/mov prologue, so within its
664+ * first few bytes the steady-state rule is slightly off (there the
665+ * shim's %rbp is still the caller's). This is a small, well-bounded
666+ * tradeoff accepted because GDB rarely stops inside a 4-byte prologue
667+ * window and backtraces remain correctly structured (they just skip one
668+ * frame) rather than corrupt (wrong RA).
669+ *
670+ * If stencil code generation changes so that executors start touching
671+ * the frame pointer, this helper must be updated together with the
672+ * stencil generator and tests.
673+ */
674+ static void elf_init_ehframe_gdb (ELFObjectContext * ctx ) {
675+ int fde_ptr_enc = DWRF_EH_PE_absptr ;
676+ uint8_t * p = ctx -> p ;
677+ uint8_t * framep = p ; // Remember start of frame data
678+
679+ DWRF_SECTION (CIE ,
680+ DWRF_U32 (0 ); // CIE ID (0 indicates this is a CIE)
681+ DWRF_U8 (DWRF_CIE_VERSION ); // CIE version (1)
682+ DWRF_STR ("zR" ); // Augmentation string ("zR" = has LSDA)
683+ #ifdef __x86_64__
684+ DWRF_UV (1 ); // Code alignment factor (x86_64: 1 byte)
685+ #elif defined(__aarch64__ ) && defined(__AARCH64EL__ ) && !defined(__ILP32__ )
686+ DWRF_UV (4 ); // Code alignment factor (AArch64: 4 bytes per instruction)
687+ #endif
688+ DWRF_SV (- (int64_t )sizeof (uintptr_t )); // Data alignment factor (negative)
689+ DWRF_U8 (DWRF_REG_RA ); // Return address register number
690+ DWRF_UV (1 ); // Augmentation data length
691+ DWRF_U8 (fde_ptr_enc ); // FDE pointer encoding (absptr)
692+
693+ /* Initial CFI: steady-state frame layout. */
694+ #ifdef __x86_64__
695+ DWRF_U8 (DWRF_CFA_def_cfa ); // CFA = %rbp + 16
696+ DWRF_UV (DWRF_REG_BP );
697+ DWRF_UV (16 );
698+ DWRF_U8 (DWRF_CFA_offset | DWRF_REG_RA );
699+ DWRF_UV (1 ); // RA at cfa-8
700+ DWRF_U8 (DWRF_CFA_offset | DWRF_REG_BP );
701+ DWRF_UV (2 ); // saved %rbp at cfa-16
702+ #elif defined(__aarch64__ ) && defined(__AARCH64EL__ ) && !defined(__ILP32__ )
703+ DWRF_U8 (DWRF_CFA_def_cfa ); // CFA = x29 + 16
704+ DWRF_UV (DWRF_REG_FP );
705+ DWRF_UV (16 );
706+ DWRF_U8 (DWRF_CFA_offset | DWRF_REG_FP );
707+ DWRF_UV (2 ); // saved x29 at cfa-16
708+ DWRF_U8 (DWRF_CFA_offset | DWRF_REG_RA );
709+ DWRF_UV (1 ); // x30 at cfa-8
710+ #else
711+ # error "Unsupported target architecture"
712+ #endif
713+ DWRF_ALIGNNOP (sizeof (uintptr_t )); // Align to pointer boundary
714+ )
715+
716+ DWRF_SECTION (FDE ,
717+ DWRF_U32 ((uint32_t )(p - framep )); // Offset to CIE (backwards reference)
718+ DWRF_ADDR (ctx -> code_addr ); // Absolute code start
719+ DWRF_ADDR ((uintptr_t )ctx -> code_size ); // Code range covered
720+ DWRF_U8 (0 ); // Augmentation data length (none)
721+ /* No per-PC CFI: the CIE's initial state covers the whole region. */
722+ DWRF_ALIGNNOP (sizeof (uintptr_t )); // Align to pointer boundary
723+ )
724+
725+ ctx -> p = p ;
726+ }
727+
622728#if defined(__linux__ ) && defined(__ELF__ )
623729enum {
624730 JIT_NOACTION = 0 ,
0 commit comments