Skip to content

Commit 0b07c57

Browse files
committed
Fix CFI prologue mismatch in GDB JIT unwind info
The FDE described a push/mov prologue that executor stencils (-mframe-pointer=reserved) never execute, corrupting unwind at the first few bytes of every region. Move the steady-state CFI into the CIE and split the emitter into perf (unchanged) and gdb helpers.
1 parent 6357698 commit 0b07c57

File tree

1 file changed

+114
-8
lines changed

1 file changed

+114
-8
lines changed

Python/jit_unwind.c

Lines changed: 114 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,17 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
204204
// DWARF EH FRAME GENERATION
205205
// =============================================================================
206206

207-
static void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr);
207+
static void elf_init_ehframe_perf(ELFObjectContext* ctx);
208+
static void elf_init_ehframe_gdb(ELFObjectContext* ctx);
209+
210+
static inline void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr) {
211+
if (absolute_addr) {
212+
elf_init_ehframe_gdb(ctx);
213+
}
214+
else {
215+
elf_init_ehframe_perf(ctx);
216+
}
217+
}
208218

209219
size_t
210220
_PyJitUnwind_EhFrameSize(int absolute_addr)
@@ -261,14 +271,26 @@ _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
261271
* 1. A CIE (Common Information Entry) describing the calling convention.
262272
* 2. An FDE (Frame Description Entry) describing how to unwind the JIT frame.
263273
*
264-
* The caller selects the FDE address encoding through absolute_addr:
265-
* - 0: emit PC-relative addresses for perf's synthesized DSO layout.
266-
* - 1: emit absolute addresses for the GDB JIT in-memory ELF.
274+
* Two flavors are emitted, dispatched on the absolute_addr flag:
275+
*
276+
* - absolute_addr == 0 (elf_init_ehframe_perf): PC-relative FDE address
277+
* encoding for perf's synthesized DSO layout. The CIE describes the
278+
* trampoline's entry state and the FDE walks through the prologue and
279+
* epilogue with advance_loc instructions. This matches the pre-existing
280+
* perf_jit_trampoline behavior byte-for-byte.
281+
*
282+
* - absolute_addr == 1 (elf_init_ehframe_gdb): absolute FDE address
283+
* encoding for the GDB JIT in-memory ELF. The CIE describes the
284+
* steady-state frame layout (CFA = %rbp+16 / x29+16, with saved fp and
285+
* return-address column at fixed offsets) and the FDE emits no further
286+
* CFI. The same rule applies at every PC in the registered region,
287+
* which is correct for executor stencils (they pin the frame pointer
288+
* across the region). This is the GDB-side fix; see elf_init_ehframe_gdb
289+
* for details.
267290
*/
268-
static void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr) {
269-
int fde_ptr_enc = absolute_addr
270-
? DWRF_EH_PE_absptr
271-
: (DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4);
291+
static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
292+
const int absolute_addr = 0;
293+
int fde_ptr_enc = DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4;
272294
uint8_t* p = ctx->p;
273295
uint8_t* framep = p; // Remember start of frame data
274296

@@ -619,6 +641,90 @@ static void elf_init_ehframe(ELFObjectContext* ctx, int absolute_addr) {
619641
}
620642
}
621643

644+
/*
645+
* Build .eh_frame data for the GDB JIT interface.
646+
*
647+
* The FDE PC field is encoded with DWRF_EH_PE_absptr so GDB resolves the
648+
* covered address range directly from the generated ELF, without needing
649+
* the perf-style synthesized DSO layout.
650+
*
651+
* The CIE's initial instructions describe the STEADY-STATE frame layout
652+
* directly:
653+
*
654+
* x86_64: CFA = %rbp + 16, saved %rbp at cfa-16, RA at cfa-8
655+
* AArch64: CFA = x29 + 16, saved x29 at cfa-16, x30 at cfa-8
656+
*
657+
* The FDE emits no further CFI, so that rule applies uniformly at every
658+
* PC in the registered JIT region. This is correct for executor stencils:
659+
* they are compiled with -mframe-pointer=reserved and the optimizer
660+
* asserts they never touch the frame pointer (see Tools/jit/_optimizers.py
661+
* _validate()), so %rbp / x29 stays pinned across the whole region.
662+
*
663+
* The compiled shim does execute a real push/mov prologue, so within its
664+
* first few bytes the steady-state rule is slightly off (there the
665+
* shim's %rbp is still the caller's). This is a small, well-bounded
666+
* tradeoff accepted because GDB rarely stops inside a 4-byte prologue
667+
* window and backtraces remain correctly structured (they just skip one
668+
* frame) rather than corrupt (wrong RA).
669+
*
670+
* If stencil code generation changes so that executors start touching
671+
* the frame pointer, this helper must be updated together with the
672+
* stencil generator and tests.
673+
*/
674+
static void elf_init_ehframe_gdb(ELFObjectContext* ctx) {
675+
int fde_ptr_enc = DWRF_EH_PE_absptr;
676+
uint8_t* p = ctx->p;
677+
uint8_t* framep = p; // Remember start of frame data
678+
679+
DWRF_SECTION(CIE,
680+
DWRF_U32(0); // CIE ID (0 indicates this is a CIE)
681+
DWRF_U8(DWRF_CIE_VERSION); // CIE version (1)
682+
DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA)
683+
#ifdef __x86_64__
684+
DWRF_UV(1); // Code alignment factor (x86_64: 1 byte)
685+
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
686+
DWRF_UV(4); // Code alignment factor (AArch64: 4 bytes per instruction)
687+
#endif
688+
DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative)
689+
DWRF_U8(DWRF_REG_RA); // Return address register number
690+
DWRF_UV(1); // Augmentation data length
691+
DWRF_U8(fde_ptr_enc); // FDE pointer encoding (absptr)
692+
693+
/* Initial CFI: steady-state frame layout. */
694+
#ifdef __x86_64__
695+
DWRF_U8(DWRF_CFA_def_cfa); // CFA = %rbp + 16
696+
DWRF_UV(DWRF_REG_BP);
697+
DWRF_UV(16);
698+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA);
699+
DWRF_UV(1); // RA at cfa-8
700+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP);
701+
DWRF_UV(2); // saved %rbp at cfa-16
702+
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
703+
DWRF_U8(DWRF_CFA_def_cfa); // CFA = x29 + 16
704+
DWRF_UV(DWRF_REG_FP);
705+
DWRF_UV(16);
706+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP);
707+
DWRF_UV(2); // saved x29 at cfa-16
708+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA);
709+
DWRF_UV(1); // x30 at cfa-8
710+
#else
711+
# error "Unsupported target architecture"
712+
#endif
713+
DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary
714+
)
715+
716+
DWRF_SECTION(FDE,
717+
DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference)
718+
DWRF_ADDR(ctx->code_addr); // Absolute code start
719+
DWRF_ADDR((uintptr_t)ctx->code_size); // Code range covered
720+
DWRF_U8(0); // Augmentation data length (none)
721+
/* No per-PC CFI: the CIE's initial state covers the whole region. */
722+
DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary
723+
)
724+
725+
ctx->p = p;
726+
}
727+
622728
#if defined(__linux__) && defined(__ELF__)
623729
enum {
624730
JIT_NOACTION = 0,

0 commit comments

Comments
 (0)