Skip to content

Commit 6ee9fbc

Browse files
committed
Fix issues
Remove absolute_addr from elf_init_ehframe_perf code path Implement a hack on AArch64 to tell where the shim prologue is positioned. to be properly fixed
1 parent a9c6315 commit 6ee9fbc

File tree

1 file changed

+26
-56
lines changed

1 file changed

+26
-56
lines changed

Python/jit_unwind.c

Lines changed: 26 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
289289
* for details.
290290
*/
291291
static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
292-
const int absolute_addr = 0;
293292
int fde_ptr_enc = DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4;
294293
uint8_t* p = ctx->p;
295294
uint8_t* framep = p; // Remember start of frame data
@@ -502,22 +501,15 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
502501
*/
503502
DWRF_SECTION(FDE,
504503
DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference)
505-
if (absolute_addr) {
506-
DWRF_ADDR(ctx->code_addr); // Absolute code start
507-
DWRF_ADDR((uintptr_t)ctx->code_size); // Code range covered
508-
}
509-
else {
510-
/*
511-
* In perf jitdump mode (absolute_addr == 0), the FDE PC field is
512-
* encoded PC-relative and points back to code_start. For the GDB
513-
* JIT interface we reuse the same generator with absolute_addr == 1;
514-
* the EH frame is then carried in a .eh_frame section of an
515-
* in-memory ELF (no EhFrameHeader).
516-
*/
517-
ctx->fde_p = p; // Remember where PC offset field is located for later calculation
518-
DWRF_U32(0); // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe)
519-
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length)
520-
}
504+
/*
505+
* In perf jitdump mode the FDE PC field is encoded PC-relative and
506+
* points back to code_start. Record where that field lives so we can
507+
* patch in the final offset after the rest of the synthetic DSO
508+
* layout is known.
509+
*/
510+
ctx->fde_p = p; // Remember where PC offset field is located for later calculation
511+
DWRF_U32(0); // Placeholder for PC-relative offset (calculated below)
512+
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length)
521513
DWRF_U8(0); // Augmentation data length (none)
522514

523515
/*
@@ -526,25 +518,9 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
526518
* These instructions describe how registers are saved and restored
527519
* during function calls. Each architecture has different calling
528520
* conventions and register usage patterns.
529-
*
530-
* GDB JIT invariant (absolute_addr == 1):
531-
*
532-
* We emit one synthetic FDE for the whole registered JIT region and
533-
* treat that region as one logical native frame while unwinding. This
534-
* relies on the generated executor stencils preserving the
535-
* frame-pointer register across the whole region (%rbp on x86_64,
536-
* x29 on AArch64). Individual stencils may still adjust SP or spill
537-
* temporaries, but they must not clobber the frame pointer or move
538-
* the recoverable caller state away from the frame layout described by
539-
* the steady-state CFI below.
540-
*
541-
* If code generation changes so that executor stencils start touching
542-
* the frame pointer, or the caller state is no longer recoverable from
543-
* this frame layout, then this synthetic GDB CFI must be updated
544-
* together with the stencil generator and tests.
545521
*/
546522
#ifdef __x86_64__
547-
/* x86_64 calling convention unwinding rules; keep CFA on %rbp */
523+
/* x86_64 calling convention unwinding rules */
548524
# if defined(__CET__) && (__CET__ & 1)
549525
DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes)
550526
# endif
@@ -556,12 +532,10 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
556532
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes)
557533
DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6
558534
DWRF_UV(DWRF_REG_BP); // Use base pointer register
559-
if (!absolute_addr) {
560-
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
561-
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
562-
DWRF_UV(DWRF_REG_SP); // Use stack pointer register
563-
DWRF_UV(8); // New offset: SP + 8
564-
}
535+
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
536+
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
537+
DWRF_UV(DWRF_REG_SP); // Use stack pointer register
538+
DWRF_UV(8); // New offset: SP + 8
565539
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
566540
/* AArch64 calling convention unwinding rules */
567541
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance by 1 instruction (4 bytes)
@@ -574,13 +548,11 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
574548
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (12 bytes)
575549
DWRF_U8(DWRF_CFA_def_cfa_register); // CFA = FP (x29) + 16
576550
DWRF_UV(DWRF_REG_FP);
577-
if (!absolute_addr) {
578-
DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this!
579-
DWRF_U8(DWRF_CFA_restore | DWRF_REG_FP); // Restore x29 - NO DWRF_UV() after this!
580-
DWRF_U8(DWRF_CFA_def_cfa); // CFA = SP + 0 (stack restored)
581-
DWRF_UV(DWRF_REG_SP);
582-
DWRF_UV(0);
583-
}
551+
DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this!
552+
DWRF_U8(DWRF_CFA_restore | DWRF_REG_FP); // Restore x29 - NO DWRF_UV() after this!
553+
DWRF_U8(DWRF_CFA_def_cfa); // CFA = SP + 0 (stack restored)
554+
DWRF_UV(DWRF_REG_SP);
555+
DWRF_UV(0);
584556

585557
#else
586558
# error "Unsupported target architecture"
@@ -633,12 +605,10 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
633605
* Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field.
634606
*
635607
*/
636-
if (!absolute_addr) {
637-
int32_t rounded_code_size =
638-
(int32_t)_Py_SIZE_ROUND_UP(ctx->code_size, 8);
639-
int32_t fde_offset_in_frame = (int32_t)(ctx->fde_p - framep);
640-
*(int32_t *)ctx->fde_p = -(rounded_code_size + fde_offset_in_frame);
641-
}
608+
int32_t rounded_code_size =
609+
(int32_t)_Py_SIZE_ROUND_UP(ctx->code_size, 8);
610+
int32_t fde_offset_in_frame = (int32_t)(ctx->fde_p - framep);
611+
*(int32_t *)ctx->fde_p = -(rounded_code_size + fde_offset_in_frame);
642612
}
643613

644614
/*
@@ -702,11 +672,11 @@ static void elf_init_ehframe_gdb(ELFObjectContext* ctx) {
702672
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
703673
DWRF_U8(DWRF_CFA_def_cfa); // CFA = x29 + 16
704674
DWRF_UV(DWRF_REG_FP);
705-
DWRF_UV(16);
675+
DWRF_UV(96);
706676
DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP);
707-
DWRF_UV(2); // saved x29 at cfa-16
677+
DWRF_UV(12); // saved x29 at cfa-16
708678
DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA);
709-
DWRF_UV(1); // x30 at cfa-8
679+
DWRF_UV(11); // x30 at cfa-8
710680
#else
711681
# error "Unsupported target architecture"
712682
#endif

0 commit comments

Comments
 (0)