File tree Expand file tree Collapse file tree 2 files changed +10
-7
lines changed
Expand file tree Collapse file tree 2 files changed +10
-7
lines changed Original file line number Diff line number Diff line change @@ -590,21 +590,20 @@ combine_symbol_mask(const symbol_mask src, symbol_mask dest)
590590// TODO: With dynasm, the per-architecture #ifdef branches below could be
591591// replaced by a single portable emission sequence.
592592
593- // Max oparg for manual codegen. Conservative (AArch64 imm12 limit).
594- // TODO: could be set per-architecture (x86 disp32 has a much higher limit).
595- #define _LOAD_FAST_BORROW_MAX_OPARG 4085
596-
597593// Decode a _LOAD_FAST_BORROW* opcode into register variant and oparg.
598594// Returns 1 if the opcode is a _LOAD_FAST_BORROW variant, 0 otherwise.
599- // Falls back to stencil pipeline for oparg values too large for manual codegen.
595+ // On AArch64, falls back to stencil for oparg > 4085 (imm12 limit).
596+ // https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDR--immediate---Load-register--immediate--?lang=en
600597static int
601598_decode_load_fast_borrow (uint16_t opcode , uint16_t insn_oparg ,
602599 int * reg_variant , int * oparg )
603600{
604601 if (opcode >= _LOAD_FAST_BORROW_r01 && opcode <= _LOAD_FAST_BORROW_r23 ) {
605- if (insn_oparg > _LOAD_FAST_BORROW_MAX_OPARG ) {
602+ #if defined(__aarch64__ ) || defined(_M_ARM64 )
603+ if (insn_oparg > 4085 ) {
606604 return 0 ;
607605 }
606+ #endif
608607 * reg_variant = opcode - _LOAD_FAST_BORROW_r01 ;
609608 * oparg = insn_oparg ;
610609 return 1 ;
Original file line number Diff line number Diff line change @@ -217,8 +217,12 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
217217 for case , opname in cases_and_opnames :
218218 # _LOAD_FAST_BORROW uses manual codegen in jit.c,
219219 # so skip stencil generation for its register variants.
220+ # AArch64 keeps stencils as fallback for huge opargs
221+ # (imm12 limit, oparg > 4085).
222+ # https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDR--immediate---Load-register--immediate--?lang=en
220223 if opname .startswith ("_LOAD_FAST_BORROW_r" ):
221- continue
224+ if not self .triple .startswith ("aarch64" ):
225+ continue
222226 # Write out a copy of the template with *only* this case
223227 # inserted. This is about twice as fast as #include'ing all
224228 # of executor_cases.c.h each time we compile (since the C
You can’t perform that action at this time.
0 commit comments