Skip to content

Commit 7957e62

Browse files
committed
Generate fallback for aarch64
1 parent e0cf241 commit 7957e62

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

Python/jit.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -590,21 +590,20 @@ combine_symbol_mask(const symbol_mask src, symbol_mask dest)
590590
// TODO: With dynasm, the per-architecture #ifdef branches below could be
591591
// replaced by a single portable emission sequence.
592592

593-
// Max oparg for manual codegen. Conservative (AArch64 imm12 limit).
594-
// TODO: could be set per-architecture (x86 disp32 has a much higher limit).
595-
#define _LOAD_FAST_BORROW_MAX_OPARG 4085
596-
597593
// Decode a _LOAD_FAST_BORROW* opcode into register variant and oparg.
598594
// Returns 1 if the opcode is a _LOAD_FAST_BORROW variant, 0 otherwise.
599-
// Falls back to stencil pipeline for oparg values too large for manual codegen.
595+
// On AArch64, falls back to stencil for oparg > 4085 (imm12 limit).
596+
// https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDR--immediate---Load-register--immediate--?lang=en
600597
static int
601598
_decode_load_fast_borrow(uint16_t opcode, uint16_t insn_oparg,
602599
int *reg_variant, int *oparg)
603600
{
604601
if (opcode >= _LOAD_FAST_BORROW_r01 && opcode <= _LOAD_FAST_BORROW_r23) {
605-
if (insn_oparg > _LOAD_FAST_BORROW_MAX_OPARG) {
602+
#if defined(__aarch64__) || defined(_M_ARM64)
603+
if (insn_oparg > 4085) {
606604
return 0;
607605
}
606+
#endif
608607
*reg_variant = opcode - _LOAD_FAST_BORROW_r01;
609608
*oparg = insn_oparg;
610609
return 1;

Tools/jit/_targets.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,12 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
217217
for case, opname in cases_and_opnames:
218218
# _LOAD_FAST_BORROW uses manual codegen in jit.c,
219219
# so skip stencil generation for its register variants.
220+
# AArch64 keeps stencils as fallback for huge opargs
221+
# (imm12 limit, oparg > 4085).
222+
# https://developer.arm.com/documentation/ddi0602/2024-06/Base-Instructions/LDR--immediate---Load-register--immediate--?lang=en
220223
if opname.startswith("_LOAD_FAST_BORROW_r"):
221-
continue
224+
if not self.triple.startswith("aarch64"):
225+
continue
222226
# Write out a copy of the template with *only* this case
223227
# inserted. This is about twice as fast as #include'ing all
224228
# of executor_cases.c.h each time we compile (since the C

0 commit comments

Comments
 (0)