Skip to content

Commit 9dc1564

Browse files
cranelift/x64: implement cls for all integer types (#12644)
* cranelift/x64: implement cls for all integer types * refactored to use x64_sar; moved tests to runtests * move i128-cls tests into cls tests
1 parent cfd8a4d commit 9dc1564

3 files changed

Lines changed: 57 additions & 24 deletions

File tree

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,6 +2332,41 @@
23322332
(rule 0 (do_ctz ty orig_ty src)
23332333
(bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty))))
23342334

2335+
;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2336+
2337+
(rule 2 (lower (has_type (ty_32_or_64 ty) (cls src)))
2338+
(do_cls ty src))
2339+
2340+
(rule 1 (lower (has_type (ty_8_or_16 ty) (cls src)))
2341+
(let ((extended Gpr (extend_to_gpr src $I32 (ExtendKind.Sign)))
2342+
(cls Gpr (do_cls $I32 extended)))
2343+
(x64_sub $I32 cls (RegMemImm.Imm (u32_wrapping_sub 32 (ty_bits ty))))))
2344+
2345+
(rule 0 (lower
2346+
(has_type $I128
2347+
(cls src)))
2348+
(let ((upper Gpr (do_cls $I64 (value_regs_get_gpr src 1)))
2349+
(sign_fill Gpr (x64_sarq_mi (value_regs_get_gpr src 1) 63))
2350+
(xored Gpr (x64_xor $I64 (value_regs_get_gpr src 0) sign_fill))
2351+
(lower Gpr (x64_add $I64
2352+
(do_clz $I64 $I64 xored)
2353+
(RegMemImm.Imm 63)))
2354+
(result_lo Gpr
2355+
(with_flags_reg
2356+
(x64_cmpq_mi_sxb upper 63)
2357+
(cmove $I64 (CC.NZ) upper lower))))
2358+
(value_regs result_lo (imm $I64 0))))
2359+
2360+
;; Implementation helper for cls; operates on 32 or 64-bit units.
2361+
(decl do_cls (Type Gpr) Gpr)
2362+
2363+
;; cls is implemented via clz using the identity: cls(x) = clz(x ^ (x >> 1)) - 1
2364+
(rule (do_cls ty src)
2365+
(let ((shifted Gpr (x64_sar ty src (Imm8Gpr.Imm8 1)))
2366+
(xored Gpr (x64_xor ty src (RegMemImm.Reg shifted)))
2367+
(clz Gpr (do_clz ty ty xored)))
2368+
(x64_sub ty clz (RegMemImm.Imm 1))))
2369+
23352370
;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23362371

23372372
(rule 4 (lower (has_type (ty_32_or_64 ty) (popcnt src)))

cranelift/filetests/filetests/runtests/cls.clif

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
test interpret
22
test run
3+
set enable_llvm_abi_extensions=true
34
target aarch64
5+
target x86_64
46
target riscv64
57
target riscv64 has_zbb
68
target riscv64 has_c has_zcb
9+
set enable_multi_ret_implicit_sret
710
target s390x
8-
; not implemented on `x86_64`
911

1012
function %cls_i8(i8) -> i8 {
1113
block0(v0: i8):
@@ -46,3 +48,22 @@ block0(v0: i64):
4648
; run: %cls_i64(0x4000000000000000) == 0
4749
; run: %cls_i64(-1) == 63
4850
; run: %cls_i64(0) == 63
51+
52+
function %cls_i128(i128) -> i128 {
53+
block0(v0: i128):
54+
v1 = cls v0
55+
return v1
56+
}
57+
; run: %cls_i128(1) == 126
58+
; run: %cls_i128(0x40000000_00000000_00000000_00000000) == 0
59+
; run: %cls_i128(0) == 127
60+
; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127
61+
; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63
62+
; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63
63+
; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127
64+
; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0
65+
; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1
66+
; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63
67+
; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1
68+
; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1
69+
; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0

cranelift/filetests/filetests/runtests/i128-cls.clif

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)