|
2332 | 2332 | (rule 0 (do_ctz ty orig_ty src) |
2333 | 2333 | (bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty)))) |
2334 | 2334 |
|
| 2335 | +;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 2336 | + |
| 2337 | +(rule 2 (lower (has_type (ty_32_or_64 ty) (cls src))) |
| 2338 | + (do_cls ty src)) |
| 2339 | + |
| 2340 | +(rule 1 (lower (has_type (ty_8_or_16 ty) (cls src))) |
| 2341 | + (let ((extended Gpr (extend_to_gpr src $I32 (ExtendKind.Sign))) |
| 2342 | + (cls Gpr (do_cls $I32 extended))) |
| 2343 | + (x64_sub $I32 cls (RegMemImm.Imm (u32_wrapping_sub 32 (ty_bits ty)))))) |
| 2344 | + |
| 2345 | +(rule 0 (lower |
| 2346 | + (has_type $I128 |
| 2347 | + (cls src))) |
| 2348 | + (let ((upper Gpr (do_cls $I64 (value_regs_get_gpr src 1))) |
| 2349 | + (sign_fill Gpr (x64_sarq_mi (value_regs_get_gpr src 1) 63)) |
| 2350 | + (xored Gpr (x64_xor $I64 (value_regs_get_gpr src 0) sign_fill)) |
| 2351 | + (lower Gpr (x64_add $I64 |
| 2352 | + (do_clz $I64 $I64 xored) |
| 2353 | + (RegMemImm.Imm 63))) |
| 2354 | + (result_lo Gpr |
| 2355 | + (with_flags_reg |
| 2356 | + (x64_cmpq_mi_sxb upper 63) |
| 2357 | + (cmove $I64 (CC.NZ) upper lower)))) |
| 2358 | + (value_regs result_lo (imm $I64 0)))) |
| 2359 | + |
| 2360 | +;; Implementation helper for cls; operates on 32 or 64-bit units. |
| 2361 | +(decl do_cls (Type Gpr) Gpr) |
| 2362 | + |
| 2363 | +;; cls is implemented via clz using the identity: cls(x) = clz(x ^ (x >> 1)) - 1 |
| 2364 | +(rule (do_cls ty src) |
| 2365 | + (let ((shifted Gpr (x64_sar ty src (Imm8Gpr.Imm8 1))) |
| 2366 | + (xored Gpr (x64_xor ty src (RegMemImm.Reg shifted))) |
| 2367 | + (clz Gpr (do_clz ty ty xored))) |
| 2368 | + (x64_sub ty clz (RegMemImm.Imm 1)))) |
| 2369 | + |
2335 | 2370 | ;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2336 | 2371 |
|
2337 | 2372 | (rule 4 (lower (has_type (ty_32_or_64 ty) (popcnt src))) |
|
0 commit comments