Skip to content

Commit cd985e0

Browse files
authored
Ensure register allocation ordering for i8x16 popcnt (#12010)
Closes bytecodealliance/wasmtime#11991 This commit fixes the register allocation ordering for `i8x16.popcnt`. This change avoids having to deal wiht a potential spill when holding the scratch register scope by allocating the floating point register prior to the acquiring the scratch register.
1 parent 7d41355 commit cd985e0

2 files changed

Lines changed: 119 additions & 1 deletion

File tree

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
(module
5+
(type (;0;) (func (param v128 i64)))
6+
(table (;0;) 0 265945 funcref)
7+
(global (;0;) (mut f32) f32.const -0x1.4f4f4ep-48 (;=-0.000000000000004653358;))
8+
(global (;1;) (mut f32) f32.const -0x1.cbcb4ep+76 (;=-135707280000000000000000;))
9+
(global (;2;) (mut v128) v128.const i32x4 0xff500177 0x01bbffff 0x5e010150 0x3b3b0177)
10+
(func (;0;) (type 0) (param v128 i64)
11+
local.get 0
12+
local.get 1
13+
global.get 1
14+
global.get 0
15+
global.get 1
16+
global.get 1
17+
global.get 1
18+
global.get 1
19+
global.get 0
20+
global.get 1
21+
global.get 1
22+
global.get 0
23+
global.get 1
24+
global.get 1
25+
local.get 0
26+
i8x16.popcnt
27+
global.get 0
28+
global.get 2
29+
i8x16.popcnt
30+
unreachable
31+
)
32+
)
33+
;; wasm[0]::function[0]:
34+
;; pushq %rbp
35+
;; movq %rsp, %rbp
36+
;; movq 8(%rdi), %r11
37+
;; movq 0x10(%r11), %r11
38+
;; addq $0x8c, %r11
39+
;; cmpq %rsp, %r11
40+
;; ja 0x1c5
41+
;; 1c: movq %rdi, %r14
42+
;; subq $0x30, %rsp
43+
;; movq %rdi, 0x28(%rsp)
44+
;; movq %rsi, 0x20(%rsp)
45+
;; movdqu %xmm0, 0x10(%rsp)
46+
;; movq %rdx, 8(%rsp)
47+
;; movss 0x50(%r14), %xmm0
48+
;; movss 0x40(%r14), %xmm1
49+
;; movss 0x50(%r14), %xmm2
50+
;; movss 0x50(%r14), %xmm3
51+
;; movss 0x50(%r14), %xmm4
52+
;; movss 0x50(%r14), %xmm5
53+
;; movss 0x40(%r14), %xmm6
54+
;; movss 0x50(%r14), %xmm7
55+
;; movss 0x50(%r14), %xmm8
56+
;; movss 0x40(%r14), %xmm9
57+
;; movss 0x50(%r14), %xmm10
58+
;; movss 0x50(%r14), %xmm11
59+
;; movdqu 0x10(%rsp), %xmm12
60+
;; vpand 0x13e(%rip), %xmm12, %xmm15
61+
;; vpsrlw $4, %xmm12, %xmm12
62+
;; vpand 0x130(%rip), %xmm12, %xmm12
63+
;; movdqu 0x137(%rip), %xmm13
64+
;; vpshufb %xmm12, %xmm13, %xmm12
65+
;; vpshufb %xmm15, %xmm13, %xmm15
66+
;; vpaddb %xmm15, %xmm12, %xmm12
67+
;; movss 0x40(%r14), %xmm13
68+
;; movdqu 0x60(%r14), %xmm14
69+
;; movdqu 0x10(%rsp), %xmm15
70+
;; subq $0x10, %rsp
71+
;; movdqu %xmm15, (%rsp)
72+
;; movq 0x18(%rsp), %r11
73+
;; pushq %r11
74+
;; subq $4, %rsp
75+
;; movss %xmm0, (%rsp)
76+
;; subq $4, %rsp
77+
;; movss %xmm1, (%rsp)
78+
;; subq $4, %rsp
79+
;; movss %xmm2, (%rsp)
80+
;; subq $4, %rsp
81+
;; movss %xmm3, (%rsp)
82+
;; subq $4, %rsp
83+
;; movss %xmm4, (%rsp)
84+
;; subq $4, %rsp
85+
;; movss %xmm5, (%rsp)
86+
;; subq $4, %rsp
87+
;; movss %xmm6, (%rsp)
88+
;; subq $4, %rsp
89+
;; movss %xmm7, (%rsp)
90+
;; subq $4, %rsp
91+
;; movss %xmm8, (%rsp)
92+
;; subq $4, %rsp
93+
;; movss %xmm9, (%rsp)
94+
;; subq $4, %rsp
95+
;; movss %xmm10, (%rsp)
96+
;; subq $4, %rsp
97+
;; movss %xmm11, (%rsp)
98+
;; subq $0x10, %rsp
99+
;; movdqu %xmm12, (%rsp)
100+
;; subq $4, %rsp
101+
;; movss %xmm13, (%rsp)
102+
;; vpand 0x3b(%rip), %xmm14, %xmm15
103+
;; vpsrlw $4, %xmm14, %xmm14
104+
;; vpand 0x2d(%rip), %xmm14, %xmm14
105+
;; movdqu 0x35(%rip), %xmm0
106+
;; vpshufb %xmm14, %xmm0, %xmm14
107+
;; vpshufb %xmm15, %xmm0, %xmm15
108+
;; vpaddb %xmm15, %xmm14, %xmm14
109+
;; ud2
110+
;; addq $0x30, %rsp
111+
;; popq %rbp
112+
;; retq
113+
;; 1c5: ud2
114+
;; 1c7: addb %al, (%rax)
115+
;; 1c9: addb %al, (%rax)
116+
;; 1cb: addb %al, (%rax)
117+
;; 1cd: addb %al, (%rax)
118+
;; 1cf: addb %cl, (%rdi)

winch/codegen/src/isa/x64/masm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3059,6 +3059,7 @@ impl Masm for MacroAssembler {
30593059
self.ensure_has_avx()?;
30603060

30613061
let reg = writable!(context.pop_to_reg(self, None)?.reg);
3062+
let reg2 = writable!(context.any_fpr(self)?);
30623063

30633064
// This works by using a lookup table to determine the count of bits
30643065
// set in the upper 4 bits and lower 4 bits separately and then adding
@@ -3103,7 +3104,6 @@ impl Masm for MacroAssembler {
31033104
let address = masm.asm.add_constant(&[
31043105
0x0, 0x1, 0x1, 0x2, 0x1, 0x2, 0x2, 0x3, 0x1, 0x2, 0x2, 0x3, 0x2, 0x3, 0x3, 0x4,
31053106
]);
3106-
let reg2 = writable!(context.any_fpr(masm)?);
31073107
masm.asm
31083108
.xmm_mov_mr(&address, reg2, OperandSize::S128, MemFlags::trusted());
31093109
// Use the upper 4 bits as an index into the lookup table.

0 commit comments

Comments
 (0)