Skip to content

Commit 0db81cc

Browse files
authored
winch: Properly zero extend the CAS result (#12994)
Accoding to the spec, the result of a compare-and-swap operation must be zero extended according to the result type. Prior to this commit the implementation was skipping the 32 -> 64 bit case. When the swap happens, the value in `rax` remains untouched, however, it's possible that the higher bits are undefined, in which case the zero extension is needed to ensure that the right value is returned. This bug is only reproducible for values in which the lower bits match the expected value, like in the example included in this commit.
1 parent e70a31a commit 0db81cc

6 files changed

Lines changed: 83 additions & 23 deletions

File tree

crates/test-util/src/wast.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ impl WastTest {
491491
"spec_testsuite/proposals/threads/exports.wast",
492492
"spec_testsuite/proposals/threads/memory.wast",
493493
"misc_testsuite/memory64/threads.wast",
494+
"misc_testsuite/winch/rmw32_cmpxchg_u_wrap.wast",
494495
];
495496

496497
if unsupported.iter().any(|part| self.path.ends_with(part)) {

tests/disas/winch/x64/atomic/rmw/cmpxchg/i64_atomic_rmw32_cmpxchgu.wat

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
;; movq 0x18(%r11), %r11
1313
;; addq $0x20, %r11
1414
;; cmpq %rsp, %r11
15-
;; ja 0x6f
15+
;; ja 0x71
1616
;; 1c: movq %rdi, %r14
1717
;; subq $0x10, %rsp
1818
;; movq %rdi, 8(%rsp)
@@ -22,7 +22,7 @@
2222
;; movl $0, %edx
2323
;; andl $3, %edx
2424
;; cmpl $0, %edx
25-
;; jne 0x71
25+
;; jne 0x73
2626
;; 4d: movl $0, %edx
2727
;; movq 0x30(%r14), %r11
2828
;; movq (%r11), %rbx
@@ -33,8 +33,9 @@
3333
;; popq %rcx
3434
;; popq %rax
3535
;; lock cmpxchgl %ecx, (%rbx)
36+
;; movl %eax, %eax
3637
;; addq $0x10, %rsp
3738
;; popq %rbp
3839
;; retq
39-
;; 6f: ud2
4040
;; 71: ud2
41+
;; 73: ud2
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "f") (result i64)
7+
i32.const 0
8+
i64.const 0xDEADBEEF00000000
9+
i64.const 0x1234
10+
i64.atomic.rmw32.cmpxchg_u))
11+
;; wasm[0]::function[0]:
12+
;; pushq %rbp
13+
;; movq %rsp, %rbp
14+
;; movq 8(%rdi), %r11
15+
;; movq 0x18(%r11), %r11
16+
;; addq $0x20, %r11
17+
;; cmpq %rsp, %r11
18+
;; ja 0x76
19+
;; 1c: movq %rdi, %r14
20+
;; subq $0x10, %rsp
21+
;; movq %rdi, 8(%rsp)
22+
;; movq %rsi, (%rsp)
23+
;; movl $0x1234, %eax
24+
;; movabsq $16045690981097406464, %rcx
25+
;; movl $0, %edx
26+
;; andl $3, %edx
27+
;; cmpl $0, %edx
28+
;; jne 0x78
29+
;; 52: movl $0, %edx
30+
;; movq 0x30(%r14), %r11
31+
;; movq (%r11), %rbx
32+
;; movl %edx, %edx
33+
;; addq %rdx, %rbx
34+
;; pushq %rcx
35+
;; pushq %rax
36+
;; popq %rcx
37+
;; popq %rax
38+
;; lock cmpxchgl %ecx, (%rbx)
39+
;; movl %eax, %eax
40+
;; addq $0x10, %rsp
41+
;; popq %rbp
42+
;; retq
43+
;; 76: ud2
44+
;; 78: ud2
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
;;! threads = true
2+
3+
(module
4+
(memory 1 1 shared)
5+
(func (export "f") (result i64)
6+
i32.const 0
7+
i64.const 0xDEADBEEF00000000
8+
i64.const 0x1234
9+
i64.atomic.rmw32.cmpxchg_u))
10+
11+
(assert_return (invoke "f") (i64.const 0))

winch/codegen/src/codegen/mod.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,21 +1416,27 @@ where
14161416
size: OperandSize,
14171417
extend: Option<Extend<Zero>>,
14181418
) -> Result<()> {
1419-
// Emission for this instruction is a bit trickier. The address for the CAS is the 3rd from
1420-
// the top of the stack, and we must emit instruction to compute the actual address with
1421-
// `emit_compute_heap_address_align_checked`, while we still have access to self. However,
1422-
// some ISAs have requirements with regard to the registers used for some arguments, so we
1423-
// need to pass the context to the masm. To solve this issue, we pop the two first
1424-
// arguments from the stack, compute the address, push back the arguments, and hand over
1425-
// the control to masm. The implementer of `atomic_cas` can expect to find `expected` and
1426-
// `replacement` at the top the context's stack.
1427-
1428-
// pop the args
1419+
// At this point in the stack we have:
1420+
// [ address, expected, replacement ]
1421+
//
1422+
// Therefore, emission for this instruction is a bit
1423+
// trickier. The address for the CAS is the 3rd from the top
1424+
// of the stack, and we must emit instruction to compute the
1425+
// actual address with
1426+
// `emit_compute_heap_address_align_checked`, while we still
1427+
// have access to self. However, some ISAs have requirements
1428+
// with regard to the registers used for some arguments, so we
1429+
// need to pass the context to the masm. To solve this issue,
1430+
// we pop the two first arguments from the stack, compute the
1431+
// address, push back the arguments, and hand over the control
1432+
// to masm. The implementer of `atomic_cas` can expect to find
1433+
// `expected` and `replacement` at the top the context's
1434+
// stack.
1435+
14291436
let replacement = self.context.pop_to_reg(self.masm, None)?;
14301437
let expected = self.context.pop_to_reg(self.masm, None)?;
14311438

14321439
if let Some(addr) = self.emit_compute_heap_address_align_checked(arg, size)? {
1433-
// push back the args
14341440
self.context.stack.push(expected.into());
14351441
self.context.stack.push(replacement.into());
14361442

winch/codegen/src/isa/x64/masm.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,23 +1781,20 @@ impl Masm for MacroAssembler {
17811781
) -> Result<()> {
17821782
// `cmpxchg` expects `expected` to be in the `*a*` register.
17831783
// reserve rax for the expected argument.
1784-
let rax = context.reg(regs::rax(), self)?;
17851784

1786-
let replacement = context.pop_to_reg(self, None)?;
1785+
let replacement =
1786+
context.without::<Result<TypedReg>, _, _>(&[regs::rax()], self, |cx, masm| {
1787+
cx.pop_to_reg(masm, None)
1788+
})??;
17871789

1788-
// mark `rax` as allocatable again.
1789-
context.free_reg(rax);
17901790
let expected = context.pop_to_reg(self, Some(regs::rax()))?;
17911791

17921792
self.asm
17931793
.cmpxchg(addr, replacement.reg, writable!(expected.reg), size, flags);
17941794

17951795
if let Some(extend) = extend {
1796-
// We don't need to zero-extend from 32 to 64bits.
1797-
if !(extend.from_bits() == 32 && extend.to_bits() == 64) {
1798-
self.asm
1799-
.movzx_rr(expected.reg, writable!(expected.reg), extend);
1800-
}
1796+
self.asm
1797+
.movzx_rr(expected.reg, writable!(expected.reg), extend);
18011798
}
18021799

18031800
context.stack.push(expected.into());

0 commit comments

Comments
 (0)