Skip to content

Commit 5646ea6

Browse files
authored
Handle StructCmpxchg::expected in Heap2Local (#8491)
We previously assumed that if we were optimizing a StructCmpxchg in Heap2Local, then the flow must be from the `ref` operand. This was based on an assumption that allocations are processed in order of appearance, and because the `ref` operand appears before the `expected operand`, it must be the one getting optimized. But this neglected the fact the array allocations are processed before struct allocations, so if `expected` is an array, it could be optimized first. This faulty assumption led to assertion failures and invalid code. Fix the problem by handling flows from `expected` explicitly. When a non-escaping allocation flows into `expected`, we know it cannot possibly match the value already in the accessed struct, so we can optimize the `cmpxchg` to a `struct.atomic.get`. The replacement pattern uses a scratch local to propagate value of the `ref` operand past the dropped `expected` expression to the new `struct.atomic.get`. In case the `ref` operand uses another allocation that will be processed later, we must update the parents map to account for the new data flow from the `ref` into the scratch local and from the get of the scratch local through to the `struct.atomic.get`, which fully consumes the value.
1 parent 0e5e24c commit 5646ea6

3 files changed

Lines changed: 285 additions & 27 deletions

File tree

src/ir/parents.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
#ifndef wasm_ir_parents_h
1818
#define wasm_ir_parents_h
1919

20-
#include "parsing.h"
20+
#include "wasm-traversal.h"
21+
#include "wasm.h"
2122

2223
namespace wasm {
2324

@@ -32,6 +33,10 @@ struct Parents {
3233
return nullptr;
3334
}
3435

36+
void setParent(Expression* child, Expression* parent) {
37+
inner.parentMap[child] = parent;
38+
}
39+
3540
private:
3641
struct Inner
3742
: public ExpressionStackWalker<Inner, UnifiedExpressionVisitor<Inner>> {

src/passes/Heap2Local.cpp

Lines changed: 70 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@
157157
#include "ir/local-graph.h"
158158
#include "ir/parents.h"
159159
#include "ir/properties.h"
160-
#include "ir/type-updating.h"
161160
#include "ir/utils.h"
162161
#include "pass.h"
163162
#include "support/unique_deferring_queue.h"
164163
#include "wasm-builder.h"
164+
#include "wasm-type.h"
165165
#include "wasm.h"
166166

167167
namespace wasm {
@@ -192,6 +192,10 @@ enum class ParentChildInteraction : int8_t {
192192
None,
193193
};
194194

195+
// When we insert scratch locals, we sometimes need to record the flow between
196+
// their set and subsequent get.
197+
using ScratchInfo = std::unordered_map<LocalSet*, LocalGet*>;
198+
195199
// Core analysis that provides an escapes() method to check if an allocation
196200
// escapes in a way that prevents optimizing it away as described above. It also
197201
// stashes information about the relevant expressions as it goes, which helps
@@ -201,21 +205,29 @@ struct EscapeAnalyzer {
201205
// parents, and via branches, and through locals.
202206
//
203207
// We use a lazy graph here because we only need this for reference locals,
204-
// and even among them, only ones we see an allocation is stored to.
208+
// and even among them, only ones we see an allocation is stored to. The
209+
// LocalGraph is is augmented by ScratchInfo, since the LocalGraph does not
210+
// know about scratch locals we add. We currently only record scratch locals
211+
// that might possibly have another optimized allocation flowing through them.
212+
// If it's not possible for another optimized allocation to flow through the
213+
// scratch local, then we will never look at it again after creating it and do
214+
// not need to record it here.
205215
const LazyLocalGraph& localGraph;
206-
const Parents& parents;
216+
ScratchInfo& scratchInfo;
217+
Parents& parents;
207218
const BranchUtils::BranchTargets& branchTargets;
208219

209220
const PassOptions& passOptions;
210221
Module& wasm;
211222

212223
EscapeAnalyzer(const LazyLocalGraph& localGraph,
213-
const Parents& parents,
224+
ScratchInfo& scratchInfo,
225+
Parents& parents,
214226
const BranchUtils::BranchTargets& branchTargets,
215227
const PassOptions& passOptions,
216228
Module& wasm)
217-
: localGraph(localGraph), parents(parents), branchTargets(branchTargets),
218-
passOptions(passOptions), wasm(wasm) {}
229+
: localGraph(localGraph), scratchInfo(scratchInfo), parents(parents),
230+
branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {}
219231

220232
// We must track all the local.sets that write the allocation, to verify
221233
// exclusivity.
@@ -275,15 +287,23 @@ struct EscapeAnalyzer {
275287
}
276288

277289
if (auto* set = parent->dynCast<LocalSet>()) {
278-
// This is one of the sets we are written to, and so we must check for
279-
// exclusive use of our allocation by all the gets that read the value.
280-
// Note the set, and we will check the gets at the end once we know all
281-
// of our sets.
282-
sets.insert(set);
283-
284-
// We must also look at how the value flows from those gets.
285-
for (auto* get : localGraph.getSetInfluences(set)) {
290+
291+
// We must also look at how the value flows from those gets. Check the
292+
// scratchInfo first because it contains sets that localGraph doesn't
293+
// know about.
294+
if (auto it = scratchInfo.find(set); it != scratchInfo.end()) {
295+
auto* get = it->second;
286296
flows.push({get, parents.getParent(get)});
297+
} else {
298+
// This is one of the sets we are written to, and so we must check for
299+
// exclusive use of our allocation by all the gets that read the
300+
// value. Note the set, and we will check the gets at the end once we
301+
// know all of our sets. (For scratch locals above, we know all the
302+
// sets are already accounted for.)
303+
sets.insert(set);
304+
for (auto* get : localGraph.getSetInfluences(set)) {
305+
flows.push({get, parents.getParent(get)});
306+
}
287307
}
288308
}
289309

@@ -1110,17 +1130,42 @@ struct Struct2Local : PostWalker<Struct2Local> {
11101130
}
11111131

11121132
void visitStructCmpxchg(StructCmpxchg* curr) {
1113-
if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) {
1114-
// The allocation can't flow into `replacement` if we've made it this far,
1115-
// but it might flow into `expected`, in which case we don't need to do
1116-
// anything because we would still be performing the cmpxchg on a real
1117-
// struct. We only need to replace the cmpxchg if the ref is being
1118-
// replaced with locals.
1133+
if (curr->type == Type::unreachable) {
1134+
// Leave this for DCE.
11191135
return;
11201136
}
11211137

1122-
if (curr->type == Type::unreachable) {
1123-
// As with RefGetDesc and StructGet, above.
1138+
// The allocation might flow into `ref` or `expected`, but not
1139+
// `replacement`, because then it would be considered to have escaped.
1140+
if (analyzer.getInteraction(curr->expected) ==
1141+
ParentChildInteraction::Flows) {
1142+
// Since the allocation does not escape, it cannot possibly match the
1143+
// value already in the struct. The cmpxchg will just do a read. Drop the
1144+
// other arguments and do the atomic read at the end, when the cmpxchg
1145+
// would have happened. Use a nullable scratch local in case we also
1146+
// optimize `ref` later and need to replace it with a null.
1147+
auto refType = curr->ref->type.with(Nullable);
1148+
auto refScratch = builder.addVar(func, refType);
1149+
auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref);
1150+
auto* getRefScratch = builder.makeLocalGet(refScratch, refType);
1151+
auto* structGet = builder.makeStructGet(
1152+
curr->index, getRefScratch, curr->order, curr->type);
1153+
auto* block = builder.makeBlock({setRefScratch,
1154+
builder.makeDrop(curr->expected),
1155+
builder.makeDrop(curr->replacement),
1156+
structGet});
1157+
replaceCurrent(block);
1158+
// Record the new data flow into and out of the new scratch local. This is
1159+
// necessary in case `ref` gets processed later so we can detect that it
1160+
// flows to the new struct.atomic.get, which may need to be replaced.
1161+
analyzer.parents.setParent(curr->ref, setRefScratch);
1162+
analyzer.scratchInfo.insert({setRefScratch, getRefScratch});
1163+
analyzer.parents.setParent(getRefScratch, structGet);
1164+
return;
1165+
}
1166+
if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) {
1167+
// Since the allocation does not flow from `ref`, it must not flow through
1168+
// this cmpxchg at all.
11241169
return;
11251170
}
11261171

@@ -1468,6 +1513,7 @@ struct Heap2Local {
14681513
const PassOptions& passOptions;
14691514

14701515
LazyLocalGraph localGraph;
1516+
ScratchInfo scratchInfo;
14711517
Parents parents;
14721518
BranchUtils::BranchTargets branchTargets;
14731519

@@ -1539,7 +1585,7 @@ struct Heap2Local {
15391585
continue;
15401586
}
15411587
EscapeAnalyzer analyzer(
1542-
localGraph, parents, branchTargets, passOptions, wasm);
1588+
localGraph, scratchInfo, parents, branchTargets, passOptions, wasm);
15431589
if (!analyzer.escapes(allocation)) {
15441590
// Convert the allocation and all its uses into a struct. Then convert
15451591
// the struct into locals.
@@ -1559,7 +1605,7 @@ struct Heap2Local {
15591605
// Check for escaping, noting relevant information as we go. If this does
15601606
// not escape, optimize it into locals.
15611607
EscapeAnalyzer analyzer(
1562-
localGraph, parents, branchTargets, passOptions, wasm);
1608+
localGraph, scratchInfo, parents, branchTargets, passOptions, wasm);
15631609
if (!analyzer.escapes(allocation)) {
15641610
Struct2Local(allocation, analyzer, func, wasm);
15651611
optimized = true;

0 commit comments

Comments
 (0)