Skip to content

Commit 98f5798

Browse files
authored
Handle concrete values in CodeFolding (#7117)
CodeFolding previously only worked on blocks that did not produce values. It worked on Ifs that produced values, but only by accident; the logic for folding matching tails was not written to support tails producing concrete values, but it happened to work for Ifs because subsequent ReFinalize runs fixed all the incorrect types it produced. Improve the power of the optimization by explicitly handling tails that produce concrete values for both blocks and ifs. Now that the core logic handles concrete values correctly, remove the unnecessary ReFinalize run. Also remove the separate optimization of Ifs with identical arms; this optimization requires ReFinalize and is already performed by OptimizeInstructions.
1 parent 73971d7 commit 98f5798

5 files changed

Lines changed: 831 additions & 464 deletions

File tree

src/passes/CodeFolding.cpp

Lines changed: 111 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,11 @@ struct CodeFolding
105105
Tail(Block* block) : expr(nullptr), block(block), pointer(nullptr) {}
106106
// For a break
107107
Tail(Expression* expr, Block* block)
108-
: expr(expr), block(block), pointer(nullptr) {
109-
validate();
110-
}
108+
: expr(expr), block(block), pointer(nullptr) {}
111109
Tail(Expression* expr, Expression** pointer)
112110
: expr(expr), block(nullptr), pointer(pointer) {}
113111

114112
bool isFallthrough() const { return expr == nullptr; }
115-
116-
void validate() const {
117-
if (expr && block) {
118-
assert(block->list.back() == expr);
119-
}
120-
}
121113
};
122114

123115
// state
@@ -152,15 +144,13 @@ struct CodeFolding
152144
}
153145

154146
void visitBreak(Break* curr) {
155-
if (curr->condition || curr->value) {
147+
if (curr->condition) {
156148
unoptimizables.insert(curr->name);
157149
} else {
158-
// we can only optimize if we are at the end of the parent block,
159-
// and if the parent block does not return a value (we can't move
160-
// elements out of it if there is a value being returned)
150+
// we can only optimize if we are at the end of the parent block.
151+
// TODO: Relax this.
161152
Block* parent = controlFlowStack.back()->dynCast<Block>();
162-
if (parent && curr == parent->list.back() &&
163-
!parent->list.back()->type.isConcrete()) {
153+
if (parent && curr == parent->list.back()) {
164154
breakTails[curr->name].push_back(Tail(curr, parent));
165155
} else {
166156
unoptimizables.insert(curr->name);
@@ -222,24 +212,19 @@ struct CodeFolding
222212
if (unoptimizables.count(curr->name) > 0) {
223213
return;
224214
}
225-
// we can't optimize a fallthrough value
226-
if (curr->list.back()->type.isConcrete()) {
227-
return;
228-
}
229215
auto iter = breakTails.find(curr->name);
230216
if (iter == breakTails.end()) {
231217
return;
232218
}
233-
// looks promising
219+
// Looks promising.
234220
auto& tails = iter->second;
235-
// see if there is a fallthrough
236-
bool hasFallthrough = true;
237-
for (auto* child : curr->list) {
238-
if (child->type == Type::unreachable) {
239-
hasFallthrough = false;
240-
}
241-
}
242-
if (hasFallthrough) {
221+
// If the end of the block cannot be reached, then we don't need to include
222+
// it in the set of folded tails.
223+
bool includeFallthrough =
224+
!std::any_of(curr->list.begin(), curr->list.end(), [&](auto* child) {
225+
return child->type == Type::unreachable;
226+
});
227+
if (includeFallthrough) {
243228
tails.push_back({Tail(curr)});
244229
}
245230
optimizeExpressionTails(tails, curr);
@@ -249,48 +234,34 @@ struct CodeFolding
249234
if (!curr->ifFalse) {
250235
return;
251236
}
252-
// if both sides are identical, this is easy to fold
253-
if (ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) {
237+
// If both are blocks, look for a tail we can merge.
238+
auto* left = curr->ifTrue->dynCast<Block>();
239+
auto* right = curr->ifFalse->dynCast<Block>();
240+
// If one is a block and the other isn't, and the non-block is a tail of the
241+
// other, we can fold that - for our convenience, we just add a block and
242+
// run the rest of the optimization mormally.
243+
auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* {
244+
// If other is a suffix of the block, wrap it in a block.
245+
if (block->list.empty() ||
246+
!ExpressionAnalyzer::equal(other, block->list.back())) {
247+
return nullptr;
248+
}
249+
// Do it, assign to the out param `other`, and return the block.
254250
Builder builder(*getModule());
255-
// remove if (4 bytes), remove one arm, add drop (1), add block (3),
256-
// so this must be a net savings
257-
markAsModified(curr);
258-
auto* ret =
259-
builder.makeSequence(builder.makeDrop(curr->condition), curr->ifTrue);
260-
// we must ensure we present the same type as the if had
261-
ret->finalize(curr->type);
262-
replaceCurrent(ret);
263-
needEHFixups = true;
264-
} else {
265-
// if both are blocks, look for a tail we can merge
266-
auto* left = curr->ifTrue->dynCast<Block>();
267-
auto* right = curr->ifFalse->dynCast<Block>();
268-
// If one is a block and the other isn't, and the non-block is a tail
269-
// of the other, we can fold that - for our convenience, we just add
270-
// a block and run the rest of the optimization mormally.
271-
auto maybeAddBlock = [this](Block* block, Expression*& other) -> Block* {
272-
// if other is a suffix of the block, wrap it in a block
273-
if (block->list.empty() ||
274-
!ExpressionAnalyzer::equal(other, block->list.back())) {
275-
return nullptr;
276-
}
277-
// do it, assign to the out param `other`, and return the block
278-
Builder builder(*getModule());
279-
auto* ret = builder.makeBlock(other);
280-
other = ret;
281-
return ret;
282-
};
283-
if (left && !right) {
284-
right = maybeAddBlock(left, curr->ifFalse);
285-
} else if (!left && right) {
286-
left = maybeAddBlock(right, curr->ifTrue);
287-
}
288-
// we need nameless blocks, as if there is a name, someone might branch
289-
// to the end, skipping the code we want to merge
290-
if (left && right && !left->name.is() && !right->name.is()) {
291-
std::vector<Tail> tails = {Tail(left), Tail(right)};
292-
optimizeExpressionTails(tails, curr);
293-
}
251+
auto* ret = builder.makeBlock(other);
252+
other = ret;
253+
return ret;
254+
};
255+
if (left && !right) {
256+
right = maybeAddBlock(left, curr->ifFalse);
257+
} else if (!left && right) {
258+
left = maybeAddBlock(right, curr->ifTrue);
259+
}
260+
// We need nameless blocks, as if there is a name, someone might branch to
261+
// the end, skipping the code we want to merge.
262+
if (left && right && !left->name.is() && !right->name.is()) {
263+
std::vector<Tail> tails = {Tail(left), Tail(right)};
264+
optimizeExpressionTails(tails, curr);
294265
}
295266
}
296267

@@ -315,10 +286,6 @@ struct CodeFolding
315286
if (needEHFixups) {
316287
EHUtils::handleBlockNestedPops(func, *getModule());
317288
}
318-
// if we did any work, types may need to be propagated
319-
if (anotherPass) {
320-
ReFinalize().walkFunctionInModule(func, getModule());
321-
}
322289
}
323290
}
324291

@@ -372,6 +339,7 @@ struct CodeFolding
372339
// identical in all paths leading to the block exit can be merged.
373340
template<typename T>
374341
void optimizeExpressionTails(std::vector<Tail>& tails, T* curr) {
342+
auto oldType = curr->type;
375343
if (tails.size() < 2) {
376344
return;
377345
}
@@ -384,50 +352,49 @@ struct CodeFolding
384352
return;
385353
}
386354
// if we were not modified, then we should be valid for processing
387-
tail.validate();
355+
assert(!tail.expr || !tail.block ||
356+
(tail.expr == tail.block->list.back()));
388357
}
389-
// we can ignore the final br in a tail
390-
auto effectiveSize = [&](const Tail& tail) {
391-
auto ret = tail.block->list.size();
358+
auto getMergeable = [&](const Tail& tail, Index num) -> Expression* {
392359
if (!tail.isFallthrough()) {
393-
ret--;
360+
// If there is a branch value, it is the first mergeable item.
361+
auto* val = tail.expr->cast<Break>()->value;
362+
if (val && num == 0) {
363+
return val;
364+
}
365+
if (!val) {
366+
// Skip the branch instruction at the end; it is not part of the
367+
// merged tail.
368+
++num;
369+
}
394370
}
395-
return ret;
396-
};
397-
// the mergeable items do not include the final br in a tail
398-
auto getMergeable = [&](const Tail& tail, Index num) {
399-
return tail.block->list[effectiveSize(tail) - num - 1];
371+
if (num >= tail.block->list.size()) {
372+
return nullptr;
373+
}
374+
return tail.block->list[tail.block->list.size() - num - 1];
400375
};
401376
// we are going to remove duplicate elements and add a block.
402377
// so for this to make sense, we need the size of the duplicate
403378
// elements to be worth that extra block (although, there is
404379
// some chance the block would get merged higher up, see later)
405380
std::vector<Expression*> mergeable; // the elements we can merge
406-
Index num = 0; // how many elements back from the tail to look at
407381
Index saved = 0; // how much we can save
408-
while (1) {
409-
// check if this num is still relevant
410-
bool stop = false;
411-
for (auto& tail : tails) {
412-
assert(tail.block);
413-
if (num >= effectiveSize(tail)) {
414-
// one of the lists is too short
415-
stop = true;
416-
break;
417-
}
418-
}
419-
if (stop) {
382+
for (Index num = 0; true; ++num) {
383+
auto* item = getMergeable(tails[0], num);
384+
if (!item) {
385+
// The list is too short.
420386
break;
421387
}
422-
auto* item = getMergeable(tails[0], num);
423-
for (auto& tail : tails) {
424-
if (!ExpressionAnalyzer::equal(item, getMergeable(tail, num))) {
425-
// one of the lists has a different item
426-
stop = true;
388+
Index tail = 1;
389+
for (; tail < tails.size(); ++tail) {
390+
auto* other = getMergeable(tails[tail], num);
391+
if (!other || !ExpressionAnalyzer::equal(item, other)) {
392+
// Other tail too short or has a difference.
427393
break;
428394
}
429395
}
430-
if (stop) {
396+
if (tail != tails.size()) {
397+
// We saw a tail without a matching item.
431398
break;
432399
}
433400
// we may have found another one we can merge - can we move it?
@@ -436,7 +403,6 @@ struct CodeFolding
436403
}
437404
// we found another one we can merge
438405
mergeable.push_back(item);
439-
num++;
440406
saved += Measurer::measure(item);
441407
}
442408
if (saved == 0) {
@@ -450,7 +416,7 @@ struct CodeFolding
450416
for (auto& tail : tails) {
451417
// it is enough to zero out the block, or leave just one
452418
// element, as then the block can be replaced with that
453-
if (num >= tail.block->list.size() - 1) {
419+
if (mergeable.size() >= tail.block->list.size() - 1) {
454420
willEmptyBlock = true;
455421
break;
456422
}
@@ -483,44 +449,69 @@ struct CodeFolding
483449
}
484450
}
485451
}
452+
486453
// this is worth doing, do it!
487454
for (auto& tail : tails) {
488455
// remove the items we are merging / moving
489456
// first, mark them as modified, so we don't try to handle them
490457
// again in this pass, which might be buggy
491458
markAsModified(tail.block);
492459
// we must preserve the br if there is one
493-
Expression* last = nullptr;
460+
Break* branch = nullptr;
494461
if (!tail.isFallthrough()) {
495-
last = tail.block->list.back();
496-
tail.block->list.pop_back();
462+
branch = tail.block->list.back()->cast<Break>();
463+
if (branch->value) {
464+
branch->value = nullptr;
465+
} else {
466+
tail.block->list.pop_back();
467+
}
497468
}
498-
for (Index i = 0; i < mergeable.size(); i++) {
469+
for (Index i = 0; i < mergeable.size(); ++i) {
499470
tail.block->list.pop_back();
500471
}
501-
if (!tail.isFallthrough()) {
502-
tail.block->list.push_back(last);
472+
if (tail.isFallthrough()) {
473+
// The block now ends in an expression that was previously in the middle
474+
// of the block, meaning it must have type none.
475+
tail.block->finalize(Type::none);
476+
} else {
477+
tail.block->list.push_back(branch);
478+
// The block still ends with the same branch it previously ended with,
479+
// so its type cannot have changed.
480+
tail.block->finalize(tail.block->type);
503481
}
504-
// the block type may change if we removed unreachable stuff,
505-
// but in general it should remain the same, as if it had a
506-
// forced type it should remain, *and*, we don't have a
507-
// fallthrough value (we would never get here), so a concrete
508-
// type was not from that. I.e., any type on the block is
509-
// either forced and/or from breaks with a value, so the
510-
// type cannot be changed by moving code out.
511-
tail.block->finalize(tail.block->type);
512482
}
513483
// since we managed a merge, then it might open up more opportunities later
514484
anotherPass = true;
515485
// make a block with curr + the merged code
516486
Builder builder(*getModule());
517487
auto* block = builder.makeBlock();
518-
block->list.push_back(curr);
488+
if constexpr (T::SpecificId == Expression::IfId) {
489+
// If we've moved all the contents out of both arms of the If, then we can
490+
// simplify the output by replacing it entirely with just a drop of the
491+
// condition.
492+
auto* iff = curr->template cast<If>();
493+
if (iff->ifTrue->template cast<Block>()->list.empty() &&
494+
iff->ifFalse->template cast<Block>()->list.empty()) {
495+
block->list.push_back(builder.makeDrop(iff->condition));
496+
} else {
497+
block->list.push_back(curr);
498+
}
499+
} else {
500+
block->list.push_back(curr);
501+
}
519502
while (!mergeable.empty()) {
520503
block->list.push_back(mergeable.back());
521504
mergeable.pop_back();
522505
}
523-
auto oldType = curr->type;
506+
if constexpr (T::SpecificId == Expression::BlockId) {
507+
// If we didn't have a fallthrough tail because the end of the block was
508+
// not reachable, then we might have a concrete expression at the end of
509+
// the block even though the value produced by the block has been moved
510+
// out of it. If so, drop that expression.
511+
auto* currBlock = curr->template cast<Block>();
512+
currBlock->list.back() =
513+
builder.dropIfConcretelyTyped(currBlock->list.back());
514+
}
524515
// NB: we template-specialize so that this calls the proper finalizer for
525516
// the type
526517
curr->finalize();
@@ -553,9 +544,6 @@ struct CodeFolding
553544
if (tail.block && modifieds.count(tail.block) > 0) {
554545
return true;
555546
}
556-
// if we were not modified, then we should be valid for
557-
// processing
558-
tail.validate();
559547
return false;
560548
}),
561549
tails.end());

test/lit/passes/code-folding-eh-legacy.wast

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@
355355
;; CHECK-NEXT: (drop
356356
;; CHECK-NEXT: (local.get $0)
357357
;; CHECK-NEXT: )
358+
;; CHECK-NEXT: (nop)
358359
;; CHECK-NEXT: (drop
359360
;; CHECK-NEXT: (i32.eqz
360361
;; CHECK-NEXT: (i32.const 1)
@@ -377,13 +378,15 @@
377378
(if
378379
(pop i32)
379380
(then
381+
(nop)
380382
(drop
381383
(i32.eqz
382384
(i32.const 1)
383385
)
384386
)
385387
)
386388
(else
389+
(nop)
387390
(drop
388391
(i32.eqz
389392
(i32.const 1)

0 commit comments

Comments
 (0)