Skip to content

Commit 03f3528

Browse files
authored
Optimize if of br_if (#2216)
An if whose body is a br_if can be turned into a br_if of a combined condition (if side effects allow it). The naive size in bytes is identical between the patterns, but the select may avoid a hardware branch, and also the select may be further optimized. On the benchmark suite this helps every single benchmark, but by quite small amounts (e.g. 100 bytes on sqlite, which is 1MB). This was noticed in emscripten-core/emscripten#8941
1 parent 1a9b0e1 commit 03f3528

14 files changed

Lines changed: 250 additions & 90 deletions

src/asm2wasm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,7 @@ void Asm2WasmBuilder::processAsm(Ref ast) {
16881688
// autodrop can add some garbage
16891689
passRunner.add("vacuum");
16901690
passRunner.add("remove-unused-brs");
1691+
passRunner.add("vacuum");
16911692
passRunner.add("remove-unused-names");
16921693
passRunner.add("merge-blocks");
16931694
passRunner.add("optimize-instructions");

src/passes/RemoveUnusedBrs.cpp

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <ir/branch-utils.h>
2222
#include <ir/cost.h>
2323
#include <ir/effects.h>
24+
#include <ir/literal-utils.h>
2425
#include <ir/utils.h>
2526
#include <parsing.h>
2627
#include <pass.h>
@@ -49,6 +50,23 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition,
4950
return !EffectAnalyzer(options, ifCondition).invalidates(value);
5051
}
5152

53+
// Check if it is not worth it to run code unconditionally. This
54+
// assumes we are trying to run two expressions where previously
55+
// only one of the two might have executed. We assume here that
56+
// executing both is good for code size.
57+
static bool tooCostlyToRunUnconditionally(const PassOptions& passOptions,
58+
Expression* one,
59+
Expression* two) {
60+
// If we care mostly about code size, just do it for that reason.
61+
if (passOptions.shrinkLevel) {
62+
return false;
63+
}
64+
// Consider the cost of executing all the code unconditionally.
65+
const auto TOO_MUCH = 7;
66+
auto total = CostAnalyzer(one).cost + CostAnalyzer(two).cost;
67+
return total >= TOO_MUCH;
68+
}
69+
5270
struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
5371
bool isFunctionParallel() override { return true; }
5472

@@ -283,12 +301,40 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
283301

284302
void visitIf(If* curr) {
285303
if (!curr->ifFalse) {
286-
// if without an else. try to reduce if (condition) br => br_if
287-
// (condition)
288-
Break* br = curr->ifTrue->dynCast<Break>();
289-
if (br && !br->condition) { // TODO: if there is a condition, join them
304+
// if without an else. try to reduce
305+
// if (condition) br => br_if (condition)
306+
if (Break* br = curr->ifTrue->dynCast<Break>()) {
290307
if (canTurnIfIntoBrIf(curr->condition, br->value, getPassOptions())) {
291-
br->condition = curr->condition;
308+
if (!br->condition) {
309+
br->condition = curr->condition;
310+
} else {
311+
// In this case we can replace
312+
// if (condition1) br_if (condition2)
313+
// =>
314+
// br_if select (condition1) (condition2) (i32.const 0)
315+
// In other words, we replace an if (3 bytes) with a select and a
316+
// zero (also 3 bytes). The size is unchanged, but the select may
317+
// be further optimizable, and if select does not branch we also
318+
// avoid one branch.
319+
// If running the br's condition unconditionally is too expensive,
320+
// give up.
321+
auto* zero = LiteralUtils::makeZero(i32, *getModule());
322+
if (tooCostlyToRunUnconditionally(
323+
getPassOptions(), br->condition, zero)) {
324+
return;
325+
}
326+
// Of course we can't do this if the br's condition has side
327+
// effects, as we would then execute those unconditionally.
328+
if (EffectAnalyzer(getPassOptions(), br->condition)
329+
.hasSideEffects()) {
330+
return;
331+
}
332+
Builder builder(*getModule());
333+
// Note that we use the br's condition as the select condition.
334+
// That keeps the order of the two conditions as it was originally.
335+
br->condition =
336+
builder.makeSelect(br->condition, curr->condition, zero);
337+
}
292338
br->finalize();
293339
replaceCurrent(Builder(*getModule()).dropIfConcretelyTyped(br));
294340
anotherCycle = true;
@@ -871,14 +917,9 @@ struct RemoveUnusedBrs : public WalkerPass<PostWalker<RemoveUnusedBrs>> {
871917
// This is always helpful for code size, but can be a tradeoff with
872918
// performance as we run both code paths. So when shrinking we always
873919
// try to do this, but otherwise must consider more carefully.
874-
if (!passOptions.shrinkLevel) {
875-
// Consider the cost of executing all the code unconditionally
876-
const auto MAX_COST = 7;
877-
auto total =
878-
CostAnalyzer(iff->ifTrue).cost + CostAnalyzer(iff->ifFalse).cost;
879-
if (total >= MAX_COST) {
880-
return nullptr;
881-
}
920+
if (tooCostlyToRunUnconditionally(
921+
passOptions, iff->ifTrue, iff->ifFalse)) {
922+
return nullptr;
882923
}
883924
// Check if side effects allow this.
884925
EffectAnalyzer condition(passOptions, iff->condition);

test/emcc_hello_world.fromasm

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10327,13 +10327,18 @@
1032710327
)
1032810328
)
1032910329
(block
10330-
(if
10331-
(local.tee $3
10332-
(i32.load
10333-
(i32.const 616)
10330+
(br_if $label$break$L279
10331+
(select
10332+
(i32.eqz
10333+
(i32.eqz
10334+
(local.tee $3
10335+
(i32.load
10336+
(i32.const 616)
10337+
)
10338+
)
10339+
)
1033410340
)
10335-
)
10336-
(br_if $label$break$L279
10341+
(i32.const 0)
1033710342
(i32.or
1033810343
(i32.le_u
1033910344
(local.get $12)

test/emcc_hello_world.fromasm.clamp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10377,13 +10377,18 @@
1037710377
)
1037810378
)
1037910379
(block
10380-
(if
10381-
(local.tee $3
10382-
(i32.load
10383-
(i32.const 616)
10380+
(br_if $label$break$L279
10381+
(select
10382+
(i32.eqz
10383+
(i32.eqz
10384+
(local.tee $3
10385+
(i32.load
10386+
(i32.const 616)
10387+
)
10388+
)
10389+
)
1038410390
)
10385-
)
10386-
(br_if $label$break$L279
10391+
(i32.const 0)
1038710392
(i32.or
1038810393
(i32.le_u
1038910394
(local.get $12)

test/emcc_hello_world.fromasm.imprecise

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10223,13 +10223,18 @@
1022310223
)
1022410224
)
1022510225
(block
10226-
(if
10227-
(local.tee $3
10228-
(i32.load
10229-
(i32.const 616)
10226+
(br_if $label$break$L279
10227+
(select
10228+
(i32.eqz
10229+
(i32.eqz
10230+
(local.tee $3
10231+
(i32.load
10232+
(i32.const 616)
10233+
)
10234+
)
10235+
)
1023010236
)
10231-
)
10232-
(br_if $label$break$L279
10237+
(i32.const 0)
1023310238
(i32.or
1023410239
(i32.le_u
1023510240
(local.get $12)

test/memorygrowth.fromasm

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3152,13 +3152,14 @@
31523152
)
31533153
)
31543154
(block
3155-
(if
3156-
(local.tee $12
3157-
(i32.load
3158-
(i32.const 1648)
3155+
(br_if $do-once33
3156+
(select
3157+
(local.tee $12
3158+
(i32.load
3159+
(i32.const 1648)
3160+
)
31593161
)
3160-
)
3161-
(br_if $do-once33
3162+
(i32.const 0)
31623163
(i32.or
31633164
(i32.le_u
31643165
(local.get $8)

test/memorygrowth.fromasm.clamp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3152,13 +3152,14 @@
31523152
)
31533153
)
31543154
(block
3155-
(if
3156-
(local.tee $12
3157-
(i32.load
3158-
(i32.const 1648)
3155+
(br_if $do-once33
3156+
(select
3157+
(local.tee $12
3158+
(i32.load
3159+
(i32.const 1648)
3160+
)
31593161
)
3160-
)
3161-
(br_if $do-once33
3162+
(i32.const 0)
31623163
(i32.or
31633164
(i32.le_u
31643165
(local.get $8)

test/memorygrowth.fromasm.imprecise

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3150,13 +3150,14 @@
31503150
)
31513151
)
31523152
(block
3153-
(if
3154-
(local.tee $12
3155-
(i32.load
3156-
(i32.const 1648)
3153+
(br_if $do-once33
3154+
(select
3155+
(local.tee $12
3156+
(i32.load
3157+
(i32.const 1648)
3158+
)
31573159
)
3158-
)
3159-
(br_if $do-once33
3160+
(i32.const 0)
31603161
(i32.or
31613162
(i32.le_u
31623163
(local.get $8)

test/passes/bysyncify_optimize-level=1.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,11 +1244,12 @@
12441244
)
12451245
)
12461246
)
1247-
(if
1248-
(i32.eqz
1249-
(global.get $__bysyncify_state)
1250-
)
1251-
(br_if $l
1247+
(br_if $l
1248+
(select
1249+
(i32.eqz
1250+
(global.get $__bysyncify_state)
1251+
)
1252+
(i32.const 0)
12521253
(local.get $0)
12531254
)
12541255
)

test/passes/remove-unused-brs.txt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,4 +2431,55 @@
24312431
)
24322432
)
24332433
)
2434+
(func $if_br_if (; 115 ;) (type $1)
2435+
(local $0 i32)
2436+
(block $label$1
2437+
(br_if $label$1
2438+
(select
2439+
(local.tee $0
2440+
(i32.const 1024)
2441+
)
2442+
(i32.const 0)
2443+
(i32.eqz
2444+
(i32.const -4)
2445+
)
2446+
)
2447+
)
2448+
(br_if $label$1
2449+
(select
2450+
(i32.const 1025)
2451+
(i32.const 0)
2452+
(i32.eqz
2453+
(i32.const -5)
2454+
)
2455+
)
2456+
)
2457+
(br_if $label$1
2458+
(select
2459+
(local.tee $0
2460+
(i32.const 1025)
2461+
)
2462+
(i32.const 0)
2463+
(i32.eqz
2464+
(i32.const -6)
2465+
)
2466+
)
2467+
)
2468+
(if
2469+
(i32.const 1026)
2470+
(br_if $label$1
2471+
(local.tee $0
2472+
(i32.const -7)
2473+
)
2474+
)
2475+
)
2476+
(i32.store
2477+
(i32.const 1024)
2478+
(i32.add
2479+
(local.get $0)
2480+
(i32.const 1)
2481+
)
2482+
)
2483+
)
2484+
)
24342485
)

0 commit comments

Comments
 (0)