Skip to content

Commit 4b223a3

Browse files
authored
wasm2js: Switch optimizations (#2141)
This pattern-matches towers of blocks + a br_table into a JS switch. This is much smaller in code size and also avoids heavy nesting that can exceed the recursion limits of JS parsers. This is not enough yet, because it pattern-matches very specifically. In reality, switches can look slightly different. Followup PRs will extend this. For now, this passes the test suite (what passed before - not including the massive-switch tests) + fuzzing so it's a good start.
1 parent 8992638 commit 4b223a3

11 files changed

Lines changed: 37818 additions & 37286 deletions

src/wasm2js.h

Lines changed: 202 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "asmjs/asmangle.h"
3131
#include "asmjs/shared-constants.h"
3232
#include "emscripten-optimizer/optimizer.h"
33+
#include "ir/branch-utils.h"
3334
#include "ir/effects.h"
3435
#include "ir/find_all.h"
3536
#include "ir/import-utils.h"
@@ -708,6 +709,126 @@ Ref Wasm2JSBuilder::processFunction(Module* m,
708709
Ref Wasm2JSBuilder::processFunctionBody(Module* m,
709710
Function* func,
710711
bool standaloneFunction) {
712+
// Switches are tricky to handle - in wasm they often come with
713+
// massively-nested "towers" of blocks, which if naively translated
714+
// to JS may exceed parse recursion limits of VMs. Therefore even when
715+
// not optimizing we work hard to emit minimal and minimally-nested
716+
// switches.
717+
// We do so by pre-scanning for br_tables and noting which of their
718+
// targets can be hoisted up into them, e.g.
719+
//
720+
// (block $a
721+
// (block $b
722+
// (block $c
723+
// (block $d
724+
// (block $e
725+
// (br_table $a $b $c $d $e (..))
726+
// )
727+
// ;; code X (for block $e)
728+
// ;; implicit fallthrough - can be done in the switch too
729+
// )
730+
// ;; code Y
731+
// (br $c) ;; branch which is identical to a fallthrough
732+
// )
733+
// ;; code Z
734+
// (br $a) ;; skip some blocks - can't do this in a switch!
735+
// )
736+
// ;; code W
737+
// )
738+
//
739+
// Every branch we see is a potential hazard - all targets must not
740+
// be optimized into the switch, since they must be reached normally,
741+
// unless they happen to be right after us, in which case it's just
742+
// a fallthrough anyhow.
743+
struct SwitchProcessor : public ExpressionStackWalker<SwitchProcessor> {
744+
// A list of expressions we don't need to emit, as we are handling them
745+
// in another way.
746+
std::set<Expression*> unneededExpressions;
747+
748+
struct SwitchCase {
749+
Name target;
750+
std::vector<Expression*> code;
751+
SwitchCase(Name target) : target(target) {}
752+
};
753+
754+
// The switch cases we found that we can hoist up.
755+
std::map<Switch*, std::vector<SwitchCase>> hoistedSwitchCases;
756+
757+
void visitSwitch(Switch* brTable) {
758+
Index i = expressionStack.size() - 1;
759+
assert(expressionStack[i] == brTable);
760+
// A set of names we must stop at, since we've seen branches to them.
761+
std::set<Name> namesBranchedTo;
762+
while (1) {
763+
// Stop if we are at the top level.
764+
if (i == 0) {
765+
break;
766+
}
767+
i--;
768+
auto* child = expressionStack[i + 1];
769+
auto* curr = expressionStack[i];
770+
// Stop if the current node is not a block with the child in the
771+
// first position, i.e., the classic switch pattern.
772+
auto* block = curr->dynCast<Block>();
773+
if (!block || block->list[0] != child) {
774+
break;
775+
}
776+
// Ignore the case of a name-less block for simplicity (merge-blocks
777+
// would have removed it).
778+
if (!block->name.is()) {
779+
break;
780+
}
781+
// If we have already seen this block, stop here.
782+
if (unneededExpressions.count(block)) {
783+
// XXX FIXME we should probably abort the entire optimization
784+
break;
785+
}
786+
auto& list = block->list;
787+
if (child == brTable) {
788+
// Nothing more to do here (we can in fact skip any code til
789+
// the parent block).
790+
continue;
791+
}
792+
// Ok, we are a block and our child in the first position is a
793+
// block, and the neither is branched to - unless maybe the child
794+
// branches to the parent, check that. Note how we treat the
795+
// final element which may be a break that is a fallthrough.
796+
Expression* unneededBr = nullptr;
797+
for (Index j = 1; j < list.size(); j++) {
798+
auto* item = list[j];
799+
auto newBranches = BranchUtils::getExitingBranches(item);
800+
if (auto* br = item->dynCast<Break>()) {
801+
if (j == list.size() - 1) {
802+
if (!br->condition && br->name == block->name) {
803+
// This is a natural, unnecessary-to-emit fallthrough.
804+
unneededBr = br;
805+
break;
806+
}
807+
}
808+
}
809+
namesBranchedTo.insert(newBranches.begin(), newBranches.end());
810+
}
811+
if (namesBranchedTo.count(block->name)) {
812+
break;
813+
}
814+
// We can move code after the child (reached by branching on the
815+
// child) into the switch.
816+
auto* childBlock = child->cast<Block>();
817+
hoistedSwitchCases[brTable].emplace_back(childBlock->name);
818+
SwitchCase& case_ = hoistedSwitchCases[brTable].back();
819+
for (Index j = 1; j < list.size(); j++) {
820+
auto* item = list[j];
821+
if (item != unneededBr) {
822+
case_.code.push_back(item);
823+
}
824+
}
825+
list.resize(1);
826+
// Finally, mark the block as unneeded outside the switch.
827+
unneededExpressions.insert(childBlock);
828+
}
829+
}
830+
};
831+
711832
struct ExpressionProcessor : public Visitor<ExpressionProcessor, Ref> {
712833
Wasm2JSBuilder* parent;
713834
IString result; // TODO: remove
@@ -716,13 +837,20 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
716837
bool standaloneFunction;
717838
MixedArena allocator;
718839

840+
SwitchProcessor switchProcessor;
841+
719842
ExpressionProcessor(Wasm2JSBuilder* parent,
720843
Module* m,
721844
Function* func,
722845
bool standaloneFunction)
723846
: parent(parent), func(func), module(m),
724847
standaloneFunction(standaloneFunction) {}
725848

849+
Ref process() {
850+
switchProcessor.walk(func->body);
851+
return visit(func->body, NO_RESULT);
852+
}
853+
726854
// A scoped temporary variable.
727855
struct ScopedTemp {
728856
Wasm2JSBuilder* parent;
@@ -806,16 +934,18 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
806934
// Visitors
807935

808936
Ref visitBlock(Block* curr) {
937+
if (switchProcessor.unneededExpressions.count(curr)) {
938+
// We have had our tail hoisted into a switch that is nested in our
939+
// first position, so we don't need to emit that code again, or
940+
// ourselves in fact.
941+
return visit(curr->list[0], NO_RESULT);
942+
}
809943
Ref ret = ValueBuilder::makeBlock();
810944
size_t size = curr->list.size();
811-
auto noResults = result == NO_RESULT ? size : size - 1;
812-
for (size_t i = 0; i < noResults; i++) {
945+
for (size_t i = 0; i < size; i++) {
813946
flattenAppend(
814947
ret, ValueBuilder::makeStatement(visit(curr->list[i], NO_RESULT)));
815948
}
816-
if (result != NO_RESULT) {
817-
flattenAppend(ret, visitAndAssign(curr->list[size - 1], result));
818-
}
819949
if (curr->name.is()) {
820950
ret =
821951
ValueBuilder::makeLabel(fromName(curr->name, NameScope::Label), ret);
@@ -872,7 +1002,9 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
8721002
Expression* defaultBody = nullptr; // default must be last in asm.js
8731003

8741004
Ref visitSwitch(Switch* curr) {
875-
assert(!curr->value);
1005+
#if 0
1006+
// Simple switch emitting. This is valid but may lead to block nesting of a size
1007+
// that JS engines can't handle.
8761008
Ref ret = ValueBuilder::makeBlock();
8771009
Ref condition = visit(curr->condition, EXPRESSION_RESULT);
8781010
Ref theSwitch =
@@ -906,6 +1038,69 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
9061038
ValueBuilder::appendCodeToSwitch(
9071039
theSwitch, blockify(makeBreakOrContinue(curr->default_)), false);
9081040
return ret;
1041+
#else
1042+
// Even without optimizations, we work hard here to emit minimal and
1043+
// especially minimally-nested code, since otherwise we may get block
1044+
// nesting of a size that JS engines can't handle.
1045+
Ref condition = visit(curr->condition, EXPRESSION_RESULT);
1046+
Ref theSwitch =
1047+
ValueBuilder::makeSwitch(makeAsmCoercion(condition, ASM_INT));
1048+
// First, group the switch targets.
1049+
std::map<Name, std::vector<Index>> targetIndexes;
1050+
for (size_t i = 0; i < curr->targets.size(); i++) {
1051+
targetIndexes[curr->targets[i]].push_back(i);
1052+
}
1053+
// Emit first any hoisted groups.
1054+
auto& hoistedCases = switchProcessor.hoistedSwitchCases[curr];
1055+
std::set<Name> emittedTargets;
1056+
for (auto& case_ : hoistedCases) {
1057+
auto target = case_.target;
1058+
auto& code = case_.code;
1059+
emittedTargets.insert(target);
1060+
if (target != curr->default_) {
1061+
auto& indexes = targetIndexes[target];
1062+
for (auto i : indexes) {
1063+
ValueBuilder::appendCaseToSwitch(theSwitch,
1064+
ValueBuilder::makeNum(i));
1065+
}
1066+
} else {
1067+
ValueBuilder::appendDefaultToSwitch(theSwitch);
1068+
}
1069+
for (auto* c : code) {
1070+
ValueBuilder::appendCodeToSwitch(
1071+
theSwitch, blockify(visit(c, NO_RESULT)), false);
1072+
}
1073+
}
1074+
// Emit any remaining groups by just emitting branches to their code,
1075+
// which will appear outside the switch.
1076+
for (auto& pair : targetIndexes) {
1077+
auto target = pair.first;
1078+
auto& indexes = pair.second;
1079+
if (emittedTargets.count(target)) {
1080+
continue;
1081+
}
1082+
if (target != curr->default_) {
1083+
for (auto i : indexes) {
1084+
ValueBuilder::appendCaseToSwitch(theSwitch,
1085+
ValueBuilder::makeNum(i));
1086+
}
1087+
ValueBuilder::appendCodeToSwitch(
1088+
theSwitch, blockify(makeBreakOrContinue(target)), false);
1089+
} else {
1090+
// For the group going to the same place as the default, we can just
1091+
// emit the default itself, which we do at the end.
1092+
}
1093+
}
1094+
// TODO: if the group the default is in is not the largest, we can turn
1095+
// the largest into
1096+
// the default by using a local and a check on the range
1097+
if (!emittedTargets.count(curr->default_)) {
1098+
ValueBuilder::appendDefaultToSwitch(theSwitch);
1099+
ValueBuilder::appendCodeToSwitch(
1100+
theSwitch, blockify(makeBreakOrContinue(curr->default_)), false);
1101+
}
1102+
return theSwitch;
1103+
#endif
9091104
}
9101105

9111106
Ref visitCall(Call* curr) {
@@ -1618,8 +1813,7 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
16181813
}
16191814
};
16201815

1621-
return ExpressionProcessor(this, m, func, standaloneFunction)
1622-
.visit(func->body, NO_RESULT);
1816+
return ExpressionProcessor(this, m, func, standaloneFunction).process();
16231817
}
16241818

16251819
void Wasm2JSBuilder::addMemoryGrowthFuncs(Ref ast, Module* wasm) {

0 commit comments

Comments
 (0)