3030#include " asmjs/asmangle.h"
3131#include " asmjs/shared-constants.h"
3232#include " emscripten-optimizer/optimizer.h"
33+ #include " ir/branch-utils.h"
3334#include " ir/effects.h"
3435#include " ir/find_all.h"
3536#include " ir/import-utils.h"
@@ -708,6 +709,126 @@ Ref Wasm2JSBuilder::processFunction(Module* m,
708709Ref Wasm2JSBuilder::processFunctionBody (Module* m,
709710 Function* func,
710711 bool standaloneFunction) {
712+ // Switches are tricky to handle - in wasm they often come with
713+ // massively-nested "towers" of blocks, which if naively translated
714+ // to JS may exceed parse recursion limits of VMs. Therefore even when
715+ // not optimizing we work hard to emit minimal and minimally-nested
716+ // switches.
717+ // We do so by pre-scanning for br_tables and noting which of their
718+ // targets can be hoisted up into them, e.g.
719+ //
720+ // (block $a
721+ // (block $b
722+ // (block $c
723+ // (block $d
724+ // (block $e
725+ // (br_table $a $b $c $d $e (..))
726+ // )
727+ // ;; code X (for block $e)
728+ // ;; implicit fallthrough - can be done in the switch too
729+ // )
730+ // ;; code Y
731+ // (br $c) ;; branch which is identical to a fallthrough
732+ // )
733+ // ;; code Z
734+ // (br $a) ;; skip some blocks - can't do this in a switch!
735+ // )
736+ // ;; code W
737+ // )
738+ //
739+ // Every branch we see is a potential hazard - all targets must not
740+ // be optimized into the switch, since they must be reached normally,
741+ // unless they happen to be right after us, in which case it's just
742+ // a fallthrough anyhow.
743+ struct SwitchProcessor : public ExpressionStackWalker <SwitchProcessor> {
744+ // A list of expressions we don't need to emit, as we are handling them
745+ // in another way.
746+ std::set<Expression*> unneededExpressions;
747+
748+ struct SwitchCase {
749+ Name target;
750+ std::vector<Expression*> code;
751+ SwitchCase (Name target) : target(target) {}
752+ };
753+
754+ // The switch cases we found that we can hoist up.
755+ std::map<Switch*, std::vector<SwitchCase>> hoistedSwitchCases;
756+
757+ void visitSwitch (Switch* brTable) {
758+ Index i = expressionStack.size () - 1 ;
759+ assert (expressionStack[i] == brTable);
760+ // A set of names we must stop at, since we've seen branches to them.
761+ std::set<Name> namesBranchedTo;
762+ while (1 ) {
763+ // Stop if we are at the top level.
764+ if (i == 0 ) {
765+ break ;
766+ }
767+ i--;
768+ auto * child = expressionStack[i + 1 ];
769+ auto * curr = expressionStack[i];
770+ // Stop if the current node is not a block with the child in the
771+ // first position, i.e., the classic switch pattern.
772+ auto * block = curr->dynCast <Block>();
773+ if (!block || block->list [0 ] != child) {
774+ break ;
775+ }
776+ // Ignore the case of a name-less block for simplicity (merge-blocks
777+ // would have removed it).
778+ if (!block->name .is ()) {
779+ break ;
780+ }
781+ // If we have already seen this block, stop here.
782+ if (unneededExpressions.count (block)) {
783+ // XXX FIXME we should probably abort the entire optimization
784+ break ;
785+ }
786+ auto & list = block->list ;
787+ if (child == brTable) {
788+ // Nothing more to do here (we can in fact skip any code til
789+ // the parent block).
790+ continue ;
791+ }
792+ // Ok, we are a block and our child in the first position is a
793+ // block, and the neither is branched to - unless maybe the child
794+ // branches to the parent, check that. Note how we treat the
795+ // final element which may be a break that is a fallthrough.
796+ Expression* unneededBr = nullptr ;
797+ for (Index j = 1 ; j < list.size (); j++) {
798+ auto * item = list[j];
799+ auto newBranches = BranchUtils::getExitingBranches (item);
800+ if (auto * br = item->dynCast <Break>()) {
801+ if (j == list.size () - 1 ) {
802+ if (!br->condition && br->name == block->name ) {
803+ // This is a natural, unnecessary-to-emit fallthrough.
804+ unneededBr = br;
805+ break ;
806+ }
807+ }
808+ }
809+ namesBranchedTo.insert (newBranches.begin (), newBranches.end ());
810+ }
811+ if (namesBranchedTo.count (block->name )) {
812+ break ;
813+ }
814+ // We can move code after the child (reached by branching on the
815+ // child) into the switch.
816+ auto * childBlock = child->cast <Block>();
817+ hoistedSwitchCases[brTable].emplace_back (childBlock->name );
818+ SwitchCase& case_ = hoistedSwitchCases[brTable].back ();
819+ for (Index j = 1 ; j < list.size (); j++) {
820+ auto * item = list[j];
821+ if (item != unneededBr) {
822+ case_.code .push_back (item);
823+ }
824+ }
825+ list.resize (1 );
826+ // Finally, mark the block as unneeded outside the switch.
827+ unneededExpressions.insert (childBlock);
828+ }
829+ }
830+ };
831+
711832 struct ExpressionProcessor : public Visitor <ExpressionProcessor, Ref> {
712833 Wasm2JSBuilder* parent;
713834 IString result; // TODO: remove
@@ -716,13 +837,20 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
716837 bool standaloneFunction;
717838 MixedArena allocator;
718839
840+ SwitchProcessor switchProcessor;
841+
719842 ExpressionProcessor (Wasm2JSBuilder* parent,
720843 Module* m,
721844 Function* func,
722845 bool standaloneFunction)
723846 : parent(parent), func(func), module (m),
724847 standaloneFunction (standaloneFunction) {}
725848
849+ Ref process () {
850+ switchProcessor.walk (func->body );
851+ return visit (func->body , NO_RESULT);
852+ }
853+
726854 // A scoped temporary variable.
727855 struct ScopedTemp {
728856 Wasm2JSBuilder* parent;
@@ -806,16 +934,18 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
806934 // Visitors
807935
808936 Ref visitBlock (Block* curr) {
937+ if (switchProcessor.unneededExpressions .count (curr)) {
938+ // We have had our tail hoisted into a switch that is nested in our
939+ // first position, so we don't need to emit that code again, or
940+ // ourselves in fact.
941+ return visit (curr->list [0 ], NO_RESULT);
942+ }
809943 Ref ret = ValueBuilder::makeBlock ();
810944 size_t size = curr->list .size ();
811- auto noResults = result == NO_RESULT ? size : size - 1 ;
812- for (size_t i = 0 ; i < noResults; i++) {
945+ for (size_t i = 0 ; i < size; i++) {
813946 flattenAppend (
814947 ret, ValueBuilder::makeStatement (visit (curr->list [i], NO_RESULT)));
815948 }
816- if (result != NO_RESULT) {
817- flattenAppend (ret, visitAndAssign (curr->list [size - 1 ], result));
818- }
819949 if (curr->name .is ()) {
820950 ret =
821951 ValueBuilder::makeLabel (fromName (curr->name , NameScope::Label), ret);
@@ -872,7 +1002,9 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
8721002 Expression* defaultBody = nullptr ; // default must be last in asm.js
8731003
8741004 Ref visitSwitch (Switch* curr) {
875- assert (!curr->value );
1005+ #if 0
1006+ // Simple switch emitting. This is valid but may lead to block nesting of a size
1007+ // that JS engines can't handle.
8761008 Ref ret = ValueBuilder::makeBlock();
8771009 Ref condition = visit(curr->condition, EXPRESSION_RESULT);
8781010 Ref theSwitch =
@@ -906,6 +1038,69 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
9061038 ValueBuilder::appendCodeToSwitch(
9071039 theSwitch, blockify(makeBreakOrContinue(curr->default_)), false);
9081040 return ret;
1041+ #else
1042+ // Even without optimizations, we work hard here to emit minimal and
1043+ // especially minimally-nested code, since otherwise we may get block
1044+ // nesting of a size that JS engines can't handle.
1045+ Ref condition = visit (curr->condition , EXPRESSION_RESULT);
1046+ Ref theSwitch =
1047+ ValueBuilder::makeSwitch (makeAsmCoercion (condition, ASM_INT));
1048+ // First, group the switch targets.
1049+ std::map<Name, std::vector<Index>> targetIndexes;
1050+ for (size_t i = 0 ; i < curr->targets .size (); i++) {
1051+ targetIndexes[curr->targets [i]].push_back (i);
1052+ }
1053+ // Emit first any hoisted groups.
1054+ auto & hoistedCases = switchProcessor.hoistedSwitchCases [curr];
1055+ std::set<Name> emittedTargets;
1056+ for (auto & case_ : hoistedCases) {
1057+ auto target = case_.target ;
1058+ auto & code = case_.code ;
1059+ emittedTargets.insert (target);
1060+ if (target != curr->default_ ) {
1061+ auto & indexes = targetIndexes[target];
1062+ for (auto i : indexes) {
1063+ ValueBuilder::appendCaseToSwitch (theSwitch,
1064+ ValueBuilder::makeNum (i));
1065+ }
1066+ } else {
1067+ ValueBuilder::appendDefaultToSwitch (theSwitch);
1068+ }
1069+ for (auto * c : code) {
1070+ ValueBuilder::appendCodeToSwitch (
1071+ theSwitch, blockify (visit (c, NO_RESULT)), false );
1072+ }
1073+ }
1074+ // Emit any remaining groups by just emitting branches to their code,
1075+ // which will appear outside the switch.
1076+ for (auto & pair : targetIndexes) {
1077+ auto target = pair.first ;
1078+ auto & indexes = pair.second ;
1079+ if (emittedTargets.count (target)) {
1080+ continue ;
1081+ }
1082+ if (target != curr->default_ ) {
1083+ for (auto i : indexes) {
1084+ ValueBuilder::appendCaseToSwitch (theSwitch,
1085+ ValueBuilder::makeNum (i));
1086+ }
1087+ ValueBuilder::appendCodeToSwitch (
1088+ theSwitch, blockify (makeBreakOrContinue (target)), false );
1089+ } else {
1090+ // For the group going to the same place as the default, we can just
1091+ // emit the default itself, which we do at the end.
1092+ }
1093+ }
1094+ // TODO: if the group the default is in is not the largest, we can turn
1095+ // the largest into
1096+ // the default by using a local and a check on the range
1097+ if (!emittedTargets.count (curr->default_ )) {
1098+ ValueBuilder::appendDefaultToSwitch (theSwitch);
1099+ ValueBuilder::appendCodeToSwitch (
1100+ theSwitch, blockify (makeBreakOrContinue (curr->default_ )), false );
1101+ }
1102+ return theSwitch;
1103+ #endif
9091104 }
9101105
9111106 Ref visitCall (Call* curr) {
@@ -1618,8 +1813,7 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
16181813 }
16191814 };
16201815
1621- return ExpressionProcessor(this , m, func, standaloneFunction)
1622- .visit(func->body, NO_RESULT);
1816+ return ExpressionProcessor(this , m, func, standaloneFunction).process();
16231817}
16241818
16251819void Wasm2JSBuilder::addMemoryGrowthFuncs (Ref ast, Module* wasm) {
0 commit comments