@@ -236,9 +236,54 @@ enum class DataOffset { BStackPos = 0, BStackEnd = 4 };
236236
237237const auto STACK_ALIGN = 4 ;
238238
239+ // A helper class for managing fake global names. Creates the globals and
240+ // provides mappings for using them.
241+ class GlobalHelper {
242+ Module& module ;
243+
244+ public:
245+ GlobalHelper (Module& module ) : module (module ) {
246+ map[i32 ] = " bysyncify_fake_call_global_i32" ;
247+ map[i64 ] = " bysyncify_fake_call_global_i64" ;
248+ map[f32 ] = " bysyncify_fake_call_global_f32" ;
249+ map[f64 ] = " bysyncify_fake_call_global_f64" ;
250+ Builder builder (module );
251+ for (auto & pair : map) {
252+ auto type = pair.first ;
253+ auto name = pair.second ;
254+ rev[name] = type;
255+ module .addGlobal (builder.makeGlobal (
256+ name, type, LiteralUtils::makeZero (type, module ), Builder::Mutable));
257+ }
258+ }
259+
260+ ~GlobalHelper () {
261+ for (auto & pair : map) {
262+ auto name = pair.second ;
263+ module .removeGlobal (name);
264+ }
265+ }
266+
267+ Name getName (Type type) { return map.at (type); }
268+
269+ Type getTypeOrNone (Name name) {
270+ auto iter = rev.find (name);
271+ if (iter != rev.end ()) {
272+ return iter->second ;
273+ }
274+ return none;
275+ }
276+
277+ private:
278+ std::map<Type, Name> map;
279+ std::map<Name, Type> rev;
280+ };
281+
239282// Analyze the entire module to see which calls may change the state, that
240283// is, start an unwind or rewind), either in itself or in something called
241284// by it.
285+ // Handles global module management, needed from the various parts of this
286+ // transformation.
242287class ModuleAnalyzer {
243288 Module& module ;
244289 bool canIndirectChangeState;
@@ -268,7 +313,8 @@ class ModuleAnalyzer {
268313 ModuleAnalyzer (Module& module ,
269314 std::function<bool (Name, Name)> canImportChangeState,
270315 bool canIndirectChangeState)
271- : module (module ), canIndirectChangeState(canIndirectChangeState) {
316+ : module (module ), canIndirectChangeState(canIndirectChangeState),
317+ globals (module ) {
272318 // Scan to see which functions can directly change the state.
273319 // Also handle the bysyncify imports, removing them (as we will implement
274320 // them later), and replace calls to them with calls to the later proper
@@ -429,6 +475,8 @@ class ModuleAnalyzer {
429475 }
430476 return walker.canChangeState ;
431477 }
478+
479+ GlobalHelper globals;
432480};
433481
434482// Checks if something performs a call: either a direct or indirect call,
@@ -487,8 +535,9 @@ struct BysyncifyFlow : public Pass {
487535 BysyncifyFlow (ModuleAnalyzer* analyzer) : analyzer(analyzer) {}
488536
489537 void
490- runOnFunction (PassRunner* runner, Module* module_, Function* func ) override {
538+ runOnFunction (PassRunner* runner, Module* module_, Function* func_ ) override {
491539 module = module_;
540+ func = func_;
492541 // If the function cannot change our state, we have nothing to do -
493542 // we will never unwind or rewind the stack here.
494543 if (!analyzer->needsInstrumentation (func)) {
@@ -518,6 +567,7 @@ struct BysyncifyFlow : public Pass {
518567 std::unique_ptr<BysyncifyBuilder> builder;
519568
520569 Module* module ;
570+ Function* func;
521571
522572 // Each call in the function has an index, noted during unwind and checked
523573 // during rewind.
@@ -527,12 +577,15 @@ struct BysyncifyFlow : public Pass {
527577 if (!analyzer->canChangeState (curr)) {
528578 return makeMaybeSkip (curr);
529579 }
530- // The IR is in flat form, which makes this much simpler. We basically
531- // need to add skips to avoid code when rewinding, and checks around calls
532- // with unwinding/rewinding support.
580+ // The IR is in flat form, which makes this much simpler: there are no
581+ // unnecessarily nested side effects or control flow, so we can add
582+ // skips for rewinding in an easy manner, putting a single if around
583+ // whole chunks of code. Also calls are separated out so that it is easy
584+ // to add the necessary checks for them for unwinding/rewinding support.
533585 //
534586 // Aside from that, we must "linearize" all control flow so that we can
535- // reach the right part when unwinding. For example, for an if we do this:
587+ // reach the right part when rewinding, which is done by always skipping
588+ // forward. For example, for an if we do this:
536589 //
537590 // if (condition()) {
538591 // side1();
@@ -595,25 +648,28 @@ struct BysyncifyFlow : public Pass {
595648 // must be in one of the children.
596649 assert (!analyzer->canChangeState (iff->condition ));
597650 // We must linearize this, which means we pass through both arms if we
598- // are rewinding. This is pretty simple as in flat form the if condition
599- // is either a const or a get, so easily copyable.
600- // Start with the first arm, for which we reuse the original if.
601- auto * otherArm = iff->ifFalse ;
602- iff->ifFalse = nullptr ;
603- auto * originalCondition = iff->condition ;
651+ // are rewinding.
652+ if (!iff->ifFalse ) {
653+ iff->condition = builder->makeBinary (
654+ OrInt32, iff->condition , builder->makeStateCheck (State::Rewinding));
655+ iff->ifTrue = process (iff->ifTrue );
656+ iff->finalize ();
657+ return iff;
658+ }
659+ auto conditionTemp = builder->addVar (func, i32 );
660+ iff->condition = builder->makeLocalTee (conditionTemp, iff->condition );
604661 iff->condition = builder->makeBinary (
605- OrInt32, originalCondition , builder->makeStateCheck (State::Rewinding));
662+ OrInt32, iff-> condition , builder->makeStateCheck (State::Rewinding));
606663 iff->ifTrue = process (iff->ifTrue );
664+ auto * otherArm = iff->ifFalse ;
665+ iff->ifFalse = nullptr ;
607666 iff->finalize ();
608- if (!otherArm) {
609- return iff;
610- }
611667 // Add support for the second arm as well.
612668 auto * otherIf = builder->makeIf (
613669 builder->makeBinary (
614670 OrInt32,
615- builder->makeUnary (
616- EqZInt32, ExpressionManipulator::copy (originalCondition, * module )),
671+ builder->makeUnary (EqZInt32,
672+ builder-> makeLocalGet (conditionTemp, i32 )),
617673 builder->makeStateCheck (State::Rewinding)),
618674 process (otherArm));
619675 otherIf->finalize ();
@@ -640,27 +696,48 @@ struct BysyncifyFlow : public Pass {
640696 // change the state
641697 assert (doesCall (curr));
642698 assert (curr->type == none);
699+ // The case of a set is tricky: we must *not* execute the set when
700+ // unwinding, since at that point we have a fake value for the return,
701+ // and if we applied it to the local, it would end up saved and then
702+ // potentially used later - and with coalescing, that may interfere
703+ // with other things. Note also that at this point in the process we
704+ // have not yet written the load saving/loading code, so the optimizer
705+ // doesn't see that locals will have another use at the beginning and
706+ // end of the function. We don't do that yet because we don't want it
707+ // to force the final number of locals to be too high, but we also
708+ // must be careful to never touch a local unnecessarily to avoid bugs.
709+ // To implement this, write to a fake global; we'll fix it up after
710+ // BysyncifyLocals locals adds local saving/restoring.
711+ auto * set = curr->dynCast <LocalSet>();
712+ if (set) {
713+ auto name = analyzer->globals .getName (set->value ->type );
714+ curr = builder->makeGlobalSet (name, set->value );
715+ set->value = builder->makeGlobalGet (name, set->value ->type );
716+ }
717+ // Instrument the call itself (or, if a drop, the drop+call).
643718 auto index = callIndex++;
644719 // Execute the call, if either normal execution, or if rewinding and this
645720 // is the right call to go into.
646721 // TODO: we can read the next call index once in each function (but should
647722 // avoid saving/restoring that local later)
648- return builder->makeIf (
723+ curr = builder->makeIf (
649724 builder->makeIf (builder->makeStateCheck (State::Normal),
650725 builder->makeConst (Literal (int32_t (1 ))),
651726 makeCallIndexPeek (index)),
652- builder->makeSequence (curr, makePossibleUnwind (index)));
727+ builder->makeSequence (curr, makePossibleUnwind (index, set)));
728+ return curr;
653729 }
654730
655- Expression* makePossibleUnwind (Index index) {
731+ Expression* makePossibleUnwind (Index index, Expression* ifNotUnwinding ) {
656732 // In this pass we emit an "unwind" as a call to a fake intrinsic. We
657733 // will implement it in the later pass. (We can't create the unwind block
658734 // target here, as the optimizer would remove it later; we can only add
659735 // it when we add its contents, later.)
660736 return builder->makeIf (
661737 builder->makeStateCheck (State::Unwinding),
662738 builder->makeCall (
663- BYSYNCIFY_UNWIND, {builder->makeConst (Literal (int32_t (index)))}, none));
739+ BYSYNCIFY_UNWIND, {builder->makeConst (Literal (int32_t (index)))}, none),
740+ ifNotUnwinding);
664741 }
665742
666743 Expression* makeCallIndexPeek (Index index) {
@@ -707,6 +784,29 @@ struct BysyncifyLocals : public WalkerPass<PostWalker<BysyncifyLocals>> {
707784 }
708785 }
709786
787+ void visitGlobalSet (GlobalSet* curr) {
788+ auto type = analyzer->globals .getTypeOrNone (curr->name );
789+ if (type != none) {
790+ replaceCurrent (
791+ builder->makeLocalSet (getFakeCallLocal (type), curr->value ));
792+ }
793+ }
794+
795+ void visitGlobalGet (GlobalGet* curr) {
796+ auto type = analyzer->globals .getTypeOrNone (curr->name );
797+ if (type != none) {
798+ replaceCurrent (builder->makeLocalGet (getFakeCallLocal (type), type));
799+ }
800+ }
801+
802+ Index getFakeCallLocal (Type type) {
803+ auto iter = fakeCallLocals.find (type);
804+ if (iter != fakeCallLocals.end ()) {
805+ return iter->second ;
806+ }
807+ return fakeCallLocals[type] = builder->addVar (getFunction (), type);
808+ }
809+
710810 void doWalkFunction (Function* func) {
711811 // If the function cannot change our state, we have nothing to do -
712812 // we will never unwind or rewind the stack here.
@@ -763,6 +863,7 @@ struct BysyncifyLocals : public WalkerPass<PostWalker<BysyncifyLocals>> {
763863
764864 Index rewindIndex;
765865 Index numPreservableLocals;
866+ std::map<Type, Index> fakeCallLocals;
766867
767868 Expression* makeLocalLoading () {
768869 if (numPreservableLocals == 0 ) {
@@ -884,13 +985,22 @@ struct Bysyncify : public Pass {
884985 PassRunner runner (module );
885986 runner.add (" flatten" );
886987 // Dce is useful here, since BysyncifyFlow makes control flow conditional,
887- // which may make unreachable code look reachable. We also do some other
888- // minimal optimization here in an unconditional way here, to counteract
889- // the flattening.
988+ // which may make unreachable code look reachable. It also lets us ignore
989+ // unreachable code here.
890990 runner.add (" dce" );
891- runner.add (" simplify-locals-nonesting" );
892- runner.add (" merge-blocks" );
893- runner.add (" vacuum" );
991+ if (optimize) {
992+ // Optimizing before BsyncifyFlow is crucial, especially coalescing,
993+ // because the flow changes add many branches, break up if-elses, etc.,
994+ // all of which extend the live ranges of locals. In other words, it is
995+ // not possible to coalesce well afterwards.
996+ runner.add (" simplify-locals-nonesting" );
997+ runner.add (" reorder-locals" );
998+ runner.add (" coalesce-locals" );
999+ runner.add (" simplify-locals-nonesting" );
1000+ runner.add (" reorder-locals" );
1001+ runner.add (" merge-blocks" );
1002+ runner.add (" vacuum" );
1003+ }
8941004 runner.add <BysyncifyFlow>(&analyzer);
8951005 runner.setIsNested (true );
8961006 runner.setValidateGlobally (false );
0 commit comments