@@ -105,19 +105,11 @@ struct CodeFolding
105105 Tail (Block* block) : expr(nullptr ), block(block), pointer(nullptr ) {}
106106 // For a break
107107 Tail (Expression* expr, Block* block)
108- : expr(expr), block(block), pointer(nullptr ) {
109- validate ();
110- }
108+ : expr(expr), block(block), pointer(nullptr ) {}
111109 Tail (Expression* expr, Expression** pointer)
112110 : expr(expr), block(nullptr ), pointer(pointer) {}
113111
114112 bool isFallthrough () const { return expr == nullptr ; }
115-
116- void validate () const {
117- if (expr && block) {
118- assert (block->list .back () == expr);
119- }
120- }
121113 };
122114
123115 // state
@@ -152,15 +144,13 @@ struct CodeFolding
152144 }
153145
154146 void visitBreak (Break* curr) {
155- if (curr->condition || curr-> value ) {
147+ if (curr->condition ) {
156148 unoptimizables.insert (curr->name );
157149 } else {
158- // we can only optimize if we are at the end of the parent block,
159- // and if the parent block does not return a value (we can't move
160- // elements out of it if there is a value being returned)
150+ // we can only optimize if we are at the end of the parent block.
151+ // TODO: Relax this.
161152 Block* parent = controlFlowStack.back ()->dynCast <Block>();
162- if (parent && curr == parent->list .back () &&
163- !parent->list .back ()->type .isConcrete ()) {
153+ if (parent && curr == parent->list .back ()) {
164154 breakTails[curr->name ].push_back (Tail (curr, parent));
165155 } else {
166156 unoptimizables.insert (curr->name );
@@ -222,24 +212,19 @@ struct CodeFolding
222212 if (unoptimizables.count (curr->name ) > 0 ) {
223213 return ;
224214 }
225- // we can't optimize a fallthrough value
226- if (curr->list .back ()->type .isConcrete ()) {
227- return ;
228- }
229215 auto iter = breakTails.find (curr->name );
230216 if (iter == breakTails.end ()) {
231217 return ;
232218 }
233- // looks promising
219+ // Looks promising.
234220 auto & tails = iter->second ;
235- // see if there is a fallthrough
236- bool hasFallthrough = true ;
237- for (auto * child : curr->list ) {
238- if (child->type == Type::unreachable) {
239- hasFallthrough = false ;
240- }
241- }
242- if (hasFallthrough) {
221+ // If the end of the block cannot be reached, then we don't need to include
222+ // it in the set of folded tails.
223+ bool includeFallthrough =
224+ !std::any_of (curr->list .begin (), curr->list .end (), [&](auto * child) {
225+ return child->type == Type::unreachable;
226+ });
227+ if (includeFallthrough) {
243228 tails.push_back ({Tail (curr)});
244229 }
245230 optimizeExpressionTails (tails, curr);
@@ -249,48 +234,34 @@ struct CodeFolding
249234 if (!curr->ifFalse ) {
250235 return ;
251236 }
252- // if both sides are identical, this is easy to fold
253- if (ExpressionAnalyzer::equal (curr->ifTrue , curr->ifFalse )) {
237+ // If both are blocks, look for a tail we can merge.
238+ auto * left = curr->ifTrue ->dynCast <Block>();
239+ auto * right = curr->ifFalse ->dynCast <Block>();
240+ // If one is a block and the other isn't, and the non-block is a tail of the
241+ // other, we can fold that - for our convenience, we just add a block and
242+ // run the rest of the optimization mormally.
243+ auto maybeAddBlock = [this ](Block* block, Expression*& other) -> Block* {
244+ // If other is a suffix of the block, wrap it in a block.
245+ if (block->list .empty () ||
246+ !ExpressionAnalyzer::equal (other, block->list .back ())) {
247+ return nullptr ;
248+ }
249+ // Do it, assign to the out param `other`, and return the block.
254250 Builder builder (*getModule ());
255- // remove if (4 bytes), remove one arm, add drop (1), add block (3),
256- // so this must be a net savings
257- markAsModified (curr);
258- auto * ret =
259- builder.makeSequence (builder.makeDrop (curr->condition ), curr->ifTrue );
260- // we must ensure we present the same type as the if had
261- ret->finalize (curr->type );
262- replaceCurrent (ret);
263- needEHFixups = true ;
264- } else {
265- // if both are blocks, look for a tail we can merge
266- auto * left = curr->ifTrue ->dynCast <Block>();
267- auto * right = curr->ifFalse ->dynCast <Block>();
268- // If one is a block and the other isn't, and the non-block is a tail
269- // of the other, we can fold that - for our convenience, we just add
270- // a block and run the rest of the optimization mormally.
271- auto maybeAddBlock = [this ](Block* block, Expression*& other) -> Block* {
272- // if other is a suffix of the block, wrap it in a block
273- if (block->list .empty () ||
274- !ExpressionAnalyzer::equal (other, block->list .back ())) {
275- return nullptr ;
276- }
277- // do it, assign to the out param `other`, and return the block
278- Builder builder (*getModule ());
279- auto * ret = builder.makeBlock (other);
280- other = ret;
281- return ret;
282- };
283- if (left && !right) {
284- right = maybeAddBlock (left, curr->ifFalse );
285- } else if (!left && right) {
286- left = maybeAddBlock (right, curr->ifTrue );
287- }
288- // we need nameless blocks, as if there is a name, someone might branch
289- // to the end, skipping the code we want to merge
290- if (left && right && !left->name .is () && !right->name .is ()) {
291- std::vector<Tail> tails = {Tail (left), Tail (right)};
292- optimizeExpressionTails (tails, curr);
293- }
251+ auto * ret = builder.makeBlock (other);
252+ other = ret;
253+ return ret;
254+ };
255+ if (left && !right) {
256+ right = maybeAddBlock (left, curr->ifFalse );
257+ } else if (!left && right) {
258+ left = maybeAddBlock (right, curr->ifTrue );
259+ }
260+ // We need nameless blocks, as if there is a name, someone might branch to
261+ // the end, skipping the code we want to merge.
262+ if (left && right && !left->name .is () && !right->name .is ()) {
263+ std::vector<Tail> tails = {Tail (left), Tail (right)};
264+ optimizeExpressionTails (tails, curr);
294265 }
295266 }
296267
@@ -315,10 +286,6 @@ struct CodeFolding
315286 if (needEHFixups) {
316287 EHUtils::handleBlockNestedPops (func, *getModule ());
317288 }
318- // if we did any work, types may need to be propagated
319- if (anotherPass) {
320- ReFinalize ().walkFunctionInModule (func, getModule ());
321- }
322289 }
323290 }
324291
@@ -372,6 +339,7 @@ struct CodeFolding
372339 // identical in all paths leading to the block exit can be merged.
373340 template <typename T>
374341 void optimizeExpressionTails (std::vector<Tail>& tails, T* curr) {
342+ auto oldType = curr->type ;
375343 if (tails.size () < 2 ) {
376344 return ;
377345 }
@@ -384,50 +352,49 @@ struct CodeFolding
384352 return ;
385353 }
386354 // if we were not modified, then we should be valid for processing
387- tail.validate ();
355+ assert (!tail.expr || !tail.block ||
356+ (tail.expr == tail.block ->list .back ()));
388357 }
389- // we can ignore the final br in a tail
390- auto effectiveSize = [&](const Tail& tail) {
391- auto ret = tail.block ->list .size ();
358+ auto getMergeable = [&](const Tail& tail, Index num) -> Expression* {
392359 if (!tail.isFallthrough ()) {
393- ret--;
360+ // If there is a branch value, it is the first mergeable item.
361+ auto * val = tail.expr ->cast <Break>()->value ;
362+ if (val && num == 0 ) {
363+ return val;
364+ }
365+ if (!val) {
366+ // Skip the branch instruction at the end; it is not part of the
367+ // merged tail.
368+ ++num;
369+ }
394370 }
395- return ret;
396- };
397- // the mergeable items do not include the final br in a tail
398- auto getMergeable = [&](const Tail& tail, Index num) {
399- return tail.block ->list [effectiveSize (tail) - num - 1 ];
371+ if (num >= tail.block ->list .size ()) {
372+ return nullptr ;
373+ }
374+ return tail.block ->list [tail.block ->list .size () - num - 1 ];
400375 };
401376 // we are going to remove duplicate elements and add a block.
402377 // so for this to make sense, we need the size of the duplicate
403378 // elements to be worth that extra block (although, there is
404379 // some chance the block would get merged higher up, see later)
405380 std::vector<Expression*> mergeable; // the elements we can merge
406- Index num = 0 ; // how many elements back from the tail to look at
407381 Index saved = 0 ; // how much we can save
408- while (1 ) {
409- // check if this num is still relevant
410- bool stop = false ;
411- for (auto & tail : tails) {
412- assert (tail.block );
413- if (num >= effectiveSize (tail)) {
414- // one of the lists is too short
415- stop = true ;
416- break ;
417- }
418- }
419- if (stop) {
382+ for (Index num = 0 ; true ; ++num) {
383+ auto * item = getMergeable (tails[0 ], num);
384+ if (!item) {
385+ // The list is too short.
420386 break ;
421387 }
422- auto * item = getMergeable (tails[ 0 ], num) ;
423- for (auto & tail : tails) {
424- if (! ExpressionAnalyzer::equal (item, getMergeable (tail, num))) {
425- // one of the lists has a different item
426- stop = true ;
388+ Index tail = 1 ;
389+ for (; tail < tails. size (); ++tail ) {
390+ auto * other = getMergeable (tails[ tail] , num);
391+ if (!other || ! ExpressionAnalyzer::equal (item, other)) {
392+ // Other tail too short or has a difference.
427393 break ;
428394 }
429395 }
430- if (stop) {
396+ if (tail != tails.size ()) {
397+ // We saw a tail without a matching item.
431398 break ;
432399 }
433400 // we may have found another one we can merge - can we move it?
@@ -436,7 +403,6 @@ struct CodeFolding
436403 }
437404 // we found another one we can merge
438405 mergeable.push_back (item);
439- num++;
440406 saved += Measurer::measure (item);
441407 }
442408 if (saved == 0 ) {
@@ -450,7 +416,7 @@ struct CodeFolding
450416 for (auto & tail : tails) {
451417 // it is enough to zero out the block, or leave just one
452418 // element, as then the block can be replaced with that
453- if (num >= tail.block ->list .size () - 1 ) {
419+ if (mergeable. size () >= tail.block ->list .size () - 1 ) {
454420 willEmptyBlock = true ;
455421 break ;
456422 }
@@ -483,44 +449,69 @@ struct CodeFolding
483449 }
484450 }
485451 }
452+
486453 // this is worth doing, do it!
487454 for (auto & tail : tails) {
488455 // remove the items we are merging / moving
489456 // first, mark them as modified, so we don't try to handle them
490457 // again in this pass, which might be buggy
491458 markAsModified (tail.block );
492459 // we must preserve the br if there is one
493- Expression* last = nullptr ;
460+ Break* branch = nullptr ;
494461 if (!tail.isFallthrough ()) {
495- last = tail.block ->list .back ();
496- tail.block ->list .pop_back ();
462+ branch = tail.block ->list .back ()->cast <Break>();
463+ if (branch->value ) {
464+ branch->value = nullptr ;
465+ } else {
466+ tail.block ->list .pop_back ();
467+ }
497468 }
498- for (Index i = 0 ; i < mergeable.size (); i++ ) {
469+ for (Index i = 0 ; i < mergeable.size (); ++i ) {
499470 tail.block ->list .pop_back ();
500471 }
501- if (!tail.isFallthrough ()) {
502- tail.block ->list .push_back (last);
472+ if (tail.isFallthrough ()) {
473+ // The block now ends in an expression that was previously in the middle
474+ // of the block, meaning it must have type none.
475+ tail.block ->finalize (Type::none);
476+ } else {
477+ tail.block ->list .push_back (branch);
478+ // The block still ends with the same branch it previously ended with,
479+ // so its type cannot have changed.
480+ tail.block ->finalize (tail.block ->type );
503481 }
504- // the block type may change if we removed unreachable stuff,
505- // but in general it should remain the same, as if it had a
506- // forced type it should remain, *and*, we don't have a
507- // fallthrough value (we would never get here), so a concrete
508- // type was not from that. I.e., any type on the block is
509- // either forced and/or from breaks with a value, so the
510- // type cannot be changed by moving code out.
511- tail.block ->finalize (tail.block ->type );
512482 }
513483 // since we managed a merge, then it might open up more opportunities later
514484 anotherPass = true ;
515485 // make a block with curr + the merged code
516486 Builder builder (*getModule ());
517487 auto * block = builder.makeBlock ();
518- block->list .push_back (curr);
488+ if constexpr (T::SpecificId == Expression::IfId) {
489+ // If we've moved all the contents out of both arms of the If, then we can
490+ // simplify the output by replacing it entirely with just a drop of the
491+ // condition.
492+ auto * iff = curr->template cast <If>();
493+ if (iff->ifTrue ->template cast <Block>()->list .empty () &&
494+ iff->ifFalse ->template cast <Block>()->list .empty ()) {
495+ block->list .push_back (builder.makeDrop (iff->condition ));
496+ } else {
497+ block->list .push_back (curr);
498+ }
499+ } else {
500+ block->list .push_back (curr);
501+ }
519502 while (!mergeable.empty ()) {
520503 block->list .push_back (mergeable.back ());
521504 mergeable.pop_back ();
522505 }
523- auto oldType = curr->type ;
506+ if constexpr (T::SpecificId == Expression::BlockId) {
507+ // If we didn't have a fallthrough tail because the end of the block was
508+ // not reachable, then we might have a concrete expression at the end of
509+ // the block even though the value produced by the block has been moved
510+ // out of it. If so, drop that expression.
511+ auto * currBlock = curr->template cast <Block>();
512+ currBlock->list .back () =
513+ builder.dropIfConcretelyTyped (currBlock->list .back ());
514+ }
524515 // NB: we template-specialize so that this calls the proper finalizer for
525516 // the type
526517 curr->finalize ();
@@ -553,9 +544,6 @@ struct CodeFolding
553544 if (tail.block && modifieds.count (tail.block ) > 0 ) {
554545 return true ;
555546 }
556- // if we were not modified, then we should be valid for
557- // processing
558- tail.validate ();
559547 return false ;
560548 }),
561549 tails.end ());
0 commit comments