Skip to content

Commit 01a4bfd

Browse files
authored
Add a pass to lower unaligned loads and stores (#2078)
This replaces the wasm2js code that lowered them to pessimistic (1-byte aligned) loads and stores. The new pass will do the optimal thing, keeping 2-byte alignment where possible. This is also nicer as a standalone pass, which has the simple property that after it runs all loads and stores are aligned, instead of some code scattered inside wasm2js.
1 parent 3b4d901 commit 01a4bfd

11 files changed

Lines changed: 936 additions & 148 deletions

build-js.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ echo "building shared bitcode"
9191
$BINARYEN_SRC/ir/LocalGraph.cpp \
9292
$BINARYEN_SRC/ir/ReFinalize.cpp \
9393
$BINARYEN_SRC/passes/pass.cpp \
94+
$BINARYEN_SRC/passes/AlignmentLowering.cpp \
9495
$BINARYEN_SRC/passes/CoalesceLocals.cpp \
9596
$BINARYEN_SRC/passes/DeadArgumentElimination.cpp \
9697
$BINARYEN_SRC/passes/CodeFolding.cpp \

src/passes/AlignmentLowering.cpp

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/*
2+
* Copyright 2017 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Lowers unaligned loads and stores into aligned loads and stores
19+
// that are smaller. This leaves only aligned operations.
20+
//
21+
22+
#include "ir/bits.h"
23+
#include "pass.h"
24+
#include "wasm-builder.h"
25+
#include "wasm.h"
26+
27+
namespace wasm {
28+
29+
struct AlignmentLowering : public WalkerPass<PostWalker<AlignmentLowering>> {
30+
void visitLoad(Load* curr) {
31+
if (curr->align == 0 || curr->align == curr->bytes) {
32+
return;
33+
}
34+
Builder builder(*getModule());
35+
if (curr->type == unreachable) {
36+
replaceCurrent(curr->ptr);
37+
return;
38+
}
39+
assert(curr->type == i32); // TODO: i64, f32, f64
40+
auto temp = builder.addVar(getFunction(), i32);
41+
Expression* ret;
42+
if (curr->bytes == 2) {
43+
ret = builder.makeBinary(
44+
OrInt32,
45+
builder.makeLoad(
46+
1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32),
47+
builder.makeBinary(ShlInt32,
48+
builder.makeLoad(1,
49+
false,
50+
curr->offset + 1,
51+
1,
52+
builder.makeGetLocal(temp, i32),
53+
i32),
54+
builder.makeConst(Literal(int32_t(8)))));
55+
if (curr->signed_) {
56+
ret = Bits::makeSignExt(ret, 2, *getModule());
57+
}
58+
} else if (curr->bytes == 4) {
59+
if (curr->align == 1) {
60+
ret = builder.makeBinary(
61+
OrInt32,
62+
builder.makeBinary(
63+
OrInt32,
64+
builder.makeLoad(
65+
1, false, curr->offset, 1, builder.makeGetLocal(temp, i32), i32),
66+
builder.makeBinary(ShlInt32,
67+
builder.makeLoad(1,
68+
false,
69+
curr->offset + 1,
70+
1,
71+
builder.makeGetLocal(temp, i32),
72+
i32),
73+
builder.makeConst(Literal(int32_t(8))))),
74+
builder.makeBinary(
75+
OrInt32,
76+
builder.makeBinary(ShlInt32,
77+
builder.makeLoad(1,
78+
false,
79+
curr->offset + 2,
80+
1,
81+
builder.makeGetLocal(temp, i32),
82+
i32),
83+
builder.makeConst(Literal(int32_t(16)))),
84+
builder.makeBinary(ShlInt32,
85+
builder.makeLoad(1,
86+
false,
87+
curr->offset + 3,
88+
1,
89+
builder.makeGetLocal(temp, i32),
90+
i32),
91+
builder.makeConst(Literal(int32_t(24))))));
92+
} else if (curr->align == 2) {
93+
ret = builder.makeBinary(
94+
OrInt32,
95+
builder.makeLoad(
96+
2, false, curr->offset, 2, builder.makeGetLocal(temp, i32), i32),
97+
builder.makeBinary(ShlInt32,
98+
builder.makeLoad(2,
99+
false,
100+
curr->offset + 2,
101+
2,
102+
builder.makeGetLocal(temp, i32),
103+
i32),
104+
builder.makeConst(Literal(int32_t(16)))));
105+
} else {
106+
WASM_UNREACHABLE();
107+
}
108+
} else {
109+
WASM_UNREACHABLE();
110+
}
111+
replaceCurrent(
112+
builder.makeBlock({builder.makeSetLocal(temp, curr->ptr), ret}));
113+
}
114+
115+
void visitStore(Store* curr) {
116+
if (curr->align == 0 || curr->align == curr->bytes) {
117+
return;
118+
}
119+
Builder builder(*getModule());
120+
if (curr->type == unreachable) {
121+
replaceCurrent(builder.makeBlock(
122+
{builder.makeDrop(curr->ptr), builder.makeDrop(curr->value)}));
123+
return;
124+
}
125+
assert(curr->value->type == i32); // TODO: i64, f32, f64
126+
auto tempPtr = builder.addVar(getFunction(), i32);
127+
auto tempValue = builder.addVar(getFunction(), i32);
128+
auto* block =
129+
builder.makeBlock({builder.makeSetLocal(tempPtr, curr->ptr),
130+
builder.makeSetLocal(tempValue, curr->value)});
131+
if (curr->bytes == 2) {
132+
block->list.push_back(
133+
builder.makeStore(1,
134+
curr->offset,
135+
1,
136+
builder.makeGetLocal(tempPtr, i32),
137+
builder.makeGetLocal(tempValue, i32),
138+
i32));
139+
block->list.push_back(builder.makeStore(
140+
1,
141+
curr->offset + 1,
142+
1,
143+
builder.makeGetLocal(tempPtr, i32),
144+
builder.makeBinary(ShrUInt32,
145+
builder.makeGetLocal(tempValue, i32),
146+
builder.makeConst(Literal(int32_t(8)))),
147+
i32));
148+
} else if (curr->bytes == 4) {
149+
if (curr->align == 1) {
150+
block->list.push_back(
151+
builder.makeStore(1,
152+
curr->offset,
153+
1,
154+
builder.makeGetLocal(tempPtr, i32),
155+
builder.makeGetLocal(tempValue, i32),
156+
i32));
157+
block->list.push_back(builder.makeStore(
158+
1,
159+
curr->offset + 1,
160+
1,
161+
builder.makeGetLocal(tempPtr, i32),
162+
builder.makeBinary(ShrUInt32,
163+
builder.makeGetLocal(tempValue, i32),
164+
builder.makeConst(Literal(int32_t(8)))),
165+
i32));
166+
block->list.push_back(builder.makeStore(
167+
1,
168+
curr->offset + 2,
169+
1,
170+
builder.makeGetLocal(tempPtr, i32),
171+
builder.makeBinary(ShrUInt32,
172+
builder.makeGetLocal(tempValue, i32),
173+
builder.makeConst(Literal(int32_t(16)))),
174+
i32));
175+
block->list.push_back(builder.makeStore(
176+
1,
177+
curr->offset + 3,
178+
1,
179+
builder.makeGetLocal(tempPtr, i32),
180+
builder.makeBinary(ShrUInt32,
181+
builder.makeGetLocal(tempValue, i32),
182+
builder.makeConst(Literal(int32_t(24)))),
183+
i32));
184+
} else if (curr->align == 2) {
185+
block->list.push_back(
186+
builder.makeStore(2,
187+
curr->offset,
188+
2,
189+
builder.makeGetLocal(tempPtr, i32),
190+
builder.makeGetLocal(tempValue, i32),
191+
i32));
192+
block->list.push_back(builder.makeStore(
193+
2,
194+
curr->offset + 2,
195+
2,
196+
builder.makeGetLocal(tempPtr, i32),
197+
builder.makeBinary(ShrUInt32,
198+
builder.makeGetLocal(tempValue, i32),
199+
builder.makeConst(Literal(int32_t(16)))),
200+
i32));
201+
} else {
202+
WASM_UNREACHABLE();
203+
}
204+
} else {
205+
WASM_UNREACHABLE();
206+
}
207+
block->finalize();
208+
replaceCurrent(block);
209+
}
210+
};
211+
212+
Pass* createAlignmentLoweringPass() { return new AlignmentLowering(); }
213+
214+
} // namespace wasm

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_custom_command(
55

66
SET(passes_SOURCES
77
pass.cpp
8+
AlignmentLowering.cpp
89
CoalesceLocals.cpp
910
CodePushing.cpp
1011
CodeFolding.cpp

src/passes/pass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ std::string PassRegistry::getPassDescription(std::string name) {
6969
void PassRegistry::registerPasses() {
7070
registerPass(
7171
"dae", "removes arguments to calls in an lto-like manner", createDAEPass);
72+
registerPass("alignment-lowering",
73+
"lower unaligned loads and stores to smaller aligned ones",
74+
createAlignmentLoweringPass);
7275
registerPass("dae-optimizing",
7376
"removes arguments to calls in an lto-like manner, and "
7477
"optimizes where we removed",

src/passes/passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ namespace wasm {
2222
class Pass;
2323

2424
// All passes:
25+
Pass* createAlignmentLoweringPass();
2526
Pass* createCoalesceLocalsPass();
2627
Pass* createCoalesceLocalsWithLearningPass();
2728
Pass* createCodeFoldingPass();

src/wasm2js.h

Lines changed: 5 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) {
284284
// #1480
285285
runner.add("flatten");
286286
runner.add("i64-to-i32-lowering");
287+
runner.add("alignment-lowering");
287288
// Next, optimize that as best we can. This should not generate
288289
// non-JS-friendly things.
289290
if (options.optimizeLevel > 0) {
@@ -982,41 +983,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
982983
}
983984

984985
Ref visitLoad(Load* curr) {
985-
if (curr->align != 0 && curr->align < curr->bytes) {
986-
// set the pointer to a local
987-
ScopedTemp temp(i32, parent, func);
988-
SetLocal set(allocator);
989-
set.index = func->getLocalIndex(temp.getName());
990-
set.value = curr->ptr;
991-
Ref ptrSet = visit(&set, NO_RESULT);
992-
GetLocal get(allocator);
993-
get.index = func->getLocalIndex(temp.getName());
994-
// fake loads
995-
Load load = *curr;
996-
load.ptr = &get;
997-
load.bytes = 1; // do the worst
998-
load.signed_ = false;
999-
Ref rest;
1000-
switch (curr->type) {
1001-
case i32: {
1002-
rest = makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT);
1003-
for (size_t i = 1; i < curr->bytes; i++) {
1004-
++load.offset;
1005-
Ref add =
1006-
makeAsmCoercion(visit(&load, EXPRESSION_RESULT), ASM_INT);
1007-
add = ValueBuilder::makeBinary(
1008-
add, LSHIFT, ValueBuilder::makeNum(8 * i));
1009-
rest = ValueBuilder::makeBinary(rest, OR, add);
1010-
}
1011-
break;
1012-
}
1013-
default: {
1014-
std::cerr << "Unhandled type in load: " << curr->type << std::endl;
1015-
abort();
1016-
}
1017-
}
1018-
return ValueBuilder::makeSeq(ptrSet, rest);
1019-
}
986+
// Unaligned loads and stores must have been fixed up already.
987+
assert(curr->align == 0 || curr->align == curr->bytes);
1020988
// normal load
1021989
Ref ptr = makePointer(curr->ptr, curr->offset);
1022990
Ref ret;
@@ -1110,68 +1078,8 @@ Ref Wasm2JSBuilder::processFunctionBody(Module* m,
11101078
}
11111079
// FIXME if memory growth, store ptr cannot contain a function call
11121080
// also other stores to memory, check them, all makeSub's
1113-
if (curr->align != 0 && curr->align < curr->bytes) {
1114-
// set the pointer to a local
1115-
ScopedTemp temp(i32, parent, func);
1116-
SetLocal set(allocator);
1117-
set.index = func->getLocalIndex(temp.getName());
1118-
set.value = curr->ptr;
1119-
Ref ptrSet = visit(&set, NO_RESULT);
1120-
GetLocal get(allocator);
1121-
get.index = func->getLocalIndex(temp.getName());
1122-
// set the value to a local
1123-
ScopedTemp tempValue(curr->value->type, parent, func);
1124-
SetLocal setValue(allocator);
1125-
setValue.index = func->getLocalIndex(tempValue.getName());
1126-
setValue.value = curr->value;
1127-
Ref valueSet = visit(&setValue, NO_RESULT);
1128-
GetLocal getValue(allocator);
1129-
getValue.index = func->getLocalIndex(tempValue.getName());
1130-
// fake stores
1131-
Store store = *curr;
1132-
store.ptr = &get;
1133-
store.bytes = 1; // do the worst
1134-
Ref rest;
1135-
switch (curr->valueType) {
1136-
case i32: {
1137-
Const _255(allocator);
1138-
_255.value = Literal(int32_t(255));
1139-
_255.type = i32;
1140-
for (size_t i = 0; i < curr->bytes; i++) {
1141-
Const shift(allocator);
1142-
shift.value = Literal(int32_t(8 * i));
1143-
shift.type = i32;
1144-
Binary shifted(allocator);
1145-
shifted.op = ShrUInt32;
1146-
shifted.left = &getValue;
1147-
shifted.right = &shift;
1148-
shifted.type = i32;
1149-
Binary anded(allocator);
1150-
anded.op = AndInt32;
1151-
anded.left = i > 0 ? static_cast<Expression*>(&shifted)
1152-
: static_cast<Expression*>(&getValue);
1153-
anded.right = &_255;
1154-
anded.type = i32;
1155-
store.value = &anded;
1156-
Ref part = visit(&store, NO_RESULT);
1157-
if (i == 0) {
1158-
rest = part;
1159-
} else {
1160-
rest = ValueBuilder::makeSeq(rest, part);
1161-
}
1162-
++store.offset;
1163-
}
1164-
break;
1165-
}
1166-
default: {
1167-
std::cerr << "Unhandled type in store: " << curr->valueType
1168-
<< std::endl;
1169-
abort();
1170-
}
1171-
}
1172-
return ValueBuilder::makeSeq(ValueBuilder::makeSeq(ptrSet, valueSet),
1173-
rest);
1174-
}
1081+
// Unaligned loads and stores must have been fixed up already.
1082+
assert(curr->align == 0 || curr->align == curr->bytes);
11751083
// normal store
11761084
Ref ptr = makePointer(curr->ptr, curr->offset);
11771085
Ref value = visit(curr->value, EXPRESSION_RESULT);

0 commit comments

Comments
 (0)