Skip to content

Commit f8e1622

Browse files
authored
Use IRBuilder in the binary parser (#6963)
IRBuilder is a utility for turning arbitrary valid streams of Wasm instructions into valid Binaryen IR. It is already used in the text parser, so now use it in the binary parser as well. Since the IRBuilder API for building each intruction requires only the information that the binary and text formats include as immediates to that instruction, the parser is now much simpler than before. In particular, it does not need to manage a stack of instructions to figure out what the children of each expression should be; IRBuilder handles this instead. There are some differences between the IR constructed by IRBuilder and the IR the binary parser constructed before this change. Most importantly, IRBuilder generates better multivalue code because it avoids eagerly breaking up multivalue results into individual components that might need to be immediately reassembled into a tuple. It also parses try-delegate more correctly, allowing the delegate to target arbitrary labels, not just other `try`s. There are also a couple superficial differences in the generated label and scratch local names. As part of this change, add support for recording binary source locations in IRBuilder.
1 parent 6f0f2e0 commit f8e1622

70 files changed

Lines changed: 5003 additions & 7938 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/wasm-binary.h

Lines changed: 9 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "ir/module-utils.h"
3030
#include "parsing.h"
3131
#include "wasm-builder.h"
32+
#include "wasm-ir-builder.h"
3233
#include "wasm-traversal.h"
3334
#include "wasm-validator.h"
3435
#include "wasm.h"
@@ -1543,8 +1544,6 @@ class WasmBinaryReader {
15431544
Signature getSignatureByTypeIndex(Index index);
15441545
Signature getSignatureByFunctionIndex(Index index);
15451546

1546-
size_t nextLabel;
1547-
15481547
Name getNextLabel();
15491548

15501549
// We read the names section first so we know in advance what names various
@@ -1573,67 +1572,19 @@ class WasmBinaryReader {
15731572
void readVars();
15741573
void setLocalNames(Function& func, Index i);
15751574

1575+
Result<> readInst();
1576+
15761577
void readExports();
15771578

15781579
// The strings in the strings section (which are referred to by StringConst).
15791580
std::vector<Name> strings;
15801581
void readStrings();
1582+
Name getIndexedString();
15811583

15821584
Expression* readExpression();
15831585
void readGlobals();
15841586

1585-
struct BreakTarget {
1586-
Name name;
1587-
Type type;
1588-
BreakTarget(Name name, Type type) : name(name), type(type) {}
1589-
};
1590-
std::vector<BreakTarget> breakStack;
1591-
// the names that breaks target. this lets us know if a block has breaks to it
1592-
// or not.
1593-
std::unordered_set<Name> breakTargetNames;
1594-
// the names that delegates target.
1595-
std::unordered_set<Name> exceptionTargetNames;
1596-
1597-
std::vector<Expression*> expressionStack;
1598-
1599-
// Control flow structure parsing: these have not just the normal binary
1600-
// data for an instruction, but also some bytes later on like "end" or "else".
1601-
// We must be aware of the connection between those things, for debug info.
1602-
std::vector<Expression*> controlFlowStack;
1603-
1604-
// Called when we parse the beginning of a control flow structure.
1605-
void startControlFlow(Expression* curr);
1606-
1607-
// set when we know code is unreachable in the sense of the wasm spec: we are
1608-
// in a block and after an unreachable element. this helps parse stacky wasm
1609-
// code, which can be unsuitable for our IR when unreachable.
1610-
bool unreachableInTheWasmSense;
1611-
1612-
// set when the current code being processed will not be emitted in the
1613-
// output, which is the case when it is literally unreachable, for example,
1614-
// (block $a
1615-
// (unreachable)
1616-
// (block $b
1617-
// ;; code here is reachable in the wasm sense, even though $b as a whole
1618-
// ;; is not
1619-
// (unreachable)
1620-
// ;; code here is unreachable in the wasm sense
1621-
// )
1622-
// )
1623-
bool willBeIgnored;
1624-
1625-
BinaryConsts::ASTNodes lastSeparator = BinaryConsts::End;
1626-
1627-
// process a block-type scope, until an end or else marker, or the end of the
1628-
// function
1629-
void processExpressions();
1630-
void skipUnreachableCode();
1631-
1632-
void pushExpression(Expression* curr);
1633-
Expression* popExpression();
1634-
Expression* popNonVoidExpression();
1635-
Expression* popTuple(size_t numElems);
1636-
Expression* popTypedExpression(Type type);
1587+
IRBuilder builder;
16371588

16381589
// validations that cannot be performed on the Module
16391590
void validateBinary();
@@ -1663,127 +1614,12 @@ class WasmBinaryReader {
16631614
void readNextDebugLocation();
16641615
void readSourceMapHeader();
16651616

1666-
// AST reading
1667-
int depth = 0; // only for debugging
1668-
1669-
BinaryConsts::ASTNodes readExpression(Expression*& curr);
1670-
void pushBlockElements(Block* curr, Type type, size_t start);
1671-
void visitBlock(Block* curr);
1672-
1673-
// Gets a block of expressions. If it's just one, return that singleton.
1674-
Expression* getBlockOrSingleton(Type type);
1675-
1676-
BreakTarget getBreakTarget(int32_t offset);
1677-
Name getExceptionTargetName(int32_t offset);
1678-
16791617
Index readMemoryAccess(Address& alignment, Address& offset);
1618+
std::tuple<Name, Address, Address> getMemarg();
16801619

1681-
void visitIf(If* curr);
1682-
void visitLoop(Loop* curr);
1683-
void visitBreak(Break* curr, uint8_t code);
1684-
void visitSwitch(Switch* curr);
1685-
void visitCall(Call* curr);
1686-
void visitCallIndirect(CallIndirect* curr);
1687-
void visitLocalGet(LocalGet* curr);
1688-
void visitLocalSet(LocalSet* curr, uint8_t code);
1689-
void visitGlobalGet(GlobalGet* curr);
1690-
void visitGlobalSet(GlobalSet* curr);
1691-
bool maybeVisitLoad(Expression*& out,
1692-
uint8_t code,
1693-
std::optional<BinaryConsts::ASTNodes> prefix);
1694-
bool maybeVisitStore(Expression*& out,
1695-
uint8_t code,
1696-
std::optional<BinaryConsts::ASTNodes> prefix);
1697-
bool maybeVisitNontrappingTrunc(Expression*& out, uint32_t code);
1698-
bool maybeVisitAtomicRMW(Expression*& out, uint8_t code);
1699-
bool maybeVisitAtomicCmpxchg(Expression*& out, uint8_t code);
1700-
bool maybeVisitAtomicWait(Expression*& out, uint8_t code);
1701-
bool maybeVisitAtomicNotify(Expression*& out, uint8_t code);
1702-
bool maybeVisitAtomicFence(Expression*& out, uint8_t code);
1703-
bool maybeVisitConst(Expression*& out, uint8_t code);
1704-
bool maybeVisitUnary(Expression*& out, uint8_t code);
1705-
bool maybeVisitBinary(Expression*& out, uint8_t code);
1706-
bool maybeVisitTruncSat(Expression*& out, uint32_t code);
1707-
bool maybeVisitSIMDBinary(Expression*& out, uint32_t code);
1708-
bool maybeVisitSIMDUnary(Expression*& out, uint32_t code);
1709-
bool maybeVisitSIMDConst(Expression*& out, uint32_t code);
1710-
bool maybeVisitSIMDStore(Expression*& out, uint32_t code);
1711-
bool maybeVisitSIMDExtract(Expression*& out, uint32_t code);
1712-
bool maybeVisitSIMDReplace(Expression*& out, uint32_t code);
1713-
bool maybeVisitSIMDShuffle(Expression*& out, uint32_t code);
1714-
bool maybeVisitSIMDTernary(Expression*& out, uint32_t code);
1715-
bool maybeVisitSIMDShift(Expression*& out, uint32_t code);
1716-
bool maybeVisitSIMDLoad(Expression*& out, uint32_t code);
1717-
bool maybeVisitSIMDLoadStoreLane(Expression*& out, uint32_t code);
1718-
bool maybeVisitMemoryInit(Expression*& out, uint32_t code);
1719-
bool maybeVisitDataDrop(Expression*& out, uint32_t code);
1720-
bool maybeVisitMemoryCopy(Expression*& out, uint32_t code);
1721-
bool maybeVisitMemoryFill(Expression*& out, uint32_t code);
1722-
bool maybeVisitTableSize(Expression*& out, uint32_t code);
1723-
bool maybeVisitTableGrow(Expression*& out, uint32_t code);
1724-
bool maybeVisitTableFill(Expression*& out, uint32_t code);
1725-
bool maybeVisitTableCopy(Expression*& out, uint32_t code);
1726-
bool maybeVisitTableInit(Expression*& out, uint32_t code);
1727-
bool maybeVisitRefI31(Expression*& out, uint32_t code);
1728-
bool maybeVisitI31Get(Expression*& out, uint32_t code);
1729-
bool maybeVisitRefTest(Expression*& out, uint32_t code);
1730-
bool maybeVisitRefCast(Expression*& out, uint32_t code);
1731-
bool maybeVisitBrOn(Expression*& out, uint32_t code);
1732-
bool maybeVisitStructNew(Expression*& out, uint32_t code);
1733-
bool maybeVisitStructGet(Expression*& out, uint32_t code);
1734-
bool maybeVisitStructSet(Expression*& out, uint32_t code);
1735-
bool maybeVisitArrayNewData(Expression*& out, uint32_t code);
1736-
bool maybeVisitArrayNewElem(Expression*& out, uint32_t code);
1737-
bool maybeVisitArrayNewFixed(Expression*& out, uint32_t code);
1738-
bool maybeVisitArrayGet(Expression*& out, uint32_t code);
1739-
bool maybeVisitArraySet(Expression*& out, uint32_t code);
1740-
bool maybeVisitArrayLen(Expression*& out, uint32_t code);
1741-
bool maybeVisitArrayCopy(Expression*& out, uint32_t code);
1742-
bool maybeVisitArrayFill(Expression*& out, uint32_t code);
1743-
bool maybeVisitArrayInit(Expression*& out, uint32_t code);
1744-
bool maybeVisitStringNew(Expression*& out, uint32_t code);
1745-
bool maybeVisitStringAsWTF16(Expression*& out, uint32_t code);
1746-
bool maybeVisitStringConst(Expression*& out, uint32_t code);
1747-
bool maybeVisitStringMeasure(Expression*& out, uint32_t code);
1748-
bool maybeVisitStringEncode(Expression*& out, uint32_t code);
1749-
bool maybeVisitStringConcat(Expression*& out, uint32_t code);
1750-
bool maybeVisitStringEq(Expression*& out, uint32_t code);
1751-
bool maybeVisitStringWTF16Get(Expression*& out, uint32_t code);
1752-
bool maybeVisitStringSliceWTF(Expression*& out, uint32_t code);
1753-
void visitSelect(Select* curr, uint8_t code);
1754-
void visitReturn(Return* curr);
1755-
void visitMemorySize(MemorySize* curr);
1756-
void visitMemoryGrow(MemoryGrow* curr);
1757-
void visitNop(Nop* curr);
1758-
void visitUnreachable(Unreachable* curr);
1759-
void visitDrop(Drop* curr);
1760-
void visitRefNull(RefNull* curr);
1761-
void visitRefIsNull(RefIsNull* curr);
1762-
void visitRefFunc(RefFunc* curr);
1763-
void visitRefEq(RefEq* curr);
1764-
void visitTableGet(TableGet* curr);
1765-
void visitTableSet(TableSet* curr);
1766-
void visitTryOrTryInBlock(Expression*& out);
1767-
void visitTryTable(TryTable* curr);
1768-
void visitThrow(Throw* curr);
1769-
void visitRethrow(Rethrow* curr);
1770-
void visitThrowRef(ThrowRef* curr);
1771-
void visitCallRef(CallRef* curr);
1772-
void visitRefAsCast(RefCast* curr, uint32_t code);
1773-
void visitRefAs(RefAs* curr, uint8_t code);
1774-
void visitContNew(ContNew* curr);
1775-
void visitContBind(ContBind* curr);
1776-
void visitResume(Resume* curr);
1777-
void visitSuspend(Suspend* curr);
1778-
1779-
[[noreturn]] void throwError(std::string text);
1780-
1781-
// Struct/Array instructions have an unnecessary heap type that is just for
1782-
// validation (except for the case of unreachability, but that's not a problem
1783-
// anyhow, we can ignore it there). That is, we also have a reference typed
1784-
// child from which we can infer the type anyhow, and we just need to check
1785-
// that type is the same.
1786-
void validateHeapTypeUsingChild(Expression* child, HeapType heapType);
1620+
[[noreturn]] void throwError(std::string text) {
1621+
throw ParseException(text, 0, pos);
1622+
}
17871623

17881624
private:
17891625
bool hasDWARFSections();

src/wasm-ir-builder.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
4747
// of instructions after this is called.
4848
Result<Expression*> build();
4949

50+
// If the IRBuilder is empty, then it's ready to parse a new self-contained
51+
// sequence of instructions.
52+
[[nodiscard]] bool empty() { return scopeStack.empty(); }
53+
5054
// Call visit() on an existing Expression with its non-child fields
5155
// initialized to initialize the child fields and refinalize it.
5256
Result<> visit(Expression*);
@@ -59,6 +63,15 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
5963
// pushed instruction.
6064
void setDebugLocation(const std::optional<Function::DebugLocation>&);
6165

66+
// Give the builder a pointer to the counter tracking the current location in
67+
// the binary. If this pointer is non-null, the builder will record the binary
68+
// locations relative to the given code section offset for all instructions
69+
// and delimiters inside functions.
70+
void setBinaryLocation(size_t* binaryPos, size_t codeSectionOffset) {
71+
this->binaryPos = binaryPos;
72+
this->codeSectionOffset = codeSectionOffset;
73+
}
74+
6275
// Set the function used to add scratch locals when constructing an isolated
6376
// sequence of IR.
6477
void setFunction(Function* func) { this->func = func; }
@@ -232,6 +245,11 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
232245
Function* func = nullptr;
233246
Builder builder;
234247

248+
// Used for setting DWARF expression locations.
249+
size_t* binaryPos = nullptr;
250+
size_t lastBinaryPos = 0;
251+
size_t codeSectionOffset = 0;
252+
235253
// The location lacks debug info as it was marked as not having it.
236254
struct NoDebug : public std::monostate {};
237255
// The location lacks debug info, but was not marked as not having
@@ -316,6 +334,9 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
316334
// stack-polymorphic unreachable mode.
317335
bool unreachable = false;
318336

337+
// The binary location of the start of the scope, used to set debug info.
338+
size_t startPos = 0;
339+
319340
ScopeCtx() : scope(NoScope{}) {}
320341
ScopeCtx(Scope scope) : scope(scope) {}
321342
ScopeCtx(Scope scope, Name label, bool labelUsed)
@@ -529,6 +550,10 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
529550
// Record the original label to handle references to it correctly.
530551
labelDepths[label].push_back(scopeStack.size() + 1);
531552
}
553+
if (binaryPos) {
554+
scope.startPos = lastBinaryPos;
555+
lastBinaryPos = *binaryPos;
556+
}
532557
scopeStack.push_back(scope);
533558
}
534559

0 commit comments

Comments
 (0)