Skip to content

Commit 87f9dac

Browse files
authored
[NFC] Encapsulate source map reader state (#7132)
Move all state relevant to reading source maps out of WasmBinaryReader and into a new utility, SourceMapReader. This is a prerequisite for parallelizing the parsing of function bodies, since the source map reader state is different at the beginning of each function. Also take the opportunity to simplify the way we read source maps, for example by deferring the reading of anything but the position of a debug location until it will be used and by using `std::optional` instead of singleton `std::set`s to store function prologue and epilogue debug locations.
1 parent f331120 commit 87f9dac

19 files changed

Lines changed: 383 additions & 394 deletions

src/ir/module-utils.cpp

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,20 @@ namespace wasm::ModuleUtils {
2626

2727
// Update the file name indices when moving a set of debug locations from one
2828
// module to another.
29-
static void updateLocationSet(std::set<Function::DebugLocation>& locations,
30-
std::vector<Index>& fileIndexMap) {
31-
std::set<Function::DebugLocation> updatedLocations;
32-
33-
for (auto iter : locations) {
34-
iter.fileIndex = fileIndexMap[iter.fileIndex];
35-
updatedLocations.insert(iter);
29+
static void updateLocation(std::optional<Function::DebugLocation>& location,
30+
std::vector<Index>& fileIndexMap) {
31+
if (location) {
32+
location->fileIndex = fileIndexMap[location->fileIndex];
3633
}
37-
locations.clear();
38-
std::swap(locations, updatedLocations);
3934
}
4035

4136
// Update the symbol name indices when moving a set of debug locations from one
4237
// module to another.
43-
static void updateSymbolSet(std::set<Function::DebugLocation>& locations,
44-
std::vector<Index>& symbolIndexMap) {
45-
std::set<Function::DebugLocation> updatedLocations;
46-
47-
for (auto iter : locations) {
48-
if (iter.symbolNameIndex) {
49-
iter.symbolNameIndex = symbolIndexMap[*iter.symbolNameIndex];
50-
}
51-
updatedLocations.insert(iter);
38+
static void updateSymbol(std::optional<Function::DebugLocation>& location,
39+
std::vector<Index>& symbolIndexMap) {
40+
if (location && location->symbolNameIndex) {
41+
location->symbolNameIndex = symbolIndexMap[*location->symbolNameIndex];
5242
}
53-
locations.clear();
54-
std::swap(locations, updatedLocations);
5543
}
5644

5745
// Copies a function into a module. If newName is provided it is used as the
@@ -94,8 +82,8 @@ copyFunctionWithoutAdd(Function* func,
9482
iter.second->fileIndex = (*fileIndexMap)[iter.second->fileIndex];
9583
}
9684
}
97-
updateLocationSet(ret->prologLocation, *fileIndexMap);
98-
updateLocationSet(ret->epilogLocation, *fileIndexMap);
85+
updateLocation(ret->prologLocation, *fileIndexMap);
86+
updateLocation(ret->epilogLocation, *fileIndexMap);
9987
}
10088
if (symbolNameIndexMap) {
10189
for (auto& iter : ret->debugLocations) {
@@ -105,8 +93,8 @@ copyFunctionWithoutAdd(Function* func,
10593
(*symbolNameIndexMap)[*(iter.second->symbolNameIndex)];
10694
}
10795
}
108-
updateSymbolSet(ret->prologLocation, *symbolNameIndexMap);
109-
updateSymbolSet(ret->epilogLocation, *symbolNameIndexMap);
96+
updateSymbol(ret->prologLocation, *symbolNameIndexMap);
97+
updateSymbol(ret->epilogLocation, *symbolNameIndexMap);
11098
}
11199
}
112100
ret->module = func->module;

src/parsing.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,6 @@ struct ParseException {
4343
void dump(std::ostream& o) const;
4444
};
4545

46-
struct MapParseException {
47-
std::string text;
48-
49-
MapParseException() : text("unknown parse error") {}
50-
MapParseException(std::string text) : text(text) {}
51-
52-
void dump(std::ostream& o) const;
53-
};
54-
5546
// Helper for parsers that may not have unique label names. This transforms
5647
// the names into unique ones, as required by Binaryen IR.
5748
struct UniqueNameMapper {

src/passes/DebugLocationPropagation.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ struct DebugLocationPropagation
6464
if (auto it = locs.find(previous); it != locs.end()) {
6565
locs[curr] = it->second;
6666
}
67-
} else if (self->getFunction()->prologLocation.size()) {
67+
} else if (self->getFunction()->prologLocation) {
6868
// Instructions may inherit their locations from the function
6969
// prolog.
70-
locs[curr] = *self->getFunction()->prologLocation.begin();
70+
locs[curr] = *self->getFunction()->prologLocation;
7171
}
7272
}
7373
expressionStack.push_back(curr);

src/passes/Print.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3060,8 +3060,8 @@ void PrintSExpression::visitDefinedFunction(Function* curr) {
30603060
currFunction = curr;
30613061
lastPrintedLocation = std::nullopt;
30623062
lastPrintIndent = 0;
3063-
if (currFunction->prologLocation.size()) {
3064-
printDebugLocation(*currFunction->prologLocation.begin());
3063+
if (currFunction->prologLocation) {
3064+
printDebugLocation(*currFunction->prologLocation);
30653065
}
30663066
handleSignature(curr, true);
30673067
incIndent();
@@ -3095,14 +3095,14 @@ void PrintSExpression::visitDefinedFunction(Function* curr) {
30953095
}
30963096
assert(controlFlowDepth == 0);
30973097
}
3098-
if (currFunction->epilogLocation.size()) {
3098+
if (currFunction->epilogLocation) {
30993099
// Print last debug location: mix of decIndent and printDebugLocation
31003100
// logic.
31013101
doIndent(o, indent);
31023102
if (!minify) {
31033103
indent--;
31043104
}
3105-
printDebugLocation(*currFunction->epilogLocation.begin());
3105+
printDebugLocation(*currFunction->epilogLocation);
31063106
o << ')';
31073107
} else {
31083108
decIndent();

src/source-map.h

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright 2024 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef wasm_source_map_h
18+
#define wasm_source_map_h
19+
20+
#include <optional>
21+
#include <unordered_map>
22+
23+
#include "wasm.h"
24+
25+
namespace wasm {
26+
27+
struct MapParseException {
28+
std::string text;
29+
30+
MapParseException(std::string text) : text(text) {}
31+
void dump(std::ostream& o) const;
32+
};
33+
34+
class SourceMapReader {
35+
const std::vector<char>& buffer;
36+
37+
// Current position in the source map buffer.
38+
size_t pos = 0;
39+
40+
// The location in the binary the next debug location will correspond to. 0
41+
// iff there are no more debug locations.
42+
size_t location = 0;
43+
44+
// The file index, line, column, and symbol index the next debug location will
45+
// be offset from.
46+
uint32_t file = 0;
47+
uint32_t line = 1;
48+
uint32_t col = 0;
49+
uint32_t symbol = 0;
50+
51+
// Whether the last read record had position and symbol information.
52+
bool hasInfo = false;
53+
bool hasSymbol = false;
54+
55+
public:
56+
SourceMapReader(const std::vector<char>& buffer) : buffer(buffer) {}
57+
58+
void readHeader(Module& wasm);
59+
60+
std::optional<Function::DebugLocation>
61+
readDebugLocationAt(size_t currLocation);
62+
63+
// Do not reuse debug info across function boundaries.
64+
void finishFunction() { hasInfo = false; }
65+
66+
private:
67+
char peek() {
68+
if (pos >= buffer.size()) {
69+
throw MapParseException("unexpected end of source map");
70+
}
71+
return buffer[pos];
72+
}
73+
74+
char get() {
75+
char c = peek();
76+
++pos;
77+
return c;
78+
}
79+
80+
bool maybeGet(char c) {
81+
if (pos < buffer.size() && peek() == c) {
82+
++pos;
83+
return true;
84+
}
85+
return false;
86+
}
87+
88+
void expect(char c) {
89+
using namespace std::string_literals;
90+
char got = get();
91+
if (got != c) {
92+
throw MapParseException("expected '"s + c + "', got '" + got + "'");
93+
}
94+
}
95+
96+
int32_t readBase64VLQ();
97+
};
98+
99+
} // namespace wasm
100+
101+
#endif // wasm_source_map_h

src/tools/wasm-dis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// wasm2asm console tool
1919
//
2020

21+
#include "source-map.h"
2122
#include "support/colors.h"
2223
#include "support/file.h"
2324
#include "wasm-io.h"

src/wasm-binary.h

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "ir/import-utils.h"
2929
#include "ir/module-utils.h"
3030
#include "parsing.h"
31+
#include "source-map.h"
3132
#include "wasm-builder.h"
3233
#include "wasm-ir-builder.h"
3334
#include "wasm-traversal.h"
@@ -1403,41 +1404,13 @@ class WasmBinaryWriter {
14031404
void prepare();
14041405
};
14051406

1407+
extern std::vector<char> defaultEmptySourceMap;
1408+
14061409
class WasmBinaryReader {
14071410
Module& wasm;
14081411
MixedArena& allocator;
14091412
const std::vector<char>& input;
14101413

1411-
// Source map debugging support.
1412-
1413-
std::istream* sourceMap;
1414-
1415-
// The binary position that the next debug location refers to. That is, this
1416-
// is the first item in a source map entry that we have read (the "column", in
1417-
// source map terms, which for wasm means the offset in the binary). We have
1418-
// read this entry, but have not used it yet (we use it when we read the
1419-
// expression at this binary offset).
1420-
//
1421-
// This is set to 0 as an invalid value if we reach the end of the source map
1422-
// and there is nothing left to read.
1423-
size_t nextDebugPos;
1424-
1425-
// The debug location (file:line:col) corresponding to |nextDebugPos|. That
1426-
// is, this is the next 3 fields in a source map entry that we have read, but
1427-
// not used yet.
1428-
//
1429-
// If that location has no debug info (it lacks those 3 fields), then this
1430-
// contains the info from the previous one, because in a source map, these
1431-
// fields are relative to their last appearance, so we cannot forget them (we
1432-
// can't just do something like std::optional<DebugLocation> or such); for
1433-
// example, if we have line number 100, then no debug info, and then line
1434-
// number 500, then when we get to 500 we will see "+400" which is relative to
1435-
// the last existing line number (we "skip" over a place without debug info).
1436-
Function::DebugLocation nextDebugLocation;
1437-
1438-
// Whether debug info is present on |nextDebugPos| (see comment there).
1439-
bool nextDebugLocationHasDebugInfo;
1440-
14411414
// Settings.
14421415

14431416
bool debugInfo = true;
@@ -1448,17 +1421,20 @@ class WasmBinaryReader {
14481421

14491422
size_t pos = 0;
14501423
Index startIndex = -1;
1451-
std::set<Function::DebugLocation> debugLocation;
14521424
size_t codeSectionLocation;
14531425
std::unordered_set<uint8_t> seenSections;
14541426

1427+
IRBuilder builder;
1428+
SourceMapReader sourceMapReader;
1429+
14551430
// All types defined in the type section
14561431
std::vector<HeapType> types;
14571432

14581433
public:
14591434
WasmBinaryReader(Module& wasm,
14601435
FeatureSet features,
1461-
const std::vector<char>& input);
1436+
const std::vector<char>& input,
1437+
const std::vector<char>& sourceMap = defaultEmptySourceMap);
14621438

14631439
void setDebugInfo(bool value) { debugInfo = value; }
14641440
void setDWARF(bool value) { DWARF = value; }
@@ -1584,8 +1560,6 @@ class WasmBinaryReader {
15841560
Expression* readExpression();
15851561
void readGlobals();
15861562

1587-
IRBuilder builder;
1588-
15891563
// validations that cannot be performed on the Module
15901564
void validateBinary();
15911565

@@ -1607,13 +1581,6 @@ class WasmBinaryReader {
16071581
void readDylink(size_t);
16081582
void readDylink0(size_t);
16091583

1610-
// Debug information reading helpers
1611-
void setDebugLocations(std::istream* sourceMap_) { sourceMap = sourceMap_; }
1612-
std::unordered_map<std::string, Index> debugInfoFileIndices;
1613-
std::unordered_map<std::string, Index> debugInfoSymbolNameIndices;
1614-
void readNextDebugLocation();
1615-
void readSourceMapHeader();
1616-
16171584
Index readMemoryAccess(Address& alignment, Address& offset);
16181585
std::tuple<Name, Address, Address> getMemarg();
16191586

src/wasm-stack.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,8 @@ class BinaryenIRToBinaryWriter
456456

457457
void emit(Expression* curr) { writer.visit(curr); }
458458
void emitHeader() {
459-
if (func->prologLocation.size()) {
460-
parent.writeDebugLocation(*func->prologLocation.begin());
459+
if (func->prologLocation) {
460+
parent.writeDebugLocation(*func->prologLocation);
461461
}
462462
writer.mapLocalsAndEmitHeader();
463463
}
@@ -469,8 +469,8 @@ class BinaryenIRToBinaryWriter
469469
void emitFunctionEnd() {
470470
// Indicate the debug location corresponding to the end opcode
471471
// that terminates the function code.
472-
if (func->epilogLocation.size()) {
473-
parent.writeDebugLocation(*func->epilogLocation.begin());
472+
if (func->epilogLocation) {
473+
parent.writeDebugLocation(*func->epilogLocation);
474474
} else {
475475
// The end opcode has no debug location.
476476
parent.writeNoDebugLocation();

src/wasm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,8 +2095,8 @@ class Function : public Importable {
20952095
// One can explicitly set the debug location of an expression to
20962096
// nullopt to stop the propagation of debug locations.
20972097
std::unordered_map<Expression*, std::optional<DebugLocation>> debugLocations;
2098-
std::set<DebugLocation> prologLocation;
2099-
std::set<DebugLocation> epilogLocation;
2098+
std::optional<DebugLocation> prologLocation;
2099+
std::optional<DebugLocation> epilogLocation;
21002100

21012101
// General debugging info support: track instructions and the function itself.
21022102
std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations;

src/wasm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ file(GLOB wasm_HEADERS ../*.h)
22
set(wasm_SOURCES
33
literal.cpp
44
parsing.cpp
5+
source-map.cpp
56
wasm.cpp
67
wasm-binary.cpp
78
wasm-debug.cpp

0 commit comments

Comments
 (0)