Skip to content

Commit 05d785e

Browse files
authored
Binary format code section offset tracking (#2515)
Optionally track the binary format code section offsets, that is, when loading a binary, remember where each IR node was read from. This is necessary for DWARF debug info, as these are the offsets DWARF refers to. (Note that eventually we may want to do something else, like first read the DWARF and only then add debug info annotations into the IR in a more LLVM-like manner, but this is more straightforward and should be enough to update debug lines and ranges). This tracking adds noticeable overhead - every single IR node adds an entry in a map - so avoid it unless actually necessary. Specifically, if the user passes in -g and there are actually DWARF sections in the binary, and we are not about to remove those sections, then we need it. Print binary format code section offsets in text, when printing with -g. This will help debug and test dwarf support. It looks like ;; code offset: 0x7 as an annotation right before each node. Also add support for -g in wasm-opt tests (unlike a pass, it has just one - as a prefix). Helps #2400
1 parent 02e6ba2 commit 05d785e

20 files changed

Lines changed: 891 additions & 10 deletions

auto_update_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def update_wasm_opt_tests():
103103
passes_file = os.path.join(shared.get_test_dir('passes'), passname + '.passes')
104104
if os.path.exists(passes_file):
105105
passname = open(passes_file).read().strip()
106-
opts = [('--' + p if not p.startswith('O') else '-' + p) for p in passname.split('_')]
106+
opts = [('--' + p if not p.startswith('O') and p != 'g' else '-' + p) for p in passname.split('_')]
107107
actual = ''
108108
for module, asserts in support.split_wast(t):
109109
assert len(asserts) == 0

check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def run_wasm_opt_tests():
9999
passes_file = os.path.join(shared.get_test_dir('passes'), passname + '.passes')
100100
if os.path.exists(passes_file):
101101
passname = open(passes_file).read().strip()
102-
opts = [('--' + p if not p.startswith('O') else '-' + p) for p in passname.split('_')]
102+
opts = [('--' + p if not p.startswith('O') and p != 'g' else '-' + p) for p in passname.split('_')]
103103
actual = ''
104104
for module, asserts in support.split_wast(t):
105105
assert len(asserts) == 0

src/passes/Print.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,7 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> {
13921392
Module* currModule = nullptr;
13931393
Function* currFunction = nullptr;
13941394
Function::DebugLocation lastPrintedLocation;
1395+
bool debugInfo;
13951396

13961397
std::unordered_map<Name, Index> functionIndexes;
13971398

@@ -1421,6 +1422,16 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> {
14211422
if (iter != debugLocations.end()) {
14221423
printDebugLocation(iter->second);
14231424
}
1425+
// show a binary position, if there is one
1426+
if (debugInfo) {
1427+
auto iter = currFunction->binaryLocations.find(curr);
1428+
if (iter != currFunction->binaryLocations.end()) {
1429+
Colors::grey(o);
1430+
o << ";; code offset: 0x" << iter->second << '\n';
1431+
restoreNormalColor(o);
1432+
doIndent(o, indent);
1433+
}
1434+
}
14241435
}
14251436
}
14261437

@@ -1437,6 +1448,10 @@ struct PrintSExpression : public OverriddenVisitor<PrintSExpression> {
14371448

14381449
void setFull(bool full_) { full = full_; }
14391450

1451+
void setPrintStackIR(bool printStackIR_) { printStackIR = printStackIR_; }
1452+
1453+
void setDebugInfo(bool debugInfo_) { debugInfo = debugInfo_; }
1454+
14401455
void incIndent() {
14411456
if (minify) {
14421457
return;
@@ -2321,6 +2336,7 @@ class Printer : public Pass {
23212336

23222337
void run(PassRunner* runner, Module* module) override {
23232338
PrintSExpression print(o);
2339+
print.setDebugInfo(runner->options.debugInfo);
23242340
print.visitModule(module);
23252341
}
23262342
};
@@ -2337,6 +2353,7 @@ class MinifiedPrinter : public Printer {
23372353
void run(PassRunner* runner, Module* module) override {
23382354
PrintSExpression print(o);
23392355
print.setMinify(true);
2356+
print.setDebugInfo(runner->options.debugInfo);
23402357
print.visitModule(module);
23412358
}
23422359
};
@@ -2353,6 +2370,7 @@ class FullPrinter : public Printer {
23532370
void run(PassRunner* runner, Module* module) override {
23542371
PrintSExpression print(o);
23552372
print.setFull(true);
2373+
print.setDebugInfo(runner->options.debugInfo);
23562374
print.visitModule(module);
23572375
}
23582376
};
@@ -2368,7 +2386,8 @@ class PrintStackIR : public Printer {
23682386

23692387
void run(PassRunner* runner, Module* module) override {
23702388
PrintSExpression print(o);
2371-
print.printStackIR = true;
2389+
print.setDebugInfo(runner->options.debugInfo);
2390+
print.setPrintStackIR(true);
23722391
print.visitModule(module);
23732392
}
23742393
};

src/tools/wasm-emscripten-finalize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ int main(int argc, const char* argv[]) {
4949
std::string dataSegmentFile;
5050
bool emitBinary = true;
5151
bool debugInfo = false;
52+
bool DWARF = false;
5253
bool isSideModule = false;
5354
bool legalizeJavaScriptFFI = true;
5455
bool checkStackOverflow = false;
@@ -71,6 +72,11 @@ int main(int argc, const char* argv[]) {
7172
"Emit names section in wasm binary (or full debuginfo in wast)",
7273
Options::Arguments::Zero,
7374
[&debugInfo](Options*, const std::string&) { debugInfo = true; })
75+
.add("--dwarf",
76+
"",
77+
"Update DWARF debug info",
78+
Options::Arguments::Zero,
79+
[&DWARF](Options*, const std::string&) { DWARF = true; })
7480
.add("--emit-text",
7581
"-S",
7682
"Emit text instead of binary for the output file",
@@ -164,6 +170,7 @@ int main(int argc, const char* argv[]) {
164170

165171
Module wasm;
166172
ModuleReader reader;
173+
reader.setDWARF(DWARF);
167174
try {
168175
reader.read(infile, wasm, inputSourceMapFilename);
169176
} catch (ParseException& p) {

src/tools/wasm-metadce.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ int main(int argc, const char* argv[]) {
498498
std::cerr << "reading...\n";
499499
}
500500
ModuleReader reader;
501+
reader.setDWARF(debugInfo);
501502
try {
502503
reader.read(options.extra["infile"], wasm);
503504
} catch (ParseException& p) {

src/tools/wasm-opt.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
using namespace wasm;
4444

4545
// runs a command and returns its output TODO: portability, return code checking
46-
std::string runCommand(std::string command) {
46+
static std::string runCommand(std::string command) {
4747
#ifdef __linux__
4848
std::string output;
4949
const int MAX_BUFFER = 1024;
@@ -59,6 +59,15 @@ std::string runCommand(std::string command) {
5959
#endif
6060
}
6161

62+
static bool willRemoveDebugInfo(const std::vector<std::string>& passes) {
63+
for (auto& pass : passes) {
64+
if (pass == "strip" || pass == "strip-debug" || pass == "strip-dwarf") {
65+
return true;
66+
}
67+
}
68+
return false;
69+
}
70+
6271
//
6372
// main
6473
//
@@ -210,6 +219,10 @@ int main(int argc, const char* argv[]) {
210219

211220
if (!translateToFuzz) {
212221
ModuleReader reader;
222+
// Enable DWARF parsing if we were asked for debug info, and were not
223+
// asked to remove it.
224+
reader.setDWARF(options.passOptions.debugInfo &&
225+
!willRemoveDebugInfo(options.passes));
213226
try {
214227
reader.read(options.extra["infile"], wasm, inputSourceMapFilename);
215228
} catch (ParseException& p) {

src/wasm-binary.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,10 +1053,12 @@ class WasmBinaryBuilder {
10531053
const std::vector<char>& input;
10541054
std::istream* sourceMap;
10551055
std::pair<uint32_t, Function::DebugLocation> nextDebugLocation;
1056+
bool DWARF = false;
10561057

10571058
size_t pos = 0;
10581059
Index startIndex = -1;
10591060
std::set<Function::DebugLocation> debugLocation;
1061+
size_t codeSectionLocation;
10601062

10611063
std::set<BinaryConsts::Section> seenSections;
10621064

@@ -1068,6 +1070,7 @@ class WasmBinaryBuilder {
10681070
: wasm(wasm), allocator(wasm.allocator), input(input), sourceMap(nullptr),
10691071
nextDebugLocation(0, {0, 0, 0}), debugLocation() {}
10701072

1073+
void setDWARF(bool value) { DWARF = value; }
10711074
void read();
10721075
void readUserSection(size_t payloadLen);
10731076

@@ -1275,6 +1278,9 @@ class WasmBinaryBuilder {
12751278
void visitBrOnExn(BrOnExn* curr);
12761279

12771280
void throwError(std::string text);
1281+
1282+
private:
1283+
bool hasDWARFSections();
12781284
};
12791285

12801286
} // namespace wasm

src/wasm-io.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ namespace wasm {
2929

3030
class ModuleReader {
3131
public:
32+
// If DWARF support is enabled, we track the locations of all IR nodes in
33+
// the binary, so that we can update DWARF sections later when writing.
34+
void setDWARF(bool DWARF_) { DWARF = DWARF_; }
35+
3236
// read text
3337
void readText(std::string filename, Module& wasm);
3438
// read binary
@@ -43,7 +47,13 @@ class ModuleReader {
4347
bool isBinaryFile(std::string filename);
4448

4549
private:
50+
bool DWARF = false;
51+
4652
void readStdin(Module& wasm, std::string sourceMapFilename);
53+
54+
void readBinaryData(std::vector<char>& input,
55+
Module& wasm,
56+
std::string sourceMapFilename);
4757
};
4858

4959
class ModuleWriter {

src/wasm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,7 @@ class Function : public Importable {
11541154
std::map<Index, Name> localNames;
11551155
std::map<Name, Index> localIndices;
11561156

1157+
// Source maps debugging info: map expression nodes to their file, line, col.
11571158
struct DebugLocation {
11581159
uint32_t fileIndex, lineNumber, columnNumber;
11591160
bool operator==(const DebugLocation& other) const {
@@ -1175,6 +1176,10 @@ class Function : public Importable {
11751176
std::set<DebugLocation> prologLocation;
11761177
std::set<DebugLocation> epilogLocation;
11771178

1179+
// General debugging info: map every instruction to its original position in
1180+
// the binary, relative to the beginning of the code section.
1181+
std::unordered_map<Expression*, uint32_t> binaryLocations;
1182+
11781183
size_t getNumParams();
11791184
size_t getNumVars();
11801185
size_t getNumLocals();
@@ -1344,6 +1349,8 @@ class Module {
13441349
Name start;
13451350

13461351
std::vector<UserSection> userSections;
1352+
1353+
// Source maps debug info.
13471354
std::vector<std::string> debugInfoFileNames;
13481355

13491356
// `features` are the features allowed to be used in this module and should be

src/wasm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ set(wasm_SOURCES
22
literal.cpp
33
wasm.cpp
44
wasm-binary.cpp
5+
wasm-debug.cpp
56
wasm-emscripten.cpp
67
wasm-debug.cpp
78
wasm-interpreter.cpp

0 commit comments

Comments
 (0)