Skip to content

Commit ccd95f8

Browse files
authored
Asyncify: whitelist and blacklist support (#2264)
The blacklist means "functions here are to be ignored and not instrumented, we can assume they never unwind." The whitelist means "only these functions, and no others, can unwind." I had hoped such lists would not be necessary, since Asyncify's overhead is much smaller than the old Asyncify and Emterpreter, but as projects have noticed, the overhead to size and speed is still significant. The lists give power users a way to reduce any unnecessary overhead. A slightly tricky thing is escaping of names: we escape names from the names section (see #2261 #1646). The lists arrive in human-readable format, so we escape them before comparing to the internal escaped names. To enable that I refactored wasm-binary a little bit to provide the escaping logic, cc @yurydelendik If both lists are specified, an error is shown (since that is meaningless). If a name appears in a list that is not in the module, we show a warning, which will hopefully help people debug typos etc. I had hoped to make this an error, but the problem is that due to inlining etc. a single list will not always work for both unoptimized and optimized builds (a function may vanish when optimizing, due to duplicate function elimination or inlining). Fixes #2218.
1 parent 6ca3f4c commit ccd95f8

8 files changed

Lines changed: 733 additions & 20 deletions

src/passes/Asyncify.cpp

Lines changed: 97 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,24 @@
212212
// If that is true for your codebase, then this can be extremely useful
213213
// as otherwise it looks like any indirect call can go to a lot of places.
214214
//
215+
// --pass-arg=asyncify-blacklist@name1,name2,name3
216+
//
217+
// If the blacklist is provided, then the functions in it will not be
218+
// instrumented even if it looks like they need to. This can be useful
219+
// if you know things the whole-program analysis doesn't, like if you
220+
// know certain indirect calls are safe and won't unwind. But if you
221+
// get the list wrong things will break (and in a production build user
222+
// input might reach code paths you missed during testing, so it's hard
223+
// to know you got this right), so this is not recommended unless you
224+
// really know what are doing, and need to optimize every bit of speed
225+
// and size.
226+
//
227+
// --pass-arg=asyncify-whitelist@name1,name2,name3
228+
//
229+
// If the whitelist is provided, then only the functions in the list
230+
// will be instrumented. Like the blacklist, getting this wrong will
231+
// break your application.
232+
//
215233

216234
#include "ir/effects.h"
217235
#include "ir/literal-utils.h"
@@ -327,9 +345,15 @@ class ModuleAnalyzer {
327345
public:
328346
ModuleAnalyzer(Module& module,
329347
std::function<bool(Name, Name)> canImportChangeState,
330-
bool canIndirectChangeState)
348+
bool canIndirectChangeState,
349+
const String::Split& blacklistInput,
350+
const String::Split& whitelistInput)
331351
: module(module), canIndirectChangeState(canIndirectChangeState),
332352
globals(module) {
353+
354+
blacklist.insert(blacklistInput.begin(), blacklistInput.end());
355+
whitelist.insert(whitelistInput.begin(), whitelistInput.end());
356+
333357
// Scan to see which functions can directly change the state.
334358
// Also handle the asyncify imports, removing them (as we will implement
335359
// them later), and replace calls to them with calls to the later proper
@@ -388,11 +412,13 @@ class ModuleAnalyzer {
388412
}
389413
Info* info;
390414
Module* module;
415+
ModuleAnalyzer* analyzer;
391416
bool canIndirectChangeState;
392417
};
393418
Walker walker;
394419
walker.info = &info;
395420
walker.module = &module;
421+
walker.analyzer = this;
396422
walker.canIndirectChangeState = canIndirectChangeState;
397423
walker.walk(func->body);
398424

@@ -406,6 +432,13 @@ class ModuleAnalyzer {
406432

407433
map.swap(scanner.map);
408434

435+
// Functions in the blacklist are assumed to not change the state.
436+
for (auto& name : blacklist) {
437+
if (auto* func = module.getFunctionOrNull(name)) {
438+
map[func].canChangeState = false;
439+
}
440+
}
441+
409442
// Remove the asyncify imports, if any.
410443
for (auto& pair : map) {
411444
auto* func = pair.first;
@@ -435,12 +468,22 @@ class ModuleAnalyzer {
435468
while (!work.empty()) {
436469
auto* func = work.pop();
437470
for (auto* caller : map[func].calledBy) {
438-
if (!map[caller].canChangeState && !map[caller].isBottomMostRuntime) {
471+
if (!map[caller].canChangeState && !map[caller].isBottomMostRuntime &&
472+
!blacklist.count(caller->name)) {
439473
map[caller].canChangeState = true;
440474
work.push(caller);
441475
}
442476
}
443477
}
478+
479+
if (!whitelist.empty()) {
480+
// Only the functions in the whitelist can change the state.
481+
for (auto& func : module.functions) {
482+
if (!func->imported()) {
483+
map[func.get()].canChangeState = whitelist.count(func->name) > 0;
484+
}
485+
}
486+
}
444487
}
445488

446489
bool needsInstrumentation(Function* func) {
@@ -481,13 +524,15 @@ class ModuleAnalyzer {
481524
// TODO optimize the other case, at least by type
482525
}
483526
Module* module;
527+
ModuleAnalyzer* analyzer;
484528
Map* map;
485529
bool canIndirectChangeState;
486530
bool canChangeState = false;
487531
bool isBottomMostRuntime = false;
488532
};
489533
Walker walker;
490534
walker.module = &module;
535+
walker.analyzer = this;
491536
walker.map = &map;
492537
walker.canIndirectChangeState = canIndirectChangeState;
493538
walker.walk(curr);
@@ -498,6 +543,8 @@ class ModuleAnalyzer {
498543
}
499544

500545
GlobalHelper globals;
546+
std::set<Name> blacklist;
547+
std::set<Name> whitelist;
501548
};
502549

503550
// Checks if something performs a call: either a direct or indirect call,
@@ -987,23 +1034,57 @@ struct Asyncify : public Pass {
9871034
String::Split listedImports(stateChangingImports, ",");
9881035
auto ignoreIndirect =
9891036
runner->options.getArgumentOrDefault("asyncify-ignore-indirect", "");
1037+
String::Split blacklist(
1038+
runner->options.getArgumentOrDefault("asyncify-blacklist", ""), ",");
1039+
String::Split whitelist(
1040+
runner->options.getArgumentOrDefault("asyncify-whitelist", ""), ",");
1041+
1042+
// The lists contain human-readable strings. Turn them into the internal
1043+
// escaped names for later comparisons
1044+
auto processList = [module](String::Split& list, const std::string& which) {
1045+
for (auto& name : list) {
1046+
auto escaped = WasmBinaryBuilder::escape(name);
1047+
auto* func = module->getFunctionOrNull(escaped);
1048+
if (!func) {
1049+
std::cerr << "warning: Asyncify " << which
1050+
<< "list contained a non-existing function name: " << name
1051+
<< " (" << escaped << ")\n";
1052+
} else if (func->imported()) {
1053+
Fatal() << "Asyncify " << which
1054+
<< "list contained an imported function name (use the import "
1055+
"list for imports): "
1056+
<< name << '\n';
1057+
}
1058+
name = escaped.str;
1059+
}
1060+
};
1061+
processList(blacklist, "black");
1062+
processList(whitelist, "white");
1063+
1064+
if (!blacklist.empty() && !whitelist.empty()) {
1065+
Fatal() << "It makes no sense to use both a blacklist and a whitelist "
1066+
"with asyncify.";
1067+
}
1068+
1069+
auto canImportChangeState = [&](Name module, Name base) {
1070+
if (allImportsCanChangeState) {
1071+
return true;
1072+
}
1073+
std::string full = std::string(module.str) + '.' + base.str;
1074+
for (auto& listedImport : listedImports) {
1075+
if (String::wildcardMatch(listedImport, full)) {
1076+
return true;
1077+
}
1078+
}
1079+
return false;
1080+
};
9901081

9911082
// Scan the module.
9921083
ModuleAnalyzer analyzer(*module,
993-
[&](Name module, Name base) {
994-
if (allImportsCanChangeState) {
995-
return true;
996-
}
997-
std::string full =
998-
std::string(module.str) + '.' + base.str;
999-
for (auto& listedImport : listedImports) {
1000-
if (String::wildcardMatch(listedImport, full)) {
1001-
return true;
1002-
}
1003-
}
1004-
return false;
1005-
},
1006-
ignoreIndirect == "");
1084+
canImportChangeState,
1085+
ignoreIndirect == "",
1086+
blacklist,
1087+
whitelist);
10071088

10081089
// Add necessary globals before we emit code to use them.
10091090
addGlobals(module);

src/wasm-binary.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ class WasmBinaryBuilder {
11741174

11751175
void readEvents();
11761176

1177+
static Name escape(Name name);
11771178
void readNames(size_t);
11781179
void readFeatures(size_t);
11791180

src/wasm/wasm-binary.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,13 +2054,13 @@ static char formatNibble(int nibble) {
20542054
return nibble < 10 ? '0' + nibble : 'a' - 10 + nibble;
20552055
}
20562056

2057-
static void escapeName(Name& name) {
2057+
Name WasmBinaryBuilder::escape(Name name) {
20582058
bool allIdChars = true;
20592059
for (const char* p = name.str; allIdChars && *p; p++) {
20602060
allIdChars = isIdChar(*p);
20612061
}
20622062
if (allIdChars) {
2063-
return;
2063+
return name;
20642064
}
20652065
// encode name, if at least one non-idchar (per WebAssembly spec) was found
20662066
std::string escaped;
@@ -2075,7 +2075,7 @@ static void escapeName(Name& name) {
20752075
escaped.push_back(formatNibble(ch >> 4));
20762076
escaped.push_back(formatNibble(ch & 15));
20772077
}
2078-
name = escaped;
2078+
return escaped;
20792079
}
20802080

20812081
void WasmBinaryBuilder::readNames(size_t payloadLen) {
@@ -2098,7 +2098,7 @@ void WasmBinaryBuilder::readNames(size_t payloadLen) {
20982098
for (size_t i = 0; i < num; i++) {
20992099
auto index = getU32LEB();
21002100
auto rawName = getInlineString();
2101-
escapeName(rawName);
2101+
rawName = escape(rawName);
21022102
auto name = rawName;
21032103
// De-duplicate names by appending .1, .2, etc.
21042104
for (int i = 1; !usedNames.insert(name).second; ++i) {

0 commit comments

Comments
 (0)