Bysyncify: Fuzzing (#2192)

kripken · web-flow · commit be3135ca8db8 · 2019-07-01T17:45:53.000-07:00
Gets fuzzing support for Bysyncify working.

* Add the python to run the fuzzing on bysyncify.
* Add a JS script to load and run a testcase with bysyncify support. The code has all the runtime support for sleep/resume etc., which it does on calls to imports at random in a deterministic manner.
* Export memory from fuzzer so JS can access it.
* Fix tiny builder bug with makeExport.
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
@@ -52,6 +52,10 @@
 # utilities
 
 
+def in_binaryen(*args):
+  return os.path.join(options.binaryen_root, *args)
+
+
 def in_bin(tool):
   return os.path.join(options.binaryen_root, 'bin', tool)
 
@@ -139,8 +143,17 @@ def run_vm(cmd):
     raise
 
 
-def run_bynterp(wasm):
-  return fix_output(run_vm([in_bin('wasm-opt'), wasm, '--fuzz-exec-before'] + FEATURE_OPTS))
+def run_bynterp(wasm, args):
+  # increase the interpreter stack depth, to test more things
+  os.environ['BINARYEN_MAX_INTERPRETER_DEPTH'] = '1000'
+  try:
+    return run_vm([in_bin('wasm-opt'), wasm] + FEATURE_OPTS + args)
+  finally:
+    del os.environ['BINARYEN_MAX_INTERPRETER_DEPTH']
+
+
+def run_d8(wasm):
+  return run_vm(['d8', in_binaryen('scripts', 'fuzz_shell.js'), '--', wasm])
 
 
 # Each test case handler receives two wasm files, one before and one after some changes
@@ -166,7 +179,7 @@ def handle_pair(self, before_wasm, after_wasm, opts):
 
   def run_vms(self, js, wasm):
     results = []
-    results.append(run_bynterp(wasm))
+    results.append(fix_output(run_bynterp(wasm, ['--fuzz-exec-before'])))
     results.append(fix_output(run_vm(['d8', js] + V8_OPTS + ['--', wasm])))
 
     # append to add results from VMs
@@ -200,7 +213,7 @@ def compare_vs(self, before, after):
 class FuzzExec(TestCaseHandler):
   def handle_pair(self, before_wasm, after_wasm, opts):
     # fuzz binaryen interpreter itself. separate invocation so result is easily fuzzable
-    run([in_bin('wasm-opt'), before_wasm, '--fuzz-exec', '--fuzz-binary'] + opts)
+    run_bynterp(before_wasm, ['--fuzz-exec', '--fuzz-binary'])
 
 
 # Check for determinism - the same command must have the same output
@@ -241,19 +254,44 @@ def run(self, wasm):
 
 
 class Bysyncify(TestCaseHandler):
-  def handle(self, wasm):
-    # run normally and run in an async manner, and compare
-    before = run([in_bin('wasm-opt'), wasm, '--fuzz-exec'])
+  def handle_pair(self, before_wasm, after_wasm, opts):
+    # we must legalize in order to run in JS
+    run([in_bin('wasm-opt'), before_wasm, '--legalize-js-interface', '-o', before_wasm])
+    run([in_bin('wasm-opt'), after_wasm, '--legalize-js-interface', '-o', after_wasm])
+    before = fix_output(run_d8(before_wasm))
+    after = fix_output(run_d8(after_wasm))
+
     # TODO: also something that actually does async sleeps in the code, say
     # on the logging commands?
     # --remove-unused-module-elements removes the bysyncify intrinsics, which are not valid to call
-    cmd = [in_bin('wasm-opt'), wasm, '--bysyncify', '--remove-unused-module-elements', '-o', 'by.wasm']
-    if random.random() < 0.5:
-      cmd += ['--optimize-level=3']  # TODO: more
-    run(cmd)
-    after = run([in_bin('wasm-opt'), 'by.wasm', '--fuzz-exec'])
-    after = '\n'.join([line for line in after.splitlines() if '[fuzz-exec] calling $bysyncify' not in line])
-    compare(before, after, 'Bysyncify')
+
+    def do_bysyncify(wasm):
+      cmd = [in_bin('wasm-opt'), wasm, '--bysyncify', '-o', 't.wasm']
+      if random.random() < 0.5:
+        cmd += ['--optimize-level=%d' % random.randint(1, 3)]
+      if random.random() < 0.5:
+        cmd += ['--shrink-level=%d' % random.randint(1, 2)]
+      run(cmd)
+      out = run_d8('t.wasm')
+      # emit some status logging from bysyncify
+      print(out.splitlines()[-1])
+      # ignore the output from the new bysyncify API calls - the ones with asserts will trap, too
+      for ignore in ['[fuzz-exec] calling $bysyncify_start_unwind\nexception!\n',
+                     '[fuzz-exec] calling $bysyncify_start_unwind\n',
+                     '[fuzz-exec] calling $bysyncify_start_rewind\nexception!\n',
+                     '[fuzz-exec] calling $bysyncify_start_rewind\n',
+                     '[fuzz-exec] calling $bysyncify_stop_rewind\n',
+                     '[fuzz-exec] calling $bysyncify_stop_unwind\n']:
+        out = out.replace(ignore, '')
+      out = '\n'.join([l for l in out.splitlines() if 'bysyncify: ' not in l])
+      return fix_output(out)
+
+    before_bysyncify = do_bysyncify(before_wasm)
+    after_bysyncify = do_bysyncify(after_wasm)
+
+    compare(before, after, 'Bysyncify (before/after)')
+    compare(before, before_bysyncify, 'Bysyncify (before/before_bysyncify)')
+    compare(before, after_bysyncify, 'Bysyncify (before/after_bysyncify)')
 
 
 # The global list of all test case handlers
@@ -262,19 +300,19 @@ def handle(self, wasm):
   FuzzExec(),
   CheckDeterminism(),
   Wasm2JS(),
-  # TODO Bysyncify(),
+  Bysyncify(),
 ]
 
 
 # Do one test, given an input file for -ttf and some optimizations to run
-def test_one(infile, opts):
+def test_one(random_input, opts):
   randomize_pass_debug()
 
   bytes = 0
 
   # fuzz vms
   # gather VM outputs on input file
-  run([in_bin('wasm-opt'), infile, '-ttf', '-o', 'a.wasm'] + FUZZ_OPTS + FEATURE_OPTS)
+  run([in_bin('wasm-opt'), random_input, '-ttf', '-o', 'a.wasm'] + FUZZ_OPTS + FEATURE_OPTS)
   wasm_size = os.stat('a.wasm').st_size
   bytes += wasm_size
   print('pre js size :', os.stat('a.js').st_size, ' wasm size:', wasm_size)
diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js
@@ -0,0 +1,209 @@
+// Shell integration.
+if (typeof console === 'undefined') {
+  console = { log: print };
+}
+var tempRet0;
+var binary;
+if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) {
+  var args = process.argv.slice(2);
+  binary = require('fs').readFileSync(args[0]);
+  if (!binary.buffer) binary = new Uint8Array(binary);
+} else {
+  var args;
+  if (typeof scriptArgs != 'undefined') {
+    args = scriptArgs;
+  } else if (typeof arguments != 'undefined') {
+    args = arguments;
+  }
+  if (typeof readbuffer === 'function') {
+    binary = new Uint8Array(readbuffer(args[0]));
+  } else {
+    binary = read(args[0], 'binary');
+  }
+}
+
+// Utilities.
+function assert(x, y) {
+  if (!x) throw (y || 'assertion failed');// + new Error().stack;
+}
+
+// Deterministic randomness.
+var detrand = (function() {
+  var hash = 5381; // TODO DET_RAND_SEED;
+  var x = 0;
+  return function() {
+    hash = (((hash << 5) + hash) ^ (x & 0xff)) >>> 0;
+    x = (x + 1) % 256;
+    return (hash % 256) / 256;
+  };
+})();
+
+// Bysyncify integration.
+var Bysyncify = {
+  sleeping: false,
+  sleepingFunction: null,
+  sleeps: 0,
+  maxDepth: 0,
+  DATA_ADDR: 4,
+  DATA_MAX: 65536,
+  savedMemory: null,
+  instrumentImports: function(imports) {
+    var ret = {};
+    for (var module in imports) {
+      ret[module] = {};
+      for (var i in imports[module]) {
+        if (typeof imports[module][i] === 'function') {
+          (function(module, i) {
+            ret[module][i] = function() {
+              if (!Bysyncify.sleeping) {
+                // Sleep if bysyncify support is present, and at a certain
+                // probability.
+                if (exports.bysyncify_start_unwind && 
+                    detrand() < 0.5) {
+                  // We are called in order to start a sleep/unwind.
+                  console.log('bysyncify: sleep in ' + i + '...');
+                  Bysyncify.sleepingFunction = i;
+                  Bysyncify.sleeps++;
+                  var depth = new Error().stack.split('\n').length - 6;
+                  Bysyncify.maxDepth = Math.max(Bysyncify.maxDepth, depth);
+                  // Save the memory we use for data, so after we restore it later, the
+                  // sleep/resume appears to have had no change to memory.
+                  Bysyncify.savedMemory = new Int32Array(view.subarray(Bysyncify.DATA_ADDR >> 2, Bysyncify.DATA_MAX >> 2));
+                  // Unwinding.
+                  // Fill in the data structure. The first value has the stack location,
+                  // which for simplicity we can start right after the data structure itself.
+                  view[Bysyncify.DATA_ADDR >> 2] = Bysyncify.DATA_ADDR + 8;
+                  // The end of the stack will not be reached here anyhow.
+                  view[Bysyncify.DATA_ADDR + 4 >> 2] = Bysyncify.DATA_MAX;
+                  exports.bysyncify_start_unwind(Bysyncify.DATA_ADDR);
+                  Bysyncify.sleeping = true;
+                } else {
+                  // Don't sleep, normal execution.
+                  return imports[module][i].apply(null, arguments);
+                }
+              } else {
+                // We are called as part of a resume/rewind. Stop sleeping.
+                console.log('bysyncify: resume in ' + i + '...');
+                assert(Bysyncify.sleepingFunction === i);
+                exports.bysyncify_stop_rewind();
+                // The stack should have been all used up, and so returned to the original state.
+                assert(view[Bysyncify.DATA_ADDR >> 2] == Bysyncify.DATA_ADDR + 8);
+                assert(view[Bysyncify.DATA_ADDR + 4 >> 2] == Bysyncify.DATA_MAX);
+                Bysyncify.sleeping = false;
+                // Restore the memory to the state from before we slept.
+                view.set(Bysyncify.savedMemory, Bysyncify.DATA_ADDR >> 2);
+                return imports[module][i].apply(null, arguments);
+              }
+            };
+          })(module, i);
+        } else {
+          ret[module][i] = imports[module][i];
+        }
+      }
+    }
+    // Add ignored.print, which is ignored by bysyncify, and allows debugging of bysyncified code.
+    ret['ignored'] = { 'print': function(x, y) { console.log(x, y) } };
+    return ret;
+  },
+  instrumentExports: function(exports) {
+    var ret = {};
+    for (var e in exports) {
+      if (typeof exports[e] === 'function' &&
+          !e.startsWith('bysyncify_')) {
+        (function(e) {
+          ret[e] = function() {
+            while (1) {
+              var ret = exports[e].apply(null, arguments);
+              // If we are sleeping, then the stack was unwound; rewind it.
+              if (Bysyncify.sleeping) {
+                console.log('bysyncify: stop unwind; rewind');
+                assert(!ret, 'results during sleep are meaningless, just 0');
+                //console.log('bysyncify: after unwind', view[Bysyncify.DATA_ADDR >> 2], view[Bysyncify.DATA_ADDR + 4 >> 2]);
+                try {
+                  exports.bysyncify_stop_unwind();
+                  exports.bysyncify_start_rewind(Bysyncify.DATA_ADDR);
+                } catch (e) {
+                  console.log('error in unwind/rewind switch', e);
+                }
+                continue;
+              }
+              return ret;
+            }
+          };
+        })(e);
+      } else {
+        ret[e] = exports[e];
+      }
+    }
+    return ret;
+  },
+  check: function() {
+    assert(!Bysyncify.sleeping);
+  },
+  finish: function() {
+    if (Bysyncify.sleeps > 0) {
+      print('bysyncify:', 'sleeps:', Bysyncify.sleeps, 'max depth:', Bysyncify.maxDepth);
+    }
+  },
+};
+
+// Fuzz integration.
+function logValue(x, y) {
+  if (typeof y !== 'undefined') {
+    console.log('[LoggingExternalInterface logging ' + x + ' ' + y + ']');
+  } else {
+    console.log('[LoggingExternalInterface logging ' + x + ']');
+  }
+}
+
+// Set up the imports.
+var imports = {
+  'fuzzing-support': {
+    'log-i32': logValue,
+    'log-i64': logValue,
+    'log-f32': logValue,
+    'log-f64': logValue,
+  },
+  'env': {
+    'setTempRet0': function(x) { tempRet0 = x },
+    'getTempRet0': function() { return tempRet0 },
+  },
+};
+
+imports = Bysyncify.instrumentImports(imports);
+
+// Create the wasm.
+var instance = new WebAssembly.Instance(new WebAssembly.Module(binary), imports);
+
+// Handle the exports.
+var exports = instance.exports;
+exports = Bysyncify.instrumentExports(exports);
+var view = new Int32Array(exports.memory.buffer);
+
+// Run the wasm.
+var sortedExports = [];
+for (var e in exports) {
+  sortedExports.push(e);
+}
+sortedExports.sort();
+sortedExports = sortedExports.filter(function(e) {
+  // Filter special intrinsic functions.
+  return !e.startsWith('bysyncify_');
+});
+sortedExports.forEach(function(e) {
+  Bysyncify.check();
+  if (typeof exports[e] !== 'function') return;
+  try {
+    console.log('[fuzz-exec] calling $' + e);
+    var result = exports[e]();
+    if (typeof result !== 'undefined') {
+      console.log('[fuzz-exec] note result: $' + e + ' => ' + result);
+    }
+  } catch (e) {
+    console.log('exception!');// + [e, e.stack]);
+  }
+});
+
+// Finish up
+Bysyncify.finish();
+
diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h
@@ -375,6 +375,8 @@ class TranslateToFuzzReader {
     hasher->type = ensureFunctionType(getSig(hasher), &wasm)->name;
     wasm.addExport(
       builder.makeExport(hasher->name, hasher->name, ExternalKind::Function));
+    // Export memory so JS fuzzing can use it
+    wasm.addExport(builder.makeExport("memory", "0", ExternalKind::Memory));
   }
 
   void setupTable() {
diff --git a/src/wasm-builder.h b/src/wasm-builder.h
@@ -84,7 +84,7 @@ class Builder {
     auto* export_ = new Export();
     export_->name = name;
     export_->value = value;
-    export_->kind = ExternalKind::Function;
+    export_->kind = kind;
     return export_;
   }
 
diff --git a/test/passes/translate-to-fuzz_all-features.txt b/test/passes/translate-to-fuzz_all-features.txt
@@ -20,6 +20,7 @@
  (global $hangLimit (mut i32) (i32.const 10))
  (event $event$0 (attr 0) (param i32 f32 i32 f64 i32))
  (export "hashMemory" (func $hashMemory))
+ (export "memory" (memory $0))
  (export "func_5" (func $func_5))
  (export "hangLimitInitializer" (func $hangLimitInitializer))
  (func $hashMemory (; 4 ;) (type $FUNCSIG$i) (result i32)
diff --git a/test/passes/translate-to-fuzz_no-fuzz-nans_all-features.txt b/test/passes/translate-to-fuzz_no-fuzz-nans_all-features.txt
@@ -19,6 +19,7 @@
  (global $hangLimit (mut i32) (i32.const 10))
  (event $event$0 (attr 0) (param i32 f32 i32 f64 i32))
  (export "hashMemory" (func $hashMemory))
+ (export "memory" (memory $0))
  (export "hangLimitInitializer" (func $hangLimitInitializer))
  (func $hashMemory (; 4 ;) (type $FUNCSIG$i) (result i32)
   (local $0 i32)
diff --git a/test/unit/input/bysyncify.js b/test/unit/input/bysyncify.js

Original file line number	Diff line number	Diff line change
`@@ -375,6 +375,8 @@ class TranslateToFuzzReader {`
`375`	`375`	`hasher->type = ensureFunctionType(getSig(hasher), &wasm)->name;`
`376`	`376`	`wasm.addExport(`
`377`	`377`	`builder.makeExport(hasher->name, hasher->name, ExternalKind::Function));`
	`378`	`+ // Export memory so JS fuzzing can use it`
	`379`	`+ wasm.addExport(builder.makeExport("memory", "0", ExternalKind::Memory));`
`378`	`380`	`}`
`379`	`381`
`380`	`382`	`void setupTable() {`
Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,7 @@ class Builder {`
`84`	`84`	`auto* export_ = new Export();`
`85`	`85`	`export_->name = name;`
`86`	`86`	`export_->value = value;`
`87`		`- export_->kind = ExternalKind::Function;`
	`87`	`+ export_->kind = kind;`
`88`	`88`	`return export_;`
`89`	`89`	`}`
`90`	`90`