Skip to content

Commit 9bdf713

Browse files
authored
Auto-reduce testcases from the FuzzExec handler in the fuzzer (#2232)
When it finds a failing testcase, it reduces the list of optimizations, and then runs wasm-reduce to reduce the wasm itself. This refactors the testcase handlers into two kinds: one returns a list of commands to run (get_commands()), and we can auto-reduce them. The others get all the parameters and do whatever they want internally, and we can't auto-reduce them yet. If it is useful, auto-reducing could be added to the other handlers (CompareVMs, Wasm2JS, etc.) by modifying them to the new form. Tested manually by breaking stuff.
1 parent e4ae884 commit 9bdf713

1 file changed

Lines changed: 126 additions & 26 deletions

File tree

scripts/fuzz_opt.py

Lines changed: 126 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import random
2020
import re
2121
import shutil
22+
import sys
2223
import time
2324

2425
from test.shared import options, NODEJS, V8_OPTS
@@ -155,9 +156,13 @@ def run_vm(cmd):
155156
raise
156157

157158

159+
MAX_INTERPRETER_ENV_VAR = 'BINARYEN_MAX_INTERPRETER_DEPTH'
160+
MAX_INTERPRETER_DEPTH = 1000
161+
162+
158163
def run_bynterp(wasm, args):
159164
# increase the interpreter stack depth, to test more things
160-
os.environ['BINARYEN_MAX_INTERPRETER_DEPTH'] = '1000'
165+
os.environ[MAX_INTERPRETER_ENV_VAR] = str(MAX_INTERPRETER_DEPTH)
161166
try:
162167
return run_vm([in_bin('wasm-opt'), wasm] + FEATURE_OPTS + args)
163168
finally:
@@ -168,14 +173,18 @@ def run_d8(wasm):
168173
return run_vm(['d8'] + V8_OPTS + [in_binaryen('scripts', 'fuzz_shell.js'), '--', wasm])
169174

170175

171-
# Each test case handler receives two wasm files, one before and one after some changes
172-
# that should have kept it equivalent. It also receives the optimizations that the
173-
# fuzzer chose to run.
176+
# There are two types of test case handlers:
177+
# * get_commands() users: these return a list of commands to run (for example, "run this wasm-opt
178+
# command, then that one"). The calling code gets and runs those commands on the test wasm
179+
# file, and has enough information and control to be able to perform auto-reduction of any
180+
# bugs found.
181+
# * Totally generic: These receive the input pattern, a wasm generated from it, and a wasm
182+
# optimized from that, and can then do anything it wants with those.
174183
class TestCaseHandler:
175184
# If the core handle_pair() method is not overridden, it calls handle_single()
176185
# on each of the pair. That is useful if you just want the two wasms, and don't
177186
# care about their relationship
178-
def handle_pair(self, before_wasm, after_wasm, opts):
187+
def handle_pair(self, input, before_wasm, after_wasm, opts):
179188
self.handle(before_wasm)
180189
self.handle(after_wasm)
181190

@@ -185,7 +194,7 @@ def can_run_on_feature_opts(self, feature_opts):
185194

186195
# Run VMs and compare results
187196
class CompareVMs(TestCaseHandler):
188-
def handle_pair(self, before_wasm, after_wasm, opts):
197+
def handle_pair(self, input, before_wasm, after_wasm, opts):
189198
run([in_bin('wasm-opt'), before_wasm, '--emit-js-wrapper=a.js', '--emit-spec-wrapper=a.wat'] + FEATURE_OPTS)
190199
run([in_bin('wasm-opt'), after_wasm, '--emit-js-wrapper=b.js', '--emit-spec-wrapper=b.wat'] + FEATURE_OPTS)
191200
before = self.run_vms('a.js', before_wasm)
@@ -229,22 +238,40 @@ def can_run_on_feature_opts(self, feature_opts):
229238
# Fuzz the interpreter with --fuzz-exec. This tests everything in a single command (no
230239
# two separate binaries) so it's easy to reproduce.
231240
class FuzzExec(TestCaseHandler):
232-
def handle_pair(self, before_wasm, after_wasm, opts):
233-
# fuzz binaryen interpreter itself. separate invocation so result is easily fuzzable
241+
def get_commands(self, wasm, opts, random_seed):
242+
return [
243+
'%(MAX_INTERPRETER_ENV_VAR)s=%(MAX_INTERPRETER_DEPTH)d %(wasm_opt)s --fuzz-exec --fuzz-binary %(opts)s %(wasm)s' % {
244+
'MAX_INTERPRETER_ENV_VAR': MAX_INTERPRETER_ENV_VAR,
245+
'MAX_INTERPRETER_DEPTH': MAX_INTERPRETER_DEPTH,
246+
'wasm_opt': in_bin('wasm-opt'),
247+
'opts': ' '.join(opts),
248+
'wasm': wasm
249+
}
250+
]
251+
252+
253+
# As FuzzExec, but without a separate invocation. This can find internal bugs with generating
254+
# the IR (which might be worked around by writing it and then reading it).
255+
class FuzzExecImmediately(TestCaseHandler):
256+
def handle_pair(self, input, before_wasm, after_wasm, opts):
257+
# fuzz binaryen interpreter itself. separate invocation so result is easily reduceable
234258
run_bynterp(before_wasm, ['--fuzz-exec', '--fuzz-binary'] + opts)
235259

236260

237-
# Check for determinism - the same command must have the same output
261+
# Check for determinism - the same command must have the same output.
262+
# Note that this doesn't use get_commands() intentionally, since we are testing
263+
# for something that autoreduction won't help with anyhow (nondeterminism is very
264+
# hard to reduce).
238265
class CheckDeterminism(TestCaseHandler):
239-
def handle_pair(self, before_wasm, after_wasm, opts):
266+
def handle_pair(self, input, before_wasm, after_wasm, opts):
240267
# check for determinism
241268
run([in_bin('wasm-opt'), before_wasm, '-o', 'b1.wasm'] + opts)
242269
run([in_bin('wasm-opt'), before_wasm, '-o', 'b2.wasm'] + opts)
243270
assert open('b1.wasm').read() == open('b2.wasm').read(), 'output must be deterministic'
244271

245272

246273
class Wasm2JS(TestCaseHandler):
247-
def handle_pair(self, before_wasm, after_wasm, opts):
274+
def handle_pair(self, input, before_wasm, after_wasm, opts):
248275
compare(self.run(before_wasm), self.run(after_wasm), 'Wasm2JS')
249276

250277
def run(self, wasm):
@@ -275,7 +302,7 @@ def can_run_on_feature_opts(self, feature_opts):
275302

276303

277304
class Asyncify(TestCaseHandler):
278-
def handle_pair(self, before_wasm, after_wasm, opts):
305+
def handle_pair(self, input, before_wasm, after_wasm, opts):
279306
# we must legalize in order to run in JS
280307
run([in_bin('wasm-opt'), before_wasm, '--legalize-js-interface', '-o', before_wasm] + FEATURE_OPTS)
281308
run([in_bin('wasm-opt'), after_wasm, '--legalize-js-interface', '-o', after_wasm] + FEATURE_OPTS)
@@ -321,11 +348,12 @@ def can_run_on_feature_opts(self, feature_opts):
321348

322349
# The global list of all test case handlers
323350
testcase_handlers = [
324-
CompareVMs(),
325351
FuzzExec(),
352+
CompareVMs(),
326353
CheckDeterminism(),
327354
Wasm2JS(),
328355
Asyncify(),
356+
FuzzExecImmediately(),
329357
]
330358

331359

@@ -334,31 +362,103 @@ def test_one(random_input, opts):
334362
randomize_pass_debug()
335363
randomize_feature_opts()
336364

337-
bytes = 0
338-
339-
# fuzz vms
340-
# gather VM outputs on input file
341365
run([in_bin('wasm-opt'), random_input, '-ttf', '-o', 'a.wasm'] + FUZZ_OPTS + FEATURE_OPTS)
342366
wasm_size = os.stat('a.wasm').st_size
343-
bytes += wasm_size
344-
print('pre js size :', os.stat('a.js').st_size, ' wasm size:', wasm_size)
345-
print('----------------')
346-
347-
# gather VM outputs on processed file
367+
bytes = wasm_size
368+
print('pre wasm size:', wasm_size)
369+
370+
# first, run all handlers that use get_commands(). those don't need the second wasm in the
371+
# pair, since they all they do is return their commands, and expect us to run them, and
372+
# those commands do the actual testing, by operating on the original input wasm file. by
373+
# fuzzing the get_commands() ones first we can find bugs in creating the second wasm (that
374+
# has the opts run on it) before we try to create it later down for the passes that
375+
# expect to get it as one of their inputs.
376+
for testcase_handler in testcase_handlers:
377+
if testcase_handler.can_run_on_feature_opts(FEATURE_OPTS):
378+
if hasattr(testcase_handler, 'get_commands'):
379+
print('running testcase handler:', testcase_handler.__class__.__name__)
380+
# if the testcase handler supports giving us a list of commands, then we can get those commands
381+
# and use them to do useful things like automatic reduction. in this case we give it the input
382+
# wasm plus opts and a random seed (if it needs any internal randomness; we want to have the same
383+
# value there if we reduce).
384+
random_seed = random.random()
385+
386+
# gets commands from the handler, for a given set of optimizations. this is all the commands
387+
# needed to run the testing that that handler wants to do.
388+
def get_commands(opts):
389+
return testcase_handler.get_commands(wasm='a.wasm', opts=opts + FUZZ_OPTS + FEATURE_OPTS, random_seed=random_seed)
390+
391+
def write_commands_and_test(opts):
392+
commands = get_commands(opts)
393+
write_commands(commands, 't.sh')
394+
subprocess.check_call(['bash', 't.sh'])
395+
396+
try:
397+
write_commands_and_test(opts)
398+
except subprocess.CalledProcessError:
399+
print('')
400+
print('====================')
401+
print('Found a problem! See "t.sh" for the commands, and "input.wasm" for the input. Auto-reducing to "reduced.wasm" and "tt.sh"...')
402+
print('====================')
403+
print('')
404+
# first, reduce the fuzz opts: keep removing until we can't
405+
while 1:
406+
reduced = False
407+
for i in range(len(opts)):
408+
# some opts can't be removed, like --flatten --dfo requires flatten
409+
if opts[i] == '--flatten':
410+
if i != len(opts) - 1 and opts[i + 1] in ('--dfo', '--local-cse', '--rereloop'):
411+
continue
412+
shorter = opts[:i] + opts[i + 1:]
413+
try:
414+
write_commands_and_test(shorter)
415+
except subprocess.CalledProcessError:
416+
# great, the shorter one is good as well
417+
opts = shorter
418+
print('reduced opts to ' + ' '.join(opts))
419+
reduced = True
420+
break
421+
if not reduced:
422+
break
423+
# second, reduce the wasm
424+
# copy a.wasm to a safe place as the reducer will use the commands on new inputs, and the commands work on a.wasm
425+
shutil.copyfile('a.wasm', 'input.wasm')
426+
# add a command to verify the input. this lets the reducer see that it is indeed working on the input correctly
427+
commands = [in_bin('wasm-opt') + ' -all a.wasm'] + get_commands(opts)
428+
write_commands(commands, 'tt.sh')
429+
# reduce the input to something smaller with the same behavior on the script
430+
subprocess.check_call([in_bin('wasm-reduce'), 'input.wasm', '--command=bash tt.sh', '-t', 'a.wasm', '-w', 'reduced.wasm'])
431+
print('Finished reduction. See "tt.sh" and "reduced.wasm".')
432+
sys.exit(1)
433+
print('')
434+
435+
# created a second wasm for handlers that want to look at pairs.
348436
run([in_bin('wasm-opt'), 'a.wasm', '-o', 'b.wasm'] + opts + FUZZ_OPTS + FEATURE_OPTS)
349437
wasm_size = os.stat('b.wasm').st_size
350438
bytes += wasm_size
351-
print('post js size:', os.stat('a.js').st_size, ' wasm size:', wasm_size)
352-
shutil.copyfile('a.js', 'b.js')
439+
print('post wasm size:', wasm_size)
353440

354441
for testcase_handler in testcase_handlers:
355-
print('running testcase handler:', testcase_handler.__class__.__name__)
356442
if testcase_handler.can_run_on_feature_opts(FEATURE_OPTS):
357-
testcase_handler.handle_pair(before_wasm='a.wasm', after_wasm='b.wasm', opts=opts + FUZZ_OPTS + FEATURE_OPTS)
443+
if not hasattr(testcase_handler, 'get_commands'):
444+
print('running testcase handler:', testcase_handler.__class__.__name__)
445+
# let the testcase handler handle this testcase however it wants. in this case we give it
446+
# the input and both wasms.
447+
testcase_handler.handle_pair(input=random_input, before_wasm='a.wasm', after_wasm='b.wasm', opts=opts + FUZZ_OPTS + FEATURE_OPTS)
448+
print('')
358449

359450
return bytes
360451

361452

453+
def write_commands(commands, filename):
454+
with open(filename, 'w') as f:
455+
f.write('set -e\n')
456+
for command in commands:
457+
f.write('echo "%s"\n' % command)
458+
f.write(command + ' &> /dev/null\n')
459+
f.write('echo "ok"\n')
460+
461+
362462
# main
363463

364464
opt_choices = [

0 commit comments

Comments
 (0)