[Fuzzing] Use initial contents in ClusterFuzz (#7192)

kripken · web-flow · commit 8d0f662a054b · 2025-01-07T14:36:28.000-08:00
The ClusterFuzz bundler now looks through all of our test suites and
packages all testcases that are suitable for ClusterFuzz to use. This adds
more variety to the wasm files we fuzz there, as the test suite has
corner cases that the main fuzzer is unlikely to generate.

This adds a comment in the JS whenever it uses initial content, to
make debugging easier, something like

[10, 20, 30] /* using initial content 17.wasm */

(this is the reason for the change to extract_wasms.py)
diff --git a/scripts/bundle_clusterfuzz.py b/scripts/bundle_clusterfuzz.py
@@ -71,6 +71,7 @@
 '''
 
 import os
+import subprocess
 import sys
 import tarfile
 
@@ -87,7 +88,9 @@
     # Delete the argument, as importing |shared| scans it.
     sys.argv.pop()
 
+from test import fuzzing # noqa
 from test import shared # noqa
+from test import support # noqa
 
 # Pick where to get the builds
 if build_dir:
@@ -97,6 +100,14 @@
     binaryen_bin = shared.options.binaryen_bin
     binaryen_lib = shared.options.binaryen_lib
 
+# ClusterFuzz's run.py uses these features. Keep this in sync with that, so that
+# we only bundle initial content that makes sense for it.
+features = [
+    '-all',
+    '--disable-shared-everything',
+    '--disable-fp16',
+]
+
 with tarfile.open(output_file, "w:gz") as tar:
     # run.py
     run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
@@ -128,6 +139,40 @@
                     print(f'  ......... : {path}')
                     tar.add(path, arcname=f'lib/{name}')
 
+    # Add tests we will use as initial content under initial/. We put all the
+    # tests from the test suite there.
+    print('  .. initial content: ')
+    temp_wasm = 'temp.wasm'
+    index = 0
+    all_tests = shared.get_all_tests()
+    for i, test in enumerate(all_tests):
+        if not fuzzing.is_fuzzable(test):
+            continue
+        for wast, asserts in support.split_wast(test):
+            if not wast:
+                continue
+            support.write_wast(temp_wasm, wast)
+            # If the file is not valid for our features, skip it. In the same
+            # operation, also convert to binary if this was text (binary is more
+            # compact).
+            cmd = shared.WASM_OPT + ['-q', temp_wasm, '-o', temp_wasm] + features
+            if subprocess.run(cmd, stderr=subprocess.PIPE).returncode:
+                continue
+
+            # Looks good.
+            tar.add(temp_wasm, arcname=f'initial/{index}.wasm')
+            index += 1
+        print(f'\r        {100 * i / len(all_tests):.2f}%', end='', flush=True)
+    print(f'        (num: {index})')
+
+    # Write initial/num.txt which contains the number of testcases in that
+    # directory (saves run.py from needing to listdir each time).
+    num_txt = 'num.txt'
+    with open(num_txt, 'w') as f:
+        f.write(f'{index}')
+    tar.add(num_txt, arcname='initial/num.txt')
+
+
 print('Done.')
 print('To run the tests on this bundle, do:')
 print()
diff --git a/scripts/clusterfuzz/extract_wasms.py b/scripts/clusterfuzz/extract_wasms.py
@@ -67,7 +67,7 @@ def repl(text):
 
 
 # Replace the wasm files and write them out.
-js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js)
+js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\)', repl, js)
 
 # Write out the new JS.
 with open(f'{out}.js', 'w') as f:
diff --git a/scripts/clusterfuzz/run.py b/scripts/clusterfuzz/run.py
@@ -68,6 +68,12 @@
 # testcase.
 JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js')
 
+# The path to the directory with initial contents.
+INITIAL_CONTENT_PATH = os.path.join(ROOT_DIR, 'initial')
+
+# The file that contains the number of initial contents
+INITIAL_CONTENT_NUM_PATH = os.path.join(ROOT_DIR, 'initial', 'num.txt')
+
 # The arguments we provide to wasm-opt to generate wasm files.
 FUZZER_ARGS = [
     # Generate a wasm from random data.
@@ -76,7 +82,8 @@
     '--fuzz-passes',
     # Enable all features but disable ones not yet ready for fuzzing. This may
     # be a smaller set than fuzz_opt.py, as that enables a few experimental
-    # flags, while here we just fuzz with d8's --wasm-staging.
+    # flags, while here we just fuzz with d8's --wasm-staging. This should be
+    # synchonized with bundle_clusterfuzz.
     '-all',
     '--disable-shared-everything',
     '--disable-fp16',
@@ -92,6 +99,17 @@ def get_file_name(prefix, index):
 # (We also use urandom below, which uses this under the hood.)
 system_random = random.SystemRandom()
 
+# The number of initial content testcases that were bundled for us, in the
+# "initial/" subdir.
+with open(INITIAL_CONTENT_NUM_PATH) as f:
+    num_initial_contents = int(f.read())
+
+
+def get_random_initial_content():
+    index = system_random.randint(0, num_initial_contents - 1)
+    return os.path.join(INITIAL_CONTENT_PATH, f'{index}.wasm')
+
+
 # In production ClusterFuzz we retry whenever we see a wasm-opt error. We are
 # not looking for wasm-opt issues there, and just use it to generate testcases
 # for VMs. For local testing, however, we may want to disable retrying, which
@@ -117,9 +135,19 @@ def get_wasm_contents(i, output_dir):
         with open(input_data_file_path, 'wb') as file:
             file.write(os.urandom(random_size))
 
-        # Generate wasm from the random data.
+        # Generate a command to use wasm-opt with the proper args to generate
+        # wasm content from the input data.
         cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
         cmd += ['-o', wasm_file_path, input_data_file_path]
+
+        # Sometimes use a file from the initial content testcases.
+        if system_random.random() < 0.5:
+            initial_content = get_random_initial_content()
+            cmd += ['--initial-fuzz=' + initial_content]
+        else:
+            initial_content = None
+
+        # Generate wasm from the random data.
         try:
             subprocess.check_call(cmd)
         except subprocess.CalledProcessError:
@@ -148,7 +176,10 @@ def get_wasm_contents(i, output_dir):
 
     # Convert to a string, and wrap into a typed array.
     wasm_contents = ','.join([str(c) for c in wasm_contents])
-    return f'new Uint8Array([{wasm_contents}])'
+    js = f'new Uint8Array([{wasm_contents}])'
+    if initial_content:
+        js = f'{js} /* using initial content {os.path.basename(initial_content)} */'
+    return js
 
 
 # Returns the contents of a .js fuzz file, given the index of the testcase and
diff --git a/test/unit/test_cluster_fuzz.py b/test/unit/test_cluster_fuzz.py
@@ -282,6 +282,15 @@ def test_file_contents(self):
         seen_calls = []
         seen_second_builds = []
         seen_JSPIs = []
+        seen_initial_contents = []
+
+        # Initial contents are noted in comments like this:
+        #
+        # /* using initial content 42.wasm */
+        #
+        # Note that we may see more than one in a file, as we may have more than
+        # one wasm in each testcase: each wasm has a chance.
+        initial_content_regex = re.compile(r'[/][*] using initial content ([^ ]+) [*][/]')
 
         for i in range(1, N + 1):
             fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')
@@ -302,6 +311,8 @@ def test_file_contents(self):
                 assert '/* async */' in js
                 assert '/* await */' in js
 
+            seen_initial_contents.append(re.findall(initial_content_regex, js))
+
         # There is always one build and one call (those are in the default
         # fuzz_shell.js), and we add a couple of operations, each with equal
         # probability to be a build or a call, so over the 100 testcases here we
@@ -346,6 +357,55 @@ def test_file_contents(self):
 
         print()
 
+        # Flatten the data to help some of the below, from
+        #  [['a.wasm', 'b.wasm'], ['c.wasm']]
+        # into
+        #  ['a.wasm', 'b.wasm', 'c.wasm']
+        flat_initial_contents = [item for items in seen_initial_contents for item in items]
+
+        # Initial content appear 50% of the time for each wasm file. Each
+        # testcase has 1.333 wasm files on average.
+        print('Initial contents are distributed as ~ mean 0.68')
+        print(f'mean initial contents: {len(flat_initial_contents) / N}')
+        # Initial contents should be mostly unique (we have many, many testcases
+        # and we pick just 100 or so). And we must see more than one unique one.
+        unique_initial_contents = set(flat_initial_contents)
+        print(f'unique initial contents: {len(unique_initial_contents)} should be almost equal to {len(flat_initial_contents)}')
+        self.assertGreater(len(unique_initial_contents), 1)
+        # Not all testcases have initial contents.
+        num_initial_contents = [len(items) for items in seen_initial_contents]
+        self.assertEqual(min(num_initial_contents), 0)
+        # Some do (this is redundant given that the set of unique initial
+        # contents was asserted on before, so this just confirms/checks that).
+        self.assertGreaterEqual(max(num_initial_contents), 1)
+
+        print()
+
+        # Execute the files in V8. Almost all should execute properly (some
+        # small number may trap during startup, say on a segment out of bounds).
+        if shared.V8:
+            valid_executions = 0
+            for i in range(1, N + 1):
+                fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')
+
+                cmd = [shared.V8, '--wasm-staging', fuzz_file]
+                proc = subprocess.run(cmd, stdout=subprocess.PIPE)
+
+                # An execution is valid if we exited without error, and if we
+                # managed to run some code before exiting (modules with no
+                # exports will be considered "invalid" here, but that is very
+                # rare, and in a sense they are actually unuseful).
+                if proc.returncode == 0 and b'[fuzz-exec] calling ' in proc.stdout:
+                    valid_executions += 1
+
+            print('Valid executions are distributed as ~ mean 0.99')
+            print(f'mean valid executions: {valid_executions / N}')
+            # Assert on having at least half execute properly. Given the true mean
+            # is 0.9, for half of 100 to fail is incredibly unlikely.
+            self.assertGreater(valid_executions, N / 2)
+
+        print()
+
     # "zzz" in test name so that this runs last. If it runs first, it can be
     # confusing as it appears next to the logging of which bundle we use (see
     # setUpClass).