From 6d413654892e89aad4962138655ccbda78d48fa3 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Thu, 16 May 2024 03:40:10 -0400 Subject: [PATCH] Refactor `fuzz_repo` to Improve Effectiveness & Efficiency Prior to the changes introduced here, the implementation of `fuzz_repo` had several issues: 1. `repo.stage()` was not called before the first `repo.do_commit()` call 2. When `repo.stage()` was eventually called, the argument it was passed was incorrect (a list of absolute paths instead of relative to the repo dir) causing a `ValueError` that broke the fuzzer runs. This was hidden during the initial local testing because of point 3 below. 3. Inefficient consumption of the fuzzer provided data resulted in the input bytes being exhausted early in the `TestOneInput` execution until the fuzzer was able to generate a corpus large enough to satisfy all of the `Consume*` calls. The issues listed above are addressed here by simplifying the test harness implementation. The result being significanly improved fuzzing coverage and slightly improved execution speed. Other changes: - `EnhancedFuzzedDataProvider.ConsumeRandomString` now accepts an argument to optionally exclude unicode surrogates from the returned string (useful for places where they will always raise an exception like most filesystem operations.) - Adds new fuzzing engine recommended dictionary entries (now that the test is effective enough to generate recommendations.) Closes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69054 --- fuzzing/dictionaries/fuzz_repo.dict | 3 +++ fuzzing/fuzz-targets/fuzz_repo.py | 32 ++++++++++++++--------------- fuzzing/fuzz-targets/test_utils.py | 10 +++++++-- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/fuzzing/dictionaries/fuzz_repo.dict b/fuzzing/dictionaries/fuzz_repo.dict index 77916edb6..99da684ca 100644 --- a/fuzzing/dictionaries/fuzz_repo.dict +++ b/fuzzing/dictionaries/fuzz_repo.dict @@ -1,2 +1,5 @@ "{self.size}" "win32" +"{self.extended_flags}" +"P\\303\\000\\000\\000\\000\\000\\000" +"\\377\\377\\377\\377" diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py index dbc1e36ce..61db56486 100644 --- a/fuzzing/fuzz-targets/fuzz_repo.py +++ b/fuzzing/fuzz-targets/fuzz_repo.py @@ -21,16 +21,21 @@ def TestOneInput(data): repo.set_description(fdp.ConsumeRandomBytes()) repo.get_description() - # Generate a minimal set of files based on fuzz data to minimize I/O operations. - file_paths = [ - os.path.join(temp_dir, f"File{i}") - for i in range(min(3, fdp.ConsumeIntInRange(1, 3))) - ] - for file_path in file_paths: - with open(file_path, "wb") as f: - f.write(fdp.ConsumeRandomBytes()) + try: + # Generate a minimal set of files based on fuzz data to minimize I/O operations. + file_names = [ + f"File{i}{fdp.ConsumeRandomString(without_surrogates=True)}" + for i in range(min(3, fdp.ConsumeIntInRange(1, 3))) + ] + for file in file_names: + with open(os.path.join(temp_dir, file), "wb") as f: + f.write(fdp.ConsumeRandomBytes()) + except (ValueError, OSError): + # Exit early if the fuzzer generates an invalid filename. + return -1 try: + repo.stage(file_names) repo.do_commit( message=fdp.ConsumeRandomBytes(), committer=fdp.ConsumeRandomBytes(), @@ -45,15 +50,8 @@ def TestOneInput(data): except ValueError as e: if is_expected_exception(["Unable to handle non-minute offset"], e): return -1 - - for file_path in file_paths: - with open(file_path, "wb") as f: - f.write(fdp.ConsumeRandomBytes()) - - repo.stage(file_paths) - repo.do_commit( - message=fdp.ConsumeRandomBytes(), - ) + else: + raise e def main(): diff --git a/fuzzing/fuzz-targets/test_utils.py b/fuzzing/fuzz-targets/test_utils.py index 90342080f..b10d3a10d 100644 --- a/fuzzing/fuzz-targets/test_utils.py +++ b/fuzzing/fuzz-targets/test_utils.py @@ -57,11 +57,12 @@ def ConsumeRandomBytes(self, max_length=None) -> bytes: return self.ConsumeBytes(self.ConsumeIntInRange(0, max_length)) - def ConsumeRandomString(self, max_length=None) -> str: + def ConsumeRandomString(self, max_length=None, without_surrogates=False) -> str: """Consume bytes to produce a Unicode string. Args: max_length (int, optional): The maximum length of the string. Defaults to the number of remaining bytes. + without_surrogates (bool, optional): If True, never generate surrogate pair characters. Defaults to False. Returns: str: A Unicode string. @@ -71,7 +72,12 @@ def ConsumeRandomString(self, max_length=None) -> str: else: max_length = min(max_length, self.remaining_bytes()) - return self.ConsumeUnicode(self.ConsumeIntInRange(0, max_length)) + count = self.ConsumeIntInRange(0, max_length) + + if without_surrogates: + return self.ConsumeUnicodeNoSurrogates(count) + else: + return self.ConsumeUnicode(count) def ConsumeRandomInt(self, minimum=0, maximum=1234567890) -> int: """Consume bytes to produce an integer.