From 4556c51cd0664a807d1d99b0f85f59c2abf927a8 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:42:45 +0000 Subject: [PATCH 01/12] Add unit tests to check the behaviour of mutators matches expectations. The mutators weren't being tested so we only had to assume that they were doing the right thing. In my own tests I saw a lot of input that had NUL bytes in it, so I'm pretty sure that some of the mutations were not doing the right thing. With these tests, the copy method was found to have poorly named parameters. The pattern it's using is copy(a, b, posa, posb, lena, lenb) (where lena and lenb can be omitted). The 'a' parameter is where the update will take place, and the 'b' parameter is where the copy originates. However, the first parameter was called 'src' and the second parameter 'dst', which is the precise opposite of the expectation. The naming of the parameters was kept consistent (a named as src, despite being the destination) with the other named parameters and within the function. This has been corrected, and with the correct naming, it became obvious that the insert, remove and duplicate functions were not working as intended. It is unclear what the difference is intended to be for Duplicate and Copy bytes - I have 'fixed' Duplicate, but this means that it now works identically to Copy, so it's not clear to me what's meant to be done there. --- pythonfuzz/corpus.py | 43 ++++---- tests/unittest_mutators.py | 198 +++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 17 deletions(-) create mode 100644 tests/unittest_mutators.py diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 051adb3..bde80c6 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -49,22 +49,27 @@ def _rand(n): return 0 return random.randint(0, n-1) - @staticmethod - def _choose_len(n): - x = Corpus._rand(100) + @classmethod + def _choose_len(cls, n): + x = cls._rand(100) if x < 90: - return Corpus._rand(min(8, n)) + 1 + return cls._rand(min(8, n)) + 1 elif x < 99: - return Corpus._rand(min(32, n)) + 1 + return cls._rand(min(32, n)) + 1 else: - return Corpus._rand(n) + 1 + return cls._rand(n) + 1 @staticmethod - def copy(src, dst, start_source, start_dst, end_source=None, end_dst=None): - end_source = len(src) if end_source is None else end_source + def copy(dst, src, start_dst, start_src, end_dst=None, end_src=None): + """ + Copy of content from one slice of a source object to a destination object. + + dst and src may be the same object. + """ + end_src = len(src) if end_src is None else end_src end_dst = len(dst) if end_dst is None else end_dst - byte_to_copy = min(end_source-start_source, end_dst-start_dst) - src[start_source:start_source+byte_to_copy] = dst[start_dst:start_dst+byte_to_copy] + byte_to_copy = min(end_src-start_src, end_dst-start_dst) + dst[start_dst:start_dst+byte_to_copy] = src[start_src:start_src+byte_to_copy] def mutate(self, res): """ @@ -87,9 +92,10 @@ def mutate(self, res): return None pos0 = self._rand(len(res)) - pos1 = pos0 + self._choose_len(len(res) - pos0) - self.copy(res, res, pos1, pos0) - return res[:len(res) - (pos1-pos0)] + num_to_remove = self._choose_len(len(res) - pos0) + pos1 = pos0 + num_to_remove + self.copy(res, res, pos0, pos1) + return res[:len(res) - num_to_remove] @register_mutator @@ -102,7 +108,7 @@ def mutate(self, res): n = self._choose_len(10) for k in range(n): res.append(0) - self.copy(res, res, pos, pos+n) + self.copy(res, res, pos+n, pos) for k in range(n): res[pos+k] = self._rand(256) return res @@ -121,11 +127,10 @@ def mutate(self, res): while src == dst: dst = self._rand(len(res)) n = self._choose_len(len(res) - src) - tmp = bytearray(n) - self.copy(res, tmp, src, 0) + tmp = bytearray(res[src:src+n]) for k in range(n): res.append(0) - self.copy(res, res, dst, dst+n) + self.copy(res, res, dst+n, dst) for k in range(n): res[dst+k] = tmp[k] return res @@ -133,6 +138,7 @@ def mutate(self, res): @register_mutator class MutatorCopyBytes(Mutator): + # FIXME: Check how this diffs from DuplicateBytes name = 'Copy a range of bytes' types = set(['byte', 'copy']) @@ -170,6 +176,7 @@ def mutate(self, res): if len(res) == 0: return None pos = self._rand(len(res)) + # We use rand(255) + 1 so that there is no `^ 0` applied to the byte; it always changes. res[pos] ^= self._rand(255) + 1 return res @@ -492,6 +499,7 @@ def generate_input(self): def mutate(self, buf): res = buf[:] nm = self._rand_exp() + #print("Start with {}".format(res)) for i in range(nm): # Select a mutator from those we can apply @@ -501,6 +509,7 @@ def mutate(self, buf): x = self._rand(len(self.mutators)) mutator = self.mutators[x] + #print("Mutate with {}".format(mutator.__class__.__name__)) newres = mutator.mutate(res) if newres is not None: break diff --git a/tests/unittest_mutators.py b/tests/unittest_mutators.py new file mode 100644 index 0000000..3a185d1 --- /dev/null +++ b/tests/unittest_mutators.py @@ -0,0 +1,198 @@ +import unittest + +try: + from unittest.mock import patch +except ImportError: + # Python 2 backport of mock + from mock import patch + +import pythonfuzz.corpus as corpus + + +class FakeCorpus(object): + pass + + +class BaseTestMutators(unittest.TestCase): + """ + Test that the mutators objects are doing what we want them to do. + """ + # Subclasses should set this - 'mutator' will be created as part of setup. + mutator_class = None + + def setUp(self): + self.corpus = FakeCorpus() + self.patch_rand = patch('pythonfuzz.corpus.Mutator._rand') + self.mock_rand = self.patch_rand.start() + self.mock_rand.side_effect = [] + # Update the side effects in your subclass + + self.addCleanup(self.patch_rand.stop) + + self.mutator = self.mutator_class(self.corpus) + + +class TestMutatorRemoveRange(BaseTestMutators): + mutator_class = corpus.MutatorRemoveRange + + def test01_empty(self): + # You cannot remove values from an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_remove_section(self): + # Check that it removes a sensible range + + # Check that removing at the 2nd position, removing 4 characters leaves the right string. + self.mock_rand.side_effect = [2, 0, 3] + + res = self.mutator.mutate(bytearray(b'1234567890')) + self.assertEqual(res, bytearray(b'127890')) + + +class TestMutatorInsertBytes(BaseTestMutators): + mutator_class = corpus.MutatorInsertBytes + + def test02_insert_bytes(self): + # Check that it inserts sensibly + + # Check that inserting at the 2nd position, adding 4 characters gives us the right string + self.mock_rand.side_effect = [2, 0, 3, 65, 66, 67, 68] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12ABCD3456789')) + + +class TestMutatorDuplicateBytes(BaseTestMutators): + mutator_class = corpus.MutatorDuplicateBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_duplicate_bytes(self): + # Check that it duplicates + + # Duplicate from offset 2 to offset 5, length 2 + self.mock_rand.side_effect = [2, 5, 0, 1] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12345346789')) + + +class TestMutatorCopyBytes(BaseTestMutators): + mutator_class = corpus.MutatorDuplicateBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_duplicate_bytes(self): + # Check that it duplicates + + # Duplicate from offset 2 to offset 5, length 2 + self.mock_rand.side_effect = [2, 5, 0, 1] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12345346789')) + + +class TestMutatorBitFlip(BaseTestMutators): + mutator_class = corpus.MutatorBitFlip + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_flip_bit(self): + # Check that it flips + + # At offset 4, flip bit 3 + self.mock_rand.side_effect = [4, 3] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'1234=6789')) + + +class TestMutatorRandomiseByte(BaseTestMutators): + mutator_class = corpus.MutatorRandomiseByte + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_randomise_byte(self): + # Check that it changes a byte + + # At offset 4, EOR with 65+1 + self.mock_rand.side_effect = [4, 65] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'1234w6789')) + + +class TestMutatorSwapBytes(BaseTestMutators): + mutator_class = corpus.MutatorSwapBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_swap_bytes(self): + # Check that it swaps bytes + + # Swap bytes at 1 and 6 + self.mock_rand.side_effect = [1, 6] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'173456289')) + + +class TestMutatorAddSubByte(BaseTestMutators): + mutator_class = corpus.MutatorAddSubByte + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_add_bytes(self): + # Check that it adds/subs + # FIXME: Not yet implemented - uses a randomised bit for the add/sub + pass + +# FIXME: Also not implemented AddSubShort, AddSubLong, AddSubLongLong +# FIXME: Not yet implemented ReplaceByte, ReplaceShort, ReplaceLong + + +class TestMutatorReplaceDigit(BaseTestMutators): + mutator_class = corpus.MutatorReplaceDigit + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_no_digits(self): + # Cannot work with a string that has no digits + res = self.mutator.mutate(bytearray(b'wibble')) + self.assertIsNone(res) + + def test03_replace_digit(self): + # Check that it replaces a digit + self.mock_rand.side_effect = [0, 5] + + res = self.mutator.mutate(bytearray(b'there are 4 lights')) + self.assertEqual(res, bytearray(b'there are 5 lights')) + + +# FIXME: Not yet implemented: Dictionary insert, Dictionary Append + + +if __name__ == '__main__': + unittest.main() From 24f7a6516d3ef472b92fed498fa1b2dea12dd725 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:54:43 +0000 Subject: [PATCH 02/12] Update the Makefile to run unit tests. Now that we have unit tests, these can be invoked by the Makefile. --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index d4c1b9b..c44c4c0 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,8 @@ ACTIVATE = source venv/${PYTHON_TOOL}/bin/activate .PHONY: tests venv -TEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/test_*.py)} +UNITTEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/unittest_*.py)} +INTTEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/test_*.py)} ifeq (${NOCOLOUR},) COL_NOTICE = "\\e[35m" @@ -51,14 +52,17 @@ test_level_system: test_level_integration systemtests # Unit tests test individual parse of a small unit. unittests: test_testable ${NOTICE} "Running unit tests" - ${GOOD} "Unit tests passed (we don't have any yet)" + @# Note: We cd into the tests directory, so that we are testing the installed version, not + @# the version in the repository. + ${ACTIVATE} && cd tests && python -munittest -v ${UNITTEST_MODULES} + ${GOOD} "Unit tests passed" # Integration tests check the integration of those units. integrationtests: test_testable ${NOTICE} "Running integration tests" @# Note: We cd into the tests directory, so that we are testing the installed version, not @# the version in the repository. - ${ACTIVATE} && cd tests && python -munittest -v ${TEST_MODULES} + ${ACTIVATE} && cd tests && python -munittest -v ${INTTEST_MODULES} ${GOOD} "Integration tests passed" # System tests check that the way that a user might use it works. From 410fa8ac13aba8fb37a285221fc7a16dac0925c0 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:55:21 +0000 Subject: [PATCH 03/12] Add annotations to each of the test systems to describe themselves. The test files have now been updated to include annotations, in the file prologue comment, which describe the test and its place in the testing environment. These are just a convention that I've used previously, but they help to focus anyone doing testing on describing where they fit into the testing of the system. Such descriptions make it obvious when reviewed where there are gaps in testing. --- examples/run_all_examples.py | 5 +++++ tests/test_crash.py | 9 +++++++++ tests/test_nocrash.py | 9 +++++++++ tests/unittest_mutators.py | 9 +++++++++ 4 files changed, 32 insertions(+) diff --git a/examples/run_all_examples.py b/examples/run_all_examples.py index 600d6cc..79f28ef 100755 --- a/examples/run_all_examples.py +++ b/examples/run_all_examples.py @@ -1,6 +1,11 @@ #!/usr/bin/env python """ Run all the examples and collect the timings and results. + +SUT: Invocation +Area: Examples run +Class: Functional +Type: System test """ import argparse diff --git a/tests/test_crash.py b/tests/test_crash.py index a48e1c1..e19e550 100644 --- a/tests/test_crash.py +++ b/tests/test_crash.py @@ -1,3 +1,12 @@ +""" +Test the fuzzing terminates when a fault is found. + +SUT: Fuzzer +Area: Fault finding +Class: Functional +Type: Integration test +""" + import io import os import unittest diff --git a/tests/test_nocrash.py b/tests/test_nocrash.py index 22fe36a..210c14f 100644 --- a/tests/test_nocrash.py +++ b/tests/test_nocrash.py @@ -1,3 +1,12 @@ +""" +Test the fuzzing terminates when no faults found, at a run limit. + +SUT: Fuzzer +Area: Non-fault operation +Class: Functional +Type: Integration test +""" + import unittest try: diff --git a/tests/unittest_mutators.py b/tests/unittest_mutators.py index 3a185d1..8ed5ae1 100644 --- a/tests/unittest_mutators.py +++ b/tests/unittest_mutators.py @@ -1,3 +1,12 @@ +""" +Test the mutators operate as desired. + +SUT: Corpus +Area: Mutators +Class: Functional +Type: Unit test +""" + import unittest try: From 8f6c8a3d917108c30cff8f8d18fe7f233dfc95a3 Mon Sep 17 00:00:00 2001 From: Florian Pigorsch Date: Thu, 23 Jan 2020 22:00:38 +0100 Subject: [PATCH 04/12] Add 'CleverCSV' bug/trophy n/a --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 85c88f0..0b1a3e6 100644 --- a/README.md +++ b/README.md @@ -110,5 +110,6 @@ any unnecessary work is done. ## Trophies * [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287) +* [CleverCSV - unhandled exceptions](https://github.com/alan-turing-institute/CleverCSV/issues/7) **Feel free to add bugs that you found with pythonfuzz to this list via pull-request** From f9eb3d5143c967a6c2e0470afa1e5910caefa17b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 8 Feb 2020 14:12:29 +0100 Subject: [PATCH 05/12] Get rid of coverage.py --- README.md | 2 -- pythonfuzz/fuzzer.py | 35 +++-------------------------------- pythonfuzz/tracer.py | 36 ++++++++++++++++++++++++++++++++++++ requirements.txt | 1 - setup.py | 1 - 5 files changed, 39 insertions(+), 36 deletions(-) create mode 100644 pythonfuzz/tracer.py diff --git a/README.md b/README.md index 0b1a3e6..f471bd8 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,6 @@ PythonFuzz is a port of [fuzzitdev/jsfuzz](https://github.com/fuzzitdev/jsfuzz) which is in turn heavily based on [go-fuzz](https://github.com/dvyukov/go-fuzz) originally developed by [Dmitry Vyukov's](https://twitter.com/dvyukov). Which is in turn heavily based on [Michal Zalewski](https://twitter.com/lcamtuf) [AFL](http://lcamtuf.coredump.cx/afl/). -For coverage PythonFuzz is using [coverage](https://coverage.readthedocs.io/en/v4.5.x/) instrumentation and coverage library. - ## Contributions Contributions are welcome!:) There are still a lot of things to improve, and tests and features to add. We will slowly post those in the diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 3e3c886..1e27391 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -5,11 +5,10 @@ import psutil import hashlib import logging -import coverage import functools import multiprocessing as mp -from pythonfuzz import corpus +from pythonfuzz import corpus, tracer logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.getLogger().setLevel(logging.DEBUG) @@ -23,29 +22,6 @@ lru_cache = functools32.lru_cache -if coverage.version.version_info <= (5, ): - # Since we're using an old version of coverage.py, - # we're monkey patching it a bit to improve the performances. - - # Using memoization here gives +50% in performances, since this - # function triggers a lot of syscalls. - # See the benchmarks here: - # - https://github.com/fuzzitdev/pythonfuzz/issues/9 - @lru_cache(None) - def abs_file_cache(path): - """Return the absolute normalized form of `path`.""" - try: - path = os.path.realpath(path) - except UnicodeError: - pass - path = os.path.abspath(path) - path = coverage.files.actual_path(path) - path = coverage.files.unicode_filename(path) - return path - - coverage.files.abs_file = abs_file_cache - - def worker(target, child_conn, close_fd_mask): # Silence the fuzzee's noise class DummyFile: @@ -59,8 +35,7 @@ def write(self, x): if close_fd_mask & 2: sys.stderr = DummyFile() - cov = coverage.Coverage(branch=True, cover_pylib=True) - cov.start() + sys.settrace(tracer.trace) while True: buf = child_conn.recv_bytes() try: @@ -71,11 +46,7 @@ def write(self, x): child_conn.send(e) break else: - total_coverage = 0 - cov_data = cov.get_data() - for filename in cov_data._arcs: - total_coverage += len(cov_data._arcs[filename]) - child_conn.send(total_coverage) + child_conn.send(tracer.get_coverage()) class Fuzzer(object): diff --git a/pythonfuzz/tracer.py b/pythonfuzz/tracer.py new file mode 100644 index 0000000..26c7cad --- /dev/null +++ b/pythonfuzz/tracer.py @@ -0,0 +1,36 @@ +import collections +import sys + +prev_line = 0 +prev_filename = '' +data = collections.defaultdict(set) + +def trace(frame, event, arg): + if event != 'line': + return trace + + global prev_line + global prev_filename + + func_filename = frame.f_code.co_filename + func_line_no = frame.f_lineno + + if func_filename != prev_filename: + # We need a way to keep track of inter-files transferts, + # and since we don't really care about the details of the coverage, + # concatenating the two filenames in enough. + data[func_filename + prev_filename].add((prev_line, func_line_no)) + else: + data[func_filename].add((prev_line, func_line_no)) + + prev_line = func_line_no + prev_filename = func_filename + + return trace + + +def get_coverage(): + ret = 0 + for value in data.values(): + ret += len(value) + return ret \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e6af3b9..c278d1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -coverage==4.5.4 psutil==5.6.3 numpy==1.16.6; python_version < '3' numpy==1.17.3; python_version >= '3' diff --git a/setup.py b/setup.py index c1a82a2..ae25ec9 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ url="https://github.com/fuzzitdev/pythonfuzz", install_requires=[ # WARNING: Keep these values in line with those in requirements.txt - "coverage==4.5.4", "psutil==5.6.3", "numpy==1.16.6; python_version < '3'", "numpy==1.17.3; python_version >= '3'", From 80acd38e14ace64417400d45c68e1d06630b514c Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 2 Mar 2020 22:11:20 +0100 Subject: [PATCH 06/12] Use (recv/send)_butes instead of recv/send This commit improves a bit the performances in the same spirit as b91ad83 --- pythonfuzz/fuzzer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 1e27391..bd88854 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -46,7 +46,7 @@ def write(self, x): child_conn.send(e) break else: - child_conn.send(tracer.get_coverage()) + child_conn.send_bytes(b'%d' % tracer.get_coverage()) class Fuzzer(object): @@ -135,8 +135,9 @@ def start(self): self.write_sample(buf, prefix='timeout-') break - total_coverage = parent_conn.recv() - if type(total_coverage) != int: + try: + total_coverage = int(parent_conn.recv_bytes()) + except ValueError: self.write_sample(buf) break From e438c4cbe850c357a11465c00bbfe60b91aba995 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 6 Mar 2020 20:47:45 +0100 Subject: [PATCH 07/12] Significantly speed up coverage collection This reduces the time spent in get_coverage from ~30% to ~2% in my local tests on Python3. --- pythonfuzz/tracer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pythonfuzz/tracer.py b/pythonfuzz/tracer.py index 26c7cad..708979f 100644 --- a/pythonfuzz/tracer.py +++ b/pythonfuzz/tracer.py @@ -30,7 +30,4 @@ def trace(frame, event, arg): def get_coverage(): - ret = 0 - for value in data.values(): - ret += len(value) - return ret \ No newline at end of file + return sum(map(len, data.values())) From 4f3a68b53e82ee958d35b51a56e696e11a391126 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2020 18:21:03 +0000 Subject: [PATCH 08/12] Bump psutil from 5.6.3 to 5.6.6 Bumps [psutil](https://github.com/giampaolo/psutil) from 5.6.3 to 5.6.6. - [Release notes](https://github.com/giampaolo/psutil/releases) - [Changelog](https://github.com/giampaolo/psutil/blob/master/HISTORY.rst) - [Commits](https://github.com/giampaolo/psutil/compare/release-5.6.3...release-5.6.6) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c278d1e..797772e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -psutil==5.6.3 +psutil==5.6.6 numpy==1.16.6; python_version < '3' numpy==1.17.3; python_version >= '3' functools32==3.2.3.post2; python_version < '3' diff --git a/setup.py b/setup.py index ae25ec9..69bc416 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ url="https://github.com/fuzzitdev/pythonfuzz", install_requires=[ # WARNING: Keep these values in line with those in requirements.txt - "psutil==5.6.3", + "psutil==5.6.6", "numpy==1.16.6; python_version < '3'", "numpy==1.17.3; python_version >= '3'", "functools32==3.2.3.post2; python_version < '3'", From ca4960380cb3e6439a1c3730b92c9798c00abdee Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 10 Jun 2020 18:27:53 +0200 Subject: [PATCH 09/12] Get rid of numpy Numpy's types aren't doing much beside eating CPU and doing a simple wraparound, which can be done via a simple modulo. This commit also unroll a couple of loops. --- pythonfuzz/corpus.py | 60 ++++++++++++++++++++++---------------------- requirements.txt | 2 -- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index bde80c6..fbbe844 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -1,6 +1,5 @@ import os import math -import numpy import random import struct import hashlib @@ -9,8 +8,8 @@ INTERESTING8 = [-128, -1, 0, 1, 16, 32, 64, 100, 127] -INTERESTING16 = [-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767] -INTERESTING32 = [-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647] +INTERESTING16 = [0, 128, 255, 256, 512, 1000, 1024, 4096, 32767, 65535] +INTERESTING32 = [0, 1, 32768, 65535, 65536, 100663045, 2147483647, 4294967295] # A list of all the mutator clases we have available @@ -206,11 +205,8 @@ def mutate(self, res): if len(res) == 0: return None pos = self._rand(len(res)) - v = self._rand(35) + 1 - if bool(random.getrandbits(1)): - res[pos] = numpy.uint8(res[pos]) + numpy.uint8(v) - else: - res[pos] = numpy.uint8(res[pos]) - numpy.uint8(v) + v = self._rand(2**8) + res[pos] = (res[pos] + v) % 256 return res @@ -223,16 +219,14 @@ def mutate(self, res): if len(res) < 2: return None pos = self._rand(len(res) - 1) - v = numpy.uint16(self._rand(35) + 1) - if bool(random.getrandbits(1)): - v = numpy.uint16(0) - v + v = self._rand(2**16) if bool(random.getrandbits(1)): v = struct.pack('>H', v) else: v = struct.pack('I', v) else: v = struct.pack('Q', v) else: v = struct.pack('H', v) else: v = struct.pack('I', v) else: v = struct.pack('= '3' functools32==3.2.3.post2; python_version < '3' From e806f3b7346e37315c8e6698b13a18659a767e18 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 10 Jun 2020 18:53:11 +0200 Subject: [PATCH 10/12] Move the coprus initialization out of a hot path This initial corpus seeding can be done in the constructor, instead of checking if it has been initialized every single time the fuzzer generates an input. --- pythonfuzz/corpus.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index bde80c6..75e3f58 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -412,6 +412,7 @@ def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_pa self._seed_run_finished = not self._inputs self._seed_idx = 0 self._save_corpus = dirs and os.path.isdir(dirs[0]) + self._inputs.append(bytearray(0)) # Work out what we'll filter filters = mutators_filter.split(' ') if mutators_filter else [] @@ -488,13 +489,8 @@ def generate_input(self): self._seed_run_finished = True return next_input - if len(self._inputs) == 0: - zero_test_case = bytearray(0) - self.put(zero_test_case) - return zero_test_case - else: - buf = self._inputs[self._rand(len(self._inputs))] - return self.mutate(buf) + buf = self._inputs[self._rand(len(self._inputs))] + return self.mutate(buf) def mutate(self, buf): res = buf[:] From 846e69bdb885717be65f36254445aa70e93f74f3 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 12 Jun 2020 16:12:48 +0200 Subject: [PATCH 11/12] Add two html-related issues to the trophies --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f471bd8..0c19cd7 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,8 @@ any unnecessary work is done. ## Trophies -* [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287) +* [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287), [twice](https://bugs.launchpad.net/beautifulsoup/+bug/1883104) * [CleverCSV - unhandled exceptions](https://github.com/alan-turing-institute/CleverCSV/issues/7) +* [beautifulsoup](https://bugs.launchpad.net/beautifulsoup/+bug/1883264) **Feel free to add bugs that you found with pythonfuzz to this list via pull-request** From 2434a92c69fdb2d0f83e88194ffafd32f70d2f3e Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Sun, 12 Jul 2020 10:15:38 +0300 Subject: [PATCH 12/12] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0c19cd7..abfab08 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +fuzzit.dev was [acquired](https://about.gitlab.com/press/releases/2020-06-11-gitlab-acquires-peach-tech-and-fuzzit-to-expand-devsecops-offering.html) by GitLab and the new home for this repo is [here](https://gitlab.com/gitlab-org/security-products/analyzers/fuzzers/pythonfuzz) + # pythonfuzz: coverage-guided fuzz testing for python PythonFuzz is coverage-guided [fuzzer](https://developer.mozilla.org/en-US/docs/Glossary/Fuzzing) for testing python packages.