From 560c937e8e7ed8a71ebb1c5965e232f13d8fa3d2 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 14 Dec 2019 16:09:56 +0100 Subject: [PATCH 01/49] Dump samples resulting in a timeout This might be useful for catching zip-bombs or infinite loops. --- pythonfuzz/fuzzer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 9c45ae3..acbe9d6 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -67,18 +67,18 @@ def log_stats(self, log_type): self._total_executions, log_type, self._total_coverage, self._corpus.length, execs_per_second, rss)) return rss - def write_crash(self, buf): + def write_sample(self, prefix='crash-', buf): m = hashlib.sha256() m.update(buf) if self._exact_artifact_path: crash_path = self._exact_artifact_path else: - crash_path = 'crash-' + m.hexdigest() + crash_path = prefix + m.hexdigest() with open(crash_path, 'wb') as f: f.write(buf) - logging.info('crash was written to {}'.format(crash_path)) + logging.info('sample was written to {}'.format(crash_path)) if len(buf) < 200: - logging.info('crash = {}'.format(buf.hex())) + logging.info('sample = {}'.format(buf.hex())) def start(self): logging.info("#0 READ units: {}".format(self._corpus.length)) @@ -94,11 +94,12 @@ def start(self): self._p.kill() logging.info("=================================================================") logging.info("timeout reached. testcase took: {}".format(self._timeout)) + self.write_sample(prefix='timeout-', buf) break total_coverage = parent_conn.recv() if type(total_coverage) != int: - self.write_crash(buf) + self.write_sample(buf) break self._total_executions += 1 @@ -114,7 +115,7 @@ def start(self): if rss > self._rss_limit_mb: logging.info('MEMORY OOM: exceeded {} MB. Killing worker'.format(self._rss_limit_mb)) - self.write_crash(buf) + self.write_sample(buf) self._p.kill() break From 3fa39989fec78e85d54b90609b858fa8b491faf0 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 16 Dec 2019 21:53:20 +0100 Subject: [PATCH 02/49] Simplify a bit the code for the Corpus; constructor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Ternaries are simplifies - `len(…) > 0` isn't pythonic and can be simplified as well - Check on a member that has been initialised right before can be constified --- pythonfuzz/corpus.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 26df845..7008cc4 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -15,7 +15,7 @@ class Corpus(object): def __init__(self, dirs=None, max_input_size=4096): self._inputs = [] self._max_input_size = max_input_size - self._dirs = [] if dirs is None else dirs + self._dirs = dirs if dirs else [] for i, path in enumerate(dirs): if i == 0 and not os.path.exists(path): os.mkdir(path) @@ -27,9 +27,9 @@ def __init__(self, dirs=None, max_input_size=4096): fname = os.path.join(path, i) if os.path.isfile(fname): self._add_file(fname) - self._seed_run_finished = True if len(self._inputs) == 0 else False + self._seed_run_finished = not self._inputs self._seed_idx = 0 - self._save_corpus = True if len(dirs) > 0 and os.path.isdir(dirs[0]) else False + self._save_corpus = dirs and os.path.isdir(dirs[0]) def _add_file(self, path): with open(path, 'rb') as f: From f0442da535bd89f21e6ccda9f93b1bb2fe3dc9c1 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 20 Dec 2019 19:45:49 +0100 Subject: [PATCH 03/49] Silence the fuzzee's output --- pythonfuzz/fuzzer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 9c45ae3..1b26cea 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -16,6 +16,10 @@ def worker(target, child_conn): + # Silence the fuzzee's noise + logging.captureWarnings(True) + logging.getLogger().setLevel(logging.CRITICAL) + cov = coverage.Coverage(branch=True, cover_pylib=True) cov.start() while True: From 2412385c69774233ea3c4d5e8bdba3f71450041c Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 20 Dec 2019 19:50:05 +0100 Subject: [PATCH 04/49] Silence stdout as well --- pythonfuzz/fuzzer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 1b26cea..c8e3b7d 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -1,6 +1,7 @@ import os import sys import time +import sys import psutil import hashlib import logging @@ -17,8 +18,13 @@ def worker(target, child_conn): # Silence the fuzzee's noise + class DummyFile: + """No-op to trash stdout away.""" + def write(self, x): + pass logging.captureWarnings(True) logging.getLogger().setLevel(logging.CRITICAL) + sys.stdout = DummyFile() cov = coverage.Coverage(branch=True, cover_pylib=True) cov.start() From 0fdd5182f719d2defc6105f1bde77691a55b9328 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 20 Dec 2019 23:13:57 +0100 Subject: [PATCH 05/49] Use some monkey-patching voodoo to increase the performances Since we're using an old fixed version of coverage.py, we can monkey-patch it to significantly increase its performances. This commit adds memoization around a syscall-intensive function, giving around +50% in performances on my benchmark (https://github.com/fuzzitdev/pythonfuzz/issues/9). --- pythonfuzz/fuzzer.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index acbe9d6..6960955 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -5,6 +5,7 @@ import hashlib import logging import coverage +import functools import multiprocessing as mp from pythonfuzz import corpus @@ -14,6 +15,28 @@ SAMPLING_WINDOW = 5 # IN SECONDS +if coverage.version.version_info <= (5, ): + # Since we're using an old version of coverage.py, + # we're monkey patching it a bit to improve the performances. + + # Using memoization here gives +50% in performances, since this + # function triggers a lot of syscalls. + # See the benchmarks here: + # - https://github.com/fuzzitdev/pythonfuzz/issues/9 + @functools.lru_cache(None) + def abs_file(path): + """Return the absolute normalized form of `path`.""" + try: + path = os.path.realpath(path) + except UnicodeError: + pass + path = os.path.abspath(path) + path = coverage.files.actual_path(path) + path = coverage.files.unicode_filename(path) + return path + + coverage.files.abs_file = abs_file_cache + def worker(target, child_conn): cov = coverage.Coverage(branch=True, cover_pylib=True) From b1ab03723eb7eaa30cf997180819b920551d9549 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 24 Dec 2019 16:00:37 +0100 Subject: [PATCH 06/49] Add an option to control the fuzee's output silencing --- pythonfuzz/fuzzer.py | 13 +++++++++---- pythonfuzz/main.py | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index c8e3b7d..bbeced9 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -16,7 +16,7 @@ SAMPLING_WINDOW = 5 # IN SECONDS -def worker(target, child_conn): +def worker(target, child_conn, close_fd_mask): # Silence the fuzzee's noise class DummyFile: """No-op to trash stdout away.""" @@ -24,7 +24,10 @@ def write(self, x): pass logging.captureWarnings(True) logging.getLogger().setLevel(logging.CRITICAL) - sys.stdout = DummyFile() + if close_fd_mask & 1: + sys.stdout = DummyFile() + if close_fd_mask & 2: + sys.stderr = DummyFile() cov = coverage.Coverage(branch=True, cover_pylib=True) cov.start() @@ -52,13 +55,15 @@ def __init__(self, rss_limit_mb=2048, timeout=120, regression=False, - max_input_size=4096): + max_input_size=4096, + close_fd_mask=0): self._target = target self._dirs = [] if dirs is None else dirs self._exact_artifact_path = exact_artifact_path self._rss_limit_mb = rss_limit_mb self._timeout = timeout self._regression = regression + self._close_fd_mask = close_fd_mask self._corpus = corpus.Corpus(self._dirs, max_input_size) self._total_executions = 0 self._executions_in_sample = 0 @@ -94,7 +99,7 @@ def start(self): logging.info("#0 READ units: {}".format(self._corpus.length)) parent_conn, child_conn = mp.Pipe() - self._p = mp.Process(target=worker, args=(self._target, child_conn)) + self._p = mp.Process(target=worker, args=(self._target, child_conn, self._close_fd_mask)) self._p.start() while True: diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index 35a954c..386b03c 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -20,11 +20,13 @@ def __call__(self, *args, **kwargs): help='run the fuzzer through set of files for regression or reproduction') parser.add_argument('--rss-limit-mb', type=int, default=2048, help='Memory usage in MB') parser.add_argument('--max-input-size', type=int, default=4096, help='Max input size in bytes') + parser.add_argument('--close-fd-mask', type=int, default=0, help='Indicate output streams to close at startup') parser.add_argument('--timeout', type=int, default=30, help='If input takes longer then this timeout the process is treated as failure case') args = parser.parse_args() f = fuzzer.Fuzzer(self.function, args.dirs, args.exact_artifact_path, - args.rss_limit_mb, args.timeout, args.regression, args.max_input_size) + args.rss_limit_mb, args.timeout, args.regression, args.max_input_size, + args.close_fd_mask) f.start() From 19a1792f1caee9b4c2a6a18333f20a2fe2296371 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 24 Dec 2019 16:02:50 +0100 Subject: [PATCH 07/49] Cleanup the main.py file - Remove duplicate code - Warning filtering is handled in #12 - The __main__ test is correct, uncomment it --- pythonfuzz/main.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index 35a954c..d44a405 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -1,9 +1,6 @@ import argparse -import warnings from pythonfuzz import fuzzer -warnings.filterwarnings('ignore') - class PythonFuzz(object): def __init__(self, func): @@ -28,24 +25,5 @@ def __call__(self, *args, **kwargs): f.start() -def main(): - parser = argparse.ArgumentParser(description='Coverage-guided fuzzer for python packages') - parser.add_argument('target', type=str, help='path to fuzz target') - parser.add_argument('dirs', type=str, nargs='*', - help="one or more directories/files to use as seed corpus. the first directory will be used to save the generated test-cases") - parser.add_argument('--exact-artifact-path', type=str, help='set exact artifact path for crashes/ooms') - parser.add_argument('--regression', - type=bool, - default=False, - help='run the fuzzer through set of files for regression or reproduction') - parser.add_argument('--rss-limit-mb', type=int, default=2048, help='Memory usage in MB') - parser.add_argument('--timeout', type=int, default=120, - help='If input takes longer then this timeout the process is treated as failure case') - args = parser.parse_args() - f = fuzzer.Fuzzer(args.target, args.dirs, args.exact_artifact_path, - args.rss_limit_mb, args.timeout, args.regression) - f.start() - -# -# if __name__ == '__main__': -# main() +if __name__ == '__main__': + PythonFuzz() From f41dd5ab0fa82436d23d5ad12f3e4b9dabfcf7e1 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 24 Dec 2019 17:01:52 +0100 Subject: [PATCH 08/49] Fix the argument order This was broken by 560c937. This is what happens when you write your code in a virtual-env, and port it to your development copy without testing it. --- pythonfuzz/fuzzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index acbe9d6..cfebca5 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -67,7 +67,7 @@ def log_stats(self, log_type): self._total_executions, log_type, self._total_coverage, self._corpus.length, execs_per_second, rss)) return rss - def write_sample(self, prefix='crash-', buf): + def write_sample(self, buf, prefix='crash-'): m = hashlib.sha256() m.update(buf) if self._exact_artifact_path: @@ -94,7 +94,7 @@ def start(self): self._p.kill() logging.info("=================================================================") logging.info("timeout reached. testcase took: {}".format(self._timeout)) - self.write_sample(prefix='timeout-', buf) + self.write_sample(buf, prefix='timeout-') break total_coverage = parent_conn.recv() From b91ad83d432f58513299dc061cff7d6392d50047 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 24 Dec 2019 17:03:51 +0100 Subject: [PATCH 09/49] Minor speedup Use recv_bytes/send_bytes instead of recv/send where possible. This yields a bit less than 10% on my local benchmark, since the fuzzee and the fuzzers are saving one call to pickle's serialize/unserialize per cycle. I'm sure more performances could be obtained by changing the remaining recv/send calls, I might give it a try at some point. --- pythonfuzz/fuzzer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index acbe9d6..8cb8d8b 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -19,7 +19,7 @@ def worker(target, child_conn): cov = coverage.Coverage(branch=True, cover_pylib=True) cov.start() while True: - buf = child_conn.recv() + buf = child_conn.recv_bytes() try: target(buf) except Exception as e: @@ -87,9 +87,10 @@ def start(self): self._p = mp.Process(target=worker, args=(self._target, child_conn)) self._p.start() + while True: buf = self._corpus.generate_input() - parent_conn.send(buf) + parent_conn.send_bytes(buf) if not parent_conn.poll(self._timeout): self._p.kill() logging.info("=================================================================") From 9825bee72cfd094da8eaacbb31ee2e3ae7b166a3 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 26 Dec 2019 18:51:44 +0100 Subject: [PATCH 10/49] Add support for --runs This options allows to limit the number of fuzzing rounds. --- pythonfuzz/fuzzer.py | 10 ++++++++-- pythonfuzz/main.py | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 943cc13..1b53622 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -79,7 +79,8 @@ def __init__(self, timeout=120, regression=False, max_input_size=4096, - close_fd_mask=0): + close_fd_mask=0, + runs=-1): self._target = target self._dirs = [] if dirs is None else dirs self._exact_artifact_path = exact_artifact_path @@ -93,6 +94,7 @@ def __init__(self, self._last_sample_time = time.time() self._total_coverage = 0 self._p = None + self.runs = runs def log_stats(self, log_type): rss = (psutil.Process(self._p.pid).memory_info().rss + psutil.Process(os.getpid()).memory_info().rss) / 1024 / 1024 @@ -125,8 +127,12 @@ def start(self): self._p = mp.Process(target=worker, args=(self._target, child_conn, self._close_fd_mask)) self._p.start() - while True: + if self.runs != -1 and self._total_executions >= self.runs: + self._p.terminate() + logging.info('did %d runs, stopping now.', self.runs) + break + buf = self._corpus.generate_input() parent_conn.send_bytes(buf) if not parent_conn.poll(self._timeout): diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index 386b03c..943996c 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -21,12 +21,13 @@ def __call__(self, *args, **kwargs): parser.add_argument('--rss-limit-mb', type=int, default=2048, help='Memory usage in MB') parser.add_argument('--max-input-size', type=int, default=4096, help='Max input size in bytes') parser.add_argument('--close-fd-mask', type=int, default=0, help='Indicate output streams to close at startup') + parser.add_argument('--runs', type=int, default=-1, help='Number of individual test runs, -1 (the default) to run indefinitely.') parser.add_argument('--timeout', type=int, default=30, help='If input takes longer then this timeout the process is treated as failure case') args = parser.parse_args() f = fuzzer.Fuzzer(self.function, args.dirs, args.exact_artifact_path, args.rss_limit_mb, args.timeout, args.regression, args.max_input_size, - args.close_fd_mask) + args.close_fd_mask, args.runs) f.start() From 5795b0d9de7e2642924938a64301df8c33222f58 Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Sat, 28 Dec 2019 19:11:27 +0200 Subject: [PATCH 11/49] update licensing: Standard usage: APACHE 2.0 resell/SaaS-resell: AGPL or commercial license via a contract with fuzzit.dev. --- LICENSE | 205 +-------------- LICENSE.AGPL | 661 ++++++++++++++++++++++++++++++++++++++++++++++++ LICENSE.APACHE2 | 201 +++++++++++++++ 3 files changed, 869 insertions(+), 198 deletions(-) create mode 100644 LICENSE.AGPL create mode 100644 LICENSE.APACHE2 diff --git a/LICENSE b/LICENSE index f49a4e1..2864cca 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,10 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +Licesning Information - pythonfuzz - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +This library has dual license. - 1. Definitions. +For non-commercial usage - APACHE 2.0 as described at LIECNSE.APACHE2 +For commercial usage - either APACHE 2.0 or AGPL v3.0 depending on the following: +1) If this is used just as testing framework for either commercial software or non-commercial software the licensing is APACHE 2.0 +(which is essentially what this library is inteded for). +2) If this library is reselled/used as software as a service, for example by a CI provider it will be either AGPL v3.0 or will have to get a commercial license via contract with fuzzit.dev - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/LICENSE.AGPL b/LICENSE.AGPL new file mode 100644 index 0000000..dba13ed --- /dev/null +++ b/LICENSE.AGPL @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/LICENSE.APACHE2 b/LICENSE.APACHE2 new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/LICENSE.APACHE2 @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file From 3d5158de2dfd3d0b66ca6f819c02d9e6ee97ce85 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 30 Dec 2019 14:47:22 +0100 Subject: [PATCH 12/49] Fix a typo introduced by 7943d81 --- pythonfuzz/fuzzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 1b53622..ec9fc0e 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -25,7 +25,7 @@ # See the benchmarks here: # - https://github.com/fuzzitdev/pythonfuzz/issues/9 @functools.lru_cache(None) - def abs_file(path): + def abs_file_cache(path): """Return the absolute normalized form of `path`.""" try: path = os.path.realpath(path) From 63ec8e5a79adb13e8745fe14b92437e8863a6c47 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 30 Dec 2019 20:14:05 +0100 Subject: [PATCH 13/49] Add some tests This commits adds a test to check if the fuzzer can find a crash, and to make sure that it doesn't find anything when the target isn't crashing. We should probably refactor a bit the fuzzer.py file to make it expose a better API, instead of relying on `logging`. --- tests/__init__.py | 0 tests/test_crash.py | 17 +++++++++++++++++ tests/test_nocrash.py | 15 +++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_crash.py create mode 100644 tests/test_nocrash.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_crash.py b/tests/test_crash.py new file mode 100644 index 0000000..167e0bc --- /dev/null +++ b/tests/test_crash.py @@ -0,0 +1,17 @@ +import unittest +import zipfile +import io +from unittest.mock import patch + +import pythonfuzz + +class TestFindCrash(unittest.TestCase): + def test_find_crash(self): + def fuzz(buf): + f = io.BytesIO(buf) + z = zipfile.ZipFile(f) + z.testzip() + + with patch('logging.Logger.info') as mock: + pythonfuzz.fuzzer.Fuzzer(fuzz).start() + self.assertTrue(mock.called_once) diff --git a/tests/test_nocrash.py b/tests/test_nocrash.py new file mode 100644 index 0000000..ec01e37 --- /dev/null +++ b/tests/test_nocrash.py @@ -0,0 +1,15 @@ +import unittest +import zipfile +import io +from unittest.mock import patch + +import pythonfuzz + +class TestFindCrash(unittest.TestCase): + def test_find_crash(self): + def fuzz(buf): + return True + + with patch('logging.Logger.info') as mock: + pythonfuzz.fuzzer.Fuzzer(fuzz, runs=100).start() + mock.assert_called_with('did %d runs, stopping now.', 100) From 9aa183f81671bdbb18a1b3232448c851c5018846 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Mon, 6 Jan 2020 20:52:24 +0000 Subject: [PATCH 14/49] Updates for running on Python 2. The tool is now able to run on Python 2 with a suitable LRU Cache. Obviously Python 2 isn't as useful to many people, but when working with libraries that are Python 2-only, it's necessary to make the tools work with it. --- pythonfuzz/corpus.py | 5 +++++ pythonfuzz/fuzzer.py | 21 ++++++++++++++++----- setup.py | 3 +-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 7008cc4..fcdf23a 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -199,6 +199,7 @@ def mutate(self, buf): v = struct.pack('>H', v) else: v = struct.pack('I', v) else: v = struct.pack('Q', v) else: v = struct.pack('H', v) else: v = struct.pack('I', v) else: v = struct.pack(' Date: Mon, 6 Jan 2020 23:43:32 +0000 Subject: [PATCH 15/49] Ensure that the 'NEW' state reports the coverage. Because the 'NEW' state was being logged before the coverage count was updated, the log line would not include the new coverage count. This change ensures that the log line reports the new coverage count. --- pythonfuzz/fuzzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 1d241c2..2b403a1 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -162,9 +162,9 @@ def start(self): self._executions_in_sample += 1 rss = 0 if total_coverage > self._total_coverage: - rss = self.log_stats("NEW") self._total_coverage = total_coverage self._corpus.put(buf) + rss = self.log_stats("NEW") else: if (time.time() - self._last_sample_time) > SAMPLING_WINDOW: rss = self.log_stats('PULSE') From d685d2de0be43c5b1745807be4350a371e4421ca Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Mon, 6 Jan 2020 23:44:40 +0000 Subject: [PATCH 16/49] Rework the mutators into separate classes. The mutators were supplied inline with the corpus mutation loop. This makes it tedious to extend, and difficult to filter out mutations which are not interesting. The code has been reworked here so that... * Each mutator is its own class. * Each class can provide information about what it does, such as its name and the types of mutations it performs. * Each class is registered into a list of classes that are available. * The Corpus instantiates these classes when it is intialised, and could (but does not at this time) filter the list as necessary. The name isn't even used yet. * Mutators can return None to say that they're not appropriate. This means that adding a new mutator is a matter of creating a new class, in the same style as the existing ones, and giving information on what the mutator does. Mutators could be based on one another - so for example the 'swap' mutator could be reworked to exchange variable lengths of values, rather than only bytes, and then subclassed to produce short, long and longlong variants. This has not been done here. Previously, the code attempted to retry applying mutators if they were not deemed appropriate; this was ineffective because they merely tried to decrement the iteration count, which did not affect the iterations at all - it looks like the code was originally using C-style for loops where the variable controls the termination, whilst in Python the range controls the iteration of this loop. This has been replaced by the mutator returning None to signal that it is inappropriate, and a loop in the caller repeats the selection of a new mutator. --- pythonfuzz/corpus.py | 556 ++++++++++++++++++++++++++++--------------- 1 file changed, 359 insertions(+), 197 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index fcdf23a..a98a3a6 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -11,7 +11,346 @@ INTERESTING32 = [-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647] +# A list of all the mutator clases we have available +mutator_classes = [] + + +def register_mutator(cls): + mutator_classes.append(cls) + return cls + + +class Mutator(object): + """ + Base class for all mutators. + + All mutators are based on this class, and must provide a `mutate` method, which performs + some form of mutation on the input resource. Subclasses can be created which share some + properties with others. + + Each mutator has a number of properties which can be used to select whether the mutator + is of interest to the user or not. + + `name` - describes the mutator + `types` - provides a set of named types of mutations that the class performs. + these types can be used to filter out uninteresting mutations. + """ + name = None + types = set([]) + + def __init__(self, corpus): + self.corpus = corpus + + @staticmethod + def _rand(n): + if n == 1 or n == 0: + return 0 + return random.randint(0, n-1) + + @staticmethod + def _choose_len(n): + x = Corpus._rand(100) + if x < 90: + return Corpus._rand(min(8, n)) + 1 + elif x < 99: + return Corpus._rand(min(32, n)) + 1 + else: + return Corpus._rand(n) + 1 + + @staticmethod + def copy(src, dst, start_source, start_dst, end_source=None, end_dst=None): + end_source = len(src) if end_source is None else end_source + end_dst = len(dst) if end_dst is None else end_dst + byte_to_copy = min(end_source-start_source, end_dst-start_dst) + src[start_source:start_source+byte_to_copy] = dst[start_dst:start_dst+byte_to_copy] + + def mutate(self, res): + """ + Function to mutate a given resource into another one. + + @return: new resource, or None if this mutator is not appropriate. + """ + raise NotImplementedError('mutate not implemented in {}'.format(self.__class__.__name__)) + + +@register_mutator +class MutatorRemoveRange(Mutator): + name = 'Remove a range of bytes' + types = set(['byte']) + + def mutate(self, res): + if len(res) < 2: + # Originally this checked the size of the corpus; we merely check whether the + # resource is long. If not, we give up. + return None + + pos0 = self._rand(len(res)) + pos1 = pos0 + self._choose_len(len(res) - pos0) + self.copy(res, res, pos1, pos0) + return res[:len(res) - (pos1-pos0)] + + +@register_mutator +class MutatorInsertBytes(Mutator): + name = 'Insert a range of random bytes' + types = set(['byte']) + + def mutate(self, res): + pos = self._rand(len(res) + 1) + n = self._choose_len(10) + for k in range(n): + res.append(0) + self.copy(res, res, pos, pos+n) + for k in range(n): + res[pos+k] = self._rand(256) + return res + + +@register_mutator +class MutatorDuplicateBytes(Mutator): + name = 'Duplicate a range of bytes' + types = set(['byte']) + + def mutate(self, res): + if len(res) <= 1: + return None + src = self._rand(len(res)) + dst = self._rand(len(res)) + while src == dst: + dst = self._rand(len(res)) + n = self._choose_len(len(res) - src) + tmp = bytearray(n) + self.copy(res, tmp, src, 0) + for k in range(n): + res.append(0) + self.copy(res, res, dst, dst+n) + for k in range(n): + res[dst+k] = tmp[k] + return res + + +@register_mutator +class MutatorCopyBytes(Mutator): + name = 'Copy a range of bytes' + types = set(['byte']) + + def mutate(self, res): + if len(res) <= 1: + return None + src = self._rand(len(res)) + dst = self._rand(len(res)) + while src == dst: + dst = self._rand(len(res)) + n = self._choose_len(len(res) - src) + self.copy(res, res, src, dst, src+n) + return res + + +@register_mutator +class MutatorBitFlip(Mutator): + name = 'Bit flip' + types = set(['bit']) + + def mutate(self, res): + if len(res) == 0: + return None + pos = self._rand(len(res)) + res[pos] ^= 1 << self._rand(8) + return res + + +@register_mutator +class MutatorRandomiseByte(Mutator): + name = 'Set a byte to a random value.' + types = set(['byte']) + + def mutate(self, res): + if len(res) == 0: + return None + pos = self._rand(len(res)) + res[pos] ^= self._rand(255) + 1 + return res + + +@register_mutator +class MutatorSwapBytes(Mutator): + name = 'Swap 2 bytes' + types = set(['byte']) + + def mutate(self, res): + if len(res) <= 1: + return None + src = self._rand(len(res)) + dst = self._rand(len(res)) + while src == dst: + dst = self._rand(len(res)) + res[src], res[dst] = res[dst], res[src] + return res + + +@register_mutator +class MutatorAddSubByte(Mutator): + name = 'Add/subtract from a byte' + types = set(['byte']) + + def mutate(self, res): + if len(res) == 0: + return None + pos = self._rand(len(res)) + v = self._rand(35) + 1 + if bool(random.getrandbits(1)): + res[pos] = numpy.uint8(res[pos]) + numpy.uint8(v) + else: + res[pos] = numpy.uint8(res[pos]) - numpy.uint8(v) + return res + + +@register_mutator +class MutatorAddSubShort(Mutator): + name = 'Add/subtract from a uint16' + types = set(['short']) + + def mutate(self, res): + if len(res) < 2: + return None + pos = self._rand(len(res) - 1) + v = numpy.uint16(self._rand(35) + 1) + if bool(random.getrandbits(1)): + v = numpy.uint16(0) - v + if bool(random.getrandbits(1)): + v = struct.pack('>H', v) + else: + v = struct.pack('I', v) + else: + v = struct.pack('Q', v) + else: + v = struct.pack('H', v) + else: + v = struct.pack('I', v) + else: + v = struct.pack('".format(self.__class__.__name__, + len(self._inputs), + len(self._mutators)) + def _add_file(self, path): with open(path, 'rb') as f: self._inputs.append(bytearray(f.read())) @@ -58,23 +407,6 @@ def _rand_exp(): break return count - @staticmethod - def _choose_len(n): - x = Corpus._rand(100) - if x < 90: - return Corpus._rand(min(8, n)) + 1 - elif x < 99: - return Corpus._rand(min(32, n)) + 1 - else: - return Corpus._rand(n) + 1 - - @staticmethod - def copy(src, dst, start_source, start_dst, end_source=None, end_dst=None): - end_source = len(src) if end_source is None else end_source - end_dst = len(dst) if end_dst is None else end_dst - byte_to_copy = min(end_source-start_source, end_dst-start_dst) - src[start_source:start_source+byte_to_copy] = dst[start_dst:start_dst+byte_to_copy] - def put(self, buf): self._inputs.append(buf) if self._save_corpus: @@ -104,186 +436,16 @@ def mutate(self, buf): res = buf[:] nm = self._rand_exp() for i in range(nm): - # Remove a range of bytes. - x = self._rand(15) - if x == 0: - if len(self._inputs) <= 1: - i -= 1 - continue - pos0 = self._rand(len(res)) - pos1 = pos0 + self._choose_len(len(res) - pos0) - self.copy(res, res, pos1, pos0) - res = res[:len(res) - (pos1-pos0)] - elif x == 1: - # Insert a range of random bytes. - pos = self._rand(len(res) + 1) - n = self._choose_len(10) - for k in range(n): - res.append(0) - self.copy(res, res, pos, pos+n) - for k in range(n): - res[pos+k] = self._rand(256) - elif x == 2: - # Duplicate a range of bytes. - if len(res) <= 1: - i -= 1 - continue - src = self._rand(len(res)) - dst = self._rand(len(res)) - while src == dst: - dst = self._rand(len(res)) - n = self._choose_len(len(res) - src) - tmp = bytearray(n) - self.copy(res, tmp, src, 0) - for k in range(n): - res.append(0) - self.copy(res, res, dst, dst+n) - for k in range(n): - res[dst+k] = tmp[k] - elif x == 3: - # Copy a range of bytes. - if len(res) <= 1: - i -= 1 - continue - src = self._rand(len(res)) - dst = self._rand(len(res)) - while src == dst: - dst = self._rand(len(res)) - n = self._choose_len(len(res) - src) - self.copy(res, res, src, dst, src+n) - elif x == 4: - # Bit flip. Spooky! - if len(res) == 0: - i -= 1 - continue - pos = self._rand(len(res)) - res[pos] ^= 1 << self._rand(8) - elif x == 5: - # Set a byte to a random value. - if len(res) == 0: - i -= 1 - continue - pos = self._rand(len(res)) - res[pos] ^= self._rand(255) + 1 - elif x == 6: - # Swap 2 bytes. - if len(res) <= 1: - i -= 1 - continue - src = self._rand(len(res)) - dst = self._rand(len(res)) - while src == dst: - dst = self._rand(len(res)) - res[src], res[dst] = res[dst], res[src] - elif x == 7: - # Add/subtract from a byte. - if len(res) == 0: - i -= 1 - continue - pos = self._rand(len(res)) - v = self._rand(35) + 1 - if bool(random.getrandbits(1)): - res[pos] = numpy.uint8(res[pos]) + numpy.uint8(v) - else: - res[pos] = numpy.uint8(res[pos]) - numpy.uint8(v) - elif x == 8: - # Add/subtract from a uint16. - if len(res) < 2: - i -= 1 - continue - pos = self._rand(len(res) - 1) - v = numpy.uint16(self._rand(35) + 1) - if bool(random.getrandbits(1)): - v = numpy.uint16(0) - v - if bool(random.getrandbits(1)): - v = struct.pack('>H', v) - else: - v = struct.pack('I', v) - else: - v = struct.pack('Q', v) - else: - v = struct.pack('H', v) - else: - v = struct.pack('I', v) - else: - v = struct.pack(' self._max_input_size: res = res[:self._max_input_size] From 895ee2ff7fdf47611426e42bb3025b29bff3ec89 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 00:13:58 +0000 Subject: [PATCH 17/49] Add help message describing the mutators available and filtering. Mutators can now be listed as part of a help command, and may then be filtered by the user supplying a filter specification to disable or enable only certain mutators. --- pythonfuzz/corpus.py | 33 +++++++++++++++++++++++++++------ pythonfuzz/fuzzer.py | 14 ++++++++++++-- pythonfuzz/main.py | 10 ++++++++-- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index a98a3a6..d724df7 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -351,7 +351,7 @@ class CorpusError(Exception): class Corpus(object): - def __init__(self, dirs=None, max_input_size=4096): + def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None): self._inputs = [] self._max_input_size = max_input_size self._dirs = dirs if dirs else [] @@ -370,15 +370,36 @@ def __init__(self, dirs=None, max_input_size=4096): self._seed_idx = 0 self._save_corpus = dirs and os.path.isdir(dirs[0]) + # Work out what we'll filter + filters = mutators_filter.split(' ') + negative_filters = [f[1:] for f in filters if f and f[0] == '!'] + required_filters = [f for f in filters if f and f[0] != '!'] + + def acceptable(cls): + # No filters => everything's fine! + if mutators_filter is None: + return True + + # First check that the required mutator types are set + for f in required_filters: + if f not in cls.types: + return False + # Now remove any that are not allowed + for f in negative_filters: + if f in cls.types: + return False + + return True + # Construct an object for each mutator we can use - self._mutators = [cls(self) for cls in mutator_classes] - if not self._mutators: + self.mutators = [cls(self) for cls in mutator_classes if acceptable(cls)] + if not self.mutators: raise CorpusError("No mutators are available") def __repr__(self): return "<{}(corpus of {}, %i mutators)>".format(self.__class__.__name__, len(self._inputs), - len(self._mutators)) + len(self.mutators)) def _add_file(self, path): with open(path, 'rb') as f: @@ -439,8 +460,8 @@ def mutate(self, buf): # Select a mutator from those we can apply while True: - x = self._rand(len(self._mutators)) - mutator = self._mutators[x] + x = self._rand(len(self.mutators)) + mutator = self.mutators[x] newres = mutator.mutate(res) if newres is not None: diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 2b403a1..08daae8 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -88,7 +88,8 @@ def __init__(self, regression=False, max_input_size=4096, close_fd_mask=0, - runs=-1): + runs=-1, + mutators_filter=None): self._target = target self._dirs = [] if dirs is None else dirs self._exact_artifact_path = exact_artifact_path @@ -96,7 +97,7 @@ def __init__(self, self._timeout = timeout self._regression = regression self._close_fd_mask = close_fd_mask - self._corpus = corpus.Corpus(self._dirs, max_input_size) + self._corpus = corpus.Corpus(self._dirs, max_input_size, mutators_filter) self._total_executions = 0 self._executions_in_sample = 0 self._last_sample_time = time.time() @@ -104,6 +105,15 @@ def __init__(self, self._p = None self.runs = runs + def help_mutators(self): + print("Mutators currently available (and their types):") + active_mutators = [mutator.__class__ for mutator in self._corpus.mutators] + for mutator in corpus.mutator_classes: + active = mutator in active_mutators + indicator = '-' if not active else ' ' + print(" {}{:<60s} [{}]".format(indicator, mutator.name, ', '.join(sorted(mutator.types)))) + print("\nMutators prefixed by '-' are currently disabled.") + def log_stats(self, log_type): rss = (psutil.Process(self._p.pid).memory_info().rss + psutil.Process(os.getpid()).memory_info().rss) / 1024 / 1024 diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index 10ff0b1..40b5de8 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -19,13 +19,19 @@ def __call__(self, *args, **kwargs): parser.add_argument('--max-input-size', type=int, default=4096, help='Max input size in bytes') parser.add_argument('--close-fd-mask', type=int, default=0, help='Indicate output streams to close at startup') parser.add_argument('--runs', type=int, default=-1, help='Number of individual test runs, -1 (the default) to run indefinitely.') + parser.add_argument('--help-mutators', action='store_true', help='Display help on the mutators') + parser.add_argument('--mutator-filter', type=str, default=None, help='Filter for mutator types to use; prefix with ! to disable') parser.add_argument('--timeout', type=int, default=30, help='If input takes longer then this timeout the process is treated as failure case') args = parser.parse_args() f = fuzzer.Fuzzer(self.function, args.dirs, args.exact_artifact_path, args.rss_limit_mb, args.timeout, args.regression, args.max_input_size, - args.close_fd_mask, args.runs) - f.start() + args.close_fd_mask, args.runs, args.mutator_filter) + + if args.help_mutators: + f.help_mutators() + else: + f.start() if __name__ == '__main__': From ef2e1cf0e4ab5052efd7a92993b955ba57437241 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 00:23:09 +0000 Subject: [PATCH 18/49] Replaced infinite mutation loop with bounded loop. Previously there was always a likelihood that we would terminate our retries if the mutator said that it was unable to be used, because we had a number of mutators that were unconditional. However, now that the mutators are able to be filtered, it is possible to select a set of mutators which may always claim they are inappropriate. In such a case, we would loop forever. This change bounds the retries on looking for a mutator to 20 attempts - an arbitrary number I picked from the air as seeming reasonable. --- pythonfuzz/corpus.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index d724df7..794df61 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -459,7 +459,9 @@ def mutate(self, buf): for i in range(nm): # Select a mutator from those we can apply - while True: + # We'll try up to 20 times, but if we don't find a + # suitable mutator after that, we'll just give up. + for n in range(20): x = self._rand(len(self.mutators)) mutator = self.mutators[x] From acc996f11379e8209ea944b0074663114dba6e61 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 16 Dec 2019 23:30:23 +0100 Subject: [PATCH 19/49] Add support for dictionary files This commits adds the support for dictionaries (https://llvm.org/docs/LibFuzzer.html#dictionaries), to help fuzzers increase their coverage faster. It seems that there is a bug in the _copy function, because the word is correctly inserted, but it seems that the padding after it is wrong, and I couldn't understand why. Although to be honest, I didn't spent much time on it, since I'd like to have feedback on this PR before investing more debug time. The implementation is pretty crude, it silently ignore invalid lines in the dictionary file, and is likely using words in the corpus a bit too often. --- pythonfuzz/corpus.py | 6 ++++-- pythonfuzz/dictionnary.py | 27 +++++++++++++++++++++++++++ pythonfuzz/fuzzer.py | 5 +++-- pythonfuzz/main.py | 4 ++-- 4 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 pythonfuzz/dictionnary.py diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 794df61..a392347 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -5,6 +5,8 @@ import struct import hashlib +from . import dictionnary + INTERESTING8 = [-128, -1, 0, 1, 16, 32, 64, 100, 127] INTERESTING16 = [-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767] @@ -351,8 +353,9 @@ class CorpusError(Exception): class Corpus(object): - def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None): + def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_path=None): self._inputs = [] + self._dict = dictionnary.Dictionary(dict_path) self._max_input_size = max_input_size self._dirs = dirs if dirs else [] for i, path in enumerate(dirs): @@ -457,7 +460,6 @@ def mutate(self, buf): res = buf[:] nm = self._rand_exp() for i in range(nm): - # Select a mutator from those we can apply # We'll try up to 20 times, but if we don't find a # suitable mutator after that, we'll just give up. diff --git a/pythonfuzz/dictionnary.py b/pythonfuzz/dictionnary.py new file mode 100644 index 0000000..c771df7 --- /dev/null +++ b/pythonfuzz/dictionnary.py @@ -0,0 +1,27 @@ +import random +import re +import os + +class Dictionary: + line_re = re.compile('"(.+)"$') + + def __init__(self, dict_path=None): + if not dict_path or not os.path.exists(dict_path): + self._dict = list() + return + + _dict = set() + with open(dict_path) as f: + for line in f: + line = line.lstrip() + if line.startswith('#'): + continue + word = self.line_re.search(line) + if word: + _dict.add(word.group(1)) + self._dict = list(_dict) + + def get_word(self): + if not self._dict: + return None + return random.choice(self._dict) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 08daae8..9775735 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -89,7 +89,8 @@ def __init__(self, max_input_size=4096, close_fd_mask=0, runs=-1, - mutators_filter=None): + mutators_filter=None, + dict_path=None): self._target = target self._dirs = [] if dirs is None else dirs self._exact_artifact_path = exact_artifact_path @@ -97,7 +98,7 @@ def __init__(self, self._timeout = timeout self._regression = regression self._close_fd_mask = close_fd_mask - self._corpus = corpus.Corpus(self._dirs, max_input_size, mutators_filter) + self._corpus = corpus.Corpus(self._dirs, max_input_size, mutators_filter, dict_path) self._total_executions = 0 self._executions_in_sample = 0 self._last_sample_time = time.time() diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index 40b5de8..f65ad51 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -17,16 +17,16 @@ def __call__(self, *args, **kwargs): help='run the fuzzer through set of files for regression or reproduction') parser.add_argument('--rss-limit-mb', type=int, default=2048, help='Memory usage in MB') parser.add_argument('--max-input-size', type=int, default=4096, help='Max input size in bytes') + parser.add_argument('--dict', type=str, help='dictionary file') parser.add_argument('--close-fd-mask', type=int, default=0, help='Indicate output streams to close at startup') parser.add_argument('--runs', type=int, default=-1, help='Number of individual test runs, -1 (the default) to run indefinitely.') - parser.add_argument('--help-mutators', action='store_true', help='Display help on the mutators') parser.add_argument('--mutator-filter', type=str, default=None, help='Filter for mutator types to use; prefix with ! to disable') parser.add_argument('--timeout', type=int, default=30, help='If input takes longer then this timeout the process is treated as failure case') args = parser.parse_args() f = fuzzer.Fuzzer(self.function, args.dirs, args.exact_artifact_path, args.rss_limit_mb, args.timeout, args.regression, args.max_input_size, - args.close_fd_mask, args.runs, args.mutator_filter) + args.close_fd_mask, args.runs, args.mutator_filter, args.dict) if args.help_mutators: f.help_mutators() From 3a6fac53f944bbfc7a6524b88a468bf2d653e203 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 08:18:07 +0000 Subject: [PATCH 20/49] Fix dictionary typo. A small typo in the word dictionary, fixed. --- pythonfuzz/corpus.py | 4 ++-- pythonfuzz/{dictionnary.py => dictionary.py} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename pythonfuzz/{dictionnary.py => dictionary.py} (100%) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index a392347..9a9e7f7 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -5,7 +5,7 @@ import struct import hashlib -from . import dictionnary +from . import dictionary INTERESTING8 = [-128, -1, 0, 1, 16, 32, 64, 100, 127] @@ -355,7 +355,7 @@ class Corpus(object): def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_path=None): self._inputs = [] - self._dict = dictionnary.Dictionary(dict_path) + self._dict = dictionary.Dictionary(dict_path) self._max_input_size = max_input_size self._dirs = dirs if dirs else [] for i, path in enumerate(dirs): diff --git a/pythonfuzz/dictionnary.py b/pythonfuzz/dictionary.py similarity index 100% rename from pythonfuzz/dictionnary.py rename to pythonfuzz/dictionary.py From b0cd504146f6cc9c8a51202f2dd6d0e0b463fa55 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 09:07:30 +0000 Subject: [PATCH 21/49] Add support for escaped strings to the dictionary. The dictionary reader can now handle escaped strings in its vales, as given in the AFL examples. --- pythonfuzz/dictionary.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pythonfuzz/dictionary.py b/pythonfuzz/dictionary.py index c771df7..b8b7711 100644 --- a/pythonfuzz/dictionary.py +++ b/pythonfuzz/dictionary.py @@ -1,3 +1,14 @@ +""" +Basic reader for libfuzzer/AFL style dictionaries. + +See documentation at: + https://llvm.org/docs/LibFuzzer.html#dictionaries + https://github.com/google/AFL/blob/master/dictionaries/README.dictionaries + +For our use, we only support reading the content of the dictionary values. +""" + +import codecs import random import re import os @@ -18,7 +29,10 @@ def __init__(self, dict_path=None): continue word = self.line_re.search(line) if word: - _dict.add(word.group(1)) + # Decode any escaped characters, giving us a bytes object + value = word.group(1) + (value, _) = codecs.escape_decode(value) + _dict.add(value) self._dict = list(_dict) def get_word(self): From ae27ac95935be24734f5fef397b58b0d8b3bfa6e Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 09:27:21 +0000 Subject: [PATCH 22/49] Add dictionary support for a binary load of directories of files. Raw binary files are supported by the AFL dictionaries, and matter most for the cases where we're dealing with binary chunks that would otherwise be tedious to insert into the token file. --- pythonfuzz/corpus.py | 25 +++++++++++++++++++++++-- pythonfuzz/dictionary.py | 29 ++++++++++++++++++++++++++--- pythonfuzz/main.py | 1 + 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 9a9e7f7..f06450e 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -347,6 +347,23 @@ def mutate(self, res): return res +@register_mutator +class MutatorDictionaryWordInsert(Mutator): + name = 'Insert a word at a random position' + types = set(['text', 'dictionary']) + + def mutate(self, res): + word = self.corpus._dict.get_word() + if not word: + return None + pos = self._rand(len(res) + 1) + for _ in word: + res.append(0) + self.copy(res, res, pos, pos+len(word)) + for k in range(len(word)): + res[pos+k] = ord(word[k]) + + class CorpusError(Exception): pass @@ -355,7 +372,9 @@ class Corpus(object): def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_path=None): self._inputs = [] - self._dict = dictionary.Dictionary(dict_path) + self._dict = dictionary.Dictionary() + if dict_path: + self._dict.load(dict_path) self._max_input_size = max_input_size self._dirs = dirs if dirs else [] for i, path in enumerate(dirs): @@ -460,6 +479,7 @@ def mutate(self, buf): res = buf[:] nm = self._rand_exp() for i in range(nm): + # Select a mutator from those we can apply # We'll try up to 20 times, but if we don't find a # suitable mutator after that, we'll just give up. @@ -470,7 +490,8 @@ def mutate(self, buf): newres = mutator.mutate(res) if newres is not None: break - res = newres + if newres is not None: + res = newres if len(res) > self._max_input_size: res = res[:self._max_input_size] diff --git a/pythonfuzz/dictionary.py b/pythonfuzz/dictionary.py index b8b7711..a33bc86 100644 --- a/pythonfuzz/dictionary.py +++ b/pythonfuzz/dictionary.py @@ -16,16 +16,39 @@ class Dictionary: line_re = re.compile('"(.+)"$') - def __init__(self, dict_path=None): + def __init__(self): + self._dict = list() + + def load(self, dict_path): + if os.path.isfile(dict_path): + self.load_file(dict_path) + else: + self.load_directory(dict_path) + + def load_directory(self, dict_path): + """ + Read a directory of files, which are loaded raw. + """ + for bin_file in os.listdir(dict_path): + filename = os.path.join(dict_path, bin_file) + if os.path.isfile(filename): + with open(filename, 'rb') as fh: + self._dict.append(fh.read()) + + def load_file(self, dict_path): + """ + Read a dictionary file containing tokens. + """ + # Token names are discarded, as per the AFL documentation + if not dict_path or not os.path.exists(dict_path): - self._dict = list() return _dict = set() with open(dict_path) as f: for line in f: line = line.lstrip() - if line.startswith('#'): + if not line or line.startswith('#'): continue word = self.line_re.search(line) if word: diff --git a/pythonfuzz/main.py b/pythonfuzz/main.py index f65ad51..391c852 100644 --- a/pythonfuzz/main.py +++ b/pythonfuzz/main.py @@ -20,6 +20,7 @@ def __call__(self, *args, **kwargs): parser.add_argument('--dict', type=str, help='dictionary file') parser.add_argument('--close-fd-mask', type=int, default=0, help='Indicate output streams to close at startup') parser.add_argument('--runs', type=int, default=-1, help='Number of individual test runs, -1 (the default) to run indefinitely.') + parser.add_argument('--help-mutators', action='store_true', help='Display help on the mutators') parser.add_argument('--mutator-filter', type=str, default=None, help='Filter for mutator types to use; prefix with ! to disable') parser.add_argument('--timeout', type=int, default=30, help='If input takes longer then this timeout the process is treated as failure case') From faa8e5f9467efd3d94c242a7374ffe58b214cd82 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 09:37:56 +0000 Subject: [PATCH 23/49] Fix for dictionary mutator failing to return correct values. The dictionary mutator wasn't actually returning the correct values, so was always claiming to fail. --- pythonfuzz/corpus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index f06450e..93d1cab 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -361,7 +361,8 @@ def mutate(self, res): res.append(0) self.copy(res, res, pos, pos+len(word)) for k in range(len(word)): - res[pos+k] = ord(word[k]) + res[pos+k] = word[k] + return res class CorpusError(Exception): From beb0ce3b78032c838f884a304c4e2b6df5497b4b Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 23:34:48 +0000 Subject: [PATCH 24/49] Add the operation type to the mutator types; add a dictionary append. Knowing which type of mutators are in use helps to favour certain kinds of operations. In particular, if you start with a small dictionary of words and wish to generate longer strings, using just an option that appends to the dictionary is useful. The dictionary append operation has been added which allows the values from the dictionary to be strung together in increasingly longer sequences. It doesn't offer the option of appending multiple values from the dictionary so the operation my end up in a local minima, but such mutators could be added in the future. --- pythonfuzz/corpus.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 93d1cab..13f5540 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -78,7 +78,7 @@ def mutate(self, res): @register_mutator class MutatorRemoveRange(Mutator): name = 'Remove a range of bytes' - types = set(['byte']) + types = set(['byte', 'remove']) def mutate(self, res): if len(res) < 2: @@ -95,7 +95,7 @@ def mutate(self, res): @register_mutator class MutatorInsertBytes(Mutator): name = 'Insert a range of random bytes' - types = set(['byte']) + types = set(['byte', 'insert']) def mutate(self, res): pos = self._rand(len(res) + 1) @@ -111,7 +111,7 @@ def mutate(self, res): @register_mutator class MutatorDuplicateBytes(Mutator): name = 'Duplicate a range of bytes' - types = set(['byte']) + types = set(['byte', 'duplicate']) def mutate(self, res): if len(res) <= 1: @@ -134,7 +134,7 @@ def mutate(self, res): @register_mutator class MutatorCopyBytes(Mutator): name = 'Copy a range of bytes' - types = set(['byte']) + types = set(['byte', 'copy']) def mutate(self, res): if len(res) <= 1: @@ -151,7 +151,7 @@ def mutate(self, res): @register_mutator class MutatorBitFlip(Mutator): name = 'Bit flip' - types = set(['bit']) + types = set(['bit', 'replace']) def mutate(self, res): if len(res) == 0: @@ -164,7 +164,7 @@ def mutate(self, res): @register_mutator class MutatorRandomiseByte(Mutator): name = 'Set a byte to a random value.' - types = set(['byte']) + types = set(['byte', 'replace']) def mutate(self, res): if len(res) == 0: @@ -177,7 +177,7 @@ def mutate(self, res): @register_mutator class MutatorSwapBytes(Mutator): name = 'Swap 2 bytes' - types = set(['byte']) + types = set(['byte', 'swap']) def mutate(self, res): if len(res) <= 1: @@ -193,7 +193,7 @@ def mutate(self, res): @register_mutator class MutatorAddSubByte(Mutator): name = 'Add/subtract from a byte' - types = set(['byte']) + types = set(['byte', 'addsub']) def mutate(self, res): if len(res) == 0: @@ -210,7 +210,7 @@ def mutate(self, res): @register_mutator class MutatorAddSubShort(Mutator): name = 'Add/subtract from a uint16' - types = set(['short']) + types = set(['short', 'addsub']) def mutate(self, res): if len(res) < 2: @@ -232,7 +232,7 @@ def mutate(self, res): @register_mutator class MutatorAddSubLong(Mutator): name = 'Add/subtract from a uint32' - types = set(['long']) + types = set(['long', 'addsub']) def mutate(self, res): if len(res) < 4: @@ -254,7 +254,7 @@ def mutate(self, res): @register_mutator class MutatorAddSubLongLong(Mutator): name = 'Add/subtract from a uint64' - types = set(['longlong']) + types = set(['longlong', 'addsub']) def mutate(self, res): if len(res) < 8: @@ -276,7 +276,7 @@ def mutate(self, res): @register_mutator class MutatorReplaceByte(Mutator): name = 'Replace a byte with an interesting value' - types = set(['byte']) + types = set(['byte', 'replace']) def mutate(self, res): if len(res) == 0: @@ -289,7 +289,7 @@ def mutate(self, res): @register_mutator class MutatorReplaceShort(Mutator): name = 'Replace an uint16 with an interesting value' - types = set(['short']) + types = set(['short', 'replace']) def mutate(self, res): if len(res) < 2: @@ -309,7 +309,7 @@ def mutate(self, res): @register_mutator class MutatorReplaceLong(Mutator): name = 'Replace an uint32 with an interesting value' - types = set(['long']) + types = set(['long', 'replace']) def mutate(self, res): if len(res) < 4: @@ -329,7 +329,7 @@ def mutate(self, res): @register_mutator class MutatorReplaceDigit(Mutator): name = 'Replace an ascii digit with another digit' - types = set(['byte', 'ascii']) + types = set(['byte', 'ascii', 'replace']) def mutate(self, res): digits = [] @@ -365,6 +365,19 @@ def mutate(self, res): return res +@register_mutator +class MutatorDictionaryWordAppend(Mutator): + name = 'Append a word' + types = set(['dictionary', 'append']) + + def mutate(self, res): + word = self.corpus._dict.get_word() + if not word: + return None + res.extend(word) + return res + + class CorpusError(Exception): pass From ecebfffca7d36f8ca36e0fa615c756e1c1365083 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Tue, 7 Jan 2020 23:38:53 +0000 Subject: [PATCH 25/49] Replace the timeout process kill with a terminate. The kill operation is never a good choice for stopping a subprocess - it does not give the subprocess any chance to clean up. It's more usual to try a terminate and later kill if the process did not stop. More importantly to me, the kill method isn't present in the python 2 multiprocessing module. --- pythonfuzz/fuzzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 9775735..3e3c886 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -158,7 +158,7 @@ def start(self): buf = self._corpus.generate_input() parent_conn.send_bytes(bytes(buf)) if not parent_conn.poll(self._timeout): - self._p.kill() + self._p.terminate() logging.info("=================================================================") logging.info("timeout reached. testcase took: {}".format(self._timeout)) self.write_sample(buf, prefix='timeout-') From 26d1e0ef4758930dea208b22a1ff4e79b90a530a Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 14:57:28 +0000 Subject: [PATCH 26/49] Fix for corpus crashing if no mutator_filters supplied. The mutator filters were being treated as a string, even when they were using the request for the defaults. --- pythonfuzz/corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 13f5540..26c41ee 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -407,7 +407,7 @@ def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_pa self._save_corpus = dirs and os.path.isdir(dirs[0]) # Work out what we'll filter - filters = mutators_filter.split(' ') + filters = mutators_filter.split(' ') if mutators_filter else [] negative_filters = [f[1:] for f in filters if f and f[0] == '!'] required_filters = [f for f in filters if f and f[0] != '!'] From fedb3052717fd5b8f7429df9968d99f1d39fa6c0 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 20:02:58 +0000 Subject: [PATCH 27/49] Fix the zipfile example for Python 2; and rename it. The examples/csv/fuzz.py is actually testing the zipfile module, so it has been renamed to reflect this. There are slight differences in the Python 2 usage, which I've harmonised in the module. --- examples/csv/fuzz.py | 24 ------------------------ examples/zipfile/fuzz.py | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) delete mode 100644 examples/csv/fuzz.py create mode 100644 examples/zipfile/fuzz.py diff --git a/examples/csv/fuzz.py b/examples/csv/fuzz.py deleted file mode 100644 index b556165..0000000 --- a/examples/csv/fuzz.py +++ /dev/null @@ -1,24 +0,0 @@ -import io -import zipfile -# from html.parser import HTMLParser -from pythonfuzz.main import PythonFuzz - - -@PythonFuzz -def fuzz(buf): - # try: - # string = buf.decode("utf-8") - # parser = HTMLParser() - # parser.feed(string) - # except UnicodeDecodeError: - # pass - f = io.BytesIO(buf) - try: - z = zipfile.ZipFile(f) - z.testzip() - except (zipfile.BadZipFile, zipfile.LargeZipFile): - pass - - -if __name__ == '__main__': - fuzz() diff --git a/examples/zipfile/fuzz.py b/examples/zipfile/fuzz.py new file mode 100644 index 0000000..92d8554 --- /dev/null +++ b/examples/zipfile/fuzz.py @@ -0,0 +1,23 @@ +import io +import zipfile +from pythonfuzz.main import PythonFuzz + +try: + allowed_exceptions = (zipfile.BadZipFile, zipfile.LargeZipFile) +except AttributeError: + # In Python2, one of these had an inconsistent capitalisation + allowed_exceptions = (zipfile.BadZipfile, zipfile.LargeZipFile) + + +@PythonFuzz +def fuzz(buf): + f = io.BytesIO(buf) + try: + z = zipfile.ZipFile(f) + z.testzip() + except : + pass + + +if __name__ == '__main__': + fuzz() From 5e2a05e0ba0f3b7f4e574b09271edfc96ddd9d6b Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 20:14:57 +0000 Subject: [PATCH 28/49] Update the xml and htmlparser examples to work on Python 2. Some simple checks for the behaviour of the modules under Python 2 compared to Python 3. --- examples/htmlparser/fuzz.py | 6 +++++- examples/xml/fuzz.py | 9 ++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/htmlparser/fuzz.py b/examples/htmlparser/fuzz.py index 5d6d0e2..dfb67c2 100644 --- a/examples/htmlparser/fuzz.py +++ b/examples/htmlparser/fuzz.py @@ -1,4 +1,8 @@ -from html.parser import HTMLParser +try: + from html.parser import HTMLParser +except ImportError: + from HTMLParser import HTMLParser + from pythonfuzz.main import PythonFuzz diff --git a/examples/xml/fuzz.py b/examples/xml/fuzz.py index 5395897..6369457 100644 --- a/examples/xml/fuzz.py +++ b/examples/xml/fuzz.py @@ -1,12 +1,19 @@ import xml.etree.ElementTree as ET from xml.etree.ElementTree import ParseError +import sys + from pythonfuzz.main import PythonFuzz @PythonFuzz def fuzz(buf): try: - string = buf.decode("utf-8") + # In Python 2, the ElementTree only consumes bytes, not unicode strings, + # so we need to supply the correct format depending on the python version. + if sys.version_info[0] == 2: + string = bytes(buf) + else: + string = buf.decode("utf-8") ET.fromstring(string) except (UnicodeDecodeError, ParseError): pass From c5a5ea8abb60ecd9df311664491ef5c5432dfb86 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 20:34:30 +0000 Subject: [PATCH 29/49] Add a harness to run all the example fuzzers. The example fuzzers provide a useful way to see how you might use the pythonfuzz package. However, they're no use if they don't work. Adding a tool that is able to run them all, and report on their status will ensure that these do not get broken in the future. The tool is able to be configured with the number of runs that it expects to execute, and whether it will keep any crash or timeout files that are generated. At the end of the run of each example, it will report the collected information from the run. This should be useful for checking the performance in the future, as well. If any of the examples fails to function - ie it reports an error itself, then the runner will exit with a non-0 return code. This will allow it to be used as a gate for the examples being functional. --- examples/run_all_examples.py | 184 +++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100755 examples/run_all_examples.py diff --git a/examples/run_all_examples.py b/examples/run_all_examples.py new file mode 100755 index 0000000..4276db6 --- /dev/null +++ b/examples/run_all_examples.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +""" +Run all the examples and collect the timings and results. +""" + +import argparse +import os.path +import re +import subprocess +import sys +import time + +here = os.path.dirname(__file__) + + +class Result(object): + """ + Use the output from the tool to collect the information about its execution. + + Very sensitive to the format of the output as to whether it collects information or not. + """ + coverage_re = re.compile('cov: (\d+)') + corpus_re = re.compile('corp: (\d+)') + speed_re = re.compile('exec/s: (\d+)') + memory_re = re.compile('rss: (\d+\.\d+)') + count_re = re.compile('^#(\d+)') + count2_re = re.compile('^did (\d+) runs, stopping now') + exception_re = re.compile('^Exception: (.*)') + failfile_re = re.compile('^sample written to (.*)') + + def __init__(self): + self.coverage = None + self.corpus = None + self.speed = None + self.memory = None + self.count = None + self.time_start = None + self.time_end = None + self.fail_file = None + self.exception = None + self.lines = [] + self.rc = None + + def record_start(self): + self.time_start = time.time() + + def record_end(self): + self.time_end = time.time() + + @property + def time_duration(self): + """ + Number of seconds the execution took, or None if not known + """ + if self.time_start and self.time_end: + return self.time_end - self.time_start + if self.time_start: + return time.time() - self.time_start + + return None + + def process_output(self, line): + match = self.coverage_re.search(line) + if match: + self.coverage = int(match.group(1)) + + match = self.corpus_re.search(line) + if match: + self.corpus = int(match.group(1)) + + match = self.speed_re.search(line) + if match: + self.speed = int(match.group(1)) + + match = self.memory_re.search(line) + if match: + self.memory = float(match.group(1)) + + match = self.count_re.search(line) or self.count2_re.search(line) + if match: + self.count = int(match.group(1)) + + match = self.exception_re.search(line) + if match: + self.exception = match.group(1) + + match = self.failfile_re.search(line) + if match: + self.fail_file = match.group(1) + + self.lines.append(line) + + def show(self, show_lines=False, indent=''): + """ + Show the status of this result. + """ + print("{}Executions : {}".format(indent, self.count)) + print("{}Corpus : {}".format(indent, self.corpus)) + print("{}Coverage : {}".format(indent, self.coverage)) + print("{}Final speed : {}/s".format(indent, self.speed)) + if self.memory: + print("{}Memory : {:.2f} MB".format(indent, self.memory)) + print("{}Runtime : {:.2f} s".format(indent, self.time_duration)) + if self.time_duration and self.count: + print("{}Overall speed : {:.2f}/s".format(indent, self.count / self.time_duration)) + print("{}Return code : {}".format(indent, self.rc)) + if self.exception: + print("{}Exception : {}".format(indent, self.exception)) + if self.fail_file: + print("{}Failed filename : {}".format(indent, self.fail_file)) + + if show_lines or self.rc: + print("{}Lines:".format(indent)) + for line in self.lines: + print("{} {}".format(indent, line.strip('\n'))) + + +class Example(object): + + def __init__(self, name, path, script): + self.name = name + self.path = path + self.script = script + + def run(self, python='python', runs=100, log='/dev/null'): + """ + Run the example script, capturing the output and maybe processing it. + """ + cmd = [python, self.script, '--runs', str(runs)] + + result = Result() + with open(log, 'w') as log_fh: + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + result.record_start() + for line in proc.stdout: + line = line.decode('utf-8', 'replace') + result.process_output(line) + result.record_end() + + proc.wait() + result.rc = proc.returncode + + return result + + +def find_examples(): + examples = [] + print("Looking in {} for examples".format(here)) + for obj in os.listdir(here): + path = os.path.join(here, obj) + if os.path.isdir(path): + # At a later date we might actually provide multiple fuzz's per example. + fuzz = os.path.join(path, 'fuzz.py') + if os.path.isfile(fuzz): + examples.append(Example(obj, path, fuzz)) + + return examples + + +def main(): + parser = argparse.ArgumentParser(description='Exercise the example code') + parser.add_argument('--runs', type=int, default=1000, help='Number of individual test runs.') + parser.add_argument('--keep', action='store_true', help='Keep the crash/timeout files') + + args = parser.parse_args() + + # We remember whether the example itself failed (not the underlying module being fuzz'd) + # so that we can fail this run. + any_failed = False + for example in find_examples(): + print("Example: {}".format(example.name)) + result = example.run(runs=args.runs) + result.show(indent=' ') + if not args.keep: + if result.fail_file: + os.remove(result.fail_file) + if result.rc != 0: + any_failed = True + + sys.exit(1 if any_failed else 0) + + +if __name__ == '__main__': + main() From 11078d02ed3c0947b060ee8b8ab76f3b9075a8db Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sun, 12 Jan 2020 10:26:04 +0000 Subject: [PATCH 30/49] Update the executable used to run the examples to be the same as harness. In order to have an easy way to control which python runs the examples, make the python version used into the one used by the harness itself. That way if you invoke it with Python 3, it'll test with Python 3. --- examples/run_all_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run_all_examples.py b/examples/run_all_examples.py index 4276db6..600d6cc 100755 --- a/examples/run_all_examples.py +++ b/examples/run_all_examples.py @@ -169,7 +169,7 @@ def main(): any_failed = False for example in find_examples(): print("Example: {}".format(example.name)) - result = example.run(runs=args.runs) + result = example.run(python=sys.executable, runs=args.runs) result.show(indent=' ') if not args.keep: if result.fail_file: From 6e469da56a52196a5b820d6c876ed94de92ee6f5 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 15:00:11 +0000 Subject: [PATCH 31/49] Update .gitignore to ignore pyc files; describe what they're for. The addition of Python 2 means that we need to ignore the compiled files that Python 2 creates which are not in the __pycache__ directory. This is added to the .gitignore file to make things consistent. To make the rest of the file clearer, the sections have been annotated to show why they're being ignored. A timeout-* rule has also been added, as this can happen when there are hangs triggered by the fuzzer. The .idea exclusion has been removed as it's not generated by nor anything to do with the product. Users who have use editors or tools which create files should use the global configuration of their local system, rather than include them in the project-specific .gitignore files. --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9f55976..26ac81e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,13 @@ -.idea +# Files created by tests venv +*.pyc __pycache__ + +# Files created by general usage crash-* +timeout-* + +# Distribution files dist/ build/ pythonfuzz.egg-info/ From 0f78a24f543ceb783156ca2700bdf21490dd00ff Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 15:40:47 +0000 Subject: [PATCH 32/49] Update the requirements in setup.py and requirements.txt. The setup.py and requirements.txt had been stripped to make them work on both python2 and 3, without regard for keeping explicit statements of what it works with. This has been updated to specify the enviroment checks, so that we only install the requirements if they're needed on a given python version. Similarly, the python_requires has been updated to say "I'll take any 2.x version over 2.7, OR any 3.x version over 3.5.3" which better matches with the original requirements. --- requirements.txt | 4 +++- setup.py | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8866bf1..e6af3b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ coverage==4.5.4 psutil==5.6.3 -numpy==1.17.3 +numpy==1.16.6; python_version < '3' +numpy==1.17.3; python_version >= '3' +functools32==3.2.3.post2; python_version < '3' diff --git a/setup.py b/setup.py index 1383565..c1a82a2 100644 --- a/setup.py +++ b/setup.py @@ -13,9 +13,12 @@ long_description_content_type="text/markdown", url="https://github.com/fuzzitdev/pythonfuzz", install_requires=[ - 'coverage==4.5.4', - 'psutil==5.6.3', - 'numpy<1.17' + # WARNING: Keep these values in line with those in requirements.txt + "coverage==4.5.4", + "psutil==5.6.3", + "numpy==1.16.6; python_version < '3'", + "numpy==1.17.3; python_version >= '3'", + "functools32==3.2.3.post2; python_version < '3'", ], classifiers=[ "Programming Language :: Python :: 3", @@ -23,5 +26,6 @@ "Operating System :: OS Independent", "Topic :: Software Development :: Testing" ], + python_requires='~=2.7, ~=3.5.3', packages=setuptools.find_packages('.', exclude=("examples",)) ) From 3d05025f6fa8220f7a917ea7528dcb38f41ddc49 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 15:41:30 +0000 Subject: [PATCH 33/49] Create a requirements-test.txt; update the tests to work on Python 2. The requirements-test.txt file describes what the tests need in order to run. That's distinct from the requirements to install - and it's needed because the unittest.mock interface isn't present on Python 2.7, so we need to use a backport. This module is then used conditionally in the two tests we currently have, so that we can run in either version. The test classes have also had comments added to describe what they're trying to test and the invariants they assume. --- requirements-test.txt | 2 ++ tests/test_crash.py | 17 +++++++++++++++-- tests/test_nocrash.py | 18 ++++++++++++++---- 3 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 requirements-test.txt diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..6815990 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +# unittest.mock is present in 3.3 and higher; the mock library provides a backport +mock==3.0.5 ; python_version < '3.3' diff --git a/tests/test_crash.py b/tests/test_crash.py index 167e0bc..8988b0c 100644 --- a/tests/test_crash.py +++ b/tests/test_crash.py @@ -1,12 +1,25 @@ import unittest import zipfile import io -from unittest.mock import patch -import pythonfuzz +try: + from unittest.mock import patch +except ImportError: + # Python 2 backport of mock + from mock import patch + +import pythonfuzz.fuzzer + class TestFindCrash(unittest.TestCase): def test_find_crash(self): + """ + Tests that when an Exception occurs in the fuzz function, we detect this. + + Requires that the Fuzzer's configuration causes it to stop when an exception + is detected, and that the fuzzer will generate an invalid zip file. + Detects the exception implicitly by the fact that a logger call was made. + """ def fuzz(buf): f = io.BytesIO(buf) z = zipfile.ZipFile(f) diff --git a/tests/test_nocrash.py b/tests/test_nocrash.py index ec01e37..22fe36a 100644 --- a/tests/test_nocrash.py +++ b/tests/test_nocrash.py @@ -1,12 +1,22 @@ import unittest -import zipfile -import io -from unittest.mock import patch -import pythonfuzz +try: + from unittest.mock import patch +except ImportError: + # Python 2 backport of mock + from mock import patch + +import pythonfuzz.fuzzer + class TestFindCrash(unittest.TestCase): def test_find_crash(self): + """ + Tests that when no Exception occurs in the fuzz function, we exit without error. + + Detects the exception implicitly by the fact that a logger call was made with + particular text. + """ def fuzz(buf): return True From 6c7dae6d12fe2282f5d626aa556229638b36ba3e Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 16:02:10 +0000 Subject: [PATCH 34/49] Ensure that test_crash checks the file and cleans up after itself. The test for a crash now checks that the file was actually created and that we clean that file up afterward, so that we don't affect other runs. --- tests/test_crash.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_crash.py b/tests/test_crash.py index 8988b0c..a48e1c1 100644 --- a/tests/test_crash.py +++ b/tests/test_crash.py @@ -1,6 +1,7 @@ +import io +import os import unittest import zipfile -import io try: from unittest.mock import patch @@ -28,3 +29,10 @@ def fuzz(buf): with patch('logging.Logger.info') as mock: pythonfuzz.fuzzer.Fuzzer(fuzz).start() self.assertTrue(mock.called_once) + + # Check that we created a crash file + # (this is the hash of an empty string, because we know that the first call is with an empty string) + self.assertTrue(os.path.exists('crash-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855')) + + # Clean up after ourselves + os.remove('crash-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') From 19e09e45397a8b3c19a926caae267bb4d5277e5a Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 16:12:52 +0000 Subject: [PATCH 35/49] Add a Makefile to allow testing of the existing code. A small makefile allows the invocation of the test modules, so that we can exercise the system and make sure that it does sensible things when it's run - or at least that it doesn't regress. --- Makefile | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bdc7d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +# Common User Targets: +# +# make tests +# - Sxecute the tests +# +# make shell +# - Set up the environment and drop to a shell (re-uses existing shells if they +# have already been set up). +# Use ctrl-d or `exit` to leave the shell. +# +# PYTHON_TOOL=python2 make +# - Build the target requested, using python 2 +# +# +# Assumptions: +# * Python 3 is installed as `python3`. Use the PYTHON_TOOL variable to test using a +# specific python binary. +# * The 'virtualenv' tool is installed. +# + + +PYTHON_TOOL ?= python3 +ACTIVATE = source venv/${PYTHON_TOOL}/bin/activate + +.PHONY: tests venv + +TEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/test_*.py)} + +ifeq (${NOCOLOUR},) +COL_NOTICE = "\\e[35m" +COL_GOOD = "\\e[32m" +COL_RESET = "\\e[0m" +else +COL_NOTICE = "" +COL_GOOD = "" +COL_RESET = "" +endif + +NOTICE = @notice() { printf "\n${COL_NOTICE}+++ %s${COL_RESET}\n" "$$@"; } && notice +GOOD = @notice() { printf "\n${COL_GOOD}+++ %s${COL_RESET}\n" "$$@"; } && notice + + +tests: test_testable + ${NOTICE} "Running tests" + @# Note: We cd into the tests directory, so that we are testing the installed version, not + @# the version in the repository. + ${ACTIVATE} && cd tests && python -munittest -v ${TEST_MODULES} + ${GOOD} "Tests passed" + +venv: venv/successful-${PYTHON_TOOL} + +venv/successful-${PYTHON_TOOL}: + ${NOTICE} "Build the virtualenv we will test within (for ${PYTHON_TOOL})" + -rm -rf venv + mkdir -p venv + virtualenv -p ${PYTHON_TOOL} venv/${PYTHON_TOOL} + touch venv/successful-${PYTHON_TOOL} + +test_installable: venv + ${NOTICE} "Check that we can install the product" + ${ACTIVATE} && python setup.py install + +test_testable: test_installable + ${NOTICE} "Install the test requirements" + ${ACTIVATE} && pip install -r requirements-test.txt + +clean: + ${NOTICE} "Cleaning temporary files" + -rm -rf venv dist build + -find . -name '*.pyc' -delete + -find . -name '__pycache__' -delete + ${GOOD} "Cleaned" + +shell: venv + ${NOTICE} "Running shell; use ctrl-d or `exit` to leave" + bash -i <<<"${ACTIVATE} && exec < /dev/tty" + ${GOOD} "Returned to user shell" From 6daa2b534add813e4acd83be67ee1b402b0b5ae2 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 22:13:30 +0000 Subject: [PATCH 36/49] Fix for the dictionary text insert leaving NUL characters in string. The dictionary text insert wasn't working properly, despite looking just fine. I think the reason is that the parameters on 'copy' are transposed. The parameter called 'src' is actually the destination, (and vice-versa). This caused much confusion when trying to work out what was happening there. This change fixes the behaviour of the dictionary insert, but defers fixing the parameter names until it can be confirmed what was meant and that the strings are being manipulated correctly. The change also corrects a mistaken format character in the mutator __repr__ method. --- pythonfuzz/corpus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 26c41ee..051adb3 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -359,7 +359,7 @@ def mutate(self, res): pos = self._rand(len(res) + 1) for _ in word: res.append(0) - self.copy(res, res, pos, pos+len(word)) + self.copy(res, res, pos+len(word), pos) for k in range(len(word)): res[pos+k] = word[k] return res @@ -433,7 +433,7 @@ def acceptable(cls): raise CorpusError("No mutators are available") def __repr__(self): - return "<{}(corpus of {}, %i mutators)>".format(self.__class__.__name__, + return "<{}(corpus of {}, {} mutators)>".format(self.__class__.__name__, len(self._inputs), len(self.mutators)) From da4bb4f3e1ba4e8214b4e41c6e415f8d559ff6d6 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 20:39:06 +0000 Subject: [PATCH 37/49] Update the Makefile to invoke the examples; add a levels concept to tests. The Makefile's default target has been updated so that it now requires that the 'system' level tests build. This is one of 3 new targets that have been added: - test_level_unittests, which should cause the unit tests to run. We don't have any yet, but some will be added shortly. - test_level_integration, which causes the integration tests to run. These are the tests in the 'tests' directory, which exercise the external interface in a test-like environment to see that it functions as desired. This level is dependant on the unittests passing before it will be run. - test_level_system, which causes the examples to be run. These examples are the sorts of programs that a user might write, and which we expect to be invoked from the command and to do their job. This level is dependant on the integration tests passing before it will be run. In CI, the individual test targets (rather than the levels) would probably be invoked, with each one still dependant on the earlier ones in stages (or maybe just run them all in parallel, as they're all very fast). --- Makefile | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 2bdc7d5..d4c1b9b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Common User Targets: # # make tests -# - Sxecute the tests +# - Execute the tests # # make shell # - Set up the environment and drop to a shell (re-uses existing shells if they @@ -40,12 +40,34 @@ NOTICE = @notice() { printf "\n${COL_NOTICE}+++ %s${COL_RESET}\n" "$$@"; } && no GOOD = @notice() { printf "\n${COL_GOOD}+++ %s${COL_RESET}\n" "$$@"; } && notice -tests: test_testable - ${NOTICE} "Running tests" +tests: test_level_system + ${GOOD} "All tests passed" + +# 'test_level_*' targets run all the tests up to that level +test_level_unittests: unittests +test_level_integration: test_level_unittests integrationtests +test_level_system: test_level_integration systemtests + +# Unit tests test individual parse of a small unit. +unittests: test_testable + ${NOTICE} "Running unit tests" + ${GOOD} "Unit tests passed (we don't have any yet)" + +# Integration tests check the integration of those units. +integrationtests: test_testable + ${NOTICE} "Running integration tests" @# Note: We cd into the tests directory, so that we are testing the installed version, not @# the version in the repository. ${ACTIVATE} && cd tests && python -munittest -v ${TEST_MODULES} - ${GOOD} "Tests passed" + ${GOOD} "Integration tests passed" + +# System tests check that the way that a user might use it works. +systemtests: test_testable + ${NOTICE} "Running system tests" + @# We only run 1000 runs; just enough that we get to see that it's running the tests. + ${ACTIVATE} && examples/run_all_examples.py --runs 1000 + ${GOOD} "System tests passed" + venv: venv/successful-${PYTHON_TOOL} From 4556c51cd0664a807d1d99b0f85f59c2abf927a8 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:42:45 +0000 Subject: [PATCH 38/49] Add unit tests to check the behaviour of mutators matches expectations. The mutators weren't being tested so we only had to assume that they were doing the right thing. In my own tests I saw a lot of input that had NUL bytes in it, so I'm pretty sure that some of the mutations were not doing the right thing. With these tests, the copy method was found to have poorly named parameters. The pattern it's using is copy(a, b, posa, posb, lena, lenb) (where lena and lenb can be omitted). The 'a' parameter is where the update will take place, and the 'b' parameter is where the copy originates. However, the first parameter was called 'src' and the second parameter 'dst', which is the precise opposite of the expectation. The naming of the parameters was kept consistent (a named as src, despite being the destination) with the other named parameters and within the function. This has been corrected, and with the correct naming, it became obvious that the insert, remove and duplicate functions were not working as intended. It is unclear what the difference is intended to be for Duplicate and Copy bytes - I have 'fixed' Duplicate, but this means that it now works identically to Copy, so it's not clear to me what's meant to be done there. --- pythonfuzz/corpus.py | 43 ++++---- tests/unittest_mutators.py | 198 +++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 17 deletions(-) create mode 100644 tests/unittest_mutators.py diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index 051adb3..bde80c6 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -49,22 +49,27 @@ def _rand(n): return 0 return random.randint(0, n-1) - @staticmethod - def _choose_len(n): - x = Corpus._rand(100) + @classmethod + def _choose_len(cls, n): + x = cls._rand(100) if x < 90: - return Corpus._rand(min(8, n)) + 1 + return cls._rand(min(8, n)) + 1 elif x < 99: - return Corpus._rand(min(32, n)) + 1 + return cls._rand(min(32, n)) + 1 else: - return Corpus._rand(n) + 1 + return cls._rand(n) + 1 @staticmethod - def copy(src, dst, start_source, start_dst, end_source=None, end_dst=None): - end_source = len(src) if end_source is None else end_source + def copy(dst, src, start_dst, start_src, end_dst=None, end_src=None): + """ + Copy of content from one slice of a source object to a destination object. + + dst and src may be the same object. + """ + end_src = len(src) if end_src is None else end_src end_dst = len(dst) if end_dst is None else end_dst - byte_to_copy = min(end_source-start_source, end_dst-start_dst) - src[start_source:start_source+byte_to_copy] = dst[start_dst:start_dst+byte_to_copy] + byte_to_copy = min(end_src-start_src, end_dst-start_dst) + dst[start_dst:start_dst+byte_to_copy] = src[start_src:start_src+byte_to_copy] def mutate(self, res): """ @@ -87,9 +92,10 @@ def mutate(self, res): return None pos0 = self._rand(len(res)) - pos1 = pos0 + self._choose_len(len(res) - pos0) - self.copy(res, res, pos1, pos0) - return res[:len(res) - (pos1-pos0)] + num_to_remove = self._choose_len(len(res) - pos0) + pos1 = pos0 + num_to_remove + self.copy(res, res, pos0, pos1) + return res[:len(res) - num_to_remove] @register_mutator @@ -102,7 +108,7 @@ def mutate(self, res): n = self._choose_len(10) for k in range(n): res.append(0) - self.copy(res, res, pos, pos+n) + self.copy(res, res, pos+n, pos) for k in range(n): res[pos+k] = self._rand(256) return res @@ -121,11 +127,10 @@ def mutate(self, res): while src == dst: dst = self._rand(len(res)) n = self._choose_len(len(res) - src) - tmp = bytearray(n) - self.copy(res, tmp, src, 0) + tmp = bytearray(res[src:src+n]) for k in range(n): res.append(0) - self.copy(res, res, dst, dst+n) + self.copy(res, res, dst+n, dst) for k in range(n): res[dst+k] = tmp[k] return res @@ -133,6 +138,7 @@ def mutate(self, res): @register_mutator class MutatorCopyBytes(Mutator): + # FIXME: Check how this diffs from DuplicateBytes name = 'Copy a range of bytes' types = set(['byte', 'copy']) @@ -170,6 +176,7 @@ def mutate(self, res): if len(res) == 0: return None pos = self._rand(len(res)) + # We use rand(255) + 1 so that there is no `^ 0` applied to the byte; it always changes. res[pos] ^= self._rand(255) + 1 return res @@ -492,6 +499,7 @@ def generate_input(self): def mutate(self, buf): res = buf[:] nm = self._rand_exp() + #print("Start with {}".format(res)) for i in range(nm): # Select a mutator from those we can apply @@ -501,6 +509,7 @@ def mutate(self, buf): x = self._rand(len(self.mutators)) mutator = self.mutators[x] + #print("Mutate with {}".format(mutator.__class__.__name__)) newres = mutator.mutate(res) if newres is not None: break diff --git a/tests/unittest_mutators.py b/tests/unittest_mutators.py new file mode 100644 index 0000000..3a185d1 --- /dev/null +++ b/tests/unittest_mutators.py @@ -0,0 +1,198 @@ +import unittest + +try: + from unittest.mock import patch +except ImportError: + # Python 2 backport of mock + from mock import patch + +import pythonfuzz.corpus as corpus + + +class FakeCorpus(object): + pass + + +class BaseTestMutators(unittest.TestCase): + """ + Test that the mutators objects are doing what we want them to do. + """ + # Subclasses should set this - 'mutator' will be created as part of setup. + mutator_class = None + + def setUp(self): + self.corpus = FakeCorpus() + self.patch_rand = patch('pythonfuzz.corpus.Mutator._rand') + self.mock_rand = self.patch_rand.start() + self.mock_rand.side_effect = [] + # Update the side effects in your subclass + + self.addCleanup(self.patch_rand.stop) + + self.mutator = self.mutator_class(self.corpus) + + +class TestMutatorRemoveRange(BaseTestMutators): + mutator_class = corpus.MutatorRemoveRange + + def test01_empty(self): + # You cannot remove values from an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_remove_section(self): + # Check that it removes a sensible range + + # Check that removing at the 2nd position, removing 4 characters leaves the right string. + self.mock_rand.side_effect = [2, 0, 3] + + res = self.mutator.mutate(bytearray(b'1234567890')) + self.assertEqual(res, bytearray(b'127890')) + + +class TestMutatorInsertBytes(BaseTestMutators): + mutator_class = corpus.MutatorInsertBytes + + def test02_insert_bytes(self): + # Check that it inserts sensibly + + # Check that inserting at the 2nd position, adding 4 characters gives us the right string + self.mock_rand.side_effect = [2, 0, 3, 65, 66, 67, 68] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12ABCD3456789')) + + +class TestMutatorDuplicateBytes(BaseTestMutators): + mutator_class = corpus.MutatorDuplicateBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_duplicate_bytes(self): + # Check that it duplicates + + # Duplicate from offset 2 to offset 5, length 2 + self.mock_rand.side_effect = [2, 5, 0, 1] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12345346789')) + + +class TestMutatorCopyBytes(BaseTestMutators): + mutator_class = corpus.MutatorDuplicateBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_duplicate_bytes(self): + # Check that it duplicates + + # Duplicate from offset 2 to offset 5, length 2 + self.mock_rand.side_effect = [2, 5, 0, 1] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'12345346789')) + + +class TestMutatorBitFlip(BaseTestMutators): + mutator_class = corpus.MutatorBitFlip + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_flip_bit(self): + # Check that it flips + + # At offset 4, flip bit 3 + self.mock_rand.side_effect = [4, 3] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'1234=6789')) + + +class TestMutatorRandomiseByte(BaseTestMutators): + mutator_class = corpus.MutatorRandomiseByte + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_randomise_byte(self): + # Check that it changes a byte + + # At offset 4, EOR with 65+1 + self.mock_rand.side_effect = [4, 65] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'1234w6789')) + + +class TestMutatorSwapBytes(BaseTestMutators): + mutator_class = corpus.MutatorSwapBytes + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_swap_bytes(self): + # Check that it swaps bytes + + # Swap bytes at 1 and 6 + self.mock_rand.side_effect = [1, 6] + + res = self.mutator.mutate(bytearray(b'123456789')) + self.assertEqual(res, bytearray(b'173456289')) + + +class TestMutatorAddSubByte(BaseTestMutators): + mutator_class = corpus.MutatorAddSubByte + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_add_bytes(self): + # Check that it adds/subs + # FIXME: Not yet implemented - uses a randomised bit for the add/sub + pass + +# FIXME: Also not implemented AddSubShort, AddSubLong, AddSubLongLong +# FIXME: Not yet implemented ReplaceByte, ReplaceShort, ReplaceLong + + +class TestMutatorReplaceDigit(BaseTestMutators): + mutator_class = corpus.MutatorReplaceDigit + + def test01_empty(self): + # Cannot work with an empty input + res = self.mutator.mutate(bytearray(b'')) + self.assertIsNone(res) + + def test02_no_digits(self): + # Cannot work with a string that has no digits + res = self.mutator.mutate(bytearray(b'wibble')) + self.assertIsNone(res) + + def test03_replace_digit(self): + # Check that it replaces a digit + self.mock_rand.side_effect = [0, 5] + + res = self.mutator.mutate(bytearray(b'there are 4 lights')) + self.assertEqual(res, bytearray(b'there are 5 lights')) + + +# FIXME: Not yet implemented: Dictionary insert, Dictionary Append + + +if __name__ == '__main__': + unittest.main() From 24f7a6516d3ef472b92fed498fa1b2dea12dd725 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:54:43 +0000 Subject: [PATCH 39/49] Update the Makefile to run unit tests. Now that we have unit tests, these can be invoked by the Makefile. --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index d4c1b9b..c44c4c0 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,8 @@ ACTIVATE = source venv/${PYTHON_TOOL}/bin/activate .PHONY: tests venv -TEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/test_*.py)} +UNITTEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/unittest_*.py)} +INTTEST_MODULES = ${patsubst tests/%.py,%,$(wildcard tests/test_*.py)} ifeq (${NOCOLOUR},) COL_NOTICE = "\\e[35m" @@ -51,14 +52,17 @@ test_level_system: test_level_integration systemtests # Unit tests test individual parse of a small unit. unittests: test_testable ${NOTICE} "Running unit tests" - ${GOOD} "Unit tests passed (we don't have any yet)" + @# Note: We cd into the tests directory, so that we are testing the installed version, not + @# the version in the repository. + ${ACTIVATE} && cd tests && python -munittest -v ${UNITTEST_MODULES} + ${GOOD} "Unit tests passed" # Integration tests check the integration of those units. integrationtests: test_testable ${NOTICE} "Running integration tests" @# Note: We cd into the tests directory, so that we are testing the installed version, not @# the version in the repository. - ${ACTIVATE} && cd tests && python -munittest -v ${TEST_MODULES} + ${ACTIVATE} && cd tests && python -munittest -v ${INTTEST_MODULES} ${GOOD} "Integration tests passed" # System tests check that the way that a user might use it works. From 410fa8ac13aba8fb37a285221fc7a16dac0925c0 Mon Sep 17 00:00:00 2001 From: Charles Ferguson Date: Sat, 11 Jan 2020 23:55:21 +0000 Subject: [PATCH 40/49] Add annotations to each of the test systems to describe themselves. The test files have now been updated to include annotations, in the file prologue comment, which describe the test and its place in the testing environment. These are just a convention that I've used previously, but they help to focus anyone doing testing on describing where they fit into the testing of the system. Such descriptions make it obvious when reviewed where there are gaps in testing. --- examples/run_all_examples.py | 5 +++++ tests/test_crash.py | 9 +++++++++ tests/test_nocrash.py | 9 +++++++++ tests/unittest_mutators.py | 9 +++++++++ 4 files changed, 32 insertions(+) diff --git a/examples/run_all_examples.py b/examples/run_all_examples.py index 600d6cc..79f28ef 100755 --- a/examples/run_all_examples.py +++ b/examples/run_all_examples.py @@ -1,6 +1,11 @@ #!/usr/bin/env python """ Run all the examples and collect the timings and results. + +SUT: Invocation +Area: Examples run +Class: Functional +Type: System test """ import argparse diff --git a/tests/test_crash.py b/tests/test_crash.py index a48e1c1..e19e550 100644 --- a/tests/test_crash.py +++ b/tests/test_crash.py @@ -1,3 +1,12 @@ +""" +Test the fuzzing terminates when a fault is found. + +SUT: Fuzzer +Area: Fault finding +Class: Functional +Type: Integration test +""" + import io import os import unittest diff --git a/tests/test_nocrash.py b/tests/test_nocrash.py index 22fe36a..210c14f 100644 --- a/tests/test_nocrash.py +++ b/tests/test_nocrash.py @@ -1,3 +1,12 @@ +""" +Test the fuzzing terminates when no faults found, at a run limit. + +SUT: Fuzzer +Area: Non-fault operation +Class: Functional +Type: Integration test +""" + import unittest try: diff --git a/tests/unittest_mutators.py b/tests/unittest_mutators.py index 3a185d1..8ed5ae1 100644 --- a/tests/unittest_mutators.py +++ b/tests/unittest_mutators.py @@ -1,3 +1,12 @@ +""" +Test the mutators operate as desired. + +SUT: Corpus +Area: Mutators +Class: Functional +Type: Unit test +""" + import unittest try: From 8f6c8a3d917108c30cff8f8d18fe7f233dfc95a3 Mon Sep 17 00:00:00 2001 From: Florian Pigorsch Date: Thu, 23 Jan 2020 22:00:38 +0100 Subject: [PATCH 41/49] Add 'CleverCSV' bug/trophy n/a --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 85c88f0..0b1a3e6 100644 --- a/README.md +++ b/README.md @@ -110,5 +110,6 @@ any unnecessary work is done. ## Trophies * [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287) +* [CleverCSV - unhandled exceptions](https://github.com/alan-turing-institute/CleverCSV/issues/7) **Feel free to add bugs that you found with pythonfuzz to this list via pull-request** From f9eb3d5143c967a6c2e0470afa1e5910caefa17b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 8 Feb 2020 14:12:29 +0100 Subject: [PATCH 42/49] Get rid of coverage.py --- README.md | 2 -- pythonfuzz/fuzzer.py | 35 +++-------------------------------- pythonfuzz/tracer.py | 36 ++++++++++++++++++++++++++++++++++++ requirements.txt | 1 - setup.py | 1 - 5 files changed, 39 insertions(+), 36 deletions(-) create mode 100644 pythonfuzz/tracer.py diff --git a/README.md b/README.md index 0b1a3e6..f471bd8 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,6 @@ PythonFuzz is a port of [fuzzitdev/jsfuzz](https://github.com/fuzzitdev/jsfuzz) which is in turn heavily based on [go-fuzz](https://github.com/dvyukov/go-fuzz) originally developed by [Dmitry Vyukov's](https://twitter.com/dvyukov). Which is in turn heavily based on [Michal Zalewski](https://twitter.com/lcamtuf) [AFL](http://lcamtuf.coredump.cx/afl/). -For coverage PythonFuzz is using [coverage](https://coverage.readthedocs.io/en/v4.5.x/) instrumentation and coverage library. - ## Contributions Contributions are welcome!:) There are still a lot of things to improve, and tests and features to add. We will slowly post those in the diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 3e3c886..1e27391 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -5,11 +5,10 @@ import psutil import hashlib import logging -import coverage import functools import multiprocessing as mp -from pythonfuzz import corpus +from pythonfuzz import corpus, tracer logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.getLogger().setLevel(logging.DEBUG) @@ -23,29 +22,6 @@ lru_cache = functools32.lru_cache -if coverage.version.version_info <= (5, ): - # Since we're using an old version of coverage.py, - # we're monkey patching it a bit to improve the performances. - - # Using memoization here gives +50% in performances, since this - # function triggers a lot of syscalls. - # See the benchmarks here: - # - https://github.com/fuzzitdev/pythonfuzz/issues/9 - @lru_cache(None) - def abs_file_cache(path): - """Return the absolute normalized form of `path`.""" - try: - path = os.path.realpath(path) - except UnicodeError: - pass - path = os.path.abspath(path) - path = coverage.files.actual_path(path) - path = coverage.files.unicode_filename(path) - return path - - coverage.files.abs_file = abs_file_cache - - def worker(target, child_conn, close_fd_mask): # Silence the fuzzee's noise class DummyFile: @@ -59,8 +35,7 @@ def write(self, x): if close_fd_mask & 2: sys.stderr = DummyFile() - cov = coverage.Coverage(branch=True, cover_pylib=True) - cov.start() + sys.settrace(tracer.trace) while True: buf = child_conn.recv_bytes() try: @@ -71,11 +46,7 @@ def write(self, x): child_conn.send(e) break else: - total_coverage = 0 - cov_data = cov.get_data() - for filename in cov_data._arcs: - total_coverage += len(cov_data._arcs[filename]) - child_conn.send(total_coverage) + child_conn.send(tracer.get_coverage()) class Fuzzer(object): diff --git a/pythonfuzz/tracer.py b/pythonfuzz/tracer.py new file mode 100644 index 0000000..26c7cad --- /dev/null +++ b/pythonfuzz/tracer.py @@ -0,0 +1,36 @@ +import collections +import sys + +prev_line = 0 +prev_filename = '' +data = collections.defaultdict(set) + +def trace(frame, event, arg): + if event != 'line': + return trace + + global prev_line + global prev_filename + + func_filename = frame.f_code.co_filename + func_line_no = frame.f_lineno + + if func_filename != prev_filename: + # We need a way to keep track of inter-files transferts, + # and since we don't really care about the details of the coverage, + # concatenating the two filenames in enough. + data[func_filename + prev_filename].add((prev_line, func_line_no)) + else: + data[func_filename].add((prev_line, func_line_no)) + + prev_line = func_line_no + prev_filename = func_filename + + return trace + + +def get_coverage(): + ret = 0 + for value in data.values(): + ret += len(value) + return ret \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e6af3b9..c278d1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -coverage==4.5.4 psutil==5.6.3 numpy==1.16.6; python_version < '3' numpy==1.17.3; python_version >= '3' diff --git a/setup.py b/setup.py index c1a82a2..ae25ec9 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ url="https://github.com/fuzzitdev/pythonfuzz", install_requires=[ # WARNING: Keep these values in line with those in requirements.txt - "coverage==4.5.4", "psutil==5.6.3", "numpy==1.16.6; python_version < '3'", "numpy==1.17.3; python_version >= '3'", From 80acd38e14ace64417400d45c68e1d06630b514c Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 2 Mar 2020 22:11:20 +0100 Subject: [PATCH 43/49] Use (recv/send)_butes instead of recv/send This commit improves a bit the performances in the same spirit as b91ad83 --- pythonfuzz/fuzzer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pythonfuzz/fuzzer.py b/pythonfuzz/fuzzer.py index 1e27391..bd88854 100644 --- a/pythonfuzz/fuzzer.py +++ b/pythonfuzz/fuzzer.py @@ -46,7 +46,7 @@ def write(self, x): child_conn.send(e) break else: - child_conn.send(tracer.get_coverage()) + child_conn.send_bytes(b'%d' % tracer.get_coverage()) class Fuzzer(object): @@ -135,8 +135,9 @@ def start(self): self.write_sample(buf, prefix='timeout-') break - total_coverage = parent_conn.recv() - if type(total_coverage) != int: + try: + total_coverage = int(parent_conn.recv_bytes()) + except ValueError: self.write_sample(buf) break From e438c4cbe850c357a11465c00bbfe60b91aba995 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 6 Mar 2020 20:47:45 +0100 Subject: [PATCH 44/49] Significantly speed up coverage collection This reduces the time spent in get_coverage from ~30% to ~2% in my local tests on Python3. --- pythonfuzz/tracer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pythonfuzz/tracer.py b/pythonfuzz/tracer.py index 26c7cad..708979f 100644 --- a/pythonfuzz/tracer.py +++ b/pythonfuzz/tracer.py @@ -30,7 +30,4 @@ def trace(frame, event, arg): def get_coverage(): - ret = 0 - for value in data.values(): - ret += len(value) - return ret \ No newline at end of file + return sum(map(len, data.values())) From 4f3a68b53e82ee958d35b51a56e696e11a391126 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2020 18:21:03 +0000 Subject: [PATCH 45/49] Bump psutil from 5.6.3 to 5.6.6 Bumps [psutil](https://github.com/giampaolo/psutil) from 5.6.3 to 5.6.6. - [Release notes](https://github.com/giampaolo/psutil/releases) - [Changelog](https://github.com/giampaolo/psutil/blob/master/HISTORY.rst) - [Commits](https://github.com/giampaolo/psutil/compare/release-5.6.3...release-5.6.6) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c278d1e..797772e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -psutil==5.6.3 +psutil==5.6.6 numpy==1.16.6; python_version < '3' numpy==1.17.3; python_version >= '3' functools32==3.2.3.post2; python_version < '3' diff --git a/setup.py b/setup.py index ae25ec9..69bc416 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ url="https://github.com/fuzzitdev/pythonfuzz", install_requires=[ # WARNING: Keep these values in line with those in requirements.txt - "psutil==5.6.3", + "psutil==5.6.6", "numpy==1.16.6; python_version < '3'", "numpy==1.17.3; python_version >= '3'", "functools32==3.2.3.post2; python_version < '3'", From ca4960380cb3e6439a1c3730b92c9798c00abdee Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 10 Jun 2020 18:27:53 +0200 Subject: [PATCH 46/49] Get rid of numpy Numpy's types aren't doing much beside eating CPU and doing a simple wraparound, which can be done via a simple modulo. This commit also unroll a couple of loops. --- pythonfuzz/corpus.py | 60 ++++++++++++++++++++++---------------------- requirements.txt | 2 -- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index bde80c6..fbbe844 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -1,6 +1,5 @@ import os import math -import numpy import random import struct import hashlib @@ -9,8 +8,8 @@ INTERESTING8 = [-128, -1, 0, 1, 16, 32, 64, 100, 127] -INTERESTING16 = [-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767] -INTERESTING32 = [-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647] +INTERESTING16 = [0, 128, 255, 256, 512, 1000, 1024, 4096, 32767, 65535] +INTERESTING32 = [0, 1, 32768, 65535, 65536, 100663045, 2147483647, 4294967295] # A list of all the mutator clases we have available @@ -206,11 +205,8 @@ def mutate(self, res): if len(res) == 0: return None pos = self._rand(len(res)) - v = self._rand(35) + 1 - if bool(random.getrandbits(1)): - res[pos] = numpy.uint8(res[pos]) + numpy.uint8(v) - else: - res[pos] = numpy.uint8(res[pos]) - numpy.uint8(v) + v = self._rand(2**8) + res[pos] = (res[pos] + v) % 256 return res @@ -223,16 +219,14 @@ def mutate(self, res): if len(res) < 2: return None pos = self._rand(len(res) - 1) - v = numpy.uint16(self._rand(35) + 1) - if bool(random.getrandbits(1)): - v = numpy.uint16(0) - v + v = self._rand(2**16) if bool(random.getrandbits(1)): v = struct.pack('>H', v) else: v = struct.pack('I', v) else: v = struct.pack('Q', v) else: v = struct.pack('H', v) else: v = struct.pack('I', v) else: v = struct.pack('= '3' functools32==3.2.3.post2; python_version < '3' From e806f3b7346e37315c8e6698b13a18659a767e18 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 10 Jun 2020 18:53:11 +0200 Subject: [PATCH 47/49] Move the coprus initialization out of a hot path This initial corpus seeding can be done in the constructor, instead of checking if it has been initialized every single time the fuzzer generates an input. --- pythonfuzz/corpus.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pythonfuzz/corpus.py b/pythonfuzz/corpus.py index bde80c6..75e3f58 100644 --- a/pythonfuzz/corpus.py +++ b/pythonfuzz/corpus.py @@ -412,6 +412,7 @@ def __init__(self, dirs=None, max_input_size=4096, mutators_filter=None, dict_pa self._seed_run_finished = not self._inputs self._seed_idx = 0 self._save_corpus = dirs and os.path.isdir(dirs[0]) + self._inputs.append(bytearray(0)) # Work out what we'll filter filters = mutators_filter.split(' ') if mutators_filter else [] @@ -488,13 +489,8 @@ def generate_input(self): self._seed_run_finished = True return next_input - if len(self._inputs) == 0: - zero_test_case = bytearray(0) - self.put(zero_test_case) - return zero_test_case - else: - buf = self._inputs[self._rand(len(self._inputs))] - return self.mutate(buf) + buf = self._inputs[self._rand(len(self._inputs))] + return self.mutate(buf) def mutate(self, buf): res = buf[:] From 846e69bdb885717be65f36254445aa70e93f74f3 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 12 Jun 2020 16:12:48 +0200 Subject: [PATCH 48/49] Add two html-related issues to the trophies --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f471bd8..0c19cd7 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,8 @@ any unnecessary work is done. ## Trophies -* [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287) +* [python built-in HTMLParser - unhandled exception](https://bugs.python.org/msg355287), [twice](https://bugs.launchpad.net/beautifulsoup/+bug/1883104) * [CleverCSV - unhandled exceptions](https://github.com/alan-turing-institute/CleverCSV/issues/7) +* [beautifulsoup](https://bugs.launchpad.net/beautifulsoup/+bug/1883264) **Feel free to add bugs that you found with pythonfuzz to this list via pull-request** From 2434a92c69fdb2d0f83e88194ffafd32f70d2f3e Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Sun, 12 Jul 2020 10:15:38 +0300 Subject: [PATCH 49/49] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0c19cd7..abfab08 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +fuzzit.dev was [acquired](https://about.gitlab.com/press/releases/2020-06-11-gitlab-acquires-peach-tech-and-fuzzit-to-expand-devsecops-offering.html) by GitLab and the new home for this repo is [here](https://gitlab.com/gitlab-org/security-products/analyzers/fuzzers/pythonfuzz) + # pythonfuzz: coverage-guided fuzz testing for python PythonFuzz is coverage-guided [fuzzer](https://developer.mozilla.org/en-US/docs/Glossary/Fuzzing) for testing python packages.