diff mbox series

[1/2] commit-graph, fuzz: Add fuzzer for commit-graph

Message ID 53e62baaa8769bf8e90991e32e0d123cc6629559.1544048946.git.steadmon@google.com (mailing list archive)
State New, archived
Headers show
Series Add commit-graph fuzzer and fix buffer overflow | expand

Commit Message

Josh Steadmon Dec. 5, 2018, 10:32 p.m. UTC
Breaks load_commit_graph_one() into a new function,
parse_commit_graph(). The latter function operates on arbitrary buffers,
which makes it suitable as a fuzzing target.

Adds fuzz-commit-graph.c, which provides a fuzzing entry point
compatible with libFuzzer (and possibly other fuzzing engines).

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 .gitignore          |  1 +
 Makefile            |  1 +
 commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
 fuzz-commit-graph.c | 18 +++++++++++++
 4 files changed, 66 insertions(+), 17 deletions(-)
 create mode 100644 fuzz-commit-graph.c

Comments

Ævar Arnfjörð Bjarmason Dec. 5, 2018, 10:48 p.m. UTC | #1
On Wed, Dec 05 2018, Josh Steadmon wrote:

> Breaks load_commit_graph_one() into a new function,
> parse_commit_graph(). The latter function operates on arbitrary buffers,
> which makes it suitable as a fuzzing target.
>
> Adds fuzz-commit-graph.c, which provides a fuzzing entry point
> compatible with libFuzzer (and possibly other fuzzing engines).
>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  .gitignore          |  1 +
>  Makefile            |  1 +
>  commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
>  fuzz-commit-graph.c | 18 +++++++++++++
>  4 files changed, 66 insertions(+), 17 deletions(-)
>  create mode 100644 fuzz-commit-graph.c
>
> diff --git a/.gitignore b/.gitignore
> index 0d77ea5894..8bcf153ed9 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -1,3 +1,4 @@
> +/fuzz-commit-graph
>  /fuzz_corpora
>  /fuzz-pack-headers
>  /fuzz-pack-idx
> diff --git a/Makefile b/Makefile
> index 1a44c811aa..6b72f37c29 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
>
>  ETAGS_TARGET = TAGS
>
> +FUZZ_OBJS += fuzz-commit-graph.o
>  FUZZ_OBJS += fuzz-pack-headers.o
>  FUZZ_OBJS += fuzz-pack-idx.o
>
> diff --git a/commit-graph.c b/commit-graph.c
> index 40c855f185..0755359b1a 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -46,6 +46,10 @@
>  #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
>  			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
>
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +
>  char *get_commit_graph_filename(const char *obj_dir)
>  {
>  	return xstrfmt("%s/info/commit-graph", obj_dir);
> @@ -84,16 +88,10 @@ static int commit_graph_compatible(struct repository *r)
>  struct commit_graph *load_commit_graph_one(const char *graph_file)
>  {
>  	void *graph_map;
> -	const unsigned char *data, *chunk_lookup;
>  	size_t graph_size;
>  	struct stat st;
> -	uint32_t i;
> -	struct commit_graph *graph;
> +	struct commit_graph *ret;
>  	int fd = git_open(graph_file);
> -	uint64_t last_chunk_offset;
> -	uint32_t last_chunk_id;
> -	uint32_t graph_signature;
> -	unsigned char graph_version, hash_version;
>
>  	if (fd < 0)
>  		return NULL;
> @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  		die(_("graph file %s is too small"), graph_file);
>  	}
>  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +	ret = parse_commit_graph(graph_map, fd, graph_size);
> +
> +	if (ret == NULL) {

Code in git usually uses just !ret.

> +		munmap(graph_map, graph_size);
> +		close(fd);
> +		exit(1);

Ouch, exit(1) from load_commit_graph_one()? Can't we return NULL here
instead? Nasty to exit from a library routine, but then I see later...

> @@ -201,11 +235,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  	}
>
>  	return graph;
> -
> -cleanup_fail:
> -	munmap(graph_map, graph_size);
> -	close(fd);
> -	exit(1);
>  }

... ah, I see this is where you got the exit(1) from. So it was there
already.

>  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);
> +
> +	return 0;
> +}

I hadn't looked at this before, but see your 5e47215080 ("fuzz: add
basic fuzz testing target.", 2018-10-12) for some prior art.

There's instructions there for a very long "make" invocation. Would be
nice if this were friendlier and we could just do "make test-fuzz" or
something...
Josh Steadmon Dec. 6, 2018, 1 a.m. UTC | #2
On 2018.12.05 23:48, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Dec 05 2018, Josh Steadmon wrote:
> 
> > Breaks load_commit_graph_one() into a new function,
> > parse_commit_graph(). The latter function operates on arbitrary buffers,
> > which makes it suitable as a fuzzing target.
> >
> > Adds fuzz-commit-graph.c, which provides a fuzzing entry point
> > compatible with libFuzzer (and possibly other fuzzing engines).
> >
> > Signed-off-by: Josh Steadmon <steadmon@google.com>
> > ---
> >  .gitignore          |  1 +
> >  Makefile            |  1 +
> >  commit-graph.c      | 63 +++++++++++++++++++++++++++++++++------------
> >  fuzz-commit-graph.c | 18 +++++++++++++
> >  4 files changed, 66 insertions(+), 17 deletions(-)
> >  create mode 100644 fuzz-commit-graph.c
> >
> > diff --git a/.gitignore b/.gitignore
> > index 0d77ea5894..8bcf153ed9 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -1,3 +1,4 @@
> > +/fuzz-commit-graph
> >  /fuzz_corpora
> >  /fuzz-pack-headers
> >  /fuzz-pack-idx
> > diff --git a/Makefile b/Makefile
> > index 1a44c811aa..6b72f37c29 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \
> >
> >  ETAGS_TARGET = TAGS
> >
> > +FUZZ_OBJS += fuzz-commit-graph.o
> >  FUZZ_OBJS += fuzz-pack-headers.o
> >  FUZZ_OBJS += fuzz-pack-idx.o
> >
> > diff --git a/commit-graph.c b/commit-graph.c
> > index 40c855f185..0755359b1a 100644
> > --- a/commit-graph.c
> > +++ b/commit-graph.c
> > @@ -46,6 +46,10 @@
> >  #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
> >  			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
> >
> > +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> > +					size_t graph_size);
> > +
> > +
> >  char *get_commit_graph_filename(const char *obj_dir)
> >  {
> >  	return xstrfmt("%s/info/commit-graph", obj_dir);
> > @@ -84,16 +88,10 @@ static int commit_graph_compatible(struct repository *r)
> >  struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  {
> >  	void *graph_map;
> > -	const unsigned char *data, *chunk_lookup;
> >  	size_t graph_size;
> >  	struct stat st;
> > -	uint32_t i;
> > -	struct commit_graph *graph;
> > +	struct commit_graph *ret;
> >  	int fd = git_open(graph_file);
> > -	uint64_t last_chunk_offset;
> > -	uint32_t last_chunk_id;
> > -	uint32_t graph_signature;
> > -	unsigned char graph_version, hash_version;
> >
> >  	if (fd < 0)
> >  		return NULL;
> > @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  		die(_("graph file %s is too small"), graph_file);
> >  	}
> >  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> > +	ret = parse_commit_graph(graph_map, fd, graph_size);
> > +
> > +	if (ret == NULL) {
> 
> Code in git usually uses just !ret.

Will fix in V2, thanks.


> > +		munmap(graph_map, graph_size);
> > +		close(fd);
> > +		exit(1);
> 
> Ouch, exit(1) from load_commit_graph_one()? Can't we return NULL here
> instead? Nasty to exit from a library routine, but then I see later...
> 
> > @@ -201,11 +235,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
> >  	}
> >
> >  	return graph;
> > -
> > -cleanup_fail:
> > -	munmap(graph_map, graph_size);
> > -	close(fd);
> > -	exit(1);
> >  }
> 
> ... ah, I see this is where you got the exit(1) from. So it was there
> already.
> 
> >  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> > diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> > new file mode 100644
> > index 0000000000..420851d0d2
> > --- /dev/null
> > +++ b/fuzz-commit-graph.c
> > @@ -0,0 +1,18 @@
> > +#include "object-store.h"
> > +#include "commit-graph.h"
> > +
> > +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> > +					size_t graph_size);
> > +
> > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> > +
> > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> > +{
> > +	struct commit_graph *g;
> > +
> > +	g = parse_commit_graph((void *) data, -1, size);
> > +	if (g)
> > +		free(g);
> > +
> > +	return 0;
> > +}
> 
> I hadn't looked at this before, but see your 5e47215080 ("fuzz: add
> basic fuzz testing target.", 2018-10-12) for some prior art.
> 
> There's instructions there for a very long "make" invocation. Would be
> nice if this were friendlier and we could just do "make test-fuzz" or
> something...

Yeah, the problem is that there are too many combinations of fuzzing
engine, sanitizer, and compiler to make any reasonable default here.
Even if you just stick with libFuzzer, address sanitizer, and clang, the
flags change radically depending on which version of clang you're using.
Junio C Hamano Dec. 6, 2018, 1:32 a.m. UTC | #3
Josh Steadmon <steadmon@google.com> writes:

> @@ -108,27 +106,61 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
>  		die(_("graph file %s is too small"), graph_file);
>  	}
>  	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> +	ret = parse_commit_graph(graph_map, fd, graph_size);

OK, assuming that the new helper returns NULL from all places in the
original that would have jumped to the cleanup-fail label, this would
do the same thing (it may not be the right thing to exit, but that
is OK for the purpose of this change).

> +	if (ret == NULL) {
> +		munmap(graph_map, graph_size);
> +		close(fd);
> +		exit(1);
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * This function is intended to be used only from load_commit_graph_one() or in
> + * fuzz tests.
> + */

Hmph, is that necessary to say?  

"Right now, it has only these two callers" is sometimes handy for
those without good static analysis tools, like "git grep" ;-), but I
do not think of a reason why a new caller we'll add in the future,
who happens to have the data of the graph file in-core, should not
be allowed to call this function.


> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size)
> +{
> +	const unsigned char *data, *chunk_lookup;
> +	uint32_t i;
> +	struct commit_graph *graph;
> +	uint64_t last_chunk_offset;
> +	uint32_t last_chunk_id;
> +	uint32_t graph_signature;
> +	unsigned char graph_version, hash_version;
> +
> +	/*
> +	 * This should already be checked in load_commit_graph_one, but we still
> +	 * need a check here for when we're calling parse_commit_graph directly
> +	 * from fuzz tests. We can omit the error message in that case.
> +	 */

In the same spirit, I am dubious of the longer-term value of this
comment.  As an explanation of why this conversion is correct in the
proposed log message for this change, it perfectly makes sense,
though.

> +	if (graph_size < GRAPH_MIN_SIZE)
> +		return NULL;
> +

The load_commit_graph_one() grabbed graph_map out of xmmap() so it
is guaranteed to be non-NULL, but we need to check graph_map != NULL
when we're calling this directly from the fuzz tests, exactly in the
same spirit that we check graph_size above.  Besides, these are to
make sure future callers won't misuse the API.

>  	data = (const unsigned char *)graph_map;

And the reset of the function is the same as the original modulo
jumping to the cleanup_fail label has been replaced with returning
NULL.

Looks good.

> ...
> -
> -cleanup_fail:
> -	munmap(graph_map, graph_size);
> -	close(fd);
> -	exit(1);
>  }
>  
>  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);
> +
> +	return 0;
> +}
Junio C Hamano Dec. 6, 2018, 1:41 a.m. UTC | #4
Junio C Hamano <gitster@pobox.com> writes:

>> +	if (graph_size < GRAPH_MIN_SIZE)
>> +		return NULL;
>> +
>
> The load_commit_graph_one() grabbed graph_map out of xmmap() so it
> is guaranteed to be non-NULL, but we need to check graph_map != NULL
> when we're calling this directly from the fuzz tests, exactly in the
> same spirit that we check graph_size above.  Besides, these are to
> make sure future callers won't misuse the API.

Insert "Please check graph_map != NULL here, too." before the above
paragraph.

>>  	data = (const unsigned char *)graph_map;
>
> And the reset of the function is the same as the original modulo
> jumping to the cleanup_fail label has been replaced with returning
> NULL.
>
> Looks good.

Of course, s/reset/rest/ is what I meant.
Junio C Hamano Dec. 6, 2018, 4:47 a.m. UTC | #5
Josh Steadmon <steadmon@google.com> writes:

> diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
> new file mode 100644
> index 0000000000..420851d0d2
> --- /dev/null
> +++ b/fuzz-commit-graph.c
> @@ -0,0 +1,18 @@
> +#include "object-store.h"
> +#include "commit-graph.h"
> +
> +struct commit_graph *parse_commit_graph(void *graph_map, int fd,
> +					size_t graph_size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
> +
> +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
> +{
> +	struct commit_graph *g;
> +
> +	g = parse_commit_graph((void *) data, -1, size);
> +	if (g)
> +		free(g);

As it is perfectly OK to free(NULL), please lose "if (g)" and a
level of indentation; otherwise, "make coccicheck" would complain.

Thanks.

> +	return 0;
> +}
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index 0d77ea5894..8bcf153ed9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@ 
+/fuzz-commit-graph
 /fuzz_corpora
 /fuzz-pack-headers
 /fuzz-pack-idx
diff --git a/Makefile b/Makefile
index 1a44c811aa..6b72f37c29 100644
--- a/Makefile
+++ b/Makefile
@@ -684,6 +684,7 @@  SCRIPTS = $(SCRIPT_SH_INS) \
 
 ETAGS_TARGET = TAGS
 
+FUZZ_OBJS += fuzz-commit-graph.o
 FUZZ_OBJS += fuzz-pack-headers.o
 FUZZ_OBJS += fuzz-pack-idx.o
 
diff --git a/commit-graph.c b/commit-graph.c
index 40c855f185..0755359b1a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -46,6 +46,10 @@ 
 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
 			+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
 
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+
 char *get_commit_graph_filename(const char *obj_dir)
 {
 	return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -84,16 +88,10 @@  static int commit_graph_compatible(struct repository *r)
 struct commit_graph *load_commit_graph_one(const char *graph_file)
 {
 	void *graph_map;
-	const unsigned char *data, *chunk_lookup;
 	size_t graph_size;
 	struct stat st;
-	uint32_t i;
-	struct commit_graph *graph;
+	struct commit_graph *ret;
 	int fd = git_open(graph_file);
-	uint64_t last_chunk_offset;
-	uint32_t last_chunk_id;
-	uint32_t graph_signature;
-	unsigned char graph_version, hash_version;
 
 	if (fd < 0)
 		return NULL;
@@ -108,27 +106,61 @@  struct commit_graph *load_commit_graph_one(const char *graph_file)
 		die(_("graph file %s is too small"), graph_file);
 	}
 	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	ret = parse_commit_graph(graph_map, fd, graph_size);
+
+	if (ret == NULL) {
+		munmap(graph_map, graph_size);
+		close(fd);
+		exit(1);
+	}
+
+	return ret;
+}
+
+/*
+ * This function is intended to be used only from load_commit_graph_one() or in
+ * fuzz tests.
+ */
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size)
+{
+	const unsigned char *data, *chunk_lookup;
+	uint32_t i;
+	struct commit_graph *graph;
+	uint64_t last_chunk_offset;
+	uint32_t last_chunk_id;
+	uint32_t graph_signature;
+	unsigned char graph_version, hash_version;
+
+	/*
+	 * This should already be checked in load_commit_graph_one, but we still
+	 * need a check here for when we're calling parse_commit_graph directly
+	 * from fuzz tests. We can omit the error message in that case.
+	 */
+	if (graph_size < GRAPH_MIN_SIZE)
+		return NULL;
+
 	data = (const unsigned char *)graph_map;
 
 	graph_signature = get_be32(data);
 	if (graph_signature != GRAPH_SIGNATURE) {
 		error(_("graph signature %X does not match signature %X"),
 		      graph_signature, GRAPH_SIGNATURE);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph_version = *(unsigned char*)(data + 4);
 	if (graph_version != GRAPH_VERSION) {
 		error(_("graph version %X does not match version %X"),
 		      graph_version, GRAPH_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	hash_version = *(unsigned char*)(data + 5);
 	if (hash_version != GRAPH_OID_VERSION) {
 		error(_("hash version %X does not match version %X"),
 		      hash_version, GRAPH_OID_VERSION);
-		goto cleanup_fail;
+		return NULL;
 	}
 
 	graph = alloc_commit_graph();
@@ -152,7 +184,8 @@  struct commit_graph *load_commit_graph_one(const char *graph_file)
 		if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
 			error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
 			      (uint32_t)chunk_offset);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		switch (chunk_id) {
@@ -187,7 +220,8 @@  struct commit_graph *load_commit_graph_one(const char *graph_file)
 
 		if (chunk_repeated) {
 			error(_("chunk id %08x appears multiple times"), chunk_id);
-			goto cleanup_fail;
+			free(graph);
+			return NULL;
 		}
 
 		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
@@ -201,11 +235,6 @@  struct commit_graph *load_commit_graph_one(const char *graph_file)
 	}
 
 	return graph;
-
-cleanup_fail:
-	munmap(graph_map, graph_size);
-	close(fd);
-	exit(1);
 }
 
 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
new file mode 100644
index 0000000000..420851d0d2
--- /dev/null
+++ b/fuzz-commit-graph.c
@@ -0,0 +1,18 @@ 
+#include "object-store.h"
+#include "commit-graph.h"
+
+struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+					size_t graph_size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+	struct commit_graph *g;
+
+	g = parse_commit_graph((void *) data, -1, size);
+	if (g)
+		free(g);
+
+	return 0;
+}