diff mbox series

[2/4] fsck: avoid using an uninitialized variable

Message ID 575b4b2c2b8a5e800bb65b99f1fcdd6aaae63f94.1743079429.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series Initialize a few uninitialized variables | expand

Commit Message

Johannes Schindelin March 27, 2025, 12:43 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

In `fsck_commit()`, after counting the authors of a commit, we set the
`err` variable either when there was no author, or when there were more
than two authors recorded. Then we access the `err` variable to figure
out whether we should return early. But if there was exactly one author,
that variable is still uninitialized.

Let's just initialize the variable.

This issue was pointed out by CodeQL.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 fsck.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Jeff King March 28, 2025, 4:07 a.m. UTC | #1
On Thu, Mar 27, 2025 at 12:43:47PM +0000, Johannes Schindelin via GitGitGadget wrote:

> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> 
> In `fsck_commit()`, after counting the authors of a commit, we set the
> `err` variable either when there was no author, or when there were more
> than two authors recorded. Then we access the `err` variable to figure
> out whether we should return early. But if there was exactly one author,
> that variable is still uninitialized.
> 
> Let's just initialize the variable.
> 
> This issue was pointed out by CodeQL.

Hmm, I'd think we would hit this case all the time, since commits
generally have one author. But I think it's another false positive.

The code in question is this:

          author_count = 0;
          while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
                  author_count++;
                  err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
                  if (err)
                          return err;
          }
          if (author_count < 1)
                  err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
          else if (author_count > 1)
                  err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
          if (err)
                  return err;

So we set "err" as soon as we find _any_ author (when we check whether
it is properly formatted via fsck_ident). And author_count will not be
incremented if we did not find one. So either we must have assigned
the result of fsck_ident(), or we will hit the "author_count < 1" case
and assign there.

It's certainly confusing, though, since "err" gets used in so many
spots. I think the whole thing would be easier to understand if we had
tighter-scoped single use variables like this:

diff --git a/fsck.c b/fsck.c
index 9fc4c25ffd..ea72b3247d 100644
--- a/fsck.c
+++ b/fsck.c
@@ -925,7 +925,6 @@ static int fsck_commit(const struct object_id *oid,
 {
 	struct object_id tree_oid, parent_oid;
 	unsigned author_count;
-	int err;
 	const char *buffer_begin = buffer;
 	const char *buffer_end = buffer + size;
 	const char *p;
@@ -941,39 +940,44 @@ static int fsck_commit(const struct object_id *oid,
 	if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
 	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
 		if (err)
 			return err;
 	}
 	buffer = p + 1;
 	while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
 		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
-			err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
+			int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
 			if (err)
 				return err;
 		}
 		buffer = p + 1;
 	}
 	author_count = 0;
 	while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
+		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+		if (err)
+			return err;
 		author_count++;
-		err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+	}
+	if (author_count < 1) {
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
+		if (err)
+			return err;
+	} else if (author_count > 1) {
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
 		if (err)
 			return err;
 	}
-	if (author_count < 1)
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
-	else if (author_count > 1)
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
-	if (err)
-		return err;
 	if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
-	err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-	if (err)
-		return err;
+	else {
+		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
+		if (err)
+			return err;
+	}
 	if (memchr(buffer_begin, '\0', size)) {
-		err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
+		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
 			     "NUL byte in the commit object body");
 		if (err)
 			return err;

And then it is obvious that the general pattern is to propagate "err"
from individual calls (and the ones that do not stick out like sore
thumbs; are those bugs where we should keep going if the user set those
message types to warn/ignore?).

You could even wrap the pattern in a macro, though perhaps that is
getting too magical. The resulting logic is easier to follow, though, if
you can look past the macro:

diff --git a/fsck.c b/fsck.c
index ea72b3247d..8c7ac3c448 100644
--- a/fsck.c
+++ b/fsck.c
@@ -919,6 +919,12 @@ static int fsck_ident(const char **ident,
 	return 0;
 }
 
+#define MAYBE_RETURN(x) do { \
+	int err = (x); \
+	if (err) \
+		return err; \
+} while (0)
+
 static int fsck_commit(const struct object_id *oid,
 		       const char *buffer, unsigned long size,
 		       struct fsck_options *options)
@@ -939,49 +945,30 @@ static int fsck_commit(const struct object_id *oid,
 
 	if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
-	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
-		if (err)
-			return err;
-	}
+	if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n')
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"));
 	buffer = p + 1;
 	while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
-		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
-			int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
-			if (err)
-				return err;
-		}
+		if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n')
+			MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"));
 		buffer = p + 1;
 	}
 	author_count = 0;
 	while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
-		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-		if (err)
-			return err;
+		MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options));
 		author_count++;
 	}
-	if (author_count < 1) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
-		if (err)
-			return err;
-	} else if (author_count > 1) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
-		if (err)
-			return err;
-	}
+	if (author_count < 1)
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"));
+	else if (author_count > 1)
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"));
 	if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
 		return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
-	else {
-		int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
-		if (err)
-			return err;
-	}
-	if (memchr(buffer_begin, '\0', size)) {
-		int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
-			     "NUL byte in the commit object body");
-		if (err)
-			return err;
-	}
+	else
+		MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options));
+
+	if (memchr(buffer_begin, '\0', size))
+		MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, "NUL byte in the commit object body"));
 	return 0;
 }
 

I'd suspect that just the first patch above would fix the CodeQL issue.
It's certainly a larger diff, but IMHO the result is less confusing for
humans, too.

-Peff
diff mbox series

Patch

diff --git a/fsck.c b/fsck.c
index 9fc4c25ffd5..ad04b24ff13 100644
--- a/fsck.c
+++ b/fsck.c
@@ -925,7 +925,7 @@  static int fsck_commit(const struct object_id *oid,
 {
 	struct object_id tree_oid, parent_oid;
 	unsigned author_count;
-	int err;
+	int err = 0;
 	const char *buffer_begin = buffer;
 	const char *buffer_end = buffer + size;
 	const char *p;