Message ID | 575b4b2c2b8a5e800bb65b99f1fcdd6aaae63f94.1743079429.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Initialize a few uninitialized variables | expand |
On Thu, Mar 27, 2025 at 12:43:47PM +0000, Johannes Schindelin via GitGitGadget wrote: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > In `fsck_commit()`, after counting the authors of a commit, we set the > `err` variable either when there was no author, or when there were more > than two authors recorded. Then we access the `err` variable to figure > out whether we should return early. But if there was exactly one author, > that variable is still uninitialized. > > Let's just initialize the variable. > > This issue was pointed out by CodeQL. Hmm, I'd think we would hit this case all the time, since commits generally have one author. But I think it's another false positive. The code in question is this: author_count = 0; while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) { author_count++; err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); if (err) return err; } if (author_count < 1) err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"); else if (author_count > 1) err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"); if (err) return err; So we set "err" as soon as we find _any_ author (when we check whether it is properly formatted via fsck_ident). And author_count will not be incremented if we did not find one. So either we must have assigned the result of fsck_ident(), or we will hit the "author_count < 1" case and assign there. It's certainly confusing, though, since "err" gets used in so many spots. I think the whole thing would be easier to understand if we had tighter-scoped single use variables like this: diff --git a/fsck.c b/fsck.c index 9fc4c25ffd..ea72b3247d 100644 --- a/fsck.c +++ b/fsck.c @@ -925,7 +925,6 @@ static int fsck_commit(const struct object_id *oid, { struct object_id tree_oid, parent_oid; unsigned author_count; - int err; const char *buffer_begin = buffer; const char *buffer_end = buffer + size; const char *p; @@ -941,39 +940,44 @@ static int fsck_commit(const struct object_id *oid, if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line"); if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') { - err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"); + int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"); if (err) return err; } buffer = p + 1; while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) { if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') { - err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"); + int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"); if (err) return err; } buffer = p + 1; } author_count = 0; while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) { + int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); + if (err) + return err; author_count++; - err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); + } + if (author_count < 1) { + int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"); + if (err) + return err; + } else if (author_count > 1) { + int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"); if (err) return err; } - if (author_count < 1) - err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"); - else if (author_count > 1) - err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"); - if (err) - return err; if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line"); - err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); - if (err) - return err; + else { + int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); + if (err) + return err; + } if (memchr(buffer_begin, '\0', size)) { - err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, + int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, "NUL byte in the commit object body"); if (err) return err; And then it is obvious that the general pattern is to propagate "err" from individual calls (and the ones that do not stick out like sore thumbs; are those bugs where we should keep going if the user set those message types to warn/ignore?). You could even wrap the pattern in a macro, though perhaps that is getting too magical. The resulting logic is easier to follow, though, if you can look past the macro: diff --git a/fsck.c b/fsck.c index ea72b3247d..8c7ac3c448 100644 --- a/fsck.c +++ b/fsck.c @@ -919,6 +919,12 @@ static int fsck_ident(const char **ident, return 0; } +#define MAYBE_RETURN(x) do { \ + int err = (x); \ + if (err) \ + return err; \ +} while (0) + static int fsck_commit(const struct object_id *oid, const char *buffer, unsigned long size, struct fsck_options *options) @@ -939,49 +945,30 @@ static int fsck_commit(const struct object_id *oid, if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line"); - if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') { - int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"); - if (err) - return err; - } + if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') + MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1")); buffer = p + 1; while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) { - if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') { - int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"); - if (err) - return err; - } + if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') + MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1")); buffer = p + 1; } author_count = 0; while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) { - int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); - if (err) - return err; + MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options)); author_count++; } - if (author_count < 1) { - int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line"); - if (err) - return err; - } else if (author_count > 1) { - int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"); - if (err) - return err; - } + if (author_count < 1) + MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line")); + else if (author_count > 1) + MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines")); if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line"); - else { - int err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); - if (err) - return err; - } - if (memchr(buffer_begin, '\0', size)) { - int err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, - "NUL byte in the commit object body"); - if (err) - return err; - } + else + MAYBE_RETURN(fsck_ident(&buffer, oid, OBJ_COMMIT, options)); + + if (memchr(buffer_begin, '\0', size)) + MAYBE_RETURN(report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT, "NUL byte in the commit object body")); return 0; } I'd suspect that just the first patch above would fix the CodeQL issue. It's certainly a larger diff, but IMHO the result is less confusing for humans, too. -Peff
diff --git a/fsck.c b/fsck.c index 9fc4c25ffd5..ad04b24ff13 100644 --- a/fsck.c +++ b/fsck.c @@ -925,7 +925,7 @@ static int fsck_commit(const struct object_id *oid, { struct object_id tree_oid, parent_oid; unsigned author_count; - int err; + int err = 0; const char *buffer_begin = buffer; const char *buffer_end = buffer + size; const char *p;