diff mbox series

[v2,1/2] cocci: add and apply a rule to find "unused" variables

Message ID patch-v2-1.2-d14036521ab-20220621T223954Z-avarab@gmail.com (mailing list archive)
State Superseded
Headers show
Series add and apply a rule to find "unused" init+free | expand

Commit Message

Ævar Arnfjörð Bjarmason June 21, 2022, 10:44 p.m. UTC
Add a coccinelle rule to remove variable initialization followed by
calling a "release" function. See extensive commentary in the new
"unused.cocci" for how it works, and what it's intended to find and
replace.

The inclusion of "contrib/scalar/scalar.c" is because "spatch" was
manually run on it (we don't usually run spatch on contrib).

The use of "with strict" here will be explained and amended in the
following commit.

1. https://lore.kernel.org/git/042d624b8159364229e95d35e9309f12b67f8173.1652977582.git.gitgitgadget@gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/fetch.c                 |  3 +-
 builtin/merge.c                 |  2 -
 contrib/coccinelle/unused.cocci | 66 +++++++++++++++++++++++++++++++++
 contrib/scalar/scalar.c         |  3 +-
 diff.c                          |  2 -
 5 files changed, 68 insertions(+), 8 deletions(-)
 create mode 100644 contrib/coccinelle/unused.cocci

Comments

Junio C Hamano June 22, 2022, 4:02 p.m. UTC | #1
Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> +identifier INIT_ASSIGN1 =~ "^get_worktrees$";
> +// strbuf_init(&I, ...) etc.
> +identifier INIT_CALL1 =~ "^[a-z_]*_init$";
> +// stbuf_release(), string_list_clear() etc.

strbuf?

> +identifier REL1 =~ "^[a-z_]*_(release|clear|free)$";
> +// release_patch(), clear_pathspec() etc.
> +identifier REL2 =~ "^(release|clear|free)_[a-z_]*$";
> +@@

I am hesitant to see this broad set of patterns that could match
init/release functions (and possible false positive matches).

Especially given that it ended up finding only 4 instances, all of
the same "STRBUF_INIT" followed by "strbuf_release()", which means
that all other possible matches, when they actually are found, will
be seen by developers who are not necessarily familiar with these
rules before they are inspected by those who are for correctness.

It would be nice to have a step that catch only strbuf_init(),
STRBUF_INIT, strbuf_release(), and nothing else, possibly with
another step with concrete function names, with other "presumably
functions whose name match this loose pattern are all release
functions" patterns in a separate follow-up patch so that the last
one can easily be reverted.

> +// .. A declaration like "struct strbuf buf;"...
> +(
> +- T I;
> +// ... or "struct STRBUF buf = STRBUF_INIT;" ...
> +|
> +- T I = INIT;
> +)

Presumably, if either of the above followed by foo_release(I) should
be caught, then we should catch "T I = { 0 };" followed by a release
as well.  Initialization "T I = { 1, };" for a type without _INIT
macro is also the same story.

Given that, do we even need to limit the forms of declaration?  The
only thing we care about is that I is new in this scope, and I is
not used otherwise, in a way other than (1) calling _init() function
on it, or (2) calling _release() function on it, before leaving the
scope, right?

Thanks.
diff mbox series

Patch

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ac29c2b1ae3..8a3ae71fed0 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1113,7 +1113,7 @@  static int store_updated_refs(const char *raw_url, const char *remote_name,
 			      struct fetch_head *fetch_head, struct worktree **worktrees)
 {
 	int url_len, i, rc = 0;
-	struct strbuf note = STRBUF_INIT, err = STRBUF_INIT;
+	struct strbuf note = STRBUF_INIT;
 	const char *what, *kind;
 	struct ref *rm;
 	char *url;
@@ -1281,7 +1281,6 @@  static int store_updated_refs(const char *raw_url, const char *remote_name,
 
  abort:
 	strbuf_release(&note);
-	strbuf_release(&err);
 	free(url);
 	return rc;
 }
diff --git a/builtin/merge.c b/builtin/merge.c
index d9784d4891c..bbd70b17bc6 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -502,7 +502,6 @@  static void merge_name(const char *remote, struct strbuf *msg)
 {
 	struct commit *remote_head;
 	struct object_id branch_head;
-	struct strbuf buf = STRBUF_INIT;
 	struct strbuf bname = STRBUF_INIT;
 	struct merge_remote_desc *desc;
 	const char *ptr;
@@ -590,7 +589,6 @@  static void merge_name(const char *remote, struct strbuf *msg)
 		oid_to_hex(&remote_head->object.oid), remote);
 cleanup:
 	free(found_ref);
-	strbuf_release(&buf);
 	strbuf_release(&bname);
 }
 
diff --git a/contrib/coccinelle/unused.cocci b/contrib/coccinelle/unused.cocci
new file mode 100644
index 00000000000..45452f8979a
--- /dev/null
+++ b/contrib/coccinelle/unused.cocci
@@ -0,0 +1,66 @@ 
+// This rule finds sequences of "unused" declerations, init and
+// release(). E.g.:
+//
+//	struct strbuf buf = STRBUF_INIT;
+//      [.. no other use of "buf" in the function ..]
+//	strbuf_release(&buf)
+//
+// To do do this we find (continued below)...
+@@
+type T;
+identifier I;
+// STRBUF_INIT, but also e.g. STRING_LIST_INIT_DUP (so no anchoring)
+constant INIT =~ "_INIT";
+// I = get_worktrees() etc.
+identifier INIT_ASSIGN1 =~ "^get_worktrees$";
+// strbuf_init(&I, ...) etc.
+identifier INIT_CALL1 =~ "^[a-z_]*_init$";
+// stbuf_release(), string_list_clear() etc.
+identifier REL1 =~ "^[a-z_]*_(release|clear|free)$";
+// release_patch(), clear_pathspec() etc.
+identifier REL2 =~ "^(release|clear|free)_[a-z_]*$";
+@@
+
+// .. A declaration like "struct strbuf buf;"...
+(
+- T I;
+// ... or "struct STRBUF buf = STRBUF_INIT;" ...
+|
+- T I = INIT;
+)
+
+// ... Optionally followed by lines that make no use of "buf", "&buf"
+// etc., but which ...
+<... when != \( I \| &I \)
+     when strict
+// .. (only) make use of "buf" or "&buf" to call something like
+// "strbuf_init(&buf, ...)" ...
+(
+- \( INIT_CALL1 \)( \( I \| &I \), ...);
+|
+// .. or e.g. "worktrees = get_worktrees();", i.e. a known "assignment
+// init" ...
+- I = \( INIT_ASSIGN1 \)(...);
+)
+...>
+
+// ... and then no mention of "buf" or "&buf" until we get to a
+// strbuf_release(&buf) at the end ...
+(
+- \( REL1 \| REL2 \)( \( I \| &I \), ...);
+|
+- \( REL1 \| REL2 \)( \( &I \| I \) );
+)
+// ... and no use *after* either, e.g. we don't want to delete
+// init/strbuf_release() patterns, where "&buf" could be used
+// afterwards.
+  ... when != \( I \| &I \)
+      when strict
+// Note that we're intentionally loose in accepting e.g. a
+// "strbuf_init(&buf)" followed by a "string_list_clear(&buf,
+// 0)". It's assumed that the compiler will catch any such invalid
+// code, i.e. that our constructors/destructors don't take a "void *".
+//
+// This rule also isn't capable of finding cases where &buf is used,
+// but only to e.g. pass that variable to a static function which
+// doesn't use it. The analysis is only function-local.
diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c
index 28176914e57..97e71fe19cd 100644
--- a/contrib/scalar/scalar.c
+++ b/contrib/scalar/scalar.c
@@ -687,7 +687,7 @@  static int cmd_diagnose(int argc, const char **argv)
 	int stdout_fd = -1, archiver_fd = -1;
 	time_t now = time(NULL);
 	struct tm tm;
-	struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT;
+	struct strbuf buf = STRBUF_INIT;
 	int res = 0;
 
 	argc = parse_options(argc, argv, NULL, options,
@@ -779,7 +779,6 @@  static int cmd_diagnose(int argc, const char **argv)
 	free(argv_copy);
 	strvec_clear(&archiver_args);
 	strbuf_release(&zip_path);
-	strbuf_release(&path);
 	strbuf_release(&buf);
 
 	return res;
diff --git a/diff.c b/diff.c
index e71cf758861..d4290615aaa 100644
--- a/diff.c
+++ b/diff.c
@@ -1289,7 +1289,6 @@  static void emit_diff_symbol_from_struct(struct diff_options *o,
 {
 	static const char *nneof = " No newline at end of file\n";
 	const char *context, *reset, *set, *set_sign, *meta, *fraginfo;
-	struct strbuf sb = STRBUF_INIT;
 
 	enum diff_symbol s = eds->s;
 	const char *line = eds->line;
@@ -1521,7 +1520,6 @@  static void emit_diff_symbol_from_struct(struct diff_options *o,
 	default:
 		BUG("unknown diff symbol");
 	}
-	strbuf_release(&sb);
 }
 
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,