diff mbox series

[v3,4/7] shortlog: support arbitrary commit format `--group`s

Message ID f75a952421ee1ab531d9977252d6594346a94583.1666391136.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series shortlog: introduce `--group=<format>` | expand

Commit Message

Taylor Blau Oct. 21, 2022, 10:25 p.m. UTC
In addition to generating a shortlog based on committer, author, or the
identity in one or more specified trailers, it can be useful to generate
a shortlog based on an arbitrary commit format.

This can be used, for example, to generate a distribution of commit
activity over time, like so:

    $ git shortlog --group='%cd' --date='format:%Y-%m' -s v2.37.0..
       117  2022-06
       274  2022-07
       324  2022-08
       263  2022-09
         7  2022-10

Arbitrary commit formats can be used. In fact, `git shortlog`'s default
behavior (to count by commit authors) can be emulated as follows:

    $ git shortlog --group='%aN <%aE>' ...

and future patches will make the default behavior (as well as
`--committer`, and `--group=trailer:<trailer>`) special cases of the
more flexible `--group` option.

Note also that the SHORTLOG_GROUP_FORMAT enum value is used only to
designate that `--group:<format>` is in use when in stdin mode to
declare that the combination is invalid.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-shortlog.txt |  5 ++++-
 builtin/shortlog.c             | 41 +++++++++++++++++++++++++++++++++-
 shortlog.h                     |  2 ++
 t/t4201-shortlog.sh            | 32 ++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 2 deletions(-)

Comments

Jeff King Oct. 22, 2022, 12:28 a.m. UTC | #1
On Fri, Oct 21, 2022 at 06:25:49PM -0400, Taylor Blau wrote:

> @@ -203,6 +205,32 @@ static void insert_records_from_trailers(struct shortlog *log,
>  	unuse_commit_buffer(commit, commit_buffer);
>  }
>  
> +static int shortlog_needs_dedup(const struct shortlog *log)
> +{
> +	return log->format.nr > 1 || log->trailers.nr;
> +}

So this is obviously much nicer to read. But I think there might be a
weird logic error during the series. Since we haven't yet folded
SHORTLOG_GROUP_AUTHOR, etc, into the "format" code, I think at this
point it still needs:

  HAS_MULTI_BITS(log->groups)

at the beginning of the OR-chain. Otherwise "shortlog --group=author
--format=%an" would fail to dedup, I think.

It's hard to care too much, since the end result of the series is
correct, and you'd end up just removing that part of the line in the
final patch. So I could go either way on re-rolling.

Sorry for not catching this in the last round.

-Peff
diff mbox series

Patch

diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt
index 9ed9d6a9e7..7d0277d033 100644
--- a/Documentation/git-shortlog.txt
+++ b/Documentation/git-shortlog.txt
@@ -50,7 +50,7 @@  OPTIONS
 --date=<format>::
 	Show dates formatted according to the given date string. (See
 	the `--date` option in the "Commit Formatting" section of
-	linkgit:git-log[1]).
+	linkgit:git-log[1]). Useful with `--group=format:<format>`.
 
 --group=<type>::
 	Group commits based on `<type>`. If no `--group` option is
@@ -64,6 +64,9 @@  OPTIONS
    example, if your project uses `Reviewed-by` trailers, you might want
    to see who has been reviewing with
    `git shortlog -ns --group=trailer:reviewed-by`.
+ - `format:<format>`, any string accepted by the `--format` option of
+   'git log'. (See the "PRETTY FORMATS" section of
+   linkgit:git-log[1].)
 +
 Note that commits that do not include the trailer will not be counted.
 Likewise, commits with multiple trailers (e.g., multiple signoffs) may
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index d0645769d7..a52288acab 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -133,6 +133,8 @@  static void read_from_stdin(struct shortlog *log)
 		break;
 	case SHORTLOG_GROUP_TRAILER:
 		die(_("using %s with stdin is not supported"), "--group=trailer");
+	case SHORTLOG_GROUP_FORMAT:
+		die(_("using %s with stdin is not supported"), "--group=format");
 	default:
 		BUG("unhandled shortlog group");
 	}
@@ -203,6 +205,32 @@  static void insert_records_from_trailers(struct shortlog *log,
 	unuse_commit_buffer(commit, commit_buffer);
 }
 
+static int shortlog_needs_dedup(const struct shortlog *log)
+{
+	return log->format.nr > 1 || log->trailers.nr;
+}
+
+static void insert_records_from_format(struct shortlog *log,
+				       struct strset *dups,
+				       struct commit *commit,
+				       struct pretty_print_context *ctx,
+				       const char *oneline)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct string_list_item *item;
+
+	for_each_string_list_item(item, &log->format) {
+		strbuf_reset(&buf);
+
+		format_commit_message(commit, item->string, &buf, ctx);
+
+		if (!shortlog_needs_dedup(log) || strset_add(dups, buf.buf))
+			insert_one_record(log, buf.buf, oneline);
+	}
+
+	strbuf_release(&buf);
+}
+
 void shortlog_add_commit(struct shortlog *log, struct commit *commit)
 {
 	struct strbuf ident = STRBUF_INIT;
@@ -244,6 +272,7 @@  void shortlog_add_commit(struct shortlog *log, struct commit *commit)
 			insert_one_record(log, ident.buf, oneline_str);
 	}
 	insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
+	insert_records_from_format(log, &dups, commit, &ctx, oneline_str);
 
 	strset_clear(&dups);
 	strbuf_release(&ident);
@@ -315,6 +344,7 @@  static int parse_group_option(const struct option *opt, const char *arg, int uns
 	if (unset) {
 		log->groups = 0;
 		string_list_clear(&log->trailers, 0);
+		string_list_clear(&log->format, 0);
 	} else if (!strcasecmp(arg, "author"))
 		log->groups |= SHORTLOG_GROUP_AUTHOR;
 	else if (!strcasecmp(arg, "committer"))
@@ -322,8 +352,15 @@  static int parse_group_option(const struct option *opt, const char *arg, int uns
 	else if (skip_prefix(arg, "trailer:", &field)) {
 		log->groups |= SHORTLOG_GROUP_TRAILER;
 		string_list_append(&log->trailers, field);
-	} else
+	} else if (skip_prefix(arg, "format:", &field)) {
+		log->groups |= SHORTLOG_GROUP_FORMAT;
+		string_list_append(&log->format, field);
+	} else if (strchr(arg, '%')) {
+		log->groups |= SHORTLOG_GROUP_FORMAT;
+		string_list_append(&log->format, arg);
+	} else {
 		return error(_("unknown group type: %s"), arg);
+	}
 
 	return 0;
 }
@@ -341,6 +378,7 @@  void shortlog_init(struct shortlog *log)
 	log->in2 = DEFAULT_INDENT2;
 	log->trailers.strdup_strings = 1;
 	log->trailers.cmp = strcasecmp;
+	log->format.strdup_strings = 1;
 }
 
 int cmd_shortlog(int argc, const char **argv, const char *prefix)
@@ -481,4 +519,5 @@  void shortlog_output(struct shortlog *log)
 	log->list.strdup_strings = 1;
 	string_list_clear(&log->list, 1);
 	clear_mailmap(&log->mailmap);
+	string_list_clear(&log->format, 0);
 }
diff --git a/shortlog.h b/shortlog.h
index dc388dd459..4850a8c30f 100644
--- a/shortlog.h
+++ b/shortlog.h
@@ -22,8 +22,10 @@  struct shortlog {
 		SHORTLOG_GROUP_AUTHOR = (1 << 0),
 		SHORTLOG_GROUP_COMMITTER = (1 << 1),
 		SHORTLOG_GROUP_TRAILER = (1 << 2),
+		SHORTLOG_GROUP_FORMAT = (1 << 3),
 	} groups;
 	struct string_list trailers;
+	struct string_list format;
 
 	int email;
 	struct string_list mailmap;
diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index 7547da539d..8e4effebdb 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -244,6 +244,26 @@  test_expect_success 'shortlog --group=trailer:signed-off-by' '
 	test_cmp expect actual
 '
 
+test_expect_success 'shortlog --group=format' '
+	git shortlog -s --date="format:%Y" --group="format:%cN (%cd)" \
+		HEAD >actual &&
+	cat >expect <<-\EOF &&
+	     4	C O Mitter (2005)
+	     1	Sin Nombre (2005)
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'shortlog --group=<format> DWIM' '
+	git shortlog -s --date="format:%Y" --group="%cN (%cd)" HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'shortlog bogus --group' '
+	test_must_fail git shortlog --group=bogus HEAD 2>err &&
+	grep "unknown group type" err
+'
+
 test_expect_success 'trailer idents are split' '
 	cat >expect <<-\EOF &&
 	     2	C O Mitter
@@ -326,6 +346,18 @@  test_expect_success 'shortlog can match multiple groups' '
 	test_cmp expect actual
 '
 
+test_expect_success 'shortlog can match multiple format groups' '
+	GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME" \
+		git commit --allow-empty -m "identical names" &&
+	test_tick &&
+	cat >expect <<-\EOF &&
+	     2	A U Thor
+	     1	C O Mitter
+	EOF
+	git shortlog -ns --group="%cn" --group="%an" -2 HEAD >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success 'set up option selection tests' '
 	git commit --allow-empty -F - <<-\EOF
 	subject