diff mbox series

[v3] rev-list: support human-readable output for `--disk-usage`

Message ID pull.1313.v3.git.1660111276934.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v3] rev-list: support human-readable output for `--disk-usage` | expand

Commit Message

Li Linchao Aug. 10, 2022, 6:01 a.m. UTC
From: Li Linchao <lilinchao@oschina.cn>

The '--disk-usage' option for git-rev-list was introduced in 16950f8384
(rev-list: add --disk-usage option for calculating disk usage, 2021-02-09).
This is very useful for people inspect their git repo's objects usage
infomation, but the resulting number is quit hard for a human to read.

Teach git rev-list to output a human readable result when using
'--disk-usage'.

Signed-off-by: Li Linchao <lilinchao@oschina.cn>
---
    rev-list: support human-readable output for disk-usage
    
    The '--disk-usage' option for git-rev-list was introduced in 16950f8384
    (rev-list: add --disk-usage option for calculating disk usage,
    2021-02-09). This is very useful for people inspect their git repo's
    objects usage infomation, but the result number is quit hard for human
    to read.
    
    Teach git rev-list to output more human readable result when using
    '--disk-usage' to calculate objects disk usage.
    
    Signed-off-by: Li Linchao lilinchao@oschina.cn

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1313%2FCactusinhand%2Fllc%2Fadd-human-readable-option-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1313/Cactusinhand/llc/add-human-readable-option-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1313

Range-diff vs v2:

 1:  7e34d16efe4 ! 1:  000a6b37ec9 rev-list: support human-readable output for `--disk-usage`
     @@ builtin/rev-list.c: static int arg_show_object_names = 1;
       
       static off_t get_object_disk_usage(struct object *obj)
       {
     +@@ builtin/rev-list.c: static int show_object_fast(
     + 	return 1;
     + }
     + 
     ++static void print_disk_usage(off_t size)
     ++{
     ++	struct strbuf sb = STRBUF_INIT;
     ++	if (human_readable)
     ++		strbuf_humanise_bytes(&sb, size);
     ++	else
     ++		strbuf_addf(&sb, "%"PRIuMAX, (uintmax_t)size);
     ++	puts(sb.buf);
     ++	strbuf_release(&sb);
     ++}
     ++
     + static inline int parse_missing_action_value(const char *value)
     + {
     + 	if (!strcmp(value, "error")) {
      @@ builtin/rev-list.c: static int try_bitmap_disk_usage(struct rev_info *revs,
       				 int filter_provided_objects)
       {
       	struct bitmap_index *bitmap_git;
     -+	struct strbuf disk_buf = STRBUF_INIT;
      +	off_t size_from_bitmap;
       
       	if (!show_disk_usage)
     @@ builtin/rev-list.c: static int try_bitmap_disk_usage(struct rev_info *revs,
      -	printf("%"PRIuMAX"\n",
      -	       (uintmax_t)get_disk_usage_from_bitmap(bitmap_git, revs));
      +	size_from_bitmap = get_disk_usage_from_bitmap(bitmap_git, revs);
     -+	if (human_readable)
     -+		strbuf_humanise_bytes(&disk_buf, size_from_bitmap);
     -+	else
     -+		strbuf_addf(&disk_buf, "%"PRIuMAX"", (uintmax_t)size_from_bitmap);
     -+	puts(disk_buf.buf);
     -+	strbuf_release(&disk_buf);
     ++	print_disk_usage(size_from_bitmap);
       	return 0;
       }
       
     @@ builtin/rev-list.c: int cmd_rev_list(int argc, const char **argv, const char *pr
       		}
       
      -		if (!strcmp(arg, "--disk-usage")) {
     --			show_disk_usage = 1;
     --			info.flags |= REV_LIST_QUIET;
     --			continue;
      +		if (skip_prefix(arg, "--disk-usage", &arg)) {
      +			if (*arg == '=') {
      +				if (!strcmp(++arg, "human")) {
      +					human_readable = 1;
     -+					show_disk_usage = 1;
     -+					info.flags |= REV_LIST_QUIET;
     -+					continue;
      +				} else
      +					die(_("invalid value for '%s': '%s', try --disk-usage=human"), "--disk-usage", arg);
     -+			} else {
     -+				show_disk_usage = 1;
     -+				info.flags |= REV_LIST_QUIET;
     -+				continue;
     ++			} else if (*arg) {
     ++				/*
     ++				* Arguably should goto a label to continue chain of ifs?
     ++				* Doesn't matter unless we try to add --disk-usage-foo
     ++				* afterwards
     ++				*/
     ++				usage(rev_list_usage);
      +			}
     - 		}
     - 
     - 		usage(rev_list_usage);
     + 			show_disk_usage = 1;
     + 			info.flags |= REV_LIST_QUIET;
     + 			continue;
      @@ builtin/rev-list.c: int cmd_rev_list(int argc, const char **argv, const char *prefix)
     - 			printf("%d\n", revs.count_left + revs.count_right);
       	}
       
     --	if (show_disk_usage)
     + 	if (show_disk_usage)
      -		printf("%"PRIuMAX"\n", (uintmax_t)total_disk_usage);
     -+	if (show_disk_usage) {
     -+		struct strbuf disk_buf = STRBUF_INIT;
     -+		if (human_readable)
     -+			strbuf_humanise_bytes(&disk_buf, total_disk_usage);
     -+		else
     -+			strbuf_addf(&disk_buf, "%"PRIuMAX"", (uintmax_t)total_disk_usage);
     -+		puts(disk_buf.buf);
     -+		strbuf_release(&disk_buf);
     -+	}
     ++		print_disk_usage(total_disk_usage);
       
       cleanup:
       	release_revisions(&revs);


 Documentation/rev-list-options.txt |  5 ++++-
 builtin/rev-list.c                 | 35 ++++++++++++++++++++++++++----
 t/t6115-rev-list-du.sh             | 22 +++++++++++++++++++
 3 files changed, 57 insertions(+), 5 deletions(-)


base-commit: 679aad9e82d0dfd8ef3d1f98fa4629665496cec9

Comments

Johannes Sixt Aug. 10, 2022, 7:18 a.m. UTC | #1
Am 10.08.22 um 08:01 schrieb Li Linchao via GitGitGadget:
>  --disk-usage::
> +--disk-usage=human::
>  	Suppress normal output; instead, print the sum of the bytes used
> -	for on-disk storage by the selected commits or objects. This is
> +	for on-disk storage by the selected commits or objects.
> +	When it accepts a value `human`, like: `--disk-usage=human`, this
> +	means to print objects size in human readable format. This is
>  	equivalent to piping the output into `git cat-file
>  	--batch-check='%(objectsize:disk)'`, except that it runs much
>  	faster (especially with `--use-bitmap-index`). See the `CAVEATS`

The original paragraph flows very well and explains what the option does
and how it computes the result. Please do not interrupt the flow of the
text with a whole sentence that should be just a parenthetical remark,
but add a sentence at the end, not in the middle.

You added a new feature, and I understand that it is important *to you*.
But do make it a habit to ask yourself if it is also important for the
general audience. Generally, a new feature is not as important as
existing features, otherwise, it would have been added earlier, wouldn't it?

-- Hannes
diff mbox series

Patch

diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 195e74eec63..9966ce4ef91 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -242,8 +242,11 @@  ifdef::git-rev-list[]
 	to `/dev/null` as the output does not have to be formatted.
 
 --disk-usage::
+--disk-usage=human::
 	Suppress normal output; instead, print the sum of the bytes used
-	for on-disk storage by the selected commits or objects. This is
+	for on-disk storage by the selected commits or objects.
+	When it accepts a value `human`, like: `--disk-usage=human`, this
+	means to print objects size in human readable format. This is
 	equivalent to piping the output into `git cat-file
 	--batch-check='%(objectsize:disk)'`, except that it runs much
 	faster (especially with `--use-bitmap-index`). See the `CAVEATS`
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 30fd8e83eaf..df42e1b667e 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -46,6 +46,7 @@  static const char rev_list_usage[] =
 "    --parents\n"
 "    --children\n"
 "    --objects | --objects-edge\n"
+"    --disk-usage | --disk-usage=human\n"
 "    --unpacked\n"
 "    --header | --pretty\n"
 "    --[no-]object-names\n"
@@ -81,6 +82,7 @@  static int arg_show_object_names = 1;
 
 static int show_disk_usage;
 static off_t total_disk_usage;
+static int human_readable;
 
 static off_t get_object_disk_usage(struct object *obj)
 {
@@ -368,6 +370,17 @@  static int show_object_fast(
 	return 1;
 }
 
+static void print_disk_usage(off_t size)
+{
+	struct strbuf sb = STRBUF_INIT;
+	if (human_readable)
+		strbuf_humanise_bytes(&sb, size);
+	else
+		strbuf_addf(&sb, "%"PRIuMAX, (uintmax_t)size);
+	puts(sb.buf);
+	strbuf_release(&sb);
+}
+
 static inline int parse_missing_action_value(const char *value)
 {
 	if (!strcmp(value, "error")) {
@@ -473,6 +486,7 @@  static int try_bitmap_disk_usage(struct rev_info *revs,
 				 int filter_provided_objects)
 {
 	struct bitmap_index *bitmap_git;
+	off_t size_from_bitmap;
 
 	if (!show_disk_usage)
 		return -1;
@@ -481,8 +495,8 @@  static int try_bitmap_disk_usage(struct rev_info *revs,
 	if (!bitmap_git)
 		return -1;
 
-	printf("%"PRIuMAX"\n",
-	       (uintmax_t)get_disk_usage_from_bitmap(bitmap_git, revs));
+	size_from_bitmap = get_disk_usage_from_bitmap(bitmap_git, revs);
+	print_disk_usage(size_from_bitmap);
 	return 0;
 }
 
@@ -624,7 +638,20 @@  int cmd_rev_list(int argc, const char **argv, const char *prefix)
 			continue;
 		}
 
-		if (!strcmp(arg, "--disk-usage")) {
+		if (skip_prefix(arg, "--disk-usage", &arg)) {
+			if (*arg == '=') {
+				if (!strcmp(++arg, "human")) {
+					human_readable = 1;
+				} else
+					die(_("invalid value for '%s': '%s', try --disk-usage=human"), "--disk-usage", arg);
+			} else if (*arg) {
+				/*
+				* Arguably should goto a label to continue chain of ifs?
+				* Doesn't matter unless we try to add --disk-usage-foo
+				* afterwards
+				*/
+				usage(rev_list_usage);
+			}
 			show_disk_usage = 1;
 			info.flags |= REV_LIST_QUIET;
 			continue;
@@ -753,7 +780,7 @@  int cmd_rev_list(int argc, const char **argv, const char *prefix)
 	}
 
 	if (show_disk_usage)
-		printf("%"PRIuMAX"\n", (uintmax_t)total_disk_usage);
+		print_disk_usage(total_disk_usage);
 
 cleanup:
 	release_revisions(&revs);
diff --git a/t/t6115-rev-list-du.sh b/t/t6115-rev-list-du.sh
index b4aef32b713..b34841a4ba8 100755
--- a/t/t6115-rev-list-du.sh
+++ b/t/t6115-rev-list-du.sh
@@ -48,4 +48,26 @@  check_du HEAD
 check_du --objects HEAD
 check_du --objects HEAD^..HEAD
 
+# As mentioned above, don't use hardcode sizes as actual size, but use the
+# output from git cat-file.
+test_expect_success 'rev-list --disk-usage=human' '
+	git rev-list --objects HEAD --disk-usage=human >actual &&
+	disk_usage_slow --objects HEAD >actual_size &&
+	grep "$(cat actual_size) bytes" actual
+'
+
+test_expect_success 'rev-list --disk-usage=human with bitmaps' '
+	git rev-list --objects HEAD --use-bitmap-index --disk-usage=human >actual &&
+	disk_usage_slow --objects HEAD >actual_size &&
+	grep "$(cat actual_size) bytes" actual
+'
+
+test_expect_success 'rev-list use --disk-usage unproperly' '
+	test_must_fail git rev-list --objects HEAD --disk-usage=typo 2>err &&
+	cat >expect <<-\EOF &&
+	fatal: invalid value for '\''--disk-usage'\'': '\''typo'\'', try --disk-usage=human
+	EOF
+	test_cmp err expect
+'
+
 test_done