diff mbox series

[v2] ls-files.c: add --object-only option

Message ID pull.1250.v2.git.1654778272871.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] ls-files.c: add --object-only option | expand

Commit Message

ZheNing Hu June 9, 2022, 12:37 p.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

`git ls-files --stage` default output format is:

[<tag> ]<mode> <object> <stage> <file>

sometime we want to find a path's corresponding objectname,
we will parse the output and extract objectname from it
again and again.

So introduce a new option `--object-only` which can only
output objectname when giving `--stage` or `--resolve-undo`.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
    ls-files.c: add --object-only option
    
    v1 -> v2: rename option '--only-object-name' to '--object-only'.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1250%2Fadlternative%2Fzh%2Fls-file-only-objectname-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1250/adlternative/zh/ls-file-only-objectname-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1250

Range-diff vs v1:

 1:  10efe3bd9ca ! 1:  aed0bd2c791 ls-files.c: add --only-object-name option
     @@ Metadata
      Author: ZheNing Hu <adlternative@gmail.com>
      
       ## Commit message ##
     -    ls-files.c: add --only-object-name option
     +    ls-files.c: add --object-only option
      
          `git ls-files --stage` default output format is:
      
     @@ Commit message
          we will parse the output and extract objectname from it
          again and again.
      
     -    So introduce a new option `--only-object-name` which can only
     +    So introduce a new option `--object-only` which can only
          output objectname when giving `--stage` or `--resolve-undo`.
      
          Signed-off-by: ZheNing Hu <adlternative@gmail.com>
     @@ Documentation/git-ls-files.txt: SYNOPSIS
       		[-s|--stage] [-u|--unmerged] [-k|--|killed] [-m|--modified]
       		[--directory [--no-empty-directory]] [--eol]
      -		[--deduplicate]
     -+		[--deduplicate] [--only-object-name]
     ++		[--deduplicate] [--object-only]
       		[-x <pattern>|--exclude=<pattern>]
       		[-X <file>|--exclude-from=<file>]
       		[--exclude-per-directory=<file>]
     @@ Documentation/git-ls-files.txt: OPTIONS
       	When any of the `-t`, `--unmerged`, or `--stage` option is
       	in use, this option has no effect.
       
     -+--only-object-name:
     ++--object-only:
      +	When giving `--stage` or `--resolve-undo` , only output `<object>`
      +	instead of `[<tag> ]<mode> <object> <stage> <file>` format.
      +
     @@ builtin/ls-files.c: static int show_deleted;
       static int show_cached;
       static int show_others;
       static int show_stage;
     -+static int only_object_name;
     ++static int object_only;
       static int show_unmerged;
       static int show_resolve_undo;
       static int show_modified;
     @@ builtin/ls-files.c: static void show_ce(struct repository *repo, struct dir_stru
       			fputs(tag, stdout);
       		} else {
      +			const char *object_name = repo_find_unique_abbrev(repo, &ce->oid, abbrev);
     -+			if (only_object_name) {
     ++			if (object_only) {
      +				printf("%s%c", object_name, line_terminator);
      +				return;
      +			}
     @@ builtin/ls-files.c: static void show_ru_info(struct index_state *istate)
       		for (i = 0; i < 3; i++) {
       			if (!ui->mode[i])
       				continue;
     -+			if (only_object_name) {
     ++			if (object_only) {
      +				printf("%s%c", find_unique_abbrev(&ui->oid[i], abbrev), line_terminator);
      +				continue;
      +			}
     @@ builtin/ls-files.c: int cmd_ls_files(int argc, const char **argv, const char *cm
       			DIR_SHOW_IGNORED),
       		OPT_BOOL('s', "stage", &show_stage,
       			N_("show staged contents' object name in the output")),
     -+		OPT_BOOL(0, "only-object-name", &only_object_name,
     ++		OPT_BOOL(0, "object-only", &object_only,
      +			N_("only show staged contents' object name in the output")),
       		OPT_BOOL('k', "killed", &show_killed,
       			N_("show files on the filesystem that need to be removed")),
     @@ builtin/ls-files.c: int cmd_ls_files(int argc, const char **argv, const char *cm
       		die("ls-files --recurse-submodules does not support "
       		    "--error-unmatch");
       
     -+	if (only_object_name && !show_stage && !show_resolve_undo)
     -+		die("ls-files --only-object-name only used with --stage "
     -+		    "or --resolve-undo");
     ++	if (object_only && !show_stage && !show_resolve_undo)
     ++		die(_("ls-files --object-only only used with --stage "
     ++		    "or --resolve-undo"));
      +
       	parse_pathspec(&pathspec, 0,
       		       PATHSPEC_PREFER_CWD,
     @@ t/t2030-unresolve-info.sh: check_resolve_undo () {
       	test_cmp "$msg.expect" "$msg.actual"
       }
       
     -+check_resolve_undo_only_object_name() {
     ++check_resolve_undo_object_only() {
      +	msg=$1
      +	shift
      +	while case $# in
      +	0)	break ;;
     -+	1|2|3)	die "Bug in check-resolve-undo test" ;;
     ++	1|2|3)	BUG "wrong arguments" ;;
      +	esac
      +	do
      +		path=$1
     @@ t/t2030-unresolve-info.sh: check_resolve_undo () {
      +			case "$sha1" in
      +			'') continue ;;
      +			esac
     -+			sha1=$(git rev-parse --verify "$sha1")
     ++			sha1=$(git rev-parse --verify "$sha1") &&
      +			printf "%s\n" $sha1
      +		done
      +	done >"$msg.expect" &&
     -+	git ls-files --resolve-undo --only-object-name >"$msg.actual" &&
     ++	git ls-files --resolve-undo --object-only >"$msg.actual" &&
      +	test_cmp "$msg.expect" "$msg.actual"
      +}
      +
     @@ t/t2030-unresolve-info.sh: test_expect_success 'rerere forget (add-add conflict)
       	test_i18ngrep "no remembered" actual
       '
       
     -+test_expect_success '--resolve-undo with --only-object-name' '
     ++test_expect_success '--resolve-undo with --object-only' '
      +	prime_resolve_undo &&
     -+	check_resolve_undo_only_object_name kept fi/le initial:fi/le second:fi/le third:fi/le &&
     ++	check_resolve_undo_object_only kept fi/le initial:fi/le second:fi/le third:fi/le &&
      +	git checkout second^0 &&
      +	echo switching clears &&
      +	check_resolve_undo cleared
     @@ t/t3004-ls-files-basic.sh: test_expect_success SYMLINKS 'ls-files with absolute
       	test_cmp expect actual
       '
       
     -+test_expect_success 'git ls-files --stage with --only-object-name' '
     ++test_expect_success 'git ls-files --stage with --object-only' '
      +	git init test &&
      +	test_when_finished "rm -rf test" &&
     -+	(
     -+		cd test &&
     -+		echo a >a.txt &&
     -+		echo b >b.txt &&
     -+		git add a.txt b.txt &&
     -+		oid1=$(git hash-object a.txt) &&
     -+		oid2=$(git hash-object b.txt) &&
     -+		git ls-files --stage --only-object-name >actual &&
     -+		cat >expect <<-EOF &&
     -+		$oid1
     -+		$oid2
     -+		EOF
     -+		test_cmp expect actual
     -+	)
     ++	echo a >test/a.txt &&
     ++	echo b >test/b.txt &&
     ++	git -C test add a.txt b.txt &&
     ++	oid1=$(git -C test hash-object a.txt) &&
     ++	oid2=$(git -C test hash-object b.txt) &&
     ++	git -C test ls-files --stage --object-only >actual &&
     ++	cat >expect <<-EOF &&
     ++	$oid1
     ++	$oid2
     ++	EOF
     ++	test_cmp expect actual
      +'
      +
     -+test_expect_success 'git ls-files --only-object-name without --stage or --resolve-undo' '
     ++test_expect_success 'git ls-files --object-only without --stage or --resolve-undo' '
      +	git init test &&
      +	test_when_finished "rm -rf test" &&
     -+	(
     -+		cd test &&
     -+		echo a >a.txt &&
     -+		echo b >b.txt &&
     -+		git add a.txt b.txt &&
     -+		test_must_fail git ls-files --only-object-name 2>stderr &&
     -+		test_i18ngrep "fatal: ls-files --only-object-name only used with --stage or --resolve-undo" stderr
     -+	)
     ++	echo a >test/a.txt &&
     ++	echo b >test/b.txt &&
     ++	git -C test add a.txt b.txt &&
     ++	test_must_fail git -C test ls-files --object-only 2>stderr &&
     ++	grep "fatal: ls-files --object-only only used with --stage or --resolve-undo" stderr
      +'
      +
       test_done


 Documentation/git-ls-files.txt |  6 +++++-
 builtin/ls-files.c             | 18 +++++++++++++++++-
 t/t2030-unresolve-info.sh      | 33 +++++++++++++++++++++++++++++++++
 t/t3004-ls-files-basic.sh      | 26 ++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 2 deletions(-)


base-commit: ab336e8f1c8009c8b1aab8deb592148e69217085

Comments

Junio C Hamano June 9, 2022, 7:50 p.m. UTC | #1
"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: ZheNing Hu <adlternative@gmail.com>
>
> `git ls-files --stage` default output format is:
>
> [<tag> ]<mode> <object> <stage> <file>
>
> sometime we want to find a path's corresponding objectname,

"sometime" -> "When", perhaps.  If you really want to say that,
"Sometimes, " is also good, though.

By the way, I do not think you are "want to find a path's
corresponding objectname" at all with this feature.  The output from
"ls-files -s" will have many object names, one per each path if the
index is merged, and if you discard the path, you no longer can tell
which object name corresponds to which path.

> we will parse the output and extract objectname from it
> again and again.

Why is that a problem?  "again and again" is over-exaggerating;
you'd munge each line just once.

It would help readers if you say WHY you want to find object names.
Perhaps you want to find the set of objects that are registered in
the index, regardless of their paths?

In any case, the paragraph needs a rewrite.

> So introduce a new option `--object-only` which can only
> output objectname when giving `--stage` or `--resolve-undo`.

"which can only" makes it sound like you are complaining about its
limitation.

I read these two lines to mean "git ls-files -s --object-only" does
not even give me the stage information, but that would make the
command completely useless, so I am assuming that is not what you
meant to say.  The same comment applies for resolve-undo, which is
merely "what 'ls-files -s' may have given before you resolved".

If you borrowed a feature from another existing command, say that
explicitly, which will allow your commit to gain confidence by
reviewers and future readers by showing that you care about overall
consistency in the system.

	Add a new option `--object-only` that omits the mode and
	filename from the output, taking inspiration from the option
	with the same name in the `git ls-tree` command.

or something like that, perhaps.

How does/should this interact with the `--deduplicate` option?

If we are not giving stages and truly giving only object names
(which I doubt is what we want, by the way), then we can and should
deduplicate the output when the option is given.  If we have two
identical blobs at different paths, or two identical blobs at the
same path but at different stages, shouldn't we get only a single
copy of output for that blob, as we are not showing paths nor
stages, right?

How does/should this behave when --stage is not given?

I have a suspicion that this whole thing is misdesigned.  Instead of
making it piggy back on --stage, don't you want to make it an
independent option?  I.e.

	git ls-files --object-only

with no other option would behave like

	git ls-files -s | sed -e 's/^[0-6]* \([0-9a-f]*\) .*/\1/'

and it is an error to combine it with -s or --deduplicate.  If the
purpose is to learn the set of objects registered in the index, then
it might even make sense to make it an equivalent to

	git ls-files -s |
	sed -e 's/^[0-6]* \([0-9a-f]*\) .*/\1/' |
	sort -u

as duplicates or order of the entries is no use for such a use
case.  

It entirely depends on WHY you want to find object names, and that
is why I asked it much earlier in this message.

And I do not think it makes any sense to give resolve-undo
information without paths nor stages at all.  Please do not tie this
with that mode.

In short

 - this probably is better done as a separate independent mode
   "--object-only", rather than a piggy-back feature on top of
    existing other features like "-s" and "--resolve-undo".

 - the new mode should be made mutually incompatible with "-s" and
   "--resolve-undo".  There may be other options that this should be
   incompatible, like "--tag" and "--full-name".

Thanks.
ZheNing Hu June 12, 2022, 10:24 a.m. UTC | #2
Junio C Hamano <gitster@pobox.com> 于2022年6月10日周五 03:51写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > From: ZheNing Hu <adlternative@gmail.com>

> I read these two lines to mean "git ls-files -s --object-only" does
> not even give me the stage information, but that would make the
> command completely useless, so I am assuming that is not what you
> meant to say.  The same comment applies for resolve-undo, which is
> merely "what 'ls-files -s' may have given before you resolved".
>
> If you borrowed a feature from another existing command, say that
> explicitly, which will allow your commit to gain confidence by
> reviewers and future readers by showing that you care about overall
> consistency in the system.
>
>         Add a new option `--object-only` that omits the mode and
>         filename from the output, taking inspiration from the option
>         with the same name in the `git ls-tree` command.
>
> or something like that, perhaps.
>

Yes, this message will be better. I think it omits not only mode,
filename, but also tag, stage, eol info, debug message.

> How does/should this interact with the `--deduplicate` option?
>
> If we are not giving stages and truly giving only object names
> (which I doubt is what we want, by the way), then we can and should
> deduplicate the output when the option is given.  If we have two
> identical blobs at different paths, or two identical blobs at the
> same path but at different stages, shouldn't we get only a single
> copy of output for that blob, as we are not showing paths nor
> stages, right?
>

I have think about it for a long time, I think deduplicate is used for
removing duplicates entries which caused by one path can have
different stage.

But we now care about a output format just like %(objectname), if we
need to deduplicate it, when we use --format="%(objectname) %(path)" later,
do we need to deduplicate its output too? I think we should disable
--deduplicate
when we are using --object-only.

> How does/should this behave when --stage is not given?
>
> I have a suspicion that this whole thing is misdesigned.  Instead of
> making it piggy back on --stage, don't you want to make it an
> independent option?  I.e.
>
>         git ls-files --object-only
>
> with no other option would behave like
>
>         git ls-files -s | sed -e 's/^[0-6]* \([0-9a-f]*\) .*/\1/'
>
> and it is an error to combine it with -s or --deduplicate.  If the
> purpose is to learn the set of objects registered in the index, then
> it might even make sense to make it an equivalent to
>
>         git ls-files -s |
>         sed -e 's/^[0-6]* \([0-9a-f]*\) .*/\1/' |
>         sort -u
>
> as duplicates or order of the entries is no use for such a use
> case.
>
> It entirely depends on WHY you want to find object names, and that
> is why I asked it much earlier in this message.
>

My origin requirement is to do a app which can move one file to another
file in a bare git-repo, so I need to get first file object-name for
second file to
update-index. It can parsed by the app of course, but I think such kind of work
left to git itself can help other app programers.

Maybe you are right that --object-only or --format should not be
sub-option of --stage
or --resolve-undo, I will think about how to implement it later.

> And I do not think it makes any sense to give resolve-undo
> information without paths nor stages at all.  Please do not tie this
> with that mode.
>
> In short
>
>  - this probably is better done as a separate independent mode
>    "--object-only", rather than a piggy-back feature on top of
>     existing other features like "-s" and "--resolve-undo".
>
>  - the new mode should be made mutually incompatible with "-s" and
>    "--resolve-undo".  There may be other options that this should be
>    incompatible, like "--tag" and "--full-name".
>

By the way, if we need --format for git ls-files, which atoms should we keep?

I think those atoms are undoubtedly necessary to keep

%(tag)
%(objectmode)
%(objectname)
%(stage)
%(path)

git ls-files --stage just like

git ls-file --format="%(tag)%(obejctmode) %(objectname) %(stage)\t%(path)"

but for these follow atoms, I am not sure if we need them?

%(eofinfo)
%(debug)
%(eol)
%(ctime)
%(ctime:sec)
%(ctime:nsec)
%(mtime)
%(mtime:sec)
%(mtime:nsec)
%(dev)
%(ino)
%(uid)
%(gid)
%(size)
%(flags)

Thanks
Junio C Hamano June 13, 2022, 5:19 p.m. UTC | #3
ZheNing Hu <adlternative@gmail.com> writes:

> I think those atoms are undoubtedly necessary to keep
>
> %(tag)
> %(objectmode)
> %(objectname)
> %(stage)
> %(path)

I am not sure what you mean by "keep".  You cannot keep what you do
not have yet ;-)

If ls-files needs (that is a big if; it is a plumbing to be used by
whatever program that want to assemble the pieces, and it shouldn't
have to learn such assembly itself) to support "--format", so that
people can reinvent its "-s" output (but why?  There already is "-s"
output available), then the above would be necessary (assuming that
via the "--format" the user will be able to supply inter-field
spaces and tabs properly).  I do not know if there are other things
available in output other than the "-s" option produces offhand, but
if there are, they would need to be added for completeness.
diff mbox series

Patch

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0dabf3f0ddc..9736b02b565 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -13,7 +13,7 @@  SYNOPSIS
 		[-c|--cached] [-d|--deleted] [-o|--others] [-i|--|ignored]
 		[-s|--stage] [-u|--unmerged] [-k|--|killed] [-m|--modified]
 		[--directory [--no-empty-directory]] [--eol]
-		[--deduplicate]
+		[--deduplicate] [--object-only]
 		[-x <pattern>|--exclude=<pattern>]
 		[-X <file>|--exclude-from=<file>]
 		[--exclude-per-directory=<file>]
@@ -88,6 +88,10 @@  OPTIONS
 	When any of the `-t`, `--unmerged`, or `--stage` option is
 	in use, this option has no effect.
 
+--object-only:
+	When giving `--stage` or `--resolve-undo` , only output `<object>`
+	instead of `[<tag> ]<mode> <object> <stage> <file>` format.
+
 -x <pattern>::
 --exclude=<pattern>::
 	Skip untracked files matching pattern.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index e791b65e7e9..2fef5f40a3f 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -26,6 +26,7 @@  static int show_deleted;
 static int show_cached;
 static int show_others;
 static int show_stage;
+static int object_only;
 static int show_unmerged;
 static int show_resolve_undo;
 static int show_modified;
@@ -241,10 +242,15 @@  static void show_ce(struct repository *repo, struct dir_struct *dir,
 		if (!show_stage) {
 			fputs(tag, stdout);
 		} else {
+			const char *object_name = repo_find_unique_abbrev(repo, &ce->oid, abbrev);
+			if (object_only) {
+				printf("%s%c", object_name, line_terminator);
+				return;
+			}
 			printf("%s%06o %s %d\t",
 			       tag,
 			       ce->ce_mode,
-			       repo_find_unique_abbrev(repo, &ce->oid, abbrev),
+			       object_name,
 			       ce_stage(ce));
 		}
 		write_eolinfo(repo->index, ce, fullname);
@@ -274,6 +280,10 @@  static void show_ru_info(struct index_state *istate)
 		for (i = 0; i < 3; i++) {
 			if (!ui->mode[i])
 				continue;
+			if (object_only) {
+				printf("%s%c", find_unique_abbrev(&ui->oid[i], abbrev), line_terminator);
+				continue;
+			}
 			printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i],
 			       find_unique_abbrev(&ui->oid[i], abbrev),
 			       i + 1);
@@ -635,6 +645,8 @@  int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 			DIR_SHOW_IGNORED),
 		OPT_BOOL('s', "stage", &show_stage,
 			N_("show staged contents' object name in the output")),
+		OPT_BOOL(0, "object-only", &object_only,
+			N_("only show staged contents' object name in the output")),
 		OPT_BOOL('k', "killed", &show_killed,
 			N_("show files on the filesystem that need to be removed")),
 		OPT_BIT(0, "directory", &dir.flags,
@@ -734,6 +746,10 @@  int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		die("ls-files --recurse-submodules does not support "
 		    "--error-unmatch");
 
+	if (object_only && !show_stage && !show_resolve_undo)
+		die(_("ls-files --object-only only used with --stage "
+		    "or --resolve-undo"));
+
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD,
 		       prefix, argv);
diff --git a/t/t2030-unresolve-info.sh b/t/t2030-unresolve-info.sh
index f691e6d9032..cdab953980c 100755
--- a/t/t2030-unresolve-info.sh
+++ b/t/t2030-unresolve-info.sh
@@ -32,6 +32,31 @@  check_resolve_undo () {
 	test_cmp "$msg.expect" "$msg.actual"
 }
 
+check_resolve_undo_object_only() {
+	msg=$1
+	shift
+	while case $# in
+	0)	break ;;
+	1|2|3)	BUG "wrong arguments" ;;
+	esac
+	do
+		path=$1
+		shift
+		for stage in 1 2 3
+		do
+			sha1=$1
+			shift
+			case "$sha1" in
+			'') continue ;;
+			esac
+			sha1=$(git rev-parse --verify "$sha1") &&
+			printf "%s\n" $sha1
+		done
+	done >"$msg.expect" &&
+	git ls-files --resolve-undo --object-only >"$msg.actual" &&
+	test_cmp "$msg.expect" "$msg.actual"
+}
+
 prime_resolve_undo () {
 	git reset --hard &&
 	git checkout second^0 &&
@@ -194,4 +219,12 @@  test_expect_success 'rerere forget (add-add conflict)' '
 	test_i18ngrep "no remembered" actual
 '
 
+test_expect_success '--resolve-undo with --object-only' '
+	prime_resolve_undo &&
+	check_resolve_undo_object_only kept fi/le initial:fi/le second:fi/le third:fi/le &&
+	git checkout second^0 &&
+	echo switching clears &&
+	check_resolve_undo cleared
+'
+
 test_done
diff --git a/t/t3004-ls-files-basic.sh b/t/t3004-ls-files-basic.sh
index a16e25c79bd..6c81ead140e 100755
--- a/t/t3004-ls-files-basic.sh
+++ b/t/t3004-ls-files-basic.sh
@@ -52,4 +52,30 @@  test_expect_success SYMLINKS 'ls-files with absolute paths to symlinks' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git ls-files --stage with --object-only' '
+	git init test &&
+	test_when_finished "rm -rf test" &&
+	echo a >test/a.txt &&
+	echo b >test/b.txt &&
+	git -C test add a.txt b.txt &&
+	oid1=$(git -C test hash-object a.txt) &&
+	oid2=$(git -C test hash-object b.txt) &&
+	git -C test ls-files --stage --object-only >actual &&
+	cat >expect <<-EOF &&
+	$oid1
+	$oid2
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --object-only without --stage or --resolve-undo' '
+	git init test &&
+	test_when_finished "rm -rf test" &&
+	echo a >test/a.txt &&
+	echo b >test/b.txt &&
+	git -C test add a.txt b.txt &&
+	test_must_fail git -C test ls-files --object-only 2>stderr &&
+	grep "fatal: ls-files --object-only only used with --stage or --resolve-undo" stderr
+'
+
 test_done