diff mbox series

[v3] ls-files: introduce "--format" option

Message ID pull.1262.v3.git.1655777140231.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v3] ls-files: introduce "--format" option | expand

Commit Message

ZheNing Hu June 21, 2022, 2:05 a.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

Add a new option --format that output index enties
informations with custom format, taking inspiration
from the option with the same name in the `git ls-tree`
command.

--format cannot used with -s, -o, -k, --resolve-undo,
--deduplicate and --debug.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
    ls-files: introduce "--format" options
    
    v2->v3:
    
     1. remove %(tag) because -t is deprecated, suggested by Phillip.
     2. fix some description of atoms in document, suggested by Phillip..

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1262%2Fadlternative%2Fzh%2Fls-file-format-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1262/adlternative/zh/ls-file-format-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1262

Range-diff vs v2:

 1:  67f2c3b8ebe ! 1:  aaafa35ffcd ls-files: introduce "--format" option
     @@ Documentation/git-ls-files.txt: quoted as explained for the configuration variab
      +into the resulting output. For each outputting line, the following
      +names can be used:
      +
     -+tag::
     -+	The tag of file status.
      +objectmode::
     -+	The mode of the object.
     ++	The mode of the file which is in the index.
      +objectname::
     -+	The name of the object.
     ++	The name of the file which is in the index.
      +stage::
     -+	The stage of the file.
     ++	The stage of the file which is in the index.
      +eol::
     -+	The line endings of files.
     ++	The <eolinfo> and <eolattr> of files both in the
     ++	index and the work-tree.
      +path::
     -+	The pathname of the object.
     ++	The pathname of the file which is in the index.
      +ctime::
     -+	The create time of file.
     ++	The create time of file which is in the index.
      +mtime::
     -+	The modify time of file.
     ++	The modified time of file which is in the index.
      +dev::
     -+	The ID of device containing file.
     ++	The ID of device containing file which is in the index.
      +ino::
     -+	The inode number of file.
     ++	The inode number of file which is in the index.
      +uid::
     -+	The user id of file owner.
     ++	The user id of file owner which is in the index.
      +gid::
     -+	The group id of file owner.
     ++	The group id of file owner which is in the index.
      +size::
     -+	The size of the file.
     ++	The size of the file which is in the index.
      +flags::
     -+	The flags of the file.
     ++	The flags of the file in the index which include
     ++	in-memory only flags and some extended on-disk flags.
       
       EXCLUDE PATTERNS
       ----------------
     @@ builtin/ls-files.c: static void show_submodule(struct repository *superproject,
      +	struct show_index_data *data = context;
      +	const char *end;
      +	const char *p;
     -+	unsigned int errlen;
      +	const struct stat_data *sd = &data->ce->ce_stat_data;
      +	size_t len = strbuf_expand_literal_cb(sb, start, NULL);
      +	if (len)
     @@ builtin/ls-files.c: static void show_submodule(struct repository *superproject,
      +		      "does not end in ')'"), start);
      +
      +	len = end - start + 1;
     -+	if (skip_prefix(start, "(tag)", &p))
     -+		strbuf_addstr(sb, get_tag(data->ce, data->tag));
     -+	else if (skip_prefix(start, "(objectmode)", &p))
     ++	if (skip_prefix(start, "(objectmode)", &p))
      +		strbuf_addf(sb, "%06o", data->ce->ce_mode);
      +	else if (skip_prefix(start, "(objectname)", &p))
      +		strbuf_add_unique_abbrev(sb, &data->ce->oid, abbrev);
     @@ builtin/ls-files.c: static void show_submodule(struct repository *superproject,
      +		strbuf_addf(sb, "size: %u", sd->sd_size);
      +	else if (skip_prefix(start, "(flags)", &p))
      +		strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
     -+	else {
     -+		errlen = (unsigned long)len;
     -+		die(_("bad ls-files format: %%%.*s"), errlen, start);
     -+	}
     ++	else
     ++		die(_("bad ls-files format: %%%.*s"), (int)len, start);
      +
      +	return len;
      +}
      +
      +static void show_ce_fmt(struct repository *repo, const struct cache_entry *ce,
     -+			const char *format, const char *fullname, const char *tag) {
     ++			const char *format, const char *fullname) {
      +
      +	struct show_index_data data = {
     -+		.tag = tag,
      +		.pathname = fullname,
      +		.istate = repo->index,
      +		.ce = ce,
     @@ builtin/ls-files.c: static void show_ce(struct repository *repo, struct dir_stru
       				  S_ISDIR(ce->ce_mode) ||
       				  S_ISGITLINK(ce->ce_mode))) {
      +		if (format) {
     -+			show_ce_fmt(repo, ce, format, fullname, tag);
     ++			show_ce_fmt(repo, ce, format, fullname);
      +			return;
      +		}
      +
     @@ t/t3013-ls-files-format.sh (new)
      +	git commit -m base
      +'
      +
     -+test_expect_success 'git ls-files --format tag' '
     -+	printf "H \nH \n" >expect &&
     -+	git ls-files --format="%(tag)" -t >actual &&
     -+	test_cmp expect actual
     -+'
     -+
      +test_expect_success 'git ls-files --format objectmode' '
      +	cat >expect <<-\EOF &&
      +	100755


 Documentation/git-ls-files.txt |  51 +++++++++++++-
 builtin/ls-files.c             | 124 ++++++++++++++++++++++++++++++++-
 t/t3013-ls-files-format.sh     | 124 +++++++++++++++++++++++++++++++++
 3 files changed, 295 insertions(+), 4 deletions(-)
 create mode 100755 t/t3013-ls-files-format.sh


base-commit: ab336e8f1c8009c8b1aab8deb592148e69217085

Comments

Phillip Wood June 23, 2022, 2:06 p.m. UTC | #1
Hi ZheNing

On 21/06/2022 03:05, ZheNing Hu via GitGitGadget wrote:
> From: ZheNing Hu <adlternative@gmail.com>
> 
> Add a new option --format that output index enties
> informations with custom format, taking inspiration
> from the option with the same name in the `git ls-tree`
> command.
> 
> --format cannot used with -s, -o, -k, --resolve-undo,
> --deduplicate and --debug.
> 
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---
>      ls-files: introduce "--format" options
>      
>      v2->v3:
>      
>       1. remove %(tag) because -t is deprecated, suggested by Phillip.
>       2. fix some description of atoms in document, suggested by Phillip..

Thanks for re-rolling, having taken a look a closer look at the tests 
I'm concerned about the output format for some of the specifiers, see below.

> [...]  
> +It is possible to print in a custom format by using the `--format`
> +option, which is able to interpolate different fields using
> +a `%(fieldname)` notation. For example, if you only care about the
> +"objectname" and "path" fields, you can execute with a specific
> +"--format" like
> +
> +	git ls-files --format='%(objectname) %(path)'
> +
> +FIELD NAMES
> +-----------
> +Various values from structured fields can be used to interpolate
> +into the resulting output. For each outputting line, the following
> +names can be used:
> +
> +objectmode::
> +	The mode of the file which is in the index.
> +objectname::
> +	The name of the file which is in the index.
> +stage::
> +	The stage of the file which is in the index.
> +eol::
> +	The <eolinfo> and <eolattr> of files both in the
> +	index and the work-tree.

Looking at the test for this option I think it needs more work, why 
should --format arbitrarily append a tab to the end of the output? - the 
user should be able to specify a separator if they want one as part of 
the format string. Also I'm not sure why there is so much whitespace in 
the output.

> +path::
> +	The pathname of the file which is in the index.

I think that for all these it might be clearer to say "recorded in the 
index" rather than "of the file which is in the index"

> +ctime::
> +	The create time of file which is in the index.

This is printed with a prefix 'ctime:' (the same applies to the format 
specifiers below) I think we should omit that and just print the data so 
the user can choose the format they want.

> +mtime::
> +	The modified time of file which is in the index.
> +dev::
> +	The ID of device containing file which is in the index.
> +ino::
> +	The inode number of file which is in the index.
> +uid::
> +	The user id of file owner which is in the index.
> +gid::
> +	The group id of file owner which is in the index.
> +size::
> +	The size of the file which is in the index.
> +flags::
> +	The flags of the file in the index which include
> +	in-memory only flags and some extended on-disk flags.

If %(flags) is going to be useful then I think we need to think about 
how they are printed and document that. At the moment they are printed 
as a hexadecimal number which is fine for debugging but probably not 
going to be useful for something like --format. I think printing 
documented symbolic names with some kind of separator (a comma maybe) 
between them is probably more useful

 > [...]
> +test_expect_success 'git ls-files --format eol' '
> +	printf "i/lf    w/lf    attr/                 \t\n" >expect &&
> +	printf "i/lf    w/lf    attr/                 \t\n" >>expect &&
> +	git ls-files --format="%(eol)" --eol >actual &&

I'm not sure why this is passing --eol as well as --format='%(eol)' - 
shouldn't that combination of flags be an error?

Best Wishes

Phillip
Junio C Hamano June 23, 2022, 3:57 p.m. UTC | #2
Phillip Wood <phillip.wood123@gmail.com> writes:

> Thanks for re-rolling, having taken a look a closer look at the tests
> I'm concerned about the output format for some of the specifiers, see
> below.

Thanks for raising these issues.  I agree with you on many of them.
In addition to what you covered ....

>> +path::
>> +	The pathname of the file which is in the index.
> I think that for all these it might be clearer to say "recorded in the
> index" rather than "of the file which is in the index"

I think we would call this "name".  The name of the existing option
that controls how they are shown is "--full-name", not "--full-path",
for example.

>> +ctime::
>> +	The create time of file which is in the index.
>
> This is printed with a prefix 'ctime:' (the same applies to the format
> specifiers below) I think we should omit that and just print the data
> so the user can choose the format they want.
>
>> +mtime::
>> +	The modified time of file which is in the index.

These are only the low-bits of the full timestamp, not ctime/mtime
themselves.

But stepping back a bit, why do we need to include them in the
output?  What workflow and use case are we trying to help?  Dump
output from "stat <path>" equivalent from ls-files and compare with
"stat ." output to see which ones are stale?  Or is there any value
to see the value of, say, ctime as an individual data item?

>> +dev::
>> +	The ID of device containing file which is in the index.
>> +ino::
>> +	The inode number of file which is in the index.
>> +uid::
>> +	The user id of file owner which is in the index.
>> +gid::
>> +	The group id of file owner which is in the index.

Again, why do we need to include these in the output?

Wouldn't it be sufficient, as well as a lot more useful, to show a
single bit "the cached stat info matches what is in the working tree
(yes/no)"?

>> +size::
>> +	The size of the file which is in the index.

This needs to explain what kind of size this is.  Is it the size of
the blob object?  Is it the size of the file in the working tree
(i.e. not cleaned)?  Is it _always_ the size, or can it become a
number that is very different from size in certain circumstances?

IOW, I do not think giving this to unsuspecting users and call it
"size of the file" hurts them more than it helps them, especially
because it is not always the size of the file.

I'd suggest getting rid of everything from ctime down to size and if
we really care about the freshness of the cached stat info, replace
them with a single bit "up-to-date".

>> +flags::
>> +	The flags of the file in the index which include
>> +	in-memory only flags and some extended on-disk flags.
>
> If %(flags) is going to be useful then I think we need to think about
> how they are printed and document that. At the moment they are printed 
> as a hexadecimal number which is fine for debugging but probably not
> going to be useful for something like --format. I think printing 
> documented symbolic names with some kind of separator (a comma maybe)
> between them is probably more useful

I am guessing that most of the above are only useful for curious
geeks and those who are debugging their new tweak to the code that
touches the index, i.e. a debugging feature.  But these folks can
run "git" under a debugger, and they probably have to do so when
they are seeing an unexpected value in the flags member of a cache
entry anyway.  So I am not sure whom this field is intended to help.

>> [...]
>> +test_expect_success 'git ls-files --format eol' '
>> +	printf "i/lf    w/lf    attr/                 \t\n" >expect &&
>> +	printf "i/lf    w/lf    attr/                 \t\n" >>expect &&
>> +	git ls-files --format="%(eol)" --eol >actual &&
>
> I'm not sure why this is passing --eol as well as --format='%(eol)' -
> shouldn't that combination of flags be an error?

Good eyes.

Thanks.
Phillip Wood June 24, 2022, 10:16 a.m. UTC | #3
On 23/06/2022 16:57, Junio C Hamano wrote:
> Phillip Wood <phillip.wood123@gmail.com> writes:
> 
>> Thanks for re-rolling, having taken a look a closer look at the tests
>> I'm concerned about the output format for some of the specifiers, see
>> below.
> 
> Thanks for raising these issues.  I agree with you on many of them.
> In addition to what you covered ....
> 
>>> +path::
>>> +	The pathname of the file which is in the index.
>> I think that for all these it might be clearer to say "recorded in the
>> index" rather than "of the file which is in the index"
> 
> I think we would call this "name".  The name of the existing option
> that controls how they are shown is "--full-name", not "--full-path",
> for example.

That's a good point, also I've just noticed that this is another case 
where there is a separator character is printed automatically when the 
format string is expanded. I think it is probably right to format the 
name based on whether or not -z was passed but we should leave it up to 
the user to supply a delimiter in the format string.

>>> +ctime::
>>> +	The create time of file which is in the index.
>>
>> This is printed with a prefix 'ctime:' (the same applies to the format
>> specifiers below) I think we should omit that and just print the data
>> so the user can choose the format they want.
>>
>>> +mtime::
>>> +	The modified time of file which is in the index.
> 
> These are only the low-bits of the full timestamp, not ctime/mtime
> themselves.
> 
> But stepping back a bit, why do we need to include them in the
> output?  What workflow and use case are we trying to help?  Dump
> output from "stat <path>" equivalent from ls-files and compare with
> "stat ." output to see which ones are stale?  Or is there any value
> to see the value of, say, ctime as an individual data item?
> 
>>> +dev::
>>> +	The ID of device containing file which is in the index.
>>> +ino::
>>> +	The inode number of file which is in the index.
>>> +uid::
>>> +	The user id of file owner which is in the index.
>>> +gid::
>>> +	The group id of file owner which is in the index.
> 
> Again, why do we need to include these in the output?
> 
> Wouldn't it be sufficient, as well as a lot more useful, to show a
> single bit "the cached stat info matches what is in the working tree
> (yes/no)"?

That does sound useful

>>> +flags::
>>> +	The flags of the file in the index which include
>>> +	in-memory only flags and some extended on-disk flags.
>>
>> If %(flags) is going to be useful then I think we need to think about
>> how they are printed and document that. At the moment they are printed
>> as a hexadecimal number which is fine for debugging but probably not
>> going to be useful for something like --format. I think printing
>> documented symbolic names with some kind of separator (a comma maybe)
>> between them is probably more useful
> 
> I am guessing that most of the above are only useful for curious
> geeks and those who are debugging their new tweak to the code that
> touches the index, i.e. a debugging feature.  But these folks can
> run "git" under a debugger, and they probably have to do so when
> they are seeing an unexpected value in the flags member of a cache
> entry anyway.  So I am not sure whom this field is intended to help.

I wondered about that as well, but thought there might be a plausible 
use if someone wants to check if an entry is marked intent-to-add, or 
has the skip-worktree/spare-index bits set (are there other ways to 
inspect those?)

Best Wishes

Phillip
Ævar Arnfjörð Bjarmason June 24, 2022, 1:20 p.m. UTC | #4
On Thu, Jun 23 2022, Junio C Hamano wrote:

> Phillip Wood <phillip.wood123@gmail.com> writes:
>
>> Thanks for re-rolling, having taken a look a closer look at the tests
>> I'm concerned about the output format for some of the specifiers, see
>> below.
>
> Thanks for raising these issues.  I agree with you on many of them.
> In addition to what you covered ....
>
>>> +path::
>>> +	The pathname of the file which is in the index.
>> I think that for all these it might be clearer to say "recorded in the
>> index" rather than "of the file which is in the index"
>
> I think we would call this "name".  The name of the existing option
> that controls how they are shown is "--full-name", not "--full-path",
> for example.

To the extent that we got this wrong it was me in 455923e0a15 (ls-tree:
introduce "--format" option, 2022-03-23), but given that we have that I
think it makes sense to have this be consistent with ls-tree.

FWIW ls-tree also uses "name" options, but its docs talked about
"<path>", so I thought it was more helpful to pick that.

We also say that we will "show the full path names" in that
documentation.
Ævar Arnfjörð Bjarmason June 24, 2022, 1:25 p.m. UTC | #5
On Tue, Jun 21 2022, ZheNing Hu via GitGitGadget wrote:

> From: ZheNing Hu <adlternative@gmail.com>
> [...]
> +	if (skip_prefix(start, "(objectmode)", &p))
> +		strbuf_addf(sb, "%06o", data->ce->ce_mode);
> +	else if (skip_prefix(start, "(objectname)", &p))
> +		strbuf_add_unique_abbrev(sb, &data->ce->oid, abbrev);
> +	else if (skip_prefix(start, "(stage)", &p))
> +		strbuf_addf(sb, "%d", ce_stage(data->ce));
> +	else if (skip_prefix(start, "(eol)", &p))
> +		write_eolinfo_to_buf(sb, data->istate,
> +				     data->ce, data->pathname);
> +	else if (skip_prefix(start, "(path)", &p))
> +		write_name_to_buf(sb, data->pathname);
> +	else if (skip_prefix(start, "(ctime)", &p))
> +		strbuf_addf(sb, "ctime: %u:%u",
> +			    sd->sd_ctime.sec, sd->sd_ctime.nsec);
> +	else if (skip_prefix(start, "(mtime)", &p))
> +		strbuf_addf(sb, "mtime: %u:%u",
> +			    sd->sd_mtime.sec, sd->sd_mtime.nsec);
> +	else if (skip_prefix(start, "(dev)", &p))
> +		strbuf_addf(sb, "dev: %u", sd->sd_dev);
> +	else if (skip_prefix(start, "(ino)", &p))
> +		strbuf_addf(sb, "ino: %u", sd->sd_ino);
> +	else if (skip_prefix(start, "(uid)", &p))
> +		strbuf_addf(sb, "uid: %u", sd->sd_uid);
> +	else if (skip_prefix(start, "(gid)", &p))
> +		strbuf_addf(sb, "gid: %u", sd->sd_gid);
> +	else if (skip_prefix(start, "(size)", &p))
> +		strbuf_addf(sb, "size: %u", sd->sd_size);
> +	else if (skip_prefix(start, "(flags)", &p))
> +		strbuf_addf(sb, "flags: %x", data->ce->ce_flags);


In my mind almost the entire point of a --format is that you can
e.g. \0-delimit it, and don't need to do other parsing games.

So this really should be adding just e.g. "%x", not "flags: %x", 

Similarly, let's no have :-delimited fields. First, for a formatted
number "1656077225:850723245" is just bizarre for %(ctime), let's use
".", not ":", so: "1656077225.850723245".

And let's call that %(ctime), then have (which is trivial to add) a
%(ctime:sec) and %(ctime:nsec), so someone who wants to format this can
parse it as they please, ditto for mtime.

Looking at your tests it seemed you went down the route of aligning the
output with the --debug output, which is already pre-formatted. I.e. to
make what you have here match:

                printf("  ctime: %u:%u\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
                printf("  mtime: %u:%u\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
                printf("  dev: %u\tino: %u\n", sd->sd_dev, sd->sd_ino);
                printf("  uid: %u\tgid: %u\n", sd->sd_uid, sd->sd_gid);
                printf("  size: %u\tflags: %x\n", sd->sd_size, ce->ce_flags);

I think that's a mistake, we should be able to emit those individual
%-specifiers instead, not that line as-is without the " " prefix and
"\n" suffix.

> +
> +	if (format && (show_stage || show_others || show_killed ||
> +		show_resolve_undo || skipping_duplicates || debug_mode))
> +			die(_("ls-files --format cannot used with -s, -o, -k, --resolve-undo, --deduplicate, --debug"));

Use usage_msg_opt() or usage_msg_optf() here instead of die(), and no
need to include "ls-files " in the message.

See die_for_incompatible_opt4, maybe you can just use that instead? A
bit painful, but:

    die_for_incompatible_opt4(format, "--format", show_stage, "-s", show_others, "-o", show_killed, "-k");
    die_for_incompatible_opt4(format, "--format", show_resolve_undo, "--resolve-undo", skipping_duplicates, "--deduplicate", debug_mode, "--debug");

But urgh, that helper really should use usage_msg_opt() instead, but
using it for now as-is probably sucks less.

I also think we should not forbid combining this wtih --debug, it's
helpful to construct a format. This seems to work:
		
	diff --git a/builtin/ls-files.c b/builtin/ls-files.c
	index 387641b32df..82f13edef7e 100644
	--- a/builtin/ls-files.c
	+++ b/builtin/ls-files.c
	@@ -343,12 +343,17 @@ static void show_ce(struct repository *repo, struct dir_struct *dir,
	 				  S_ISGITLINK(ce->ce_mode))) {
	 		if (format) {
	 			show_ce_fmt(repo, ce, format, fullname);
	-			return;
	+			if (!debug_mode)
	+				return;
	 		}
	 
	 		tag = get_tag(ce, tag);
	 
	-		if (!show_stage) {
	+		if (format) {
	+			if (!debug_mode)
	+				BUG("unreachable");
	+			; /* for --debug */
	+		} else if (!show_stage) {
	 			fputs(tag, stdout);
	 		} else {
	 			printf("%s%06o %s %d\t",
	@@ -814,7 +819,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
	 	}
	 
	 	if (format && (show_stage || show_others || show_killed ||
	-		show_resolve_undo || skipping_duplicates || debug_mode))
	+		show_resolve_undo || skipping_duplicates))
	 			die(_("ls-files --format cannot used with -s, -o, -k, --resolve-undo, --deduplicate, --debug"));
	 
	 	if (show_tag || show_valid_bit || show_fsmonitor_bit) {
	
I.e. we'll get:
	
	$ ./git ls-files --debug --format='<%(flags) %(path)>'  -- po/is.po
	<flags: 0 po/is.po>
	po/is.po
	  ctime: 1654300098:369653868
	  mtime: 1654300098:369653868
	  dev: 2306     ino: 10487322
	  uid: 1001     gid: 1001
	  size: 3370    flags: 0

Which I think is quite useful when poking around in this an coming up
with a format.

> +
>  	if (show_tag || show_valid_bit || show_fsmonitor_bit) {
>  		tag_cached = "H ";
>  		tag_unmerged = "M ";
> diff --git a/t/t3013-ls-files-format.sh b/t/t3013-ls-files-format.sh
> new file mode 100755
> index 00000000000..8c3ef2df138
> --- /dev/null
> +++ b/t/t3013-ls-files-format.sh
> @@ -0,0 +1,124 @@
> +#!/bin/sh
> +
> +test_description='git ls-files --format test'
> +

Add this line here:

TEST_PASSES_SANITIZE_LEAK=true

I.e. just before test-lib.sh, see other test examples. Then we'll test
this under SANITIZE=leak in CI, to ensure it doesn't leak memory.

> +. ./test-lib.sh
> +
> +test_expect_success 'setup' '
> +	echo o1 >o1 &&
> +	echo o2 >o2 &&
> +	git add o1 o2 &&
> +	git add --chmod +x o1 &&
> +	git commit -m base
> +'
> +
> [...]

> +for flag in -s -o -k --resolve-undo --deduplicate --debug
> +do
> +	test_expect_success "git ls-files --format is incompatible with $flag" '
> +		test_must_fail git ls-files --format="%(objectname)" $flag
> +	'
> +done

Nit: I think it's good to move these sotrs of tests before "setup", and
give them a "usage: " prefix, see some other existing examples.

We usually use test_expect_code 129 for those, depending on if you'll
end up with die() or not...

nit: missing \n before this line:

> +test_done
>
> base-commit: ab336e8f1c8009c8b1aab8deb592148e69217085
Junio C Hamano June 24, 2022, 3:28 p.m. UTC | #6
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> We also say that we will "show the full path names" in that
> documentation.

The primary issue is not the presence of "name" there, but the lack
of "path" in the word chosen.

Many things can have "name" (including "object name"), and "path",
not "name", in "path name" is what clarifies what kind of name it
is.  Given that --format placeholders include "objectname", it does
not make a good design to use "name" alone without saying what kind
of "name" it is.

Calling it "pathname", not just "path", is perfectly OK.  But if
there is no other things the word "path" could refer to in this
context, which I think is the case here, "path" would be acceptable.
Junio C Hamano June 24, 2022, 3:33 p.m. UTC | #7
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Tue, Jun 21 2022, ZheNing Hu via GitGitGadget wrote:
>> +	if (skip_prefix(start, "(objectmode)", &p))
>> +		strbuf_addf(sb, "%06o", data->ce->ce_mode);
>> +	else if (skip_prefix(start, "(objectname)", &p))
>> +		strbuf_add_unique_abbrev(sb, &data->ce->oid, abbrev);
>> +	else if (skip_prefix(start, "(stage)", &p))
>> +		strbuf_addf(sb, "%d", ce_stage(data->ce));
>> +	else if (skip_prefix(start, "(path)", &p))
>> +		write_name_to_buf(sb, data->pathname);

These are just "values".

>> +	else if (skip_prefix(start, "(ctime)", &p))
>> +		strbuf_addf(sb, "ctime: %u:%u",
>> +			    sd->sd_ctime.sec, sd->sd_ctime.nsec);
>> +	else if (skip_prefix(start, "(mtime)", &p))
>> +		strbuf_addf(sb, "mtime: %u:%u",
>> +			    sd->sd_mtime.sec, sd->sd_mtime.nsec);
>> +	else if (skip_prefix(start, "(dev)", &p))
>> +		strbuf_addf(sb, "dev: %u", sd->sd_dev);
>> +	else if (skip_prefix(start, "(ino)", &p))
>> +		strbuf_addf(sb, "ino: %u", sd->sd_ino);
>> +	else if (skip_prefix(start, "(uid)", &p))
>> +		strbuf_addf(sb, "uid: %u", sd->sd_uid);
>> +	else if (skip_prefix(start, "(gid)", &p))
>> +		strbuf_addf(sb, "gid: %u", sd->sd_gid);
>> +	else if (skip_prefix(start, "(size)", &p))
>> +		strbuf_addf(sb, "size: %u", sd->sd_size);
>> +	else if (skip_prefix(start, "(flags)", &p))
>> +		strbuf_addf(sb, "flags: %x", data->ce->ce_flags);

These are not.

> In my mind almost the entire point of a --format is that you can
> e.g. \0-delimit it, and don't need to do other parsing games.
>
> So this really should be adding just e.g. "%x", not "flags: %x", 

Yes.  A very good point, if we were showing these fields (I already
said I doubt it is useful), they should also show just "values"
After all, people can do "--format=mode: %(objectmode)" if they want
an identifying tag before the value.

Thanks.
ZheNing Hu June 26, 2022, 1:01 p.m. UTC | #8
Phillip Wood <phillip.wood123@gmail.com> 于2022年6月23日周四 22:06写道:
>
> Hi ZheNing
> > [...]
> > +It is possible to print in a custom format by using the `--format`
> > +option, which is able to interpolate different fields using
> > +a `%(fieldname)` notation. For example, if you only care about the
> > +"objectname" and "path" fields, you can execute with a specific
> > +"--format" like
> > +
> > +     git ls-files --format='%(objectname) %(path)'
> > +
> > +FIELD NAMES
> > +-----------
> > +Various values from structured fields can be used to interpolate
> > +into the resulting output. For each outputting line, the following
> > +names can be used:
> > +
> > +objectmode::
> > +     The mode of the file which is in the index.
> > +objectname::
> > +     The name of the file which is in the index.
> > +stage::
> > +     The stage of the file which is in the index.
> > +eol::
> > +     The <eolinfo> and <eolattr> of files both in the
> > +     index and the work-tree.
>
> Looking at the test for this option I think it needs more work, why
> should --format arbitrarily append a tab to the end of the output? - the
> user should be able to specify a separator if they want one as part of
> the format string. Also I'm not sure why there is so much whitespace in
> the output.
>

Because I used old output format in write_eolinfo(), now I think it's wrong,
I will separate it to three parts: %(eolinfo:index), %(eolinfo:worktree),
%(eolattr).

> If %(flags) is going to be useful then I think we need to think about
> how they are printed and document that. At the moment they are printed
> as a hexadecimal number which is fine for debugging but probably not
> going to be useful for something like --format. I think printing
> documented symbolic names with some kind of separator (a comma maybe)
> between them is probably more useful
>

Agree.

>  > [...]
> > +test_expect_success 'git ls-files --format eol' '
> > +     printf "i/lf    w/lf    attr/                 \t\n" >expect &&
> > +     printf "i/lf    w/lf    attr/                 \t\n" >>expect &&
> > +     git ls-files --format="%(eol)" --eol >actual &&
>
> I'm not sure why this is passing --eol as well as --format='%(eol)' -
> shouldn't that combination of flags be an error?
>

Thank you for reminding, will be corrected.

> Best Wishes
>
> Phillip

ZheNing Hu
ZheNing Hu June 26, 2022, 1:05 p.m. UTC | #9
Phillip Wood <phillip.wood123@gmail.com> 于2022年6月24日周五 18:16写道:

> >>> +ctime::
> >>> +   The create time of file which is in the index.
> >>
> >> This is printed with a prefix 'ctime:' (the same applies to the format
> >> specifiers below) I think we should omit that and just print the data
> >> so the user can choose the format they want.
> >>
> >>> +mtime::
> >>> +   The modified time of file which is in the index.
> >
> > These are only the low-bits of the full timestamp, not ctime/mtime
> > themselves.
> >
> > But stepping back a bit, why do we need to include them in the
> > output?  What workflow and use case are we trying to help?  Dump
> > output from "stat <path>" equivalent from ls-files and compare with
> > "stat ." output to see which ones are stale?  Or is there any value
> > to see the value of, say, ctime as an individual data item?
> >
> >>> +dev::
> >>> +   The ID of device containing file which is in the index.
> >>> +ino::
> >>> +   The inode number of file which is in the index.
> >>> +uid::
> >>> +   The user id of file owner which is in the index.
> >>> +gid::
> >>> +   The group id of file owner which is in the index.
> >
> > Again, why do we need to include these in the output?
> >
> > Wouldn't it be sufficient, as well as a lot more useful, to show a
> > single bit "the cached stat info matches what is in the working tree
> > (yes/no)"?
>
> That does sound useful
>
> >>> +flags::
> >>> +   The flags of the file in the index which include
> >>> +   in-memory only flags and some extended on-disk flags.
> >>
> >> If %(flags) is going to be useful then I think we need to think about
> >> how they are printed and document that. At the moment they are printed
> >> as a hexadecimal number which is fine for debugging but probably not
> >> going to be useful for something like --format. I think printing
> >> documented symbolic names with some kind of separator (a comma maybe)
> >> between them is probably more useful
> >
> > I am guessing that most of the above are only useful for curious
> > geeks and those who are debugging their new tweak to the code that
> > touches the index, i.e. a debugging feature.  But these folks can
> > run "git" under a debugger, and they probably have to do so when
> > they are seeing an unexpected value in the flags member of a cache
> > entry anyway.  So I am not sure whom this field is intended to help.
>
> I wondered about that as well, but thought there might be a plausible
> use if someone wants to check if an entry is marked intent-to-add, or
> has the skip-worktree/spare-index bits set (are there other ways to
> inspect those?)
>

I think this feature will be useful too, but it may not belong to this patch.
We can discuss how to implement it later.

> Best Wishes
>
> Phillip

ZheNing Hu
ZheNing Hu June 26, 2022, 1:34 p.m. UTC | #10
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2022年6月24日周五 21:46写道:
>
>
> On Tue, Jun 21 2022, ZheNing Hu via GitGitGadget wrote:
>
> > From: ZheNing Hu <adlternative@gmail.com>
> > [...]
>
> In my mind almost the entire point of a --format is that you can
> e.g. \0-delimit it, and don't need to do other parsing games.
>
> So this really should be adding just e.g. "%x", not "flags: %x",
>

Yeah, I admit that there really shouldn't use extra formatting here.

> Similarly, let's no have :-delimited fields. First, for a formatted
> number "1656077225:850723245" is just bizarre for %(ctime), let's use
> ".", not ":", so: "1656077225.850723245".
>
> And let's call that %(ctime), then have (which is trivial to add) a
> %(ctime:sec) and %(ctime:nsec), so someone who wants to format this can
> parse it as they please, ditto for mtime.
>
> Looking at your tests it seemed you went down the route of aligning the
> output with the --debug output, which is already pre-formatted. I.e. to
> make what you have here match:
>
>                 printf("  ctime: %u:%u\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
>                 printf("  mtime: %u:%u\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
>                 printf("  dev: %u\tino: %u\n", sd->sd_dev, sd->sd_ino);
>                 printf("  uid: %u\tgid: %u\n", sd->sd_uid, sd->sd_gid);
>                 printf("  size: %u\tflags: %x\n", sd->sd_size, ce->ce_flags);
>
> I think that's a mistake, we should be able to emit those individual
> %-specifiers instead, not that line as-is without the " " prefix and
> "\n" suffix.
>

Yeah, agree. But now I just want to delete all atoms from %(ctime) to %(flags),
and let --debug can work with --format.

> > +
> > +     if (format && (show_stage || show_others || show_killed ||
> > +             show_resolve_undo || skipping_duplicates || debug_mode))
> > +                     die(_("ls-files --format cannot used with -s, -o, -k, --resolve-undo, --deduplicate, --debug"));
>
> Use usage_msg_opt() or usage_msg_optf() here instead of die(), and no
> need to include "ls-files " in the message.
>
> See die_for_incompatible_opt4, maybe you can just use that instead? A
> bit painful, but:
>
>     die_for_incompatible_opt4(format, "--format", show_stage, "-s", show_others, "-o", show_killed, "-k");
>     die_for_incompatible_opt4(format, "--format", show_resolve_undo, "--resolve-undo", skipping_duplicates, "--deduplicate", debug_mode, "--debug");
>

Good suggestion. I am curious about why there is no function like
die_for_incompatible_opt4() with variable parameters?

> But urgh, that helper really should use usage_msg_opt() instead, but
> using it for now as-is probably sucks less.
>
> I also think we should not forbid combining this wtih --debug, it's
> helpful to construct a format. This seems to work:
>
>         diff --git a/builtin/ls-files.c b/builtin/ls-files.c
>         index 387641b32df..82f13edef7e 100644
>         --- a/builtin/ls-files.c
>         +++ b/builtin/ls-files.c
>         @@ -343,12 +343,17 @@ static void show_ce(struct repository *repo, struct dir_struct *dir,
>                                           S_ISGITLINK(ce->ce_mode))) {
>                         if (format) {
>                                 show_ce_fmt(repo, ce, format, fullname);
>         -                       return;
>         +                       if (!debug_mode)
>         +                               return;
>                         }
>
>                         tag = get_tag(ce, tag);
>
>         -               if (!show_stage) {
>         +               if (format) {
>         +                       if (!debug_mode)
>         +                               BUG("unreachable");
>         +                       ; /* for --debug */
>         +               } else if (!show_stage) {
>                                 fputs(tag, stdout);
>                         } else {
>                                 printf("%s%06o %s %d\t",
>         @@ -814,7 +819,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
>                 }
>
>                 if (format && (show_stage || show_others || show_killed ||
>         -               show_resolve_undo || skipping_duplicates || debug_mode))
>         +               show_resolve_undo || skipping_duplicates))
>                                 die(_("ls-files --format cannot used with -s, -o, -k, --resolve-undo, --deduplicate, --debug"));
>
>                 if (show_tag || show_valid_bit || show_fsmonitor_bit) {
>
> I.e. we'll get:
>
>         $ ./git ls-files --debug --format='<%(flags) %(path)>'  -- po/is.po
>         <flags: 0 po/is.po>
>         po/is.po
>           ctime: 1654300098:369653868
>           mtime: 1654300098:369653868
>           dev: 2306     ino: 10487322
>           uid: 1001     gid: 1001
>           size: 3370    flags: 0
>
> Which I think is quite useful when poking around in this an coming up
> with a format.
>

Maybe something like this will be easier?


@@ -343,6 +335,7 @@ static void show_ce(struct repository *repo,
struct dir_struct *dir,
                                  S_ISGITLINK(ce->ce_mode))) {
                if (format) {
                        show_ce_fmt(repo, ce, format, fullname);
+                       print_debug(ce);
                        return;
                }


> > +
> >       if (show_tag || show_valid_bit || show_fsmonitor_bit) {
> >               tag_cached = "H ";
> >               tag_unmerged = "M ";
> > diff --git a/t/t3013-ls-files-format.sh b/t/t3013-ls-files-format.sh
> > new file mode 100755
> > index 00000000000..8c3ef2df138
> > --- /dev/null
> > +++ b/t/t3013-ls-files-format.sh
> > @@ -0,0 +1,124 @@
> > +#!/bin/sh
> > +
> > +test_description='git ls-files --format test'
> > +
>
> Add this line here:
>
> TEST_PASSES_SANITIZE_LEAK=true
>
> I.e. just before test-lib.sh, see other test examples. Then we'll test
> this under SANITIZE=leak in CI, to ensure it doesn't leak memory.
>
> > +. ./test-lib.sh
> > +
> > +test_expect_success 'setup' '
> > +     echo o1 >o1 &&
> > +     echo o2 >o2 &&
> > +     git add o1 o2 &&
> > +     git add --chmod +x o1 &&
> > +     git commit -m base
> > +'
> > +
> > [...]
>
> > +for flag in -s -o -k --resolve-undo --deduplicate --debug
> > +do
> > +     test_expect_success "git ls-files --format is incompatible with $flag" '
> > +             test_must_fail git ls-files --format="%(objectname)" $flag
> > +     '
> > +done
>
> Nit: I think it's good to move these sotrs of tests before "setup", and
> give them a "usage: " prefix, see some other existing examples.
>

Agree.

> We usually use test_expect_code 129 for those, depending on if you'll
> end up with die() or not...
>
> nit: missing \n before this line:
>
> > +test_done
> >
> > base-commit: ab336e8f1c8009c8b1aab8deb592148e69217085
>

ZheNing Hu
ZheNing Hu June 26, 2022, 1:35 p.m. UTC | #11
Junio C Hamano <gitster@pobox.com> 于2022年6月24日周五 23:33写道:
>
> Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:
>
> > On Tue, Jun 21 2022, ZheNing Hu via GitGitGadget wrote:
> >> +    if (skip_prefix(start, "(objectmode)", &p))
> >> +            strbuf_addf(sb, "%06o", data->ce->ce_mode);
> >> +    else if (skip_prefix(start, "(objectname)", &p))
> >> +            strbuf_add_unique_abbrev(sb, &data->ce->oid, abbrev);
> >> +    else if (skip_prefix(start, "(stage)", &p))
> >> +            strbuf_addf(sb, "%d", ce_stage(data->ce));
> >> +    else if (skip_prefix(start, "(path)", &p))
> >> +            write_name_to_buf(sb, data->pathname);
>
> These are just "values".
>
> >> +    else if (skip_prefix(start, "(ctime)", &p))
> >> +            strbuf_addf(sb, "ctime: %u:%u",
> >> +                        sd->sd_ctime.sec, sd->sd_ctime.nsec);
> >> +    else if (skip_prefix(start, "(mtime)", &p))
> >> +            strbuf_addf(sb, "mtime: %u:%u",
> >> +                        sd->sd_mtime.sec, sd->sd_mtime.nsec);
> >> +    else if (skip_prefix(start, "(dev)", &p))
> >> +            strbuf_addf(sb, "dev: %u", sd->sd_dev);
> >> +    else if (skip_prefix(start, "(ino)", &p))
> >> +            strbuf_addf(sb, "ino: %u", sd->sd_ino);
> >> +    else if (skip_prefix(start, "(uid)", &p))
> >> +            strbuf_addf(sb, "uid: %u", sd->sd_uid);
> >> +    else if (skip_prefix(start, "(gid)", &p))
> >> +            strbuf_addf(sb, "gid: %u", sd->sd_gid);
> >> +    else if (skip_prefix(start, "(size)", &p))
> >> +            strbuf_addf(sb, "size: %u", sd->sd_size);
> >> +    else if (skip_prefix(start, "(flags)", &p))
> >> +            strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
>
> These are not.
>
Agree. So I just remove them as you see. If someone else
need them for some reason, we can add them back.

ZheNing Hu
Junio C Hamano June 27, 2022, 8:22 a.m. UTC | #12
ZheNing Hu <adlternative@gmail.com> writes:

>> >> +    else if (skip_prefix(start, "(path)", &p))
>> >> +            write_name_to_buf(sb, data->pathname);
>>
>> These are just "values".
>> ...
>> >> +    else if (skip_prefix(start, "(size)", &p))
>> >> +            strbuf_addf(sb, "size: %u", sd->sd_size);
>> >> +    else if (skip_prefix(start, "(flags)", &p))
>> >> +            strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
>>
>> These are not.
>>
> ... If someone else
> need them for some reason, we can add them back.

If someone else needs to see "size:" printed in front of the value
of sd_size member, we DO NOT HAVE TO DO ANYTHING!  That someone else
can write "--format=size: %(size)" to do so themselves.
ZheNing Hu June 27, 2022, 11:06 a.m. UTC | #13
Junio C Hamano <gitster@pobox.com> 于2022年6月27日周一 16:22写道:
>
> ZheNing Hu <adlternative@gmail.com> writes:
>
> >> >> +    else if (skip_prefix(start, "(path)", &p))
> >> >> +            write_name_to_buf(sb, data->pathname);
> >>
> >> These are just "values".
> >> ...
> >> >> +    else if (skip_prefix(start, "(size)", &p))
> >> >> +            strbuf_addf(sb, "size: %u", sd->sd_size);
> >> >> +    else if (skip_prefix(start, "(flags)", &p))
> >> >> +            strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
> >>
> >> These are not.
> >>
> > ... If someone else
> > need them for some reason, we can add them back.
>
> If someone else needs to see "size:" printed in front of the value
> of sd_size member, we DO NOT HAVE TO DO ANYTHING!  That someone else
> can write "--format=size: %(size)" to do so themselves.
>
>

Oh, sorry, I mean if someone need some atoms from %(size) to %(flags), we can
add them back.
Junio C Hamano June 27, 2022, 3:41 p.m. UTC | #14
ZheNing Hu <adlternative@gmail.com> writes:

> Junio C Hamano <gitster@pobox.com> 于2022年6月27日周一 16:22写道:
>>
>> ZheNing Hu <adlternative@gmail.com> writes:
>>
>> >> >> +    else if (skip_prefix(start, "(path)", &p))
>> >> >> +            write_name_to_buf(sb, data->pathname);
>> >>
>> >> These are just "values".
>> >> ...
>> >> >> +    else if (skip_prefix(start, "(size)", &p))
>> >> >> +            strbuf_addf(sb, "size: %u", sd->sd_size);
>> >> >> +    else if (skip_prefix(start, "(flags)", &p))
>> >> >> +            strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
>> >>
>> >> These are not.
>> >>
>> > ... If someone else
>> > need them for some reason, we can add them back.
>>
>> If someone else needs to see "size:" printed in front of the value
>> of sd_size member, we DO NOT HAVE TO DO ANYTHING!  That someone else
>> can write "--format=size: %(size)" to do so themselves.
>
> Oh, sorry, I mean if someone need some atoms from %(size) to %(flags), we can
> add them back.

Ah, I see.  I am not sure about the %(flags) to help the debugging
mode, but giving a single bit "is it dirty?" would be more useful
than giving the cached stat info, I would think.

Thanks.
ZheNing Hu July 1, 2022, 1:30 p.m. UTC | #15
Junio C Hamano <gitster@pobox.com> 于2022年6月27日周一 23:41写道:
>
> ZheNing Hu <adlternative@gmail.com> writes:
>
> > Oh, sorry, I mean if someone need some atoms from %(size) to %(flags), we can
> > add them back.
>
> Ah, I see.  I am not sure about the %(flags) to help the debugging
> mode, but giving a single bit "is it dirty?" would be more useful
> than giving the cached stat info, I would think.
>

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 061a205576..ccb3fd1676 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -296,6 +296,8 @@ static size_t expand_show_index(struct strbuf *sb,
const char *start,
                write_eolattr_to_buf(sb, data->istate, data->pathname);
        else if (skip_prefix(start, "(path)", &p))
                write_name_to_buf(sb, data->pathname);
+       else if (skip_prefix(start, "(updatetodate)", &p))
+               strbuf_addstr(sb, ce_uptodate(data->ce) ? "true" : "false");
        else
                die(_("bad ls-files format: %%%.*s"), (int)len, start);


I try to use ce_uptodate(ce) to check its flags, but unfortunately,
git ls-files --format="%(updatetodate)" output all files are "false" :(
That's because we have not mark some flags in the cache entry, right?

> Thanks.
>

ZheNing Hu
diff mbox series

Patch

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0dabf3f0ddc..39211bde797 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -20,7 +20,7 @@  SYNOPSIS
 		[--exclude-standard]
 		[--error-unmatch] [--with-tree=<tree-ish>]
 		[--full-name] [--recurse-submodules]
-		[--abbrev[=<n>]] [--] [<file>...]
+		[--abbrev[=<n>]] [--format=<format>] [--] [<file>...]
 
 DESCRIPTION
 -----------
@@ -192,6 +192,13 @@  followed by the  ("attr/<eolattr>").
 	to the contained files. Sparse directories will be shown with a
 	trailing slash, such as "x/" for a sparse directory "x".
 
+--format=<format>::
+	A string that interpolates `%(fieldname)` from the result being shown.
+	It also interpolates `%%` to `%`, and `%xx` where `xx` are hex digits
+	interpolates to character with hex code `xx`; for example `%00`
+	interpolates to `\0` (NUL), `%09` to `\t` (TAB) and %0a to `\n` (LF).
+	--format cannot be combined with `-s`, `-o`, `-k`, `--resolve-undo` and
+	`--debug`.
 \--::
 	Do not interpret any more arguments as options.
 
@@ -223,6 +230,48 @@  quoted as explained for the configuration variable `core.quotePath`
 (see linkgit:git-config[1]).  Using `-z` the filename is output
 verbatim and the line is terminated by a NUL byte.
 
+It is possible to print in a custom format by using the `--format`
+option, which is able to interpolate different fields using
+a `%(fieldname)` notation. For example, if you only care about the
+"objectname" and "path" fields, you can execute with a specific
+"--format" like
+
+	git ls-files --format='%(objectname) %(path)'
+
+FIELD NAMES
+-----------
+Various values from structured fields can be used to interpolate
+into the resulting output. For each outputting line, the following
+names can be used:
+
+objectmode::
+	The mode of the file which is in the index.
+objectname::
+	The name of the file which is in the index.
+stage::
+	The stage of the file which is in the index.
+eol::
+	The <eolinfo> and <eolattr> of files both in the
+	index and the work-tree.
+path::
+	The pathname of the file which is in the index.
+ctime::
+	The create time of file which is in the index.
+mtime::
+	The modified time of file which is in the index.
+dev::
+	The ID of device containing file which is in the index.
+ino::
+	The inode number of file which is in the index.
+uid::
+	The user id of file owner which is in the index.
+gid::
+	The group id of file owner which is in the index.
+size::
+	The size of the file which is in the index.
+flags::
+	The flags of the file in the index which include
+	in-memory only flags and some extended on-disk flags.
 
 EXCLUDE PATTERNS
 ----------------
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index e791b65e7e9..387641b32df 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -11,6 +11,7 @@ 
 #include "quote.h"
 #include "dir.h"
 #include "builtin.h"
+#include "strbuf.h"
 #include "tree.h"
 #include "cache-tree.h"
 #include "parse-options.h"
@@ -48,6 +49,7 @@  static char *ps_matched;
 static const char *with_tree;
 static int exc_given;
 static int exclude_args;
+static const char *format;
 
 static const char *tag_cached = "";
 static const char *tag_unmerged = "";
@@ -58,8 +60,8 @@  static const char *tag_modified = "";
 static const char *tag_skip_worktree = "";
 static const char *tag_resolve_undo = "";
 
-static void write_eolinfo(struct index_state *istate,
-			  const struct cache_entry *ce, const char *path)
+static void write_eolinfo_internal(struct strbuf *sb, struct index_state *istate,
+				   const struct cache_entry *ce, const char *path)
 {
 	if (show_eol) {
 		struct stat st;
@@ -71,10 +73,25 @@  static void write_eolinfo(struct index_state *istate,
 							       ce->name);
 		if (!lstat(path, &st) && S_ISREG(st.st_mode))
 			w_txt = get_wt_convert_stats_ascii(path);
-		printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
+		if (sb)
+			strbuf_addf(sb, "i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
+		else
+			printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
 	}
 }
 
+static void write_eolinfo(struct index_state *istate,
+			  const struct cache_entry *ce, const char *path)
+{
+	write_eolinfo_internal(NULL, istate, ce, path);
+}
+
+static void write_eolinfo_to_buf(struct strbuf *sb, struct index_state *istate,
+				 const struct cache_entry *ce, const char *path)
+{
+	write_eolinfo_internal(sb, istate, ce, path);
+}
+
 static void write_name(const char *name)
 {
 	/*
@@ -85,6 +102,15 @@  static void write_name(const char *name)
 				   stdout, line_terminator);
 }
 
+static void write_name_to_buf(struct strbuf *sb, const char *name)
+{
+	const char *rel = relative_path(name, prefix_len ? prefix : NULL, sb);
+	if (line_terminator)
+		quote_c_style(rel, sb, NULL, 0);
+	else
+		strbuf_add(sb, rel, strlen(rel));
+}
+
 static const char *get_tag(const struct cache_entry *ce, const char *tag)
 {
 	static char alttag[4];
@@ -222,6 +248,85 @@  static void show_submodule(struct repository *superproject,
 	repo_clear(&subrepo);
 }
 
+struct show_index_data {
+	const char *tag;
+	const char *pathname;
+	struct index_state *istate;
+	const struct cache_entry *ce;
+};
+
+static size_t expand_show_index(struct strbuf *sb, const char *start,
+			       void *context)
+{
+	struct show_index_data *data = context;
+	const char *end;
+	const char *p;
+	const struct stat_data *sd = &data->ce->ce_stat_data;
+	size_t len = strbuf_expand_literal_cb(sb, start, NULL);
+	if (len)
+		return len;
+	if (*start != '(')
+		die(_("bad ls-files format: element '%s' "
+		      "does not start with '('"), start);
+
+	end = strchr(start + 1, ')');
+	if (!end)
+		die(_("bad ls-files format: element '%s'"
+		      "does not end in ')'"), start);
+
+	len = end - start + 1;
+	if (skip_prefix(start, "(objectmode)", &p))
+		strbuf_addf(sb, "%06o", data->ce->ce_mode);
+	else if (skip_prefix(start, "(objectname)", &p))
+		strbuf_add_unique_abbrev(sb, &data->ce->oid, abbrev);
+	else if (skip_prefix(start, "(stage)", &p))
+		strbuf_addf(sb, "%d", ce_stage(data->ce));
+	else if (skip_prefix(start, "(eol)", &p))
+		write_eolinfo_to_buf(sb, data->istate,
+				     data->ce, data->pathname);
+	else if (skip_prefix(start, "(path)", &p))
+		write_name_to_buf(sb, data->pathname);
+	else if (skip_prefix(start, "(ctime)", &p))
+		strbuf_addf(sb, "ctime: %u:%u",
+			    sd->sd_ctime.sec, sd->sd_ctime.nsec);
+	else if (skip_prefix(start, "(mtime)", &p))
+		strbuf_addf(sb, "mtime: %u:%u",
+			    sd->sd_mtime.sec, sd->sd_mtime.nsec);
+	else if (skip_prefix(start, "(dev)", &p))
+		strbuf_addf(sb, "dev: %u", sd->sd_dev);
+	else if (skip_prefix(start, "(ino)", &p))
+		strbuf_addf(sb, "ino: %u", sd->sd_ino);
+	else if (skip_prefix(start, "(uid)", &p))
+		strbuf_addf(sb, "uid: %u", sd->sd_uid);
+	else if (skip_prefix(start, "(gid)", &p))
+		strbuf_addf(sb, "gid: %u", sd->sd_gid);
+	else if (skip_prefix(start, "(size)", &p))
+		strbuf_addf(sb, "size: %u", sd->sd_size);
+	else if (skip_prefix(start, "(flags)", &p))
+		strbuf_addf(sb, "flags: %x", data->ce->ce_flags);
+	else
+		die(_("bad ls-files format: %%%.*s"), (int)len, start);
+
+	return len;
+}
+
+static void show_ce_fmt(struct repository *repo, const struct cache_entry *ce,
+			const char *format, const char *fullname) {
+
+	struct show_index_data data = {
+		.pathname = fullname,
+		.istate = repo->index,
+		.ce = ce,
+	};
+
+	struct strbuf sb = STRBUF_INIT;
+	strbuf_expand(&sb, format, expand_show_index, &data);
+	strbuf_addch(&sb, line_terminator);
+	fwrite(sb.buf, sb.len, 1, stdout);
+	strbuf_release(&sb);
+	return;
+}
+
 static void show_ce(struct repository *repo, struct dir_struct *dir,
 		    const struct cache_entry *ce, const char *fullname,
 		    const char *tag)
@@ -236,6 +341,11 @@  static void show_ce(struct repository *repo, struct dir_struct *dir,
 				  max_prefix_len, ps_matched,
 				  S_ISDIR(ce->ce_mode) ||
 				  S_ISGITLINK(ce->ce_mode))) {
+		if (format) {
+			show_ce_fmt(repo, ce, format, fullname);
+			return;
+		}
+
 		tag = get_tag(ce, tag);
 
 		if (!show_stage) {
@@ -675,6 +785,9 @@  int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 			 N_("suppress duplicate entries")),
 		OPT_BOOL(0, "sparse", &show_sparse_dirs,
 			 N_("show sparse directories in the presence of a sparse index")),
+		OPT_STRING_F(0, "format", &format, N_("format"),
+			     N_("format to use for the output"),
+			     PARSE_OPT_NONEG),
 		OPT_END()
 	};
 	int ret = 0;
@@ -699,6 +812,11 @@  int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	for (i = 0; i < exclude_list.nr; i++) {
 		add_pattern(exclude_list.items[i].string, "", 0, pl, --exclude_args);
 	}
+
+	if (format && (show_stage || show_others || show_killed ||
+		show_resolve_undo || skipping_duplicates || debug_mode))
+			die(_("ls-files --format cannot used with -s, -o, -k, --resolve-undo, --deduplicate, --debug"));
+
 	if (show_tag || show_valid_bit || show_fsmonitor_bit) {
 		tag_cached = "H ";
 		tag_unmerged = "M ";
diff --git a/t/t3013-ls-files-format.sh b/t/t3013-ls-files-format.sh
new file mode 100755
index 00000000000..8c3ef2df138
--- /dev/null
+++ b/t/t3013-ls-files-format.sh
@@ -0,0 +1,124 @@ 
+#!/bin/sh
+
+test_description='git ls-files --format test'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	echo o1 >o1 &&
+	echo o2 >o2 &&
+	git add o1 o2 &&
+	git add --chmod +x o1 &&
+	git commit -m base
+'
+
+test_expect_success 'git ls-files --format objectmode' '
+	cat >expect <<-\EOF &&
+	100755
+	100644
+	EOF
+	git ls-files --format="%(objectmode)" -t >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format objectname' '
+	oid1=$(git hash-object o1) &&
+	oid2=$(git hash-object o2) &&
+	cat >expect <<-EOF &&
+	$oid1
+	$oid2
+	EOF
+	git ls-files --format="%(objectname)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format eol' '
+	printf "i/lf    w/lf    attr/                 \t\n" >expect &&
+	printf "i/lf    w/lf    attr/                 \t\n" >>expect &&
+	git ls-files --format="%(eol)" --eol >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format path' '
+	cat >expect <<-\EOF &&
+	o1
+	o2
+	EOF
+	git ls-files --format="%(path)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format ctime' '
+	git ls-files --debug >out &&
+	grep ctime out >expect &&
+	git ls-files --format="  %(ctime)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format mtime' '
+	git ls-files --debug >out &&
+	grep mtime out >expect &&
+	git ls-files --format="  %(mtime)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format dev and ino' '
+	git ls-files --debug >out &&
+	grep dev out >expect &&
+	git ls-files --format="  %(dev)%x09%(ino)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format uid and gid' '
+	git ls-files --debug >out &&
+	grep uid out >expect &&
+	git ls-files --format="  %(uid)%x09%(gid)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format with -m' '
+	echo change >o1 &&
+	cat >expect <<-\EOF &&
+	o1
+	EOF
+	git ls-files --format="%(path)" -m >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format with -d' '
+	echo o3 >o3 &&
+	git add o3 &&
+	rm o3 &&
+	cat >expect <<-\EOF &&
+	o3
+	EOF
+	git ls-files --format="%(path)" -d >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format size and flags' '
+	git ls-files --debug >out &&
+	grep size out >expect &&
+	git ls-files --format="  %(size)%x09%(flags)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format imitate --stage' '
+	git ls-files --stage >expect &&
+	git ls-files --format="%(objectmode) %(objectname) %(stage)%x09%(path)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git ls-files --format imitate --debug' '
+	git ls-files --debug >expect &&
+	git ls-files --format="%(path)%x0a  %(ctime)%x0a  %(mtime)%x0a  %(dev)%x09%(ino)%x0a  %(uid)%x09%(gid)%x0a  %(size)%x09%(flags)" >actual &&
+	test_cmp expect actual
+'
+
+for flag in -s -o -k --resolve-undo --deduplicate --debug
+do
+	test_expect_success "git ls-files --format is incompatible with $flag" '
+		test_must_fail git ls-files --format="%(objectname)" $flag
+	'
+done
+test_done