diff mbox series

[2/2] multi-pack-index: use --object-dir real path

Message ID 0435406e2db6c5977928a2b0b0b79e66c0a078ee.1650553069.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series multi-pack-index: use real paths for --object-dir | expand

Commit Message

Derrick Stolee April 21, 2022, 2:57 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

The --object-dir argument to 'git multi-pack-index' allows a user to
specify an alternate to use instead of the local $GITDIR. This is used
by third-party tools like VFS for Git to maintain the pack-files in a
"shared object cache" used by multiple clones.

On Windows, the user can specify a path using a Windows-style file path
with backslashes such as "C:\Path\To\ObjectDir". This same path style is
used in the .git/objects/info/alternates file, so it already matches the
path of that alternate. However, find_odb() converts these paths to
real-paths for the comparison, which use forward slashes. As of the
previous change, lookup_multi_pack_index() uses real-paths, so it
correctly finds the target multi-pack-index when given these paths.

Some commands such as 'git multi-pack-index repack' call child processes
using the object_dir value, so it can be helpful to convert the path to
the real-path before sending it to those locations.

Adding the normalization in builtin/multi-pack-index.c is a little
complicated because of how the sub-commands were split in 60ca94769
(builtin/multi-pack-index.c: split sub-commands, 2021-03-30). The
--object-dir argument could be parsed before the sub-command name _or_
after it. Thus, create a normalize_object_dir() helper to call after all
arguments are parsed, but before any logic is run on that object dir.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 builtin/multi-pack-index.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

Comments

Victoria Dye April 21, 2022, 7:50 p.m. UTC | #1
Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
> 
> The --object-dir argument to 'git multi-pack-index' allows a user to
> specify an alternate to use instead of the local $GITDIR. This is used
> by third-party tools like VFS for Git to maintain the pack-files in a
> "shared object cache" used by multiple clones.
> 
> On Windows, the user can specify a path using a Windows-style file path
> with backslashes such as "C:\Path\To\ObjectDir". This same path style is
> used in the .git/objects/info/alternates file, so it already matches the
> path of that alternate. However, find_odb() converts these paths to
> real-paths for the comparison, which use forward slashes. As of the
> previous change, lookup_multi_pack_index() uses real-paths, so it
> correctly finds the target multi-pack-index when given these paths.
> 
> Some commands such as 'git multi-pack-index repack' call child processes
> using the object_dir value, so it can be helpful to convert the path to
> the real-path before sending it to those locations.
> 
> Adding the normalization in builtin/multi-pack-index.c is a little
> complicated because of how the sub-commands were split in 60ca94769
> (builtin/multi-pack-index.c: split sub-commands, 2021-03-30). The
> --object-dir argument could be parsed before the sub-command name _or_
> after it. Thus, create a normalize_object_dir() helper to call after all
> arguments are parsed, but before any logic is run on that object dir.
> 
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  builtin/multi-pack-index.c | 19 ++++++++++++++++---
>  1 file changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
> index 4480ba39827..3853960f9ba 100644
> --- a/builtin/multi-pack-index.c
> +++ b/builtin/multi-pack-index.c
> @@ -90,6 +90,14 @@ static void read_packs_from_stdin(struct string_list *to)
>  	strbuf_release(&buf);
>  }
>  
> +static void normalize_object_dir(void)
> +{
> +	if (!opts.object_dir)
> +		opts.object_dir = get_object_directory();
> +	else
> +		opts.object_dir = real_pathdup(opts.object_dir, 1);
> +}
> +

Rather than copy the 'normalize_object_dir()' calls to every subcommand, you
could "centralize" this by making the 'object_dir' option an 'OPT_CALLBACK'
option, something like:

static struct option common_opts[] = {
	OPT_CALLBACK(0, "object-dir", &opts.object_dir, N_("file"),
		     N_("object directory containing set of packfile and pack-index pairs"),
		     normalize_object_dir),
	OPT_END(),
};

It would require changing the function signature of 'normalize_object_dir'
to match what's shown in 'Documentation/technical/api-parse-options.txt',
and it potentially needs prefix handling similar to what's done in
parse-options.c:get_value() (which internally calls 'fix_filename()' for
filename opts), but I think it's probably worth reducing duplication here
and avoiding the need to add 'normalize_object_dir()' to any new subcommand
in the future.

>  static int cmd_multi_pack_index_write(int argc, const char **argv)
>  {
>  	struct option *options;
> @@ -127,6 +135,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv)
>  
>  	FREE_AND_NULL(options);
>  
> +	normalize_object_dir();
> +
>  	if (opts.stdin_packs) {
>  		struct string_list packs = STRING_LIST_INIT_DUP;
>  		int ret;
> @@ -169,6 +179,8 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv)
>  
>  	FREE_AND_NULL(options);
>  
> +	normalize_object_dir();
> +
>  	return verify_midx_file(the_repository, opts.object_dir, opts.flags);
>  }
>  
> @@ -195,6 +207,8 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv)
>  
>  	FREE_AND_NULL(options);
>  
> +	normalize_object_dir();
> +
>  	return expire_midx_packs(the_repository, opts.object_dir, opts.flags);
>  }
>  
> @@ -225,6 +239,8 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv)
>  
>  	FREE_AND_NULL(options);
>  
> +	normalize_object_dir();
> +
>  	return midx_repack(the_repository, opts.object_dir,
>  			   (size_t)opts.batch_size, opts.flags);
>  }
> @@ -241,9 +257,6 @@ int cmd_multi_pack_index(int argc, const char **argv,
>  			     builtin_multi_pack_index_usage,
>  			     PARSE_OPT_STOP_AT_NON_OPTION);
>  
> -	if (!opts.object_dir)
> -		opts.object_dir = get_object_directory();
> -
>  	if (!argc)
>  		goto usage;
>
Derrick Stolee April 21, 2022, 7:55 p.m. UTC | #2
On 4/21/2022 3:50 PM, Victoria Dye wrote:
> Derrick Stolee via GitGitGadget wrote:
>> +static void normalize_object_dir(void)
>> +{
>> +	if (!opts.object_dir)
>> +		opts.object_dir = get_object_directory();
>> +	else
>> +		opts.object_dir = real_pathdup(opts.object_dir, 1);
>> +}
>> +
> 
> Rather than copy the 'normalize_object_dir()' calls to every subcommand, you
> could "centralize" this by making the 'object_dir' option an 'OPT_CALLBACK'
> option, something like:
> 
> static struct option common_opts[] = {
> 	OPT_CALLBACK(0, "object-dir", &opts.object_dir, N_("file"),
> 		     N_("object directory containing set of packfile and pack-index pairs"),
> 		     normalize_object_dir),
> 	OPT_END(),
> };
> 
> It would require changing the function signature of 'normalize_object_dir'
> to match what's shown in 'Documentation/technical/api-parse-options.txt',
> and it potentially needs prefix handling similar to what's done in
> parse-options.c:get_value() (which internally calls 'fix_filename()' for
> filename opts), but I think it's probably worth reducing duplication here
> and avoiding the need to add 'normalize_object_dir()' to any new subcommand
> in the future.

Thanks! I agree that that would be a cleaner approach, especially if
a new subcommand is added in the future.

Thanks,
-Stolee
Junio C Hamano April 21, 2022, 8:28 p.m. UTC | #3
Victoria Dye <vdye@github.com> writes:

>> +static void normalize_object_dir(void)
>> +{
>> +	if (!opts.object_dir)
>> +		opts.object_dir = get_object_directory();
>> +	else
>> +		opts.object_dir = real_pathdup(opts.object_dir, 1);
>> +}
>> +
>
> Rather than copy the 'normalize_object_dir()' calls to every subcommand, you
> could "centralize" this by making the 'object_dir' option an 'OPT_CALLBACK'
> option, something like:
>
> static struct option common_opts[] = {
> 	OPT_CALLBACK(0, "object-dir", &opts.object_dir, N_("file"),
> 		     N_("object directory containing set of packfile and pack-index pairs"),
> 		     normalize_object_dir),
> 	OPT_END(),
> };
>
> It would require changing the function signature of 'normalize_object_dir'
> to match what's shown in 'Documentation/technical/api-parse-options.txt',
> and it potentially needs prefix handling similar to what's done in
> parse-options.c:get_value() (which internally calls 'fix_filename()' for
> filename opts), but I think it's probably worth reducing duplication here
> and avoiding the need to add 'normalize_object_dir()' to any new subcommand
> in the future.

Good suggestion.  Thanks, both, for taking care of this.

Are there other places that we take end-user input and treat it as a
pathname without necessary normalization, I wonder.  The codepath
fixed by this series is relatively new, and I am not surprised such
a bug was still there and hopefully it was an isolated remaining bug.
diff mbox series

Patch

diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
index 4480ba39827..3853960f9ba 100644
--- a/builtin/multi-pack-index.c
+++ b/builtin/multi-pack-index.c
@@ -90,6 +90,14 @@  static void read_packs_from_stdin(struct string_list *to)
 	strbuf_release(&buf);
 }
 
+static void normalize_object_dir(void)
+{
+	if (!opts.object_dir)
+		opts.object_dir = get_object_directory();
+	else
+		opts.object_dir = real_pathdup(opts.object_dir, 1);
+}
+
 static int cmd_multi_pack_index_write(int argc, const char **argv)
 {
 	struct option *options;
@@ -127,6 +135,8 @@  static int cmd_multi_pack_index_write(int argc, const char **argv)
 
 	FREE_AND_NULL(options);
 
+	normalize_object_dir();
+
 	if (opts.stdin_packs) {
 		struct string_list packs = STRING_LIST_INIT_DUP;
 		int ret;
@@ -169,6 +179,8 @@  static int cmd_multi_pack_index_verify(int argc, const char **argv)
 
 	FREE_AND_NULL(options);
 
+	normalize_object_dir();
+
 	return verify_midx_file(the_repository, opts.object_dir, opts.flags);
 }
 
@@ -195,6 +207,8 @@  static int cmd_multi_pack_index_expire(int argc, const char **argv)
 
 	FREE_AND_NULL(options);
 
+	normalize_object_dir();
+
 	return expire_midx_packs(the_repository, opts.object_dir, opts.flags);
 }
 
@@ -225,6 +239,8 @@  static int cmd_multi_pack_index_repack(int argc, const char **argv)
 
 	FREE_AND_NULL(options);
 
+	normalize_object_dir();
+
 	return midx_repack(the_repository, opts.object_dir,
 			   (size_t)opts.batch_size, opts.flags);
 }
@@ -241,9 +257,6 @@  int cmd_multi_pack_index(int argc, const char **argv,
 			     builtin_multi_pack_index_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
-	if (!opts.object_dir)
-		opts.object_dir = get_object_directory();
-
 	if (!argc)
 		goto usage;