diff mbox series

[06/16] index-info.c: parse object type in provided in read_index_info

Message ID f56eee0b48da907a27edc99ca135cf8f6c19af35.1718130288.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series mktree: support more flexible usage | expand

Commit Message

Victoria Dye June 11, 2024, 6:24 p.m. UTC
From: Victoria Dye <vdye@github.com>

If the object type (e.g. "blob", "tree") is identified on a stdin line read
by 'read_index_info()' (i.e. on lines formatted like the output of 'git
ls-tree'), parse it into an 'enum object_type' and provide it to the
'read_index_info()' callback as an argument. If the type is not provided,
pass 'OBJ_NONE' instead. If the object type is invalid, return an error.

The goal of this change is to allow for more thorough validation of the
provided object type (e.g. against the provided mode) in 'mktree' once
'mktree_line' is replaced with 'read_index_info()'. Note, though, that this
change also strengthens the validation done by 'update-index', since invalid
type names now trigger an error.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 builtin/update-index.c        |  3 ++-
 index-info.c                  | 16 ++++++++++++----
 index-info.h                  |  3 ++-
 t/t2107-update-index-basic.sh |  5 +++++
 4 files changed, 21 insertions(+), 6 deletions(-)

Comments

Junio C Hamano June 12, 2024, 1:54 a.m. UTC | #1
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Victoria Dye <vdye@github.com>
>
> If the object type (e.g. "blob", "tree") is identified on a stdin line read
> by 'read_index_info()' (i.e. on lines formatted like the output of 'git
> ls-tree'), parse it into an 'enum object_type' and provide it to the
> 'read_index_info()' callback as an argument. If the type is not provided,
> pass 'OBJ_NONE' instead. If the object type is invalid, return an error.

My recollection is, when we do not know what to expect, we tend to
use OBJ_ANY rather than OBJ_NONE as convention to signal that fact
(e.g., object-name.c:peel_to_type()).

As long as the code path this series touches is internally
consistent, using OBJ_NONE may not hurt but once they need to start
interacting with existing code paths that use OBJ_ANY for that
purpose, we may need to adjust one to match the other.

> The goal of this change is to allow for more thorough validation of the
> provided object type (e.g. against the provided mode) in 'mktree' once
> 'mktree_line' is replaced with 'read_index_info()'. Note, though, that this
> change also strengthens the validation done by 'update-index', since invalid
> type names now trigger an error.

Nice.

> Signed-off-by: Victoria Dye <vdye@github.com>
> ---
>  builtin/update-index.c        |  3 ++-
>  index-info.c                  | 16 ++++++++++++----
>  index-info.h                  |  3 ++-
>  t/t2107-update-index-basic.sh |  5 +++++
>  4 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/update-index.c b/builtin/update-index.c
> index b1b334807f8..8882433b644 100644
> --- a/builtin/update-index.c
> +++ b/builtin/update-index.c
> @@ -510,7 +510,8 @@ static void update_one(const char *path)
>  	report("add '%s'", path);
>  }
>  
> -static int apply_index_info(unsigned int mode, struct object_id *oid, int stage,
> +static int apply_index_info(unsigned int mode, struct object_id *oid,
> +			    enum object_type obj_type UNUSED, int stage,
>  			    const char *path_name, void *cbdata UNUSED)
>  {
>  	if (!verify_path(path_name, mode)) {
> diff --git a/index-info.c b/index-info.c
> index 735cbf1f476..5d61e61e28f 100644
> --- a/index-info.c
> +++ b/index-info.c
> @@ -18,6 +18,7 @@ int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
>  		char *ptr, *tab;
>  		char *path_name;
>  		struct object_id oid;
> +		enum object_type obj_type = OBJ_NONE;
>  		unsigned int mode;
>  		unsigned long ul;
>  		int stage;
> @@ -56,18 +57,17 @@ int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
>  
>  		if (tab[-2] == ' ' && '0' <= tab[-1] && tab[-1] <= '3') {
>  			stage = tab[-1] - '0';
> -			ptr = tab + 1; /* point at the head of path */
> +			path_name = tab + 1; /* point at the head of path */
>  			tab = tab - 2; /* point at tail of sha1 */
>  		} else {
>  			stage = 0;
> -			ptr = tab + 1; /* point at the head of path */
> +			path_name = tab + 1; /* point at the head of path */
>  		}
>  
>  		if (get_oid_hex(tab - hexsz, &oid) ||
>  			tab[-(hexsz + 1)] != ' ')
>  			goto bad_line;
>  
> -		path_name = ptr;
>  		if (!nul_term_line && path_name[0] == '"') {
>  			strbuf_reset(&uq);
>  			if (unquote_c_style(&uq, path_name, NULL)) {
> @@ -77,7 +77,15 @@ int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
>  			path_name = uq.buf;
>  		}
>  
> -		ret = fn(mode, &oid, stage, path_name, cbdata);
> +		/* Get the type, if provided */
> +		if (tab - hexsz - 1 > ptr + 1) {
> +			if (*(tab - hexsz - 1) != ' ')
> +				goto bad_line;
> +			*(tab - hexsz - 1) = '\0';
> +			obj_type = type_from_string(ptr + 1);
> +		}
> +
> +		ret = fn(mode, &oid, obj_type, stage, path_name, cbdata);
>  		if (ret) {
>  			ret = -1;
>  			break;
> diff --git a/index-info.h b/index-info.h
> index 1884972021d..767cf304213 100644
> --- a/index-info.h
> +++ b/index-info.h
> @@ -2,8 +2,9 @@
>  #define INDEX_INFO_H
>  
>  #include "hash.h"
> +#include "object.h"
>  
> -typedef int (*each_index_info_fn)(unsigned int, struct object_id *, int, const char *, void *);
> +typedef int (*each_index_info_fn)(unsigned int, struct object_id *, enum object_type, int, const char *, void *);
>  
>  #define INDEX_INFO_EMPTY_LINE 1
>  
> diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh
> index 29696ade0d0..9c19d24cd4a 100755
> --- a/t/t2107-update-index-basic.sh
> +++ b/t/t2107-update-index-basic.sh
> @@ -153,6 +153,11 @@ test_expect_success '--index-info fails on malformed input' '
>  	test_must_fail git update-index --index-info 2>err &&
>  	grep "malformed input line" err &&
>  
> +	# invalid type
> +	printf "100644 bad $EMPTY_BLOB\tA" |
> +	test_must_fail git update-index --index-info 2>err &&
> +	grep "invalid object type" err &&
> +
>  	# invalid stage value
>  	printf "100644 $EMPTY_BLOB 5\tA" |
>  	test_must_fail git update-index --index-info 2>err &&
diff mbox series

Patch

diff --git a/builtin/update-index.c b/builtin/update-index.c
index b1b334807f8..8882433b644 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -510,7 +510,8 @@  static void update_one(const char *path)
 	report("add '%s'", path);
 }
 
-static int apply_index_info(unsigned int mode, struct object_id *oid, int stage,
+static int apply_index_info(unsigned int mode, struct object_id *oid,
+			    enum object_type obj_type UNUSED, int stage,
 			    const char *path_name, void *cbdata UNUSED)
 {
 	if (!verify_path(path_name, mode)) {
diff --git a/index-info.c b/index-info.c
index 735cbf1f476..5d61e61e28f 100644
--- a/index-info.c
+++ b/index-info.c
@@ -18,6 +18,7 @@  int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
 		char *ptr, *tab;
 		char *path_name;
 		struct object_id oid;
+		enum object_type obj_type = OBJ_NONE;
 		unsigned int mode;
 		unsigned long ul;
 		int stage;
@@ -56,18 +57,17 @@  int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
 
 		if (tab[-2] == ' ' && '0' <= tab[-1] && tab[-1] <= '3') {
 			stage = tab[-1] - '0';
-			ptr = tab + 1; /* point at the head of path */
+			path_name = tab + 1; /* point at the head of path */
 			tab = tab - 2; /* point at tail of sha1 */
 		} else {
 			stage = 0;
-			ptr = tab + 1; /* point at the head of path */
+			path_name = tab + 1; /* point at the head of path */
 		}
 
 		if (get_oid_hex(tab - hexsz, &oid) ||
 			tab[-(hexsz + 1)] != ' ')
 			goto bad_line;
 
-		path_name = ptr;
 		if (!nul_term_line && path_name[0] == '"') {
 			strbuf_reset(&uq);
 			if (unquote_c_style(&uq, path_name, NULL)) {
@@ -77,7 +77,15 @@  int read_index_info(int nul_term_line, each_index_info_fn fn, void *cbdata)
 			path_name = uq.buf;
 		}
 
-		ret = fn(mode, &oid, stage, path_name, cbdata);
+		/* Get the type, if provided */
+		if (tab - hexsz - 1 > ptr + 1) {
+			if (*(tab - hexsz - 1) != ' ')
+				goto bad_line;
+			*(tab - hexsz - 1) = '\0';
+			obj_type = type_from_string(ptr + 1);
+		}
+
+		ret = fn(mode, &oid, obj_type, stage, path_name, cbdata);
 		if (ret) {
 			ret = -1;
 			break;
diff --git a/index-info.h b/index-info.h
index 1884972021d..767cf304213 100644
--- a/index-info.h
+++ b/index-info.h
@@ -2,8 +2,9 @@ 
 #define INDEX_INFO_H
 
 #include "hash.h"
+#include "object.h"
 
-typedef int (*each_index_info_fn)(unsigned int, struct object_id *, int, const char *, void *);
+typedef int (*each_index_info_fn)(unsigned int, struct object_id *, enum object_type, int, const char *, void *);
 
 #define INDEX_INFO_EMPTY_LINE 1
 
diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh
index 29696ade0d0..9c19d24cd4a 100755
--- a/t/t2107-update-index-basic.sh
+++ b/t/t2107-update-index-basic.sh
@@ -153,6 +153,11 @@  test_expect_success '--index-info fails on malformed input' '
 	test_must_fail git update-index --index-info 2>err &&
 	grep "malformed input line" err &&
 
+	# invalid type
+	printf "100644 bad $EMPTY_BLOB\tA" |
+	test_must_fail git update-index --index-info 2>err &&
+	grep "invalid object type" err &&
+
 	# invalid stage value
 	printf "100644 $EMPTY_BLOB 5\tA" |
 	test_must_fail git update-index --index-info 2>err &&