diff mbox series

[v8,5/7] revision: mark non-user-given objects instead

Message ID ca25f5914a98455cb6908a34146c10101d3529b0.1536885967.git.matvore@google.com (mailing list archive)
State New, archived
Headers show
Series filter: support for excluding all trees and blobs | expand

Commit Message

Matthew DeVore Sept. 14, 2018, 12:55 a.m. UTC
Currently, list-objects.c incorrectly treats all root trees of commits
as USER_GIVEN. Also, it would be easier to mark objects that are
non-user-given instead of user-given, since the places in the code
where we access an object through a reference are more obvious than
the places where we access an object that was given by the user.

Resolve these two problems by introducing a flag NOT_USER_GIVEN that
marks blobs and trees that are non-user-given, replacing USER_GIVEN.
(Only blobs and trees are marked because this mark is only used when
filtering objects, and filtering of other types of objects is not
supported yet.)

This fixes a bug in that git rev-list behaved differently from git
pack-objects. pack-objects would *not* filter objects given explicitly
on the command line and rev-list would filter. This was because the two
commands used a different function to add objects to the rev_info
struct. This seems to have been an oversight, and pack-objects has the
correct behavior, so I added a test to make sure that rev-list now
behaves properly.

Signed-off-by: Matthew DeVore <matvore@google.com>

fixup of 6defd70de
---
 list-objects.c                      | 31 +++++++++++++++++------------
 revision.c                          |  1 -
 revision.h                          | 11 ++++++++--
 t/t6112-rev-list-filters-objects.sh | 10 ++++++++++
 4 files changed, 37 insertions(+), 16 deletions(-)

Comments

Junio C Hamano Sept. 14, 2018, 5:23 p.m. UTC | #1
Matthew DeVore <matvore@google.com> writes:

> Currently, list-objects.c incorrectly treats all root trees of commits
> as USER_GIVEN. Also, it would be easier to mark objects that are
> non-user-given instead of user-given, since the places in the code
> where we access an object through a reference are more obvious than
> the places where we access an object that was given by the user.
>
> Resolve these two problems by introducing a flag NOT_USER_GIVEN that
> marks blobs and trees that are non-user-given, replacing USER_GIVEN.
> (Only blobs and trees are marked because this mark is only used when
> filtering objects, and filtering of other types of objects is not
> supported yet.)
>
> This fixes a bug in that git rev-list behaved differently from git
> pack-objects. pack-objects would *not* filter objects given explicitly
> on the command line and rev-list would filter. This was because the two
> commands used a different function to add objects to the rev_info
> struct. This seems to have been an oversight, and pack-objects has the
> correct behavior, so I added a test to make sure that rev-list now
> behaves properly.
>
> Signed-off-by: Matthew DeVore <matvore@google.com>
>
> fixup of 6defd70de

That's probably meant to go below "---".

> ---
>  list-objects.c                      | 31 +++++++++++++++++------------
>  revision.c                          |  1 -
>  revision.h                          | 11 ++++++++--
>  t/t6112-rev-list-filters-objects.sh | 10 ++++++++++
>  4 files changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/list-objects.c b/list-objects.c
> index 243192af5..7a1a0929d 100644
> --- a/list-objects.c
> +++ b/list-objects.c
> @@ -53,7 +53,7 @@ static void process_blob(struct traversal_context *ctx,
>  
>  	pathlen = path->len;
>  	strbuf_addstr(path, name);
> -	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn)
> +	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
>  		r = ctx->filter_fn(LOFS_BLOB, obj,
>  				   path->buf, &path->buf[pathlen],
>  				   ctx->filter_data);
> @@ -120,17 +120,19 @@ static void process_tree_contents(struct traversal_context *ctx,
>  				continue;
>  		}
>  
> -		if (S_ISDIR(entry.mode))
> -			process_tree(ctx,
> -				     lookup_tree(the_repository, entry.oid),
> -				     base, entry.path);
> +		if (S_ISDIR(entry.mode)) {
> +			struct tree *t = lookup_tree(the_repository, entry.oid);
> +			t->object.flags |= NOT_USER_GIVEN;
> +			process_tree(ctx, t, base, entry.path);
> +		}
>  		else if (S_ISGITLINK(entry.mode))
>  			process_gitlink(ctx, entry.oid->hash,
>  					base, entry.path);
> -		else
> -			process_blob(ctx,
> -				     lookup_blob(the_repository, entry.oid),
> -				     base, entry.path);
> +		else {
> +			struct blob *b = lookup_blob(the_repository, entry.oid);
> +			b->object.flags |= NOT_USER_GIVEN;
> +			process_blob(ctx, b, base, entry.path);
> +		}
>  	}
>  }
>  
> @@ -171,7 +173,7 @@ static void process_tree(struct traversal_context *ctx,
>  	}
>  
>  	strbuf_addstr(base, name);
> -	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn)
> +	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
>  		r = ctx->filter_fn(LOFS_BEGIN_TREE, obj,
>  				   base->buf, &base->buf[baselen],
>  				   ctx->filter_data);
> @@ -185,7 +187,7 @@ static void process_tree(struct traversal_context *ctx,
>  	if (!failed_parse)
>  		process_tree_contents(ctx, tree, base);
>  
> -	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) {
> +	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
>  		r = ctx->filter_fn(LOFS_END_TREE, obj,
>  				   base->buf, &base->buf[baselen],
>  				   ctx->filter_data);
> @@ -301,8 +303,11 @@ static void do_traverse(struct traversal_context *ctx)
>  		 * an uninteresting boundary commit may not have its tree
>  		 * parsed yet, but we are not going to show them anyway
>  		 */
> -		if (get_commit_tree(commit))
> -			add_pending_tree(ctx->revs, get_commit_tree(commit));
> +		if (get_commit_tree(commit)) {
> +			struct tree *tree = get_commit_tree(commit);
> +			tree->object.flags |= NOT_USER_GIVEN;
> +			add_pending_tree(ctx->revs, tree);
> +		}
>  		ctx->show_commit(commit, ctx->show_data);
>  
>  		if (ctx->revs->tree_blobs_in_commit_order)
> diff --git a/revision.c b/revision.c
> index de4dce600..72d48a17f 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -175,7 +175,6 @@ static void add_pending_object_with_path(struct rev_info *revs,
>  		strbuf_release(&buf);
>  		return; /* do not add the commit itself */
>  	}
> -	obj->flags |= USER_GIVEN;
>  	add_object_array_with_path(obj, name, &revs->pending, mode, path);
>  }
>  
> diff --git a/revision.h b/revision.h
> index 5910613cb..83e164039 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -21,9 +21,16 @@
>  #define SYMMETRIC_LEFT	(1u<<8)
>  #define PATCHSAME	(1u<<9)
>  #define BOTTOM		(1u<<10)
> -#define USER_GIVEN	(1u<<25) /* given directly by the user */
> +/*
> + * Indicates object was reached by traversal. i.e. not given by user on
> + * command-line or stdin.
> + * NEEDSWORK: NOT_USER_GIVEN doesn't apply to commits because we only support
> + * filtering trees and blobs, but it may be useful to support filtering commits
> + * in the future.
> + */
> +#define NOT_USER_GIVEN	(1u<<25)
>  #define TRACK_LINEAR	(1u<<26)
> -#define ALL_REV_FLAGS	(((1u<<11)-1) | USER_GIVEN | TRACK_LINEAR)
> +#define ALL_REV_FLAGS	(((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR)
>  
>  #define DECORATE_SHORT_REFS	1
>  #define DECORATE_FULL_REFS	2
> diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
> index c662c97db..2e07dadf0 100755
> --- a/t/t6112-rev-list-filters-objects.sh
> +++ b/t/t6112-rev-list-filters-objects.sh
> @@ -30,6 +30,16 @@ test_expect_success 'verify blob:none omits all 5 blobs' '
>  	test_cmp observed expected
>  '
>  
> +test_expect_success 'specify blob explicitly prevents filtering' '
> +	file_3=$(git -C r1 ls-files -s file.3 \
> +		| awk -f print_2.awk) &&
> +	file_4=$(git -C r1 ls-files -s file.4 \
> +		| awk -f print_2.awk) &&
> +	git -C r1 rev-list HEAD --objects --filter=blob:none HEAD $file_3 >observed &&
> +	grep -q "$file_3" observed &&
> +	test_must_fail grep -q "$file_4" observed
> +'
> +
>  test_expect_success 'verify emitted+omitted == all' '
>  	git -C r1 rev-list HEAD --objects \
>  		| awk -f print_1.awk \
Matthew DeVore Sept. 14, 2018, 8:08 p.m. UTC | #2
On Fri, Sep 14, 2018 at 10:23 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Matthew DeVore <matvore@google.com> writes:
>
> > Signed-off-by: Matthew DeVore <matvore@google.com>
> >
> > fixup of 6defd70de
>
> That's probably meant to go below "---".
>

That line shouldn't be there at all, sorry!

It came from me putting that text in a commit which was meant to be a
fixup of another commit when I ran rebase -i. I asked rebase to make
it a "squash" so I could edit the commit message of the earlier commit
(6defd70de). Then rebase merged the two descriptions and let me edit
them, but I didn't remember to delete the latter commit's message.

I probably should have made the earlier commit (6defd70de) a "reword",
and the later commit a "fixup", rather than "pick" followed by
"squash"
diff mbox series

Patch

diff --git a/list-objects.c b/list-objects.c
index 243192af5..7a1a0929d 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -53,7 +53,7 @@  static void process_blob(struct traversal_context *ctx,
 
 	pathlen = path->len;
 	strbuf_addstr(path, name);
-	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn)
+	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
 		r = ctx->filter_fn(LOFS_BLOB, obj,
 				   path->buf, &path->buf[pathlen],
 				   ctx->filter_data);
@@ -120,17 +120,19 @@  static void process_tree_contents(struct traversal_context *ctx,
 				continue;
 		}
 
-		if (S_ISDIR(entry.mode))
-			process_tree(ctx,
-				     lookup_tree(the_repository, entry.oid),
-				     base, entry.path);
+		if (S_ISDIR(entry.mode)) {
+			struct tree *t = lookup_tree(the_repository, entry.oid);
+			t->object.flags |= NOT_USER_GIVEN;
+			process_tree(ctx, t, base, entry.path);
+		}
 		else if (S_ISGITLINK(entry.mode))
 			process_gitlink(ctx, entry.oid->hash,
 					base, entry.path);
-		else
-			process_blob(ctx,
-				     lookup_blob(the_repository, entry.oid),
-				     base, entry.path);
+		else {
+			struct blob *b = lookup_blob(the_repository, entry.oid);
+			b->object.flags |= NOT_USER_GIVEN;
+			process_blob(ctx, b, base, entry.path);
+		}
 	}
 }
 
@@ -171,7 +173,7 @@  static void process_tree(struct traversal_context *ctx,
 	}
 
 	strbuf_addstr(base, name);
-	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn)
+	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
 		r = ctx->filter_fn(LOFS_BEGIN_TREE, obj,
 				   base->buf, &base->buf[baselen],
 				   ctx->filter_data);
@@ -185,7 +187,7 @@  static void process_tree(struct traversal_context *ctx,
 	if (!failed_parse)
 		process_tree_contents(ctx, tree, base);
 
-	if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) {
+	if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
 		r = ctx->filter_fn(LOFS_END_TREE, obj,
 				   base->buf, &base->buf[baselen],
 				   ctx->filter_data);
@@ -301,8 +303,11 @@  static void do_traverse(struct traversal_context *ctx)
 		 * an uninteresting boundary commit may not have its tree
 		 * parsed yet, but we are not going to show them anyway
 		 */
-		if (get_commit_tree(commit))
-			add_pending_tree(ctx->revs, get_commit_tree(commit));
+		if (get_commit_tree(commit)) {
+			struct tree *tree = get_commit_tree(commit);
+			tree->object.flags |= NOT_USER_GIVEN;
+			add_pending_tree(ctx->revs, tree);
+		}
 		ctx->show_commit(commit, ctx->show_data);
 
 		if (ctx->revs->tree_blobs_in_commit_order)
diff --git a/revision.c b/revision.c
index de4dce600..72d48a17f 100644
--- a/revision.c
+++ b/revision.c
@@ -175,7 +175,6 @@  static void add_pending_object_with_path(struct rev_info *revs,
 		strbuf_release(&buf);
 		return; /* do not add the commit itself */
 	}
-	obj->flags |= USER_GIVEN;
 	add_object_array_with_path(obj, name, &revs->pending, mode, path);
 }
 
diff --git a/revision.h b/revision.h
index 5910613cb..83e164039 100644
--- a/revision.h
+++ b/revision.h
@@ -21,9 +21,16 @@ 
 #define SYMMETRIC_LEFT	(1u<<8)
 #define PATCHSAME	(1u<<9)
 #define BOTTOM		(1u<<10)
-#define USER_GIVEN	(1u<<25) /* given directly by the user */
+/*
+ * Indicates object was reached by traversal. i.e. not given by user on
+ * command-line or stdin.
+ * NEEDSWORK: NOT_USER_GIVEN doesn't apply to commits because we only support
+ * filtering trees and blobs, but it may be useful to support filtering commits
+ * in the future.
+ */
+#define NOT_USER_GIVEN	(1u<<25)
 #define TRACK_LINEAR	(1u<<26)
-#define ALL_REV_FLAGS	(((1u<<11)-1) | USER_GIVEN | TRACK_LINEAR)
+#define ALL_REV_FLAGS	(((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR)
 
 #define DECORATE_SHORT_REFS	1
 #define DECORATE_FULL_REFS	2
diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh
index c662c97db..2e07dadf0 100755
--- a/t/t6112-rev-list-filters-objects.sh
+++ b/t/t6112-rev-list-filters-objects.sh
@@ -30,6 +30,16 @@  test_expect_success 'verify blob:none omits all 5 blobs' '
 	test_cmp observed expected
 '
 
+test_expect_success 'specify blob explicitly prevents filtering' '
+	file_3=$(git -C r1 ls-files -s file.3 \
+		| awk -f print_2.awk) &&
+	file_4=$(git -C r1 ls-files -s file.4 \
+		| awk -f print_2.awk) &&
+	git -C r1 rev-list HEAD --objects --filter=blob:none HEAD $file_3 >observed &&
+	grep -q "$file_3" observed &&
+	test_must_fail grep -q "$file_4" observed
+'
+
 test_expect_success 'verify emitted+omitted == all' '
 	git -C r1 rev-list HEAD --objects \
 		| awk -f print_1.awk \