diff mbox series

[RFC,3/3] refspec: add support for negative refspecs

Message ID 20200815002509.2467645-3-jacob.e.keller@intel.com
State New, archived
Headers show
Series None | expand

Commit Message

Keller, Jacob E Aug. 15, 2020, 12:25 a.m. UTC
From: Jacob Keller <jacob.keller@gmail.com>

Both fetch and push support pattern refspecs which allow fetching or
pushing references that match a specific pattern. Because these patterns
are globs, they have somewhat limited ability to express more complex
situations.

For example, suppose you wish to fetch all branches from a remote except
for a specific one. To allow this, you must setup a set of refspecs
which match only the branches you want. Because refspecs are either
explicit name matches, or simple globs, many patterns cannot be
expressed.

Add support for a new type of refspec, referred to as "negative"
refspecs. These are prefixed with a '^' and mean "exclude any ref
matching this refspec". They can only have one "side" which always
refers to the source. During a fetch, this refers to the name of the ref
on the remote. During a push, this refers to the name of the ref on the
local side.

With negative refspecs, users can express more complex patterns. For
example:

 git fetch origin refs/heads/*:refs/remotes/origin/* ^refs/heads/dontwant

will fetch all branches on origin into remotes/origin, but will exclude
fetching the branch named dontwant.

Refspecs today are commutative, meaning that order doesn't expressly
matter. Rather than forcing an implied order, negative refspecs will
always be applied last. That is, in order to match, a ref must match at
least one positive refspec, and match none of the negative refspecs.
This is similar to how negative pathspecs work.

Signed-off-by: Jacob Keller <jacob.keller@gmail.com>
---
 builtin/fetch.c |  3 +++
 refspec.c       | 30 ++++++++++++++++++++++++++++++
 refspec.h       | 14 ++++++++------
 remote.c        | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 remote.h        |  9 ++++++++-
 5 files changed, 97 insertions(+), 8 deletions(-)

Comments

Jacob Keller Aug. 17, 2020, 6:02 p.m. UTC | #1
On Fri, Aug 14, 2020 at 5:25 PM Jacob Keller <jacob.e.keller@intel.com> wrote:
>
> From: Jacob Keller <jacob.keller@gmail.com>
>
> Both fetch and push support pattern refspecs which allow fetching or
> pushing references that match a specific pattern. Because these patterns
> are globs, they have somewhat limited ability to express more complex
> situations.
>
> For example, suppose you wish to fetch all branches from a remote except
> for a specific one. To allow this, you must setup a set of refspecs
> which match only the branches you want. Because refspecs are either
> explicit name matches, or simple globs, many patterns cannot be
> expressed.
>
> Add support for a new type of refspec, referred to as "negative"
> refspecs. These are prefixed with a '^' and mean "exclude any ref
> matching this refspec". They can only have one "side" which always
> refers to the source. During a fetch, this refers to the name of the ref
> on the remote. During a push, this refers to the name of the ref on the
> local side.
>
> With negative refspecs, users can express more complex patterns. For
> example:
>
>  git fetch origin refs/heads/*:refs/remotes/origin/* ^refs/heads/dontwant
>
> will fetch all branches on origin into remotes/origin, but will exclude
> fetching the branch named dontwant.
>
> Refspecs today are commutative, meaning that order doesn't expressly
> matter. Rather than forcing an implied order, negative refspecs will
> always be applied last. That is, in order to match, a ref must match at
> least one positive refspec, and match none of the negative refspecs.
> This is similar to how negative pathspecs work.
>
> Signed-off-by: Jacob Keller <jacob.keller@gmail.com>
> ---
>  builtin/fetch.c |  3 +++
>  refspec.c       | 30 ++++++++++++++++++++++++++++++
>  refspec.h       | 14 ++++++++------
>  remote.c        | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  remote.h        |  9 ++++++++-
>  5 files changed, 97 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index c49f0e975203..930214626b54 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -530,6 +530,9 @@ static struct ref *get_ref_map(struct remote *remote,
>                 tail = &rm->next;
>         }
>
> +       /* apply any negative refspecs now to prune the list of refs */
> +       ref_map = apply_negative_refspecs(ref_map, rs);
> +

So there is a slight bug here: we need to determine whether to use the
remote->fetch rs or the commandline rs. This only prunes the refs
using commandline negative refspecs, but if you're using the values
configured in the remote they won't get pruned.

I am not sure the best way to handle this, since I don't really like a
check on the lines of "if (rs->nr) { /* use rs */ } else { /* use
remote->fetch */ }..


>         ref_map = ref_remove_duplicates(ref_map);
>
>         refname_hash_init(&existing_refs);
> diff --git a/refspec.c b/refspec.c
> index f10ef284cef9..feed20aca961 100644
> --- a/refspec.c
> +++ b/refspec.c
> @@ -8,6 +8,7 @@ static struct refspec_item s_tag_refspec = {
>         1,
>         0,
>         0,
> +       0,
>         "refs/tags/*",
>         "refs/tags/*"
>  };
> @@ -32,10 +33,17 @@ static int parse_refspec(struct refspec_item *item, const char *refspec, int fet
>         if (*lhs == '+') {
>                 item->force = 1;
>                 lhs++;
> +       } else if (*lhs == '^') {
> +               item->negative = 1;
> +               lhs++;
>         }
>
>         rhs = strrchr(lhs, ':');
>
> +       /* negative refspecs only have one side */
> +       if (item->negative && rhs)
> +               return 0;
> +
>         /*
>          * Before going on, special case ":" (or "+:") as a refspec
>          * for pushing matching refs.
> @@ -66,6 +74,28 @@ static int parse_refspec(struct refspec_item *item, const char *refspec, int fet
>         item->src = xstrndup(lhs, llen);
>         flags = REFNAME_ALLOW_ONELEVEL | (is_glob ? REFNAME_REFSPEC_PATTERN : 0);
>
> +       if (item->negative) {
> +               struct object_id unused;
> +
> +               /*
> +                * Negative refspecs only have a LHS, which indicates a ref
> +                * (or pattern of refs) to exclude from other matches. This
> +                * can either be a simple ref, a glob pattern, or even an
> +                * exact sha1 match.
> +                */
> +               if (!*item->src)
> +                       return 0; /* negative refspecs must not be empty */
> +               else if (llen == the_hash_algo->hexsz && !get_oid_hex(item->src, &unused))
> +                       item->exact_sha1 = 1; /* ok */
> +               else if (!check_refname_format(item->src, flags))
> +                       ; /* valid looking ref is ok */
> +               else
> +                       return 0;
> +
> +               /* other rules for negative refspecs don't apply */
> +               return 1;
> +       }
> +
>         if (fetch) {
>                 struct object_id unused;
>
> diff --git a/refspec.h b/refspec.h
> index 8d654e3a3ac4..e5bf6d25d0f7 100644
> --- a/refspec.h
> +++ b/refspec.h
> @@ -5,12 +5,13 @@
>  extern const struct refspec_item *tag_refspec;
>
>  /**
> - * A struct refspec_item holds the parsed interpretation of a refspec.  If it will
> - * force updates (starts with a '+'), force is true.  If it is a pattern
> - * (sides end with '*') pattern is true.  src and dest are the two sides
> - * (including '*' characters if present); if there is only one side, it is src,
> - * and dst is NULL; if sides exist but are empty (i.e., the refspec either
> - * starts or ends with ':'), the corresponding side is "".
> + * A struct refspec_item holds the parsed interpretation of a refspec.  If it
> + * will force updates (starts with a '+'), force is true.  If it is a pattern
> + * (sides end with '*') pattern is true.  If it is a negative refspec, (starts
> + * with '^'), negative is true.  src and dest are the two sides (including '*'
> + * characters if present); if there is only one side, it is src, and dst is
> + * NULL; if sides exist but are empty (i.e., the refspec either starts or ends
> + * with ':'), the corresponding side is "".
>   *
>   * remote_find_tracking(), given a remote and a struct refspec_item with either src
>   * or dst filled out, will fill out the other such that the result is in the
> @@ -22,6 +23,7 @@ struct refspec_item {
>         unsigned pattern : 1;
>         unsigned matching : 1;
>         unsigned exact_sha1 : 1;
> +       unsigned negative : 1;
>
>         char *src;
>         char *dst;
> diff --git a/remote.c b/remote.c
> index c5ed74f91c63..6a41d1028221 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -1058,7 +1058,7 @@ static int match_explicit(struct ref *src, struct ref *dst,
>         const char *dst_value = rs->dst;
>         char *dst_guess;
>
> -       if (rs->pattern || rs->matching)
> +       if (rs->pattern || rs->matching || rs->negative)
>                 return 0;
>
>         matched_src = matched_dst = NULL;
> @@ -1134,6 +1134,10 @@ static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
>         int matching_refs = -1;
>         for (i = 0; i < rs->nr; i++) {
>                 const struct refspec_item *item = &rs->items[i];
> +
> +               if (item->negative)
> +                       continue;
> +
>                 if (item->matching &&
>                     (matching_refs == -1 || item->force)) {
>                         matching_refs = i;
> @@ -1441,6 +1445,8 @@ int match_push_refs(struct ref *src, struct ref **dst,
>                 string_list_clear(&src_ref_index, 0);
>         }
>
> +       *dst = apply_negative_refspecs(*dst, rs);
> +
>         if (errs)
>                 return -1;
>         return 0;
> @@ -1810,6 +1816,9 @@ int get_fetch_map(const struct ref *remote_refs,
>  {
>         struct ref *ref_map, **rmp;
>
> +       if (refspec->negative)
> +               return 0;
> +
>         if (refspec->pattern) {
>                 ref_map = get_expanded_map(remote_refs, refspec);
>         } else {
> @@ -1853,6 +1862,44 @@ int get_fetch_map(const struct ref *remote_refs,
>         return 0;
>  }
>
> +static int refspec_match(const struct refspec_item *refspec,
> +                        const char *name)
> +{
> +       if (refspec->pattern)
> +               return match_name_with_pattern(refspec->src, name, NULL, NULL);
> +
> +       return !strcmp(refspec->src, name);
> +}
> +
> +static int omit_name_by_refspec(const char *name, struct refspec *rs)
> +{
> +       int i;
> +
> +       for (i = 0; i < rs->nr; i++) {
> +               if (rs->items[i].negative && refspec_match(&rs->items[i], name))
> +                       return 1;
> +       }
> +       return 0;
> +}
> +
> +struct ref *apply_negative_refspecs(struct ref *ref_map, struct refspec *rs)
> +{
> +       struct ref **tail;
> +
> +       for (tail = &ref_map; *tail; ) {
> +               struct ref *ref = *tail;
> +
> +               if (omit_name_by_refspec(ref->name, rs)) {
> +                       *tail = ref->next;
> +                       free(ref->peer_ref);
> +                       free(ref);
> +               } else
> +                       tail = &ref->next;
> +       }
> +
> +       return ref_map;
> +}
> +
>  int resolve_remote_symref(struct ref *ref, struct ref *list)
>  {
>         if (!ref->symref)
> diff --git a/remote.h b/remote.h
> index 5e3ea5a26deb..104e75e0f74d 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -193,6 +193,12 @@ int resolve_remote_symref(struct ref *ref, struct ref *list);
>   */
>  struct ref *ref_remove_duplicates(struct ref *ref_map);
>
> +/*
> + * Remove all entries in the input list which match any negative refspec in
> + * the refspec list.
> + */
> +struct ref *apply_negative_refspecs(struct ref *ref_map, struct refspec *rs);
> +
>  int query_refspecs(struct refspec *rs, struct refspec_item *query);
>  char *apply_refspecs(struct refspec *rs, const char *name);
>
> @@ -205,7 +211,8 @@ void set_ref_status_for_push(struct ref *remote_refs, int send_mirror,
>  /*
>   * Given a list of the remote refs and the specification of things to
>   * fetch, makes a (separate) list of the refs to fetch and the local
> - * refs to store into.
> + * refs to store into. Note that negative refspecs are ignored here, and
> + * should be handled separately.
>   *
>   * *tail is the pointer to the tail pointer of the list of results
>   * beforehand, and will be set to the tail pointer of the list of
> --
> 2.28.0.163.g6104cc2f0b60
>
Junio C Hamano Aug. 17, 2020, 11:43 p.m. UTC | #2
Jacob Keller <jacob.e.keller@intel.com> writes:

> Refspecs today are commutative, meaning that order doesn't expressly
> matter. Rather than forcing an implied order, negative refspecs will
> always be applied last. That is, in order to match, a ref must match at
> least one positive refspec, and match none of the negative refspecs.
> This is similar to how negative pathspecs work.

Yes, enumerate what positive ones match and then exclude what
negative ones match from the result is a time-tested pattern our
users know how things work.

> @@ -530,6 +530,9 @@ static struct ref *get_ref_map(struct remote *remote,
>  		tail = &rm->next;
>  	}
>  
> +	/* apply any negative refspecs now to prune the list of refs */
> +	ref_map = apply_negative_refspecs(ref_map, rs);
> +
>  	ref_map = ref_remove_duplicates(ref_map);

How was the ordering here decided?  Should it result the same set if
negative ones are excluded after duplicates are removed?

> @@ -1441,6 +1445,8 @@ int match_push_refs(struct ref *src, struct ref **dst,
>  		string_list_clear(&src_ref_index, 0);
>  	}
>  
> +	*dst = apply_negative_refspecs(*dst, rs);
> +

The block of code whose tail is shown in the pre-context has
prepared "delete these refs because we no longer have them" to the
other side under MATCH_REFS_PRUNE but that was done based on the
*dst list before we applied the negative refspec.  Is the ordering
of these two correct, or should we filter the dst list with negative
ones and use the resulting one in pruning operation?

> +	if (item->negative) {
> +		struct object_id unused;
> +
> +		/*
> +		 * Negative refspecs only have a LHS, which indicates a ref
> +		 * (or pattern of refs) to exclude from other matches. This
> +		 * can either be a simple ref, a glob pattern, or even an
> +		 * exact sha1 match.
> +		 */

"a ref (or pattern of refs)" is clarified with the next sentence
anyway, so let's not say it, e.g.

	... only have a LHS, which indicates what to exclude from
	other matches.
Jacob Keller Aug. 18, 2020, 12:04 a.m. UTC | #3
On Mon, Aug 17, 2020 at 4:43 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Jacob Keller <jacob.e.keller@intel.com> writes:
>
> > Refspecs today are commutative, meaning that order doesn't expressly
> > matter. Rather than forcing an implied order, negative refspecs will
> > always be applied last. That is, in order to match, a ref must match at
> > least one positive refspec, and match none of the negative refspecs.
> > This is similar to how negative pathspecs work.
>
> Yes, enumerate what positive ones match and then exclude what
> negative ones match from the result is a time-tested pattern our
> users know how things work.
>
> > @@ -530,6 +530,9 @@ static struct ref *get_ref_map(struct remote *remote,
> >               tail = &rm->next;
> >       }
> >
> > +     /* apply any negative refspecs now to prune the list of refs */
> > +     ref_map = apply_negative_refspecs(ref_map, rs);
> > +
> >       ref_map = ref_remove_duplicates(ref_map);
>
> How was the ordering here decided?  Should it result the same set if
> negative ones are excluded after duplicates are removed?
>

Good question. This was what was done in peff's original patch. I need
to understand a bit more about what ref_remove_duplicates does to
really figure this out.

> > @@ -1441,6 +1445,8 @@ int match_push_refs(struct ref *src, struct ref **dst,
> >               string_list_clear(&src_ref_index, 0);
> >       }
> >
> > +     *dst = apply_negative_refspecs(*dst, rs);
> > +
>
> The block of code whose tail is shown in the pre-context has
> prepared "delete these refs because we no longer have them" to the
> other side under MATCH_REFS_PRUNE but that was done based on the
> *dst list before we applied the negative refspec.  Is the ordering
> of these two correct, or should we filter the dst list with negative
> ones and use the resulting one in pruning operation?
>

I think we need to swap the order here. I'll take a closer look.

> > +     if (item->negative) {
> > +             struct object_id unused;
> > +
> > +             /*
> > +              * Negative refspecs only have a LHS, which indicates a ref
> > +              * (or pattern of refs) to exclude from other matches. This
> > +              * can either be a simple ref, a glob pattern, or even an
> > +              * exact sha1 match.
> > +              */
>
> "a ref (or pattern of refs)" is clarified with the next sentence
> anyway, so let's not say it, e.g.
>
>         ... only have a LHS, which indicates what to exclude from
>         other matches.
>

Sure. There's also a slight bug here because in "fetch" mode,
standalone LHS-only refs cannot be globs, and I need to fix that too.

Thanks,
Jake
Jeff King Aug. 18, 2020, 5:41 p.m. UTC | #4
On Mon, Aug 17, 2020 at 05:04:00PM -0700, Jacob Keller wrote:

> > > +     /* apply any negative refspecs now to prune the list of refs */
> > > +     ref_map = apply_negative_refspecs(ref_map, rs);
> > > +
> > >       ref_map = ref_remove_duplicates(ref_map);
> >
> > How was the ordering here decided?  Should it result the same set if
> > negative ones are excluded after duplicates are removed?
> 
> Good question. This was what was done in peff's original patch. I need
> to understand a bit more about what ref_remove_duplicates does to
> really figure this out.

The relevant commit is 2467a4fa03 (Remove duplicate ref matches in
fetch, 2007-10-08), I think. We may end up with multiple refspecs
requesting a particular ref. E.g.:

  git fetch origin refs/heads/master refs/heads/*

I don't think the order should matter. If we apply negative refspecs
first, then we'd either remove both copies or leave both untouched (and
if the latter, then de-dup to a single). If we apply negative refspecs
after de-duping, then we'd either remove the single or leave it in
place. But the result is the same either way.

> > > @@ -1441,6 +1445,8 @@ int match_push_refs(struct ref *src, struct ref **dst,
> > >               string_list_clear(&src_ref_index, 0);
> > >       }
> > >
> > > +     *dst = apply_negative_refspecs(*dst, rs);
> > > +
> >
> > The block of code whose tail is shown in the pre-context has
> > prepared "delete these refs because we no longer have them" to the
> > other side under MATCH_REFS_PRUNE but that was done based on the
> > *dst list before we applied the negative refspec.  Is the ordering
> > of these two correct, or should we filter the dst list with negative
> > ones and use the resulting one in pruning operation?
> 
> I think we need to swap the order here. I'll take a closer look.

Hmm. I think the behavior we'd want is something like:

  # make sure the other side has three refs
  git branch prune/one HEAD
  git branch prune/two HEAD
  git branch prune/three HEAD
  git push dst.git refs/heads/prune/*

  # now drop two of ours, which are eligible for pruning
  git branch -d prune/one
  git branch -d prune/two

  # push with pruning, omitting "two"
  git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two

  # we should leave "two" but still deleted "one"
  test_write_lines one three >expect
  git -C dst.git for-each-ref --format='%(refname:lstrip=3)' refs/heads/prune/ >actual
  test_cmp expect actual

I.e., the negative refspec shrinks the space we're considering pruning.
And we'd probably want a similar test for "fetch --prune".

I just tried that, though, and got an interesting result. The push
actually complains:

  $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
  error: src refspec refs/heads/prune/two does not match any
  error: failed to push some refs to 'dst.git'

For negative refspecs, would we want to loosen the "must-exist" check?
Or really, is this getting into the "are we negative on the src or dst"
thing you brought up earlier? Especially with --prune, what I really
want to say is "do not touch the remote refs/heads/two".

We can get work around it by using a wildcard:

  $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two*
  To dst.git
   - [deleted]         prune/one

So it works as I'd expect already with your patch. But I do wonder if
there are corner cases around the src/dst thing that might not behave
sensibly.

-Peff
Jacob Keller Aug. 20, 2020, 11:59 p.m. UTC | #5
On Tue, Aug 18, 2020 at 10:41 AM Jeff King <peff@peff.net> wrote:
>
> On Mon, Aug 17, 2020 at 05:04:00PM -0700, Jacob Keller wrote:
>
> > > > +     /* apply any negative refspecs now to prune the list of refs */
> > > > +     ref_map = apply_negative_refspecs(ref_map, rs);
> > > > +
> > > >       ref_map = ref_remove_duplicates(ref_map);
> > >
> > > How was the ordering here decided?  Should it result the same set if
> > > negative ones are excluded after duplicates are removed?
> >
> > Good question. This was what was done in peff's original patch. I need
> > to understand a bit more about what ref_remove_duplicates does to
> > really figure this out.
>
> The relevant commit is 2467a4fa03 (Remove duplicate ref matches in
> fetch, 2007-10-08), I think. We may end up with multiple refspecs
> requesting a particular ref. E.g.:
>
>   git fetch origin refs/heads/master refs/heads/*
>
> I don't think the order should matter. If we apply negative refspecs
> first, then we'd either remove both copies or leave both untouched (and
> if the latter, then de-dup to a single). If we apply negative refspecs
> after de-duping, then we'd either remove the single or leave it in
> place. But the result is the same either way.

I'm not sure this is quite true in the case where destinations are
supplied. Suppose this case:

git fetch refs/heads/*:refs/remotes/origin/*
refs/other/mybranch:refs/remotes/origin/mybranch

This would ofcourse error out due to de-duping where we determine that
both would fetch to the same place.. however if you also added a
negative refspec:

git fetch refs/heads/*:refs/remotes/origin/*
refs/other/mybranch:refs/remotes/origin/mybranch ^refs/heads/mybranch

then shouldn't this work? meaning we should de-dupe only after we
apply negative refspecs in this case?

>
> > > > @@ -1441,6 +1445,8 @@ int match_push_refs(struct ref *src, struct ref **dst,
> > > >               string_list_clear(&src_ref_index, 0);
> > > >       }
> > > >
> > > > +     *dst = apply_negative_refspecs(*dst, rs);
> > > > +
> > >
> > > The block of code whose tail is shown in the pre-context has
> > > prepared "delete these refs because we no longer have them" to the
> > > other side under MATCH_REFS_PRUNE but that was done based on the
> > > *dst list before we applied the negative refspec.  Is the ordering
> > > of these two correct, or should we filter the dst list with negative
> > > ones and use the resulting one in pruning operation?
> >
> > I think we need to swap the order here. I'll take a closer look.
>


> Hmm. I think the behavior we'd want is something like:
>
>   # make sure the other side has three refs
>   git branch prune/one HEAD
>   git branch prune/two HEAD
>   git branch prune/three HEAD
>   git push dst.git refs/heads/prune/*
>
>   # now drop two of ours, which are eligible for pruning
>   git branch -d prune/one
>   git branch -d prune/two
>
>   # push with pruning, omitting "two"
>   git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
>
>   # we should leave "two" but still deleted "one"
>   test_write_lines one three >expect
>   git -C dst.git for-each-ref --format='%(refname:lstrip=3)' refs/heads/prune/ >actual
>   test_cmp expect actual
>
> I.e., the negative refspec shrinks the space we're considering pruning.
> And we'd probably want a similar test for "fetch --prune".
>
> I just tried that, though, and got an interesting result. The push
> actually complains:
>
>   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
>   error: src refspec refs/heads/prune/two does not match any
>   error: failed to push some refs to 'dst.git'
>
> For negative refspecs, would we want to loosen the "must-exist" check?
> Or really, is this getting into the "are we negative on the src or dst"
> thing you brought up earlier? Especially with --prune, what I really
> want to say is "do not touch the remote refs/heads/two".
>

Hmmm..

For regular push the negative refspec applies to the source. For prune
though we only provide a destination..

> We can get work around it by using a wildcard:
>
>   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two*
>   To dst.git
>    - [deleted]         prune/one
>
> So it works as I'd expect already with your patch. But I do wonder if
> there are corner cases around the src/dst thing that might not behave
> sensibly.
>

Right, there's some interesting questions here still.

> -Peff

I'll be adding this as a test!

Thanks,
Jake
Jeff King Aug. 21, 2020, 2:33 a.m. UTC | #6
On Thu, Aug 20, 2020 at 04:59:53PM -0700, Jacob Keller wrote:

> > The relevant commit is 2467a4fa03 (Remove duplicate ref matches in
> > fetch, 2007-10-08), I think. We may end up with multiple refspecs
> > requesting a particular ref. E.g.:
> >
> >   git fetch origin refs/heads/master refs/heads/*
> >
> > I don't think the order should matter. If we apply negative refspecs
> > first, then we'd either remove both copies or leave both untouched (and
> > if the latter, then de-dup to a single). If we apply negative refspecs
> > after de-duping, then we'd either remove the single or leave it in
> > place. But the result is the same either way.
> 
> I'm not sure this is quite true in the case where destinations are
> supplied. Suppose this case:

Oh, you're right. I was too focused on the de-duping of identical refs,
but this is also handling colliding destinations.

> git fetch refs/heads/*:refs/remotes/origin/*
> refs/other/mybranch:refs/remotes/origin/mybranch
> 
> This would ofcourse error out due to de-duping where we determine that
> both would fetch to the same place.. however if you also added a
> negative refspec:
> 
> git fetch refs/heads/*:refs/remotes/origin/*
> refs/other/mybranch:refs/remotes/origin/mybranch ^refs/heads/mybranch
> 
> then shouldn't this work? meaning we should de-dupe only after we
> apply negative refspecs in this case?

Yes, I'd agree we should be applying the negative refspecs first, and
then de-duping / looking for collisions. Which I think is what the patch
is doing currently.

-Peff
Junio C Hamano Aug. 21, 2020, 4:19 p.m. UTC | #7
Jeff King <peff@peff.net> writes:

> Yes, I'd agree we should be applying the negative refspecs first, and
> then de-duping / looking for collisions. Which I think is what the patch
> is doing currently.

Good to see that we thought this through.  The reasoning deserves to
be recorded somewhere (perhaps a comment just before making the call
to apply the negative refspec).

Thanks.
Jacob Keller Aug. 21, 2020, 4:28 p.m. UTC | #8
On Fri, Aug 21, 2020 at 9:19 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Jeff King <peff@peff.net> writes:
>
> > Yes, I'd agree we should be applying the negative refspecs first, and
> > then de-duping / looking for collisions. Which I think is what the patch
> > is doing currently.
>
> Good to see that we thought this through.  The reasoning deserves to
> be recorded somewhere (perhaps a comment just before making the call
> to apply the negative refspec).
>
> Thanks.

I am hoping to add a test case for this as well!
Jacob Keller Aug. 21, 2020, 5:16 p.m. UTC | #9
On Tue, Aug 18, 2020 at 10:41 AM Jeff King <peff@peff.net> wrote:
> Hmm. I think the behavior we'd want is something like:
>
>   # make sure the other side has three refs
>   git branch prune/one HEAD
>   git branch prune/two HEAD
>   git branch prune/three HEAD
>   git push dst.git refs/heads/prune/*
>
>   # now drop two of ours, which are eligible for pruning
>   git branch -d prune/one
>   git branch -d prune/two
>
>   # push with pruning, omitting "two"
>   git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
>
>   # we should leave "two" but still deleted "one"
>   test_write_lines one three >expect
>   git -C dst.git for-each-ref --format='%(refname:lstrip=3)' refs/heads/prune/ >actual
>   test_cmp expect actual
>
> I.e., the negative refspec shrinks the space we're considering pruning.
> And we'd probably want a similar test for "fetch --prune".
>
> I just tried that, though, and got an interesting result. The push
> actually complains:
>
>   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
>   error: src refspec refs/heads/prune/two does not match any
>   error: failed to push some refs to 'dst.git'
>
> For negative refspecs, would we want to loosen the "must-exist" check?
> Or really, is this getting into the "are we negative on the src or dst"
> thing you brought up earlier? Especially with --prune, what I really
> want to say is "do not touch the remote refs/heads/two".
>
> We can get work around it by using a wildcard:
>
>   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two*
>   To dst.git
>    - [deleted]         prune/one
>
> So it works as I'd expect already with your patch. But I do wonder if
> there are corner cases around the src/dst thing that might not behave
> sensibly.
>

Hmm. So this raises a good point. I added a variation of this test
where I used separate names for the source and destination. It looks
like with the current implementation, negative refspecs always apply
to the destination.
Jacob Keller Aug. 21, 2020, 5:26 p.m. UTC | #10
On Fri, Aug 21, 2020 at 10:16 AM Jacob Keller <jacob.keller@gmail.com> wrote:
>
> On Tue, Aug 18, 2020 at 10:41 AM Jeff King <peff@peff.net> wrote:
> > Hmm. I think the behavior we'd want is something like:
> >
> >   # make sure the other side has three refs
> >   git branch prune/one HEAD
> >   git branch prune/two HEAD
> >   git branch prune/three HEAD
> >   git push dst.git refs/heads/prune/*
> >
> >   # now drop two of ours, which are eligible for pruning
> >   git branch -d prune/one
> >   git branch -d prune/two
> >
> >   # push with pruning, omitting "two"
> >   git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
> >
> >   # we should leave "two" but still deleted "one"
> >   test_write_lines one three >expect
> >   git -C dst.git for-each-ref --format='%(refname:lstrip=3)' refs/heads/prune/ >actual
> >   test_cmp expect actual
> >
> > I.e., the negative refspec shrinks the space we're considering pruning.
> > And we'd probably want a similar test for "fetch --prune".
> >
> > I just tried that, though, and got an interesting result. The push
> > actually complains:
> >
> >   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
> >   error: src refspec refs/heads/prune/two does not match any
> >   error: failed to push some refs to 'dst.git'
> >
> > For negative refspecs, would we want to loosen the "must-exist" check?
> > Or really, is this getting into the "are we negative on the src or dst"
> > thing you brought up earlier? Especially with --prune, what I really
> > want to say is "do not touch the remote refs/heads/two".
> >
> > We can get work around it by using a wildcard:
> >
> >   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two*
> >   To dst.git
> >    - [deleted]         prune/one
> >
> > So it works as I'd expect already with your patch. But I do wonder if
> > there are corner cases around the src/dst thing that might not behave
> > sensibly.
> >
>
> Hmm. So this raises a good point. I added a variation of this test
> where I used separate names for the source and destination. It looks
> like with the current implementation, negative refspecs always apply
> to the destination.

I also tried adding a test for fetch --prune, but that ultimately
calls query_refspecs_multiple and query_refspecs. I need to figure out
how negative refspecs need to interact with that function still.
Jacob Keller Aug. 21, 2020, 6:21 p.m. UTC | #11
On Fri, Aug 21, 2020 at 10:26 AM Jacob Keller <jacob.keller@gmail.com> wrote:
>
> On Fri, Aug 21, 2020 at 10:16 AM Jacob Keller <jacob.keller@gmail.com> wrote:
> >
> > On Tue, Aug 18, 2020 at 10:41 AM Jeff King <peff@peff.net> wrote:
> > > Hmm. I think the behavior we'd want is something like:
> > >
> > >   # make sure the other side has three refs
> > >   git branch prune/one HEAD
> > >   git branch prune/two HEAD
> > >   git branch prune/three HEAD
> > >   git push dst.git refs/heads/prune/*
> > >
> > >   # now drop two of ours, which are eligible for pruning
> > >   git branch -d prune/one
> > >   git branch -d prune/two
> > >
> > >   # push with pruning, omitting "two"
> > >   git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
> > >
> > >   # we should leave "two" but still deleted "one"
> > >   test_write_lines one three >expect
> > >   git -C dst.git for-each-ref --format='%(refname:lstrip=3)' refs/heads/prune/ >actual
> > >   test_cmp expect actual
> > >
> > > I.e., the negative refspec shrinks the space we're considering pruning.
> > > And we'd probably want a similar test for "fetch --prune".
> > >
> > > I just tried that, though, and got an interesting result. The push
> > > actually complains:
> > >
> > >   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two
> > >   error: src refspec refs/heads/prune/two does not match any
> > >   error: failed to push some refs to 'dst.git'
> > >
> > > For negative refspecs, would we want to loosen the "must-exist" check?
> > > Or really, is this getting into the "are we negative on the src or dst"
> > > thing you brought up earlier? Especially with --prune, what I really
> > > want to say is "do not touch the remote refs/heads/two".
> > >
> > > We can get work around it by using a wildcard:
> > >
> > >   $ git push --prune dst.git refs/heads/prune/* ^refs/heads/prune/two*
> > >   To dst.git
> > >    - [deleted]         prune/one
> > >
> > > So it works as I'd expect already with your patch. But I do wonder if
> > > there are corner cases around the src/dst thing that might not behave
> > > sensibly.
> > >
> >
> > Hmm. So this raises a good point. I added a variation of this test
> > where I used separate names for the source and destination. It looks
> > like with the current implementation, negative refspecs always apply
> > to the destination.
>
> I also tried adding a test for fetch --prune, but that ultimately
> calls query_refspecs_multiple and query_refspecs. I need to figure out
> how negative refspecs need to interact with that function still.

So there's an interesting problem here... query_refspecs_multiple
takes only the destination name, which makes the "get_stale_heads" not
work properly, since for fetch we want to apply the refspec to the
remote sides "source".
Jeff King Aug. 21, 2020, 6:59 p.m. UTC | #12
On Fri, Aug 21, 2020 at 11:21:19AM -0700, Jacob Keller wrote:

> > I also tried adding a test for fetch --prune, but that ultimately
> > calls query_refspecs_multiple and query_refspecs. I need to figure out
> > how negative refspecs need to interact with that function still.
> 
> So there's an interesting problem here... query_refspecs_multiple
> takes only the destination name, which makes the "get_stale_heads" not
> work properly, since for fetch we want to apply the refspec to the
> remote sides "source".

Hmm. So if I understand it, that function is asking about _local_ refs,
and wondering "if we were to fetch using these refspecs, would we write
to this ref". We know that negative refspecs can't impact the mapping of
remote to local.

But I guess the case you are about is:

  git fetch --prune refs/heads/*:refs/remotes/origin/* ^refs/heads/foo

where we need to realize that the local refs/remotes/origin/foo needs to
be saved. I think that should be possible by reverse-applying the
transformations from any positive refspecs, and then seeing if they
match any negative ones. I don't know how much support the existing code
will give you for that, though.

-Peff
diff mbox series

Patch

diff --git a/builtin/fetch.c b/builtin/fetch.c
index c49f0e975203..930214626b54 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -530,6 +530,9 @@  static struct ref *get_ref_map(struct remote *remote,
 		tail = &rm->next;
 	}
 
+	/* apply any negative refspecs now to prune the list of refs */
+	ref_map = apply_negative_refspecs(ref_map, rs);
+
 	ref_map = ref_remove_duplicates(ref_map);
 
 	refname_hash_init(&existing_refs);
diff --git a/refspec.c b/refspec.c
index f10ef284cef9..feed20aca961 100644
--- a/refspec.c
+++ b/refspec.c
@@ -8,6 +8,7 @@  static struct refspec_item s_tag_refspec = {
 	1,
 	0,
 	0,
+	0,
 	"refs/tags/*",
 	"refs/tags/*"
 };
@@ -32,10 +33,17 @@  static int parse_refspec(struct refspec_item *item, const char *refspec, int fet
 	if (*lhs == '+') {
 		item->force = 1;
 		lhs++;
+	} else if (*lhs == '^') {
+		item->negative = 1;
+		lhs++;
 	}
 
 	rhs = strrchr(lhs, ':');
 
+	/* negative refspecs only have one side */
+	if (item->negative && rhs)
+		return 0;
+
 	/*
 	 * Before going on, special case ":" (or "+:") as a refspec
 	 * for pushing matching refs.
@@ -66,6 +74,28 @@  static int parse_refspec(struct refspec_item *item, const char *refspec, int fet
 	item->src = xstrndup(lhs, llen);
 	flags = REFNAME_ALLOW_ONELEVEL | (is_glob ? REFNAME_REFSPEC_PATTERN : 0);
 
+	if (item->negative) {
+		struct object_id unused;
+
+		/*
+		 * Negative refspecs only have a LHS, which indicates a ref
+		 * (or pattern of refs) to exclude from other matches. This
+		 * can either be a simple ref, a glob pattern, or even an
+		 * exact sha1 match.
+		 */
+		if (!*item->src)
+			return 0; /* negative refspecs must not be empty */
+		else if (llen == the_hash_algo->hexsz && !get_oid_hex(item->src, &unused))
+			item->exact_sha1 = 1; /* ok */
+		else if (!check_refname_format(item->src, flags))
+			; /* valid looking ref is ok */
+		else
+			return 0;
+
+		/* other rules for negative refspecs don't apply */
+		return 1;
+	}
+
 	if (fetch) {
 		struct object_id unused;
 
diff --git a/refspec.h b/refspec.h
index 8d654e3a3ac4..e5bf6d25d0f7 100644
--- a/refspec.h
+++ b/refspec.h
@@ -5,12 +5,13 @@ 
 extern const struct refspec_item *tag_refspec;
 
 /**
- * A struct refspec_item holds the parsed interpretation of a refspec.  If it will
- * force updates (starts with a '+'), force is true.  If it is a pattern
- * (sides end with '*') pattern is true.  src and dest are the two sides
- * (including '*' characters if present); if there is only one side, it is src,
- * and dst is NULL; if sides exist but are empty (i.e., the refspec either
- * starts or ends with ':'), the corresponding side is "".
+ * A struct refspec_item holds the parsed interpretation of a refspec.  If it
+ * will force updates (starts with a '+'), force is true.  If it is a pattern
+ * (sides end with '*') pattern is true.  If it is a negative refspec, (starts
+ * with '^'), negative is true.  src and dest are the two sides (including '*'
+ * characters if present); if there is only one side, it is src, and dst is
+ * NULL; if sides exist but are empty (i.e., the refspec either starts or ends
+ * with ':'), the corresponding side is "".
  *
  * remote_find_tracking(), given a remote and a struct refspec_item with either src
  * or dst filled out, will fill out the other such that the result is in the
@@ -22,6 +23,7 @@  struct refspec_item {
 	unsigned pattern : 1;
 	unsigned matching : 1;
 	unsigned exact_sha1 : 1;
+	unsigned negative : 1;
 
 	char *src;
 	char *dst;
diff --git a/remote.c b/remote.c
index c5ed74f91c63..6a41d1028221 100644
--- a/remote.c
+++ b/remote.c
@@ -1058,7 +1058,7 @@  static int match_explicit(struct ref *src, struct ref *dst,
 	const char *dst_value = rs->dst;
 	char *dst_guess;
 
-	if (rs->pattern || rs->matching)
+	if (rs->pattern || rs->matching || rs->negative)
 		return 0;
 
 	matched_src = matched_dst = NULL;
@@ -1134,6 +1134,10 @@  static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
 	int matching_refs = -1;
 	for (i = 0; i < rs->nr; i++) {
 		const struct refspec_item *item = &rs->items[i];
+
+		if (item->negative)
+			continue;
+
 		if (item->matching &&
 		    (matching_refs == -1 || item->force)) {
 			matching_refs = i;
@@ -1441,6 +1445,8 @@  int match_push_refs(struct ref *src, struct ref **dst,
 		string_list_clear(&src_ref_index, 0);
 	}
 
+	*dst = apply_negative_refspecs(*dst, rs);
+
 	if (errs)
 		return -1;
 	return 0;
@@ -1810,6 +1816,9 @@  int get_fetch_map(const struct ref *remote_refs,
 {
 	struct ref *ref_map, **rmp;
 
+	if (refspec->negative)
+		return 0;
+
 	if (refspec->pattern) {
 		ref_map = get_expanded_map(remote_refs, refspec);
 	} else {
@@ -1853,6 +1862,44 @@  int get_fetch_map(const struct ref *remote_refs,
 	return 0;
 }
 
+static int refspec_match(const struct refspec_item *refspec,
+			 const char *name)
+{
+	if (refspec->pattern)
+		return match_name_with_pattern(refspec->src, name, NULL, NULL);
+
+	return !strcmp(refspec->src, name);
+}
+
+static int omit_name_by_refspec(const char *name, struct refspec *rs)
+{
+	int i;
+
+	for (i = 0; i < rs->nr; i++) {
+		if (rs->items[i].negative && refspec_match(&rs->items[i], name))
+			return 1;
+	}
+	return 0;
+}
+
+struct ref *apply_negative_refspecs(struct ref *ref_map, struct refspec *rs)
+{
+	struct ref **tail;
+
+	for (tail = &ref_map; *tail; ) {
+		struct ref *ref = *tail;
+
+		if (omit_name_by_refspec(ref->name, rs)) {
+			*tail = ref->next;
+			free(ref->peer_ref);
+			free(ref);
+		} else
+			tail = &ref->next;
+	}
+
+	return ref_map;
+}
+
 int resolve_remote_symref(struct ref *ref, struct ref *list)
 {
 	if (!ref->symref)
diff --git a/remote.h b/remote.h
index 5e3ea5a26deb..104e75e0f74d 100644
--- a/remote.h
+++ b/remote.h
@@ -193,6 +193,12 @@  int resolve_remote_symref(struct ref *ref, struct ref *list);
  */
 struct ref *ref_remove_duplicates(struct ref *ref_map);
 
+/*
+ * Remove all entries in the input list which match any negative refspec in
+ * the refspec list.
+ */
+struct ref *apply_negative_refspecs(struct ref *ref_map, struct refspec *rs);
+
 int query_refspecs(struct refspec *rs, struct refspec_item *query);
 char *apply_refspecs(struct refspec *rs, const char *name);
 
@@ -205,7 +211,8 @@  void set_ref_status_for_push(struct ref *remote_refs, int send_mirror,
 /*
  * Given a list of the remote refs and the specification of things to
  * fetch, makes a (separate) list of the refs to fetch and the local
- * refs to store into.
+ * refs to store into. Note that negative refspecs are ignored here, and
+ * should be handled separately.
  *
  * *tail is the pointer to the tail pointer of the list of results
  * beforehand, and will be set to the tail pointer of the list of