Message ID | 20230902090155.8978-1-five231003@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 6d79cd8474b7bb4979f2a7544fd736bed190261a |
Headers | show |
Series | [v2] ref-filter: sort numerically when ":size" is used | expand |
On Sat, Sep 02, 2023 at 02:30:39PM +0530, Kousik Sanagavarapu wrote: > Atoms like "raw" and "contents" have a ":size" option which can be used > to know the size of the data. Since these atoms have the cmp_type > FIELD_STR, they are sorted alphabetically from 'a' to 'z' and '0' to > '9'. Meaning, even when the ":size" option is used and what we > ultimatlely have is numbers, we still sort alphabetically. [...] > So, sort numerically whenever the sort is done with "contents:size" or > "raw:size" and do it the normal alphabetic way when "contents" or "raw" > are used with some other option (they are FIELD_STR anyways). > > Helped-by: Jeff King <peff@peff.net> > Signed-off-by: Kousik Sanagavarapu <five231003@gmail.com> > --- Oops, forgot the range-diff. Range-diff against v1: 1: 9b561e429b ! 1: 194dcb0b0d ref-filter: sort numerically when ":size" is used @@ Commit message "raw:size" and do it the normal alphabetic way when "contents" or "raw" are used with some other option (they are FIELD_STR anyways). + Helped-by: Jeff King <peff@peff.net> Signed-off-by: Kousik Sanagavarapu <five231003@gmail.com> ## ref-filter.c ## -@@ ref-filter.c: struct atom_value { - ssize_t s_size; - int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state, - struct strbuf *err); -- uintmax_t value; /* used for sorting when not FIELD_STR */ -+ -+ /* -+ * Used for sorting when not FIELD_STR or when FIELD_STR but the -+ * sort should be numeric and not alphabetic. -+ */ -+ uintmax_t value; -+ - struct used_atom *atom; - }; - +@@ ref-filter.c: static int contents_atom_parser(struct ref_format *format, struct used_atom *ato + atom->u.contents.option = C_BARE; + else if (!strcmp(arg, "body")) + atom->u.contents.option = C_BODY; +- else if (!strcmp(arg, "size")) ++ else if (!strcmp(arg, "size")) { ++ atom->type = FIELD_ULONG; + atom->u.contents.option = C_LENGTH; +- else if (!strcmp(arg, "signature")) ++ } else if (!strcmp(arg, "signature")) + atom->u.contents.option = C_SIG; + else if (!strcmp(arg, "subject")) + atom->u.contents.option = C_SUB; +@@ ref-filter.c: static int raw_atom_parser(struct ref_format *format UNUSED, + { + if (!arg) + atom->u.raw_data.option = RAW_BARE; +- else if (!strcmp(arg, "size")) ++ else if (!strcmp(arg, "size")) { ++ atom->type = FIELD_ULONG; + atom->u.raw_data.option = RAW_LENGTH; +- else ++ } else + return err_bad_arg(err, "raw", arg); + return 0; + } @@ ref-filter.c: static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp v->s = xmemdupz(buf, buf_size); v->s_size = buf_size; v->s = xmemdupz(buf, buf_size); v->s_size = buf_size; v->s_size = buf_size; } else if (atom->u.raw_data.option == RAW_LENGTH) { - v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size); -+ v->value = (uintmax_t)buf_size; ++ v->value = buf_size; + v->s = xstrfmt("%"PRIuMAX, v->value); } continue; @@ ref-filter.c: static void grab_sub_body_contents(struct atom_value *val, int der v->s = xmemdupz(bodypos, bodylen); - else if (atom->u.contents.option == C_LENGTH) - v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos)); +- else if (atom->u.contents.option == C_BODY) + else if (atom->u.contents.option == C_LENGTH) { -+ v->value = (uintmax_t)strlen(subpos); ++ v->value = strlen(subpos); + v->s = xstrfmt("%"PRIuMAX, v->value); -+ } - else if (atom->u.contents.option == C_BODY) ++ } else if (atom->u.contents.option == C_BODY) v->s = xmemdupz(bodypos, nonsiglen); else if (atom->u.contents.option == C_SIG) + v->s = xmemdupz(sigpos, siglen); @@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbuf *err) v->s_size = ATOM_SIZE_UNSPECIFIED; @@ ref-filter.c: static int populate_value(struct ref_array_item *ref, struct strbu v->atom = atom; if (*name == '*') { -@@ ref-filter.c: static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru - cmp_detached_head = 1; - } else if (s->sort_flags & REF_SORTING_VERSION) { - cmp = versioncmp(va->s, vb->s); -- } else if (cmp_type == FIELD_STR) { -+ } else if (cmp_type == FIELD_STR && !va->value && !vb->value) { - if (va->s_size < 0 && vb->s_size < 0) { - int (*cmp_fn)(const char *, const char *); - cmp_fn = s->sort_flags & REF_SORTING_ICASE ## t/t6300-for-each-ref.sh ## @@ t/t6300-for-each-ref.sh: test_expect_success 'Verify sorts with raw' '
Kousik Sanagavarapu <five231003@gmail.com> writes: > } else if (atom->u.raw_data.option == RAW_LENGTH) { > - v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size); > + v->value = buf_size; > + v->s = xstrfmt("%"PRIuMAX, v->value); > } > continue; Interesting that typeof(.value) happens to be uintmax_t, keeping this a safe change.
diff --git a/ref-filter.c b/ref-filter.c index 1bfaf20fbf..9dbc4f71bd 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -582,9 +582,10 @@ static int contents_atom_parser(struct ref_format *format, struct used_atom *ato atom->u.contents.option = C_BARE; else if (!strcmp(arg, "body")) atom->u.contents.option = C_BODY; - else if (!strcmp(arg, "size")) + else if (!strcmp(arg, "size")) { + atom->type = FIELD_ULONG; atom->u.contents.option = C_LENGTH; - else if (!strcmp(arg, "signature")) + } else if (!strcmp(arg, "signature")) atom->u.contents.option = C_SIG; else if (!strcmp(arg, "subject")) atom->u.contents.option = C_SUB; @@ -690,9 +691,10 @@ static int raw_atom_parser(struct ref_format *format UNUSED, { if (!arg) atom->u.raw_data.option = RAW_BARE; - else if (!strcmp(arg, "size")) + else if (!strcmp(arg, "size")) { + atom->type = FIELD_ULONG; atom->u.raw_data.option = RAW_LENGTH; - else + } else return err_bad_arg(err, "raw", arg); return 0; } @@ -1857,7 +1859,8 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp v->s = xmemdupz(buf, buf_size); v->s_size = buf_size; } else if (atom->u.raw_data.option == RAW_LENGTH) { - v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size); + v->value = buf_size; + v->s = xstrfmt("%"PRIuMAX, v->value); } continue; } @@ -1883,9 +1886,10 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp v->s = strbuf_detach(&sb, NULL); } else if (atom->u.contents.option == C_BODY_DEP) v->s = xmemdupz(bodypos, bodylen); - else if (atom->u.contents.option == C_LENGTH) - v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos)); - else if (atom->u.contents.option == C_BODY) + else if (atom->u.contents.option == C_LENGTH) { + v->value = strlen(subpos); + v->s = xstrfmt("%"PRIuMAX, v->value); + } else if (atom->u.contents.option == C_BODY) v->s = xmemdupz(bodypos, nonsiglen); else if (atom->u.contents.option == C_SIG) v->s = xmemdupz(sigpos, siglen); @@ -2265,6 +2269,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) v->s_size = ATOM_SIZE_UNSPECIFIED; v->handler = append_atom; + v->value = 0; v->atom = atom; if (*name == '*') { diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh index aa3c7c03c4..7b943fd34c 100755 --- a/t/t6300-for-each-ref.sh +++ b/t/t6300-for-each-ref.sh @@ -1017,16 +1017,16 @@ test_expect_success 'Verify sorts with raw' ' test_expect_success 'Verify sorts with raw:size' ' cat >expected <<-EOF && refs/myblobs/blob8 - refs/myblobs/first refs/myblobs/blob7 - refs/heads/main refs/myblobs/blob4 refs/myblobs/blob1 refs/myblobs/blob2 refs/myblobs/blob3 refs/myblobs/blob5 refs/myblobs/blob6 + refs/myblobs/first refs/mytrees/first + refs/heads/main EOF git for-each-ref --format="%(refname)" --sort=raw:size \ refs/heads/main refs/myblobs/ refs/mytrees/first >actual && @@ -1138,6 +1138,17 @@ test_expect_success 'for-each-ref --format compare with cat-file --batch' ' test_cmp expected actual ' +test_expect_success 'verify sorts with contents:size' ' + cat >expect <<-\EOF && + refs/heads/main + refs/heads/newtag + refs/heads/ambiguous + EOF + git for-each-ref --format="%(refname)" \ + --sort=contents:size refs/heads/ >actual && + test_cmp expect actual +' + test_expect_success 'set up multiple-sort tags' ' for when in 100000 200000 do
Atoms like "raw" and "contents" have a ":size" option which can be used to know the size of the data. Since these atoms have the cmp_type FIELD_STR, they are sorted alphabetically from 'a' to 'z' and '0' to '9'. Meaning, even when the ":size" option is used and what we ultimatlely have is numbers, we still sort alphabetically. For example, consider the the following case in a repo refname contents:size raw:size ======= ============= ======== refs/heads/branch1 1130 1210 refs/heads/master 300 410 refs/tags/v1.0 140 260 Sorting with "--format="%(refname) %(contents:size) --sort=contents:size" would give refs/heads/branch1 1130 refs/tags/v1.0.0 140 refs/heads/master 300 which is an alphabetic sort, while what one might really expect is refs/tags/v1.0.0 140 refs/heads/master 300 refs/heads/branch1 1130 which is a numeric sort (that is, a "$ sort -n file" as opposed to a "$ sort file", where "file" contains only the "contents:size" or "raw:size" info, each of which is on a newline). Same is the case with "--sort=raw:size". So, sort numerically whenever the sort is done with "contents:size" or "raw:size" and do it the normal alphabetic way when "contents" or "raw" are used with some other option (they are FIELD_STR anyways). Helped-by: Jeff King <peff@peff.net> Signed-off-by: Kousik Sanagavarapu <five231003@gmail.com> --- ref-filter.c | 21 +++++++++++++-------- t/t6300-for-each-ref.sh | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 10 deletions(-)