Message ID | 20190705170630.27500-10-t.gummerer@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,01/14] apply: replace marc.info link with public-inbox | expand |
Hi Thomas, On Fri, 5 Jul 2019, Thomas Gummerer wrote: > Currently range-diff uses the 'strbuf_getline()' function for doing > its line by line processing. In a future patch we want to do parts of > that parsing using the 'parse_git_header()' function, which does If you like my suggestion in patch 7/14, this commit message needs to talk about the new name, too. > requires reading parts of the input from that function, which doesn't s/requires/require/ > use strbufs. > > Switch range-diff to do our own line by line parsing, so we can re-use > the parse_git_header function later. > > Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com> > --- > > Longer term it might be better to have both range-diff and apply code > use strbufs. However I didn't feel it's worth making that change for > this patch series. Makes sense. > range-diff.c | 69 +++++++++++++++++++++++++++++----------------------- > 1 file changed, 39 insertions(+), 30 deletions(-) > > diff --git a/range-diff.c b/range-diff.c > index 9242b8975f..916afa44c0 100644 > --- a/range-diff.c > +++ b/range-diff.c > @@ -24,6 +24,17 @@ struct patch_util { > struct object_id oid; > }; > > +static unsigned long linelen(const char *buffer, unsigned long size) Shouldn't this be `size_t`? > +{ > + unsigned long len = 0; Likewise. > + while (size--) { > + len++; > + if (*buffer++ == '\n') > + break; > + } > + return len; How about const char *eol = memchr(buffer, '\n', size); return !eol ? size : eol + 1 - buffer; instead? For an extra brownie point, you could even rename this function to `find_end_of_line()` and replace the LF by a NUL: if (!eol) return size; *eol = '\0'; return eol + 1 - buffer; > +} > + > /* > * Reads the patches into a string list, with the `util` field being populated > * as struct object_id (will need to be free()d). > @@ -31,10 +42,12 @@ struct patch_util { > static int read_patches(const char *range, struct string_list *list) > { > struct child_process cp = CHILD_PROCESS_INIT; > - FILE *in; > - struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; > + struct strbuf buf = STRBUF_INIT, file = STRBUF_INIT; This puzzled me. I'd like to suggest s/file/contents/ > struct patch_util *util = NULL; > int in_header = 1; > + char *line; > + int offset, len; > + size_t size; > > argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", > "--reverse", "--date-order", "--decorate=no", > @@ -54,17 +67,15 @@ static int read_patches(const char *range, struct string_list *list) > > if (start_command(&cp)) > return error_errno(_("could not start `log`")); > - in = fdopen(cp.out, "r"); > - if (!in) { > - error_errno(_("could not read `log` output")); > - finish_command(&cp); > - return -1; > - } > + strbuf_read(&file, cp.out, 0); Shouldn't we handle a negative return value here, erroring out with "could not read `log` output" as before? > > - while (strbuf_getline(&line, in) != EOF) { > + line = strbuf_detach(&file, &size); I strongly suspect this to leak, given that `line` is subsequently advanced, and there is no backup copy. Maybe line = file.buf; size = file.len; would make more sense here? > + for (offset = 0; size > 0; offset += len, size -= len, line += len) { > const char *p; > > - if (skip_prefix(line.buf, "commit ", &p)) { > + len = linelen(line, size); > + line[len - 1] = '\0'; > + if (skip_prefix(line, "commit ", &p)) { > if (util) { > string_list_append(list, buf.buf)->util = util; > strbuf_reset(&buf); > @@ -75,8 +86,6 @@ static int read_patches(const char *range, struct string_list *list) > free(util); > string_list_clear(list, 1); > strbuf_release(&buf); > - strbuf_release(&line); > - fclose(in); We should release the file contents in `file` (or `contents`, if you like my suggestions) here. > finish_command(&cp); > return -1; > } > @@ -85,26 +94,28 @@ static int read_patches(const char *range, struct string_list *list) > continue; > } > > - if (starts_with(line.buf, "diff --git")) { > + if (starts_with(line, "diff --git")) { > in_header = 0; > strbuf_addch(&buf, '\n'); > if (!util->diff_offset) > util->diff_offset = buf.len; > strbuf_addch(&buf, ' '); > - strbuf_addbuf(&buf, &line); > + strbuf_addstr(&buf, line); > } else if (in_header) { > - if (starts_with(line.buf, "Author: ")) { > - strbuf_addbuf(&buf, &line); > + if (starts_with(line, "Author: ")) { > + strbuf_addstr(&buf, line); > strbuf_addstr(&buf, "\n\n"); > - } else if (starts_with(line.buf, " ")) { > - strbuf_rtrim(&line); > - strbuf_addbuf(&buf, &line); > + } else if (starts_with(line, " ")) { > + p = line + len - 2; > + while (isspace(*p) && p >= line) > + p--; > + strbuf_add(&buf, line, p - line + 1); > strbuf_addch(&buf, '\n'); > } > continue; > - } else if (starts_with(line.buf, "@@ ")) > + } else if (starts_with(line, "@@ ")) > strbuf_addstr(&buf, "@@"); > - else if (!line.buf[0] || starts_with(line.buf, "index ")) > + else if (!line[0] || starts_with(line, "index ")) > /* > * A completely blank (not ' \n', which is context) > * line is not valid in a diff. We skip it > @@ -117,25 +128,23 @@ static int read_patches(const char *range, struct string_list *list) > * we are not interested. > */ > continue; > - else if (line.buf[0] == '>') { > + else if (line[0] == '>') { > strbuf_addch(&buf, '+'); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > - } else if (line.buf[0] == '<') { > + strbuf_addstr(&buf, line + 1); > + } else if (line[0] == '<') { > strbuf_addch(&buf, '-'); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > - } else if (line.buf[0] == '#') { > + strbuf_addstr(&buf, line + 1); > + } else if (line[0] == '#') { > strbuf_addch(&buf, ' '); > - strbuf_add(&buf, line.buf + 1, line.len - 1); > + strbuf_addstr(&buf, line + 1); > } else { > strbuf_addch(&buf, ' '); > - strbuf_addbuf(&buf, &line); > + strbuf_addstr(&buf, line); > } > > strbuf_addch(&buf, '\n'); > util->diffsize++; > } > - fclose(in); > - strbuf_release(&line); We should release the file contents we previously read via `strbuf_read()` here. Ciao, Dscho > > if (util) > string_list_append(list, buf.buf)->util = util; > -- > 2.22.0.510.g264f2c817a > >
On 07/05, Johannes Schindelin wrote: > Hi Thomas, > > > On Fri, 5 Jul 2019, Thomas Gummerer wrote: > > > Currently range-diff uses the 'strbuf_getline()' function for doing > > its line by line processing. In a future patch we want to do parts of > > that parsing using the 'parse_git_header()' function, which does > > If you like my suggestion in patch 7/14, this commit message needs to talk > about the new name, too. Thanks for the reminder here! I do indeed like the new name, but would probably have forgotten to change it in the commit message here. > > requires reading parts of the input from that function, which doesn't > > s/requires/require/ > > > use strbufs. > > > > Switch range-diff to do our own line by line parsing, so we can re-use > > the parse_git_header function later. > > > > Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com> > > --- > > > > Longer term it might be better to have both range-diff and apply code > > use strbufs. However I didn't feel it's worth making that change for > > this patch series. > > Makes sense. > > > range-diff.c | 69 +++++++++++++++++++++++++++++----------------------- > > 1 file changed, 39 insertions(+), 30 deletions(-) > > > > diff --git a/range-diff.c b/range-diff.c > > index 9242b8975f..916afa44c0 100644 > > --- a/range-diff.c > > +++ b/range-diff.c > > @@ -24,6 +24,17 @@ struct patch_util { > > struct object_id oid; > > }; > > > > +static unsigned long linelen(const char *buffer, unsigned long size) > > Shouldn't this be `size_t`? > > > +{ > > + unsigned long len = 0; > > Likewise. > > > + while (size--) { > > + len++; > > + if (*buffer++ == '\n') > > + break; > > + } > > + return len; > > How about > > const char *eol = memchr(buffer, '\n', size); > > return !eol ? size : eol + 1 - buffer; > > instead? > > For an extra brownie point, you could even rename this function to > `find_end_of_line()` and replace the LF by a NUL: > > if (!eol) > return size; > > *eol = '\0'; > return eol + 1 - buffer; I like this, thank you! > > +} > > + > > /* > > * Reads the patches into a string list, with the `util` field being populated > > * as struct object_id (will need to be free()d). > > @@ -31,10 +42,12 @@ struct patch_util { > > static int read_patches(const char *range, struct string_list *list) > > { > > struct child_process cp = CHILD_PROCESS_INIT; > > - FILE *in; > > - struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; > > + struct strbuf buf = STRBUF_INIT, file = STRBUF_INIT; > > This puzzled me. I'd like to suggest s/file/contents/ Thanks, will change. > > struct patch_util *util = NULL; > > int in_header = 1; > > + char *line; > > + int offset, len; > > + size_t size; > > > > argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", > > "--reverse", "--date-order", "--decorate=no", > > @@ -54,17 +67,15 @@ static int read_patches(const char *range, struct string_list *list) > > > > if (start_command(&cp)) > > return error_errno(_("could not start `log`")); > > - in = fdopen(cp.out, "r"); > > - if (!in) { > > - error_errno(_("could not read `log` output")); > > - finish_command(&cp); > > - return -1; > > - } > > + strbuf_read(&file, cp.out, 0); > > Shouldn't we handle a negative return value here, erroring out with "could > not read `log` output" as before? Yeah, that was an oversight, we should definitely still handle errors here. > > > > - while (strbuf_getline(&line, in) != EOF) { > > + line = strbuf_detach(&file, &size); > > I strongly suspect this to leak, given that `line` is subsequently > advanced, and there is no backup copy. > > Maybe > > line = file.buf; > size = file.len; > > would make more sense here? Hmm good point, that makes more sense indeed. > > + for (offset = 0; size > 0; offset += len, size -= len, line += len) { > > const char *p; > > > > - if (skip_prefix(line.buf, "commit ", &p)) { > > + len = linelen(line, size); > > + line[len - 1] = '\0'; > > + if (skip_prefix(line, "commit ", &p)) { > > if (util) { > > string_list_append(list, buf.buf)->util = util; > > strbuf_reset(&buf); > > @@ -75,8 +86,6 @@ static int read_patches(const char *range, struct string_list *list) > > free(util); > > string_list_clear(list, 1); > > strbuf_release(&buf); > > - strbuf_release(&line); > > - fclose(in); > > We should release the file contents in `file` (or `contents`, if you like > my suggestions) here. Yeah, I thought it was no longer necessary because of the 'strbuf_detach()' earlier, but that obviously leaks in a different way as you pointed out. Will release 'contents' here and below. > > finish_command(&cp); > > return -1; > > } > > @@ -85,26 +94,28 @@ static int read_patches(const char *range, struct string_list *list) > > continue; > > } > > > > - if (starts_with(line.buf, "diff --git")) { > > + if (starts_with(line, "diff --git")) { > > in_header = 0; > > strbuf_addch(&buf, '\n'); > > if (!util->diff_offset) > > util->diff_offset = buf.len; > > strbuf_addch(&buf, ' '); > > - strbuf_addbuf(&buf, &line); > > + strbuf_addstr(&buf, line); > > } else if (in_header) { > > - if (starts_with(line.buf, "Author: ")) { > > - strbuf_addbuf(&buf, &line); > > + if (starts_with(line, "Author: ")) { > > + strbuf_addstr(&buf, line); > > strbuf_addstr(&buf, "\n\n"); > > - } else if (starts_with(line.buf, " ")) { > > - strbuf_rtrim(&line); > > - strbuf_addbuf(&buf, &line); > > + } else if (starts_with(line, " ")) { > > + p = line + len - 2; > > + while (isspace(*p) && p >= line) > > + p--; > > + strbuf_add(&buf, line, p - line + 1); > > strbuf_addch(&buf, '\n'); > > } > > continue; > > - } else if (starts_with(line.buf, "@@ ")) > > + } else if (starts_with(line, "@@ ")) > > strbuf_addstr(&buf, "@@"); > > - else if (!line.buf[0] || starts_with(line.buf, "index ")) > > + else if (!line[0] || starts_with(line, "index ")) > > /* > > * A completely blank (not ' \n', which is context) > > * line is not valid in a diff. We skip it > > @@ -117,25 +128,23 @@ static int read_patches(const char *range, struct string_list *list) > > * we are not interested. > > */ > > continue; > > - else if (line.buf[0] == '>') { > > + else if (line[0] == '>') { > > strbuf_addch(&buf, '+'); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > - } else if (line.buf[0] == '<') { > > + strbuf_addstr(&buf, line + 1); > > + } else if (line[0] == '<') { > > strbuf_addch(&buf, '-'); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > - } else if (line.buf[0] == '#') { > > + strbuf_addstr(&buf, line + 1); > > + } else if (line[0] == '#') { > > strbuf_addch(&buf, ' '); > > - strbuf_add(&buf, line.buf + 1, line.len - 1); > > + strbuf_addstr(&buf, line + 1); > > } else { > > strbuf_addch(&buf, ' '); > > - strbuf_addbuf(&buf, &line); > > + strbuf_addstr(&buf, line); > > } > > > > strbuf_addch(&buf, '\n'); > > util->diffsize++; > > } > > - fclose(in); > > - strbuf_release(&line); > > We should release the file contents we previously read via `strbuf_read()` here. > > Ciao, > Dscho > > > > > if (util) > > string_list_append(list, buf.buf)->util = util; > > -- > > 2.22.0.510.g264f2c817a > > > >
diff --git a/range-diff.c b/range-diff.c index 9242b8975f..916afa44c0 100644 --- a/range-diff.c +++ b/range-diff.c @@ -24,6 +24,17 @@ struct patch_util { struct object_id oid; }; +static unsigned long linelen(const char *buffer, unsigned long size) +{ + unsigned long len = 0; + while (size--) { + len++; + if (*buffer++ == '\n') + break; + } + return len; +} + /* * Reads the patches into a string list, with the `util` field being populated * as struct object_id (will need to be free()d). @@ -31,10 +42,12 @@ struct patch_util { static int read_patches(const char *range, struct string_list *list) { struct child_process cp = CHILD_PROCESS_INIT; - FILE *in; - struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT, file = STRBUF_INIT; struct patch_util *util = NULL; int in_header = 1; + char *line; + int offset, len; + size_t size; argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", "--reverse", "--date-order", "--decorate=no", @@ -54,17 +67,15 @@ static int read_patches(const char *range, struct string_list *list) if (start_command(&cp)) return error_errno(_("could not start `log`")); - in = fdopen(cp.out, "r"); - if (!in) { - error_errno(_("could not read `log` output")); - finish_command(&cp); - return -1; - } + strbuf_read(&file, cp.out, 0); - while (strbuf_getline(&line, in) != EOF) { + line = strbuf_detach(&file, &size); + for (offset = 0; size > 0; offset += len, size -= len, line += len) { const char *p; - if (skip_prefix(line.buf, "commit ", &p)) { + len = linelen(line, size); + line[len - 1] = '\0'; + if (skip_prefix(line, "commit ", &p)) { if (util) { string_list_append(list, buf.buf)->util = util; strbuf_reset(&buf); @@ -75,8 +86,6 @@ static int read_patches(const char *range, struct string_list *list) free(util); string_list_clear(list, 1); strbuf_release(&buf); - strbuf_release(&line); - fclose(in); finish_command(&cp); return -1; } @@ -85,26 +94,28 @@ static int read_patches(const char *range, struct string_list *list) continue; } - if (starts_with(line.buf, "diff --git")) { + if (starts_with(line, "diff --git")) { in_header = 0; strbuf_addch(&buf, '\n'); if (!util->diff_offset) util->diff_offset = buf.len; strbuf_addch(&buf, ' '); - strbuf_addbuf(&buf, &line); + strbuf_addstr(&buf, line); } else if (in_header) { - if (starts_with(line.buf, "Author: ")) { - strbuf_addbuf(&buf, &line); + if (starts_with(line, "Author: ")) { + strbuf_addstr(&buf, line); strbuf_addstr(&buf, "\n\n"); - } else if (starts_with(line.buf, " ")) { - strbuf_rtrim(&line); - strbuf_addbuf(&buf, &line); + } else if (starts_with(line, " ")) { + p = line + len - 2; + while (isspace(*p) && p >= line) + p--; + strbuf_add(&buf, line, p - line + 1); strbuf_addch(&buf, '\n'); } continue; - } else if (starts_with(line.buf, "@@ ")) + } else if (starts_with(line, "@@ ")) strbuf_addstr(&buf, "@@"); - else if (!line.buf[0] || starts_with(line.buf, "index ")) + else if (!line[0] || starts_with(line, "index ")) /* * A completely blank (not ' \n', which is context) * line is not valid in a diff. We skip it @@ -117,25 +128,23 @@ static int read_patches(const char *range, struct string_list *list) * we are not interested. */ continue; - else if (line.buf[0] == '>') { + else if (line[0] == '>') { strbuf_addch(&buf, '+'); - strbuf_add(&buf, line.buf + 1, line.len - 1); - } else if (line.buf[0] == '<') { + strbuf_addstr(&buf, line + 1); + } else if (line[0] == '<') { strbuf_addch(&buf, '-'); - strbuf_add(&buf, line.buf + 1, line.len - 1); - } else if (line.buf[0] == '#') { + strbuf_addstr(&buf, line + 1); + } else if (line[0] == '#') { strbuf_addch(&buf, ' '); - strbuf_add(&buf, line.buf + 1, line.len - 1); + strbuf_addstr(&buf, line + 1); } else { strbuf_addch(&buf, ' '); - strbuf_addbuf(&buf, &line); + strbuf_addstr(&buf, line); } strbuf_addch(&buf, '\n'); util->diffsize++; } - fclose(in); - strbuf_release(&line); if (util) string_list_append(list, buf.buf)->util = util;
Currently range-diff uses the 'strbuf_getline()' function for doing its line by line processing. In a future patch we want to do parts of that parsing using the 'parse_git_header()' function, which does requires reading parts of the input from that function, which doesn't use strbufs. Switch range-diff to do our own line by line parsing, so we can re-use the parse_git_header function later. Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com> --- Longer term it might be better to have both range-diff and apply code use strbufs. However I didn't feel it's worth making that change for this patch series. range-diff.c | 69 +++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 30 deletions(-)