Message ID | 20240629142542.1086076-1-mjt@tls.msk.ru (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block/curl: rewrite http header parsing function | expand |
On 29.06.24 17:25, Michael Tokarev wrote: > Existing code was long, unclear and twisty. > > Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> > --- > block/curl.c | 44 ++++++++++++++++++-------------------------- > 1 file changed, 18 insertions(+), 26 deletions(-) > > diff --git a/block/curl.c b/block/curl.c > index 419f7c89ef..9802d0319d 100644 > --- a/block/curl.c > +++ b/block/curl.c > @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) > { > BDRVCURLState *s = opaque; > size_t realsize = size * nmemb; > - const char *header = (char *)ptr; > - const char *end = header + realsize; > - const char *accept_ranges = "accept-ranges:"; > - const char *bytes = "bytes"; > + const char *p = ptr; > + const char *end = p + realsize; > + const char *t = "accept-ranges : bytes "; /* A lowercase template */ Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon. > > - if (realsize >= strlen(accept_ranges) > - && g_ascii_strncasecmp(header, accept_ranges, > - strlen(accept_ranges)) == 0) { > - > - char *p = strchr(header, ':') + 1; > - > - /* Skip whitespace between the header name and value. */ > - while (p < end && *p && g_ascii_isspace(*p)) { > - p++; > - } > - > - if (end - p >= strlen(bytes) > - && strncmp(p, bytes, strlen(bytes)) == 0) { > - > - /* Check that there is nothing but whitespace after the value. */ > - p += strlen(bytes); > - while (p < end && *p && g_ascii_isspace(*p)) { > - p++; > - } > - > - if (p == end || !*p) { > - s->accept_range = true; > + /* check if header matches the "t" template */ > + for (;;) { > + if (*t == ' ') { /* space in t matches any amount of isspace in p */ > + if (p < end && g_ascii_isspace(*p)) { > + ++p; > + } else { > + ++t; > } > + } else if (*t && p < end && *t == g_ascii_tolower(*p)) { > + ++p, ++t; > + } else { > + break; > } > } > > + if (!*t && p == end) { /* if we managed to reach ends of both strings */ > + s->accept_range = true; > + } > + > return realsize; > } >
01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote: >> + const char *t = "accept-ranges : bytes "; /* A lowercase template */ > > Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon. Yes, exactly. I should add this to the description (wanted to do that but forgot). I'll update the patch (without re-sending) - hopefully its' okay to keep your S-o-b :) Thanks, /mjt
On 01.07.24 09:55, Michael Tokarev wrote: > 01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote: > >>> + const char *t = "accept-ranges : bytes "; /* A lowercase template */ >> >> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon. > > Yes, exactly. > > I should add this to the description (wanted to do that but forgot). > I'll update the patch (without re-sending) - hopefully its' okay to > keep your S-o-b :) > Of course!
Am 29.06.2024 um 16:25 hat Michael Tokarev geschrieben: > Existing code was long, unclear and twisty. > > Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> > --- > block/curl.c | 44 ++++++++++++++++++-------------------------- > 1 file changed, 18 insertions(+), 26 deletions(-) > > diff --git a/block/curl.c b/block/curl.c > index 419f7c89ef..9802d0319d 100644 > --- a/block/curl.c > +++ b/block/curl.c > @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) > { > BDRVCURLState *s = opaque; > size_t realsize = size * nmemb; > - const char *header = (char *)ptr; > - const char *end = header + realsize; > - const char *accept_ranges = "accept-ranges:"; > - const char *bytes = "bytes"; > + const char *p = ptr; > + const char *end = p + realsize; > + const char *t = "accept-ranges : bytes "; /* A lowercase template */ I don't think spaces between the field name and the colon are allowed in the spec (and in the old code), only before and after the value. > - if (realsize >= strlen(accept_ranges) > - && g_ascii_strncasecmp(header, accept_ranges, > - strlen(accept_ranges)) == 0) { > - > - char *p = strchr(header, ':') + 1; > - > - /* Skip whitespace between the header name and value. */ > - while (p < end && *p && g_ascii_isspace(*p)) { > - p++; > - } > - > - if (end - p >= strlen(bytes) > - && strncmp(p, bytes, strlen(bytes)) == 0) { > - > - /* Check that there is nothing but whitespace after the value. */ > - p += strlen(bytes); > - while (p < end && *p && g_ascii_isspace(*p)) { > - p++; > - } > - > - if (p == end || !*p) { > - s->accept_range = true; > + /* check if header matches the "t" template */ > + for (;;) { > + if (*t == ' ') { /* space in t matches any amount of isspace in p */ > + if (p < end && g_ascii_isspace(*p)) { > + ++p; > + } else { > + ++t; > } > + } else if (*t && p < end && *t == g_ascii_tolower(*p)) { > + ++p, ++t; > + } else { > + break; > } > } > > + if (!*t && p == end) { /* if we managed to reach ends of both strings */ > + s->accept_range = true; > + } Maybe make the generic comparison with a template a separate function (maybe even in cutils.c?) so that curl_header_cb() essentially only has something like this any more: if (!qemu_memcasecmp_space(ptr, end, "accept-ranges: bytes ")) { s->accept_range = true; } (A better name for the function would be preferable, of course. Maybe also a bool return value, but if it has a name related to memcmp() or strcmp(), then 0 must mean it matches.) Then this would really highlight the curl specific logic rather than the string parser in curl_header_cb(). Kevin
diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..9802d0319d 100644 --- a/block/curl.c +++ b/block/curl.c @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) { BDRVCURLState *s = opaque; size_t realsize = size * nmemb; - const char *header = (char *)ptr; - const char *end = header + realsize; - const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; + const char *p = ptr; + const char *end = p + realsize; + const char *t = "accept-ranges : bytes "; /* A lowercase template */ - if (realsize >= strlen(accept_ranges) - && g_ascii_strncasecmp(header, accept_ranges, - strlen(accept_ranges)) == 0) { - - char *p = strchr(header, ':') + 1; - - /* Skip whitespace between the header name and value. */ - while (p < end && *p && g_ascii_isspace(*p)) { - p++; - } - - if (end - p >= strlen(bytes) - && strncmp(p, bytes, strlen(bytes)) == 0) { - - /* Check that there is nothing but whitespace after the value. */ - p += strlen(bytes); - while (p < end && *p && g_ascii_isspace(*p)) { - p++; - } - - if (p == end || !*p) { - s->accept_range = true; + /* check if header matches the "t" template */ + for (;;) { + if (*t == ' ') { /* space in t matches any amount of isspace in p */ + if (p < end && g_ascii_isspace(*p)) { + ++p; + } else { + ++t; } + } else if (*t && p < end && *t == g_ascii_tolower(*p)) { + ++p, ++t; + } else { + break; } } + if (!*t && p == end) { /* if we managed to reach ends of both strings */ + s->accept_range = true; + } + return realsize; }
Existing code was long, unclear and twisty. Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> --- block/curl.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-)