diff mbox series

block/curl: rewrite http header parsing function

Message ID 20240629142542.1086076-1-mjt@tls.msk.ru (mailing list archive)
State New, archived
Headers show
Series block/curl: rewrite http header parsing function | expand

Commit Message

Michael Tokarev June 29, 2024, 2:25 p.m. UTC
Existing code was long, unclear and twisty.

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
---
 block/curl.c | 44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy July 1, 2024, 6:54 a.m. UTC | #1
On 29.06.24 17:25, Michael Tokarev wrote:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

> ---
>   block/curl.c | 44 ++++++++++++++++++--------------------------
>   1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>   {
>       BDRVCURLState *s = opaque;
>       size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

>   
> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>               }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>           }
>       }
>   
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }
> +
>       return realsize;
>   }
>
Michael Tokarev July 1, 2024, 6:55 a.m. UTC | #2
01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:

>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
> 
> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

Yes, exactly.

I should add this to the description (wanted to do that but forgot).
I'll update the patch (without re-sending) - hopefully its' okay to
keep your S-o-b :)

Thanks,

/mjt
Vladimir Sementsov-Ogievskiy July 1, 2024, 7:03 a.m. UTC | #3
On 01.07.24 09:55, Michael Tokarev wrote:
> 01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:
> 
>>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
>>
>> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.
> 
> Yes, exactly.
> 
> I should add this to the description (wanted to do that but forgot).
> I'll update the patch (without re-sending) - hopefully its' okay to
> keep your S-o-b :)
> 

Of course!
Kevin Wolf July 18, 2024, 6:54 p.m. UTC | #4
Am 29.06.2024 um 16:25 hat Michael Tokarev geschrieben:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
> ---
>  block/curl.c | 44 ++++++++++++++++++--------------------------
>  1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>  {
>      BDRVCURLState *s = opaque;
>      size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

I don't think spaces between the field name and the colon are allowed
in the spec (and in the old code), only before and after the value.

> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>              }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>          }
>      }
>  
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }

Maybe make the generic comparison with a template a separate function
(maybe even in cutils.c?) so that curl_header_cb() essentially only has
something like this any more:

if (!qemu_memcasecmp_space(ptr, end, "accept-ranges: bytes ")) {
    s->accept_range = true;
}

(A better name for the function would be preferable, of course. Maybe
also a bool return value, but if it has a name related to memcmp() or
strcmp(), then 0 must mean it matches.)

Then this would really highlight the curl specific logic rather than the
string parser in curl_header_cb().

Kevin
diff mbox series

Patch

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..9802d0319d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -210,37 +210,29 @@  static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     BDRVCURLState *s = opaque;
     size_t realsize = size * nmemb;
-    const char *header = (char *)ptr;
-    const char *end = header + realsize;
-    const char *accept_ranges = "accept-ranges:";
-    const char *bytes = "bytes";
+    const char *p = ptr;
+    const char *end = p + realsize;
+    const char *t = "accept-ranges : bytes "; /* A lowercase template */
 
-    if (realsize >= strlen(accept_ranges)
-        && g_ascii_strncasecmp(header, accept_ranges,
-                               strlen(accept_ranges)) == 0) {
-
-        char *p = strchr(header, ':') + 1;
-
-        /* Skip whitespace between the header name and value. */
-        while (p < end && *p && g_ascii_isspace(*p)) {
-            p++;
-        }
-
-        if (end - p >= strlen(bytes)
-            && strncmp(p, bytes, strlen(bytes)) == 0) {
-
-            /* Check that there is nothing but whitespace after the value. */
-            p += strlen(bytes);
-            while (p < end && *p && g_ascii_isspace(*p)) {
-                p++;
-            }
-
-            if (p == end || !*p) {
-                s->accept_range = true;
+    /* check if header matches the "t" template */
+    for (;;) {
+        if (*t == ' ') { /* space in t matches any amount of isspace in p */
+            if (p < end && g_ascii_isspace(*p)) {
+                ++p;
+            } else {
+                ++t;
             }
+        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
+            ++p, ++t;
+        } else {
+            break;
         }
     }
 
+    if (!*t && p == end) { /* if we managed to reach ends of both strings */
+        s->accept_range = true;
+    }
+
     return realsize;
 }