block/curl: rewrite http header parsing function

Message ID	20240629142542.1086076-1-mjt@tls.msk.ru (mailing list archive)
State	New, archived
Headers	show Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org> From: Michael Tokarev <mjt@tls.msk.ru> To: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, qemu-block@nongnu.org Cc: Michael Tokarev <mjt@tls.msk.ru>, qemu-devel@nongnu.org, hreitz@redhat.com, kwolf@redhat.com, qemu-trivial@nongnu.org Subject: [PATCH] block/curl: rewrite http header parsing function Date: Sat, 29 Jun 2024 17:25:42 +0300 Message-Id: <20240629142542.1086076-1-mjt@tls.msk.ru> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=86.62.121.231; envelope-from=mjt@tls.msk.ru; helo=isrv.corpit.ru X-Spam_score_int: -68 X-Spam_score: -6.9 X-Spam_bar: ------ X-Spam_report: (-6.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_HI=-5, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action Precedence: list Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Series	block/curl: rewrite http header parsing function \| expand block/curl: rewrite http header parsing function

Message ID

20240629142542.1086076-1-mjt@tls.msk.ru (mailing list archive)

State

New, archived

Headers

From: Michael Tokarev <mjt@tls.msk.ru>
To: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>,
 qemu-block@nongnu.org
Cc: Michael Tokarev <mjt@tls.msk.ru>, qemu-devel@nongnu.org,
 hreitz@redhat.com,
 kwolf@redhat.com, qemu-trivial@nongnu.org
Subject: [PATCH] block/curl: rewrite http header parsing function
Date: Sat, 29 Jun 2024 17:25:42 +0300
Message-Id: <20240629142542.1086076-1-mjt@tls.msk.ru>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Received-SPF: pass client-ip=86.62.121.231; envelope-from=mjt@tls.msk.ru;
 helo=isrv.corpit.ru
X-Spam_score_int: -68
X-Spam_score: -6.9
X-Spam_bar: ------
X-Spam_report: (-6.9 / 5.0 requ) BAYES_00=-1.9, RCVD_IN_DNSWL_HI=-5,
 SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no
X-Spam_action: no action
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
 <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org

Series

block/curl: rewrite http header parsing function | expand

Existing code was long, unclear and twisty. Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> --- block/curl.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy July 1, 2024, 6:54 a.m. UTC | #1

On 29.06.24 17:25, Michael Tokarev wrote:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

> ---
>   block/curl.c | 44 ++++++++++++++++++--------------------------
>   1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>   {
>       BDRVCURLState *s = opaque;
>       size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

>   
> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>               }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>           }
>       }
>   
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }
> +
>       return realsize;
>   }
>

Michael Tokarev July 1, 2024, 6:55 a.m. UTC | #2

01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:

>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
> 
> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.

Yes, exactly.

I should add this to the description (wanted to do that but forgot).
I'll update the patch (without re-sending) - hopefully its' okay to
keep your S-o-b :)

Thanks,

/mjt

Vladimir Sementsov-Ogievskiy July 1, 2024, 7:03 a.m. UTC | #3

On 01.07.24 09:55, Michael Tokarev wrote:
> 01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:
> 
>>> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */
>>
>> Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon.
> 
> Yes, exactly.
> 
> I should add this to the description (wanted to do that but forgot).
> I'll update the patch (without re-sending) - hopefully its' okay to
> keep your S-o-b :)
> 

Of course!

Kevin Wolf July 18, 2024, 6:54 p.m. UTC | #4

Am 29.06.2024 um 16:25 hat Michael Tokarev geschrieben:
> Existing code was long, unclear and twisty.
> 
> Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
> ---
>  block/curl.c | 44 ++++++++++++++++++--------------------------
>  1 file changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index 419f7c89ef..9802d0319d 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
>  {
>      BDRVCURLState *s = opaque;
>      size_t realsize = size * nmemb;
> -    const char *header = (char *)ptr;
> -    const char *end = header + realsize;
> -    const char *accept_ranges = "accept-ranges:";
> -    const char *bytes = "bytes";
> +    const char *p = ptr;
> +    const char *end = p + realsize;
> +    const char *t = "accept-ranges : bytes "; /* A lowercase template */

I don't think spaces between the field name and the colon are allowed
in the spec (and in the old code), only before and after the value.

> -    if (realsize >= strlen(accept_ranges)
> -        && g_ascii_strncasecmp(header, accept_ranges,
> -                               strlen(accept_ranges)) == 0) {
> -
> -        char *p = strchr(header, ':') + 1;
> -
> -        /* Skip whitespace between the header name and value. */
> -        while (p < end && *p && g_ascii_isspace(*p)) {
> -            p++;
> -        }
> -
> -        if (end - p >= strlen(bytes)
> -            && strncmp(p, bytes, strlen(bytes)) == 0) {
> -
> -            /* Check that there is nothing but whitespace after the value. */
> -            p += strlen(bytes);
> -            while (p < end && *p && g_ascii_isspace(*p)) {
> -                p++;
> -            }
> -
> -            if (p == end || !*p) {
> -                s->accept_range = true;
> +    /* check if header matches the "t" template */
> +    for (;;) {
> +        if (*t == ' ') { /* space in t matches any amount of isspace in p */
> +            if (p < end && g_ascii_isspace(*p)) {
> +                ++p;
> +            } else {
> +                ++t;
>              }
> +        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
> +            ++p, ++t;
> +        } else {
> +            break;
>          }
>      }
>  
> +    if (!*t && p == end) { /* if we managed to reach ends of both strings */
> +        s->accept_range = true;
> +    }

Maybe make the generic comparison with a template a separate function
(maybe even in cutils.c?) so that curl_header_cb() essentially only has
something like this any more:

if (!qemu_memcasecmp_space(ptr, end, "accept-ranges: bytes ")) {
    s->accept_range = true;
}

(A better name for the function would be preferable, of course. Maybe
also a bool return value, but if it has a name related to memcmp() or
strcmp(), then 0 must mean it matches.)

Then this would really highlight the curl specific logic rather than the
string parser in curl_header_cb().

Kevin

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..9802d0319d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -210,37 +210,29 @@  static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     BDRVCURLState *s = opaque;
     size_t realsize = size * nmemb;
-    const char *header = (char *)ptr;
-    const char *end = header + realsize;
-    const char *accept_ranges = "accept-ranges:";
-    const char *bytes = "bytes";
+    const char *p = ptr;
+    const char *end = p + realsize;
+    const char *t = "accept-ranges : bytes "; /* A lowercase template */
 
-    if (realsize >= strlen(accept_ranges)
-        && g_ascii_strncasecmp(header, accept_ranges,
-                               strlen(accept_ranges)) == 0) {
-
-        char *p = strchr(header, ':') + 1;
-
-        /* Skip whitespace between the header name and value. */
-        while (p < end && *p && g_ascii_isspace(*p)) {
-            p++;
-        }
-
-        if (end - p >= strlen(bytes)
-            && strncmp(p, bytes, strlen(bytes)) == 0) {
-
-            /* Check that there is nothing but whitespace after the value. */
-            p += strlen(bytes);
-            while (p < end && *p && g_ascii_isspace(*p)) {
-                p++;
-            }
-
-            if (p == end || !*p) {
-                s->accept_range = true;
+    /* check if header matches the "t" template */
+    for (;;) {
+        if (*t == ' ') { /* space in t matches any amount of isspace in p */
+            if (p < end && g_ascii_isspace(*p)) {
+                ++p;
+            } else {
+                ++t;
             }
+        } else if (*t && p < end && *t == g_ascii_tolower(*p)) {
+            ++p, ++t;
+        } else {
+            break;
         }
     }
 
+    if (!*t && p == end) { /* if we managed to reach ends of both strings */
+        s->accept_range = true;
+    }
+
     return realsize;
 }

block/curl: rewrite http header parsing function

Commit Message

Comments

Patch