diff mbox series

[XEN,2/2] x86/Dom0: Use streaming decompression for ZSTD compressed kernels

Message ID DU0P192MB1700F6DFE45A48D90B5FE679E3779@DU0P192MB1700.EURP192.PROD.OUTLOOK.COM (mailing list archive)
State New, archived
Headers show
Series [XEN,1/2] xen/decompress: Add a user pointer for book keeping in the callbacks | expand

Commit Message

Rafaël Kooi May 10, 2023, 12:18 a.m. UTC
On Arch Linux kernel decompression will fail when Xen has been unified
with the kernel and initramfs as a single binary. This change works for
both streaming and non-streaming ZSTD content.

Signed-off-by: Rafaël Kooi <rafael_andreas@hotmail.com>
---
 xen/common/decompress.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

Comments

Jan Beulich May 10, 2023, 8:03 a.m. UTC | #1
On 10.05.2023 02:18, Rafaël Kooi wrote:
> On Arch Linux kernel decompression will fail when Xen has been unified
> with the kernel and initramfs as a single binary. This change works for
> both streaming and non-streaming ZSTD content.

This could do with better explaining what "unified" means, and how
streaming decompression actually makes a difference.

> --- a/xen/common/decompress.c
> +++ b/xen/common/decompress.c
> @@ -3,11 +3,26 @@
>  #include <xen/string.h>
>  #include <xen/decompress.h>
>  
> +typedef struct _ZSTD_state
> +{
> +    void *write_buf;
> +    unsigned int write_pos;
> +} ZSTD_state;
> +
>  static void __init cf_check error(const char *msg)
>  {
>      printk("%s\n", msg);
>  }
>  
> +static int __init cf_check ZSTD_flush(void *buf, unsigned int pos,
> +                                      void *userptr)
> +{
> +    ZSTD_state *state = (ZSTD_state*)userptr;
> +    memcpy(state->write_buf + state->write_pos, buf, pos);
> +    state->write_pos += pos;
> +    return pos;
> +}

This doesn't really belong here, but will (I expect) go away anyway once
you drop the earlier patch.

> @@ -17,22 +32,32 @@ int __init decompress(void *inbuf, unsigned int len, void *outbuf)
>  #endif
>  
>      if ( len >= 3 && !memcmp(inbuf, "\x42\x5a\x68", 3) )
> -        return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +        return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
>  
>      if ( len >= 6 && !memcmp(inbuf, "\3757zXZ", 6) )
> -        return unxz(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +        return unxz(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
>  
>      if ( len >= 2 && !memcmp(inbuf, "\135\000", 2) )
> -        return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +        return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
>  
>      if ( len >= 5 && !memcmp(inbuf, "\x89LZO", 5) )
> -        return unlzo(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +        return unlzo(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
>  
>      if ( len >= 2 && !memcmp(inbuf, "\x02\x21", 2) )
> -	return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +        return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);

This also looks wrong here - if the earlier patch was to be kept, I expect
all these adjustments would have to move there. Otherwise build would be
broken when just the 1st patch is in place.

>      if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) )
> -	return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error);
> +    {
> +        // NOTE (Rafaël): On Arch Linux the kernel is compressed in a way
> +        // that requires streaming ZSTD decompression. Otherwise decompression
> +        // will fail when using a unified EFI binary. Somehow decompression
> +        // works when not using a unified EFI binary, I suspect this is the
> +        // kernel self decompressing. Or there is a code path that I am not
> +        // aware of that takes care of the use case properly.

Along the lines of what I've said for the description, this wants to avoid
terms like "somehow" if at all possible.

We also don't normally put our names in such comments.

Finally please see ./CODING_STYLE for how we expect comments to be
formatted.

Jan
Jan Beulich May 10, 2023, 9:48 a.m. UTC | #2
First of all - please don't drop Cc-s when replying. I'm restoring
xen-devel@ here at least.

On 10.05.2023 10:51, Rafaël Kooi wrote:
> On 10/05/2023 10:03, Jan Beulich wrote:> On 10.05.2023 02:18, Rafaël Kooi wrote:
>>> On Arch Linux kernel decompression will fail when Xen has been unified
>>> with the kernel and initramfs as a single binary. This change works for
>>> both streaming and non-streaming ZSTD content.
>>
>> This could do with better explaining what "unified" means, and how
>> streaming decompression actually makes a difference.
>>
> 
> I don't mind explaining it further, but with the efi documentation for
> it existing on xenbits, should I just refer to that?

You may of course refer to existing documentation. Iirc that doesn't
cover any compression aspects, though.

>>> --- a/xen/common/decompress.c
>>> +++ b/xen/common/decompress.c
>>> @@ -3,11 +3,26 @@
>>>   #include <xen/string.h>
>>>   #include <xen/decompress.h>
>>>   
>>> +typedef struct _ZSTD_state
>>> +{
>>> +    void *write_buf;
>>> +    unsigned int write_pos;
>>> +} ZSTD_state;
>>> +
>>>   static void __init cf_check error(const char *msg)
>>>   {
>>>       printk("%s\n", msg);
>>>   }
>>>   
>>> +static int __init cf_check ZSTD_flush(void *buf, unsigned int pos,
>>> +                                      void *userptr)
>>> +{
>>> +    ZSTD_state *state = (ZSTD_state*)userptr;
>>> +    memcpy(state->write_buf + state->write_pos, buf, pos);
>>> +    state->write_pos += pos;
>>> +    return pos;
>>> +}
>>
>> This doesn't really belong here, but will (I expect) go away anyway once
>> you drop the earlier patch.
>>
> 
> The ZSTD_flush will have to stay, as that is how the decompressor will
> start streaming decompression. The difference will be that the book
> keeping will be "global" (to the translation unit).

But this bookkeeping should be entirely in zstd code (i.e. presumably
unzstd.c).

>>>       if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) )
>>> -	return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error);
>>> +    {
>>> +        // NOTE (Rafaël): On Arch Linux the kernel is compressed in a way
>>> +        // that requires streaming ZSTD decompression. Otherwise decompression
>>> +        // will fail when using a unified EFI binary. Somehow decompression
>>> +        // works when not using a unified EFI binary, I suspect this is the
>>> +        // kernel self decompressing. Or there is a code path that I am not
>>> +        // aware of that takes care of the use case properly.
>>
>> Along the lines of what I've said for the description, this wants to avoid
>> terms like "somehow" if at all possible.
> 
> I've used the term "somehow" because I don't know why decompression
> works when Xen loads the kernel from the EFI file system. I assume the
> kernel still gets unpacked by Xen, right? Or does the kernel unpack
> itself?

The handling of Dom0 kernel decompression ought to be entirely independent
of EFI vs legacy. Unless I'm wrong with that (mis-remembering), you
mentioning EFI is potentially misleading. And yes, at least on x86 the
kernel is decompressed by Xen (by peeking into the supplied bzImage). The
difference between a plain bzImage and a "unified EFI binary" is what you
will want to outline in the description (and at least mention in the
comment). What I'm wondering is whether there simply is an issue with size
determination when the kernel is taken from the .kernel section.

> When I present the v2 of this patch, do I add you as a reviewer? Or will
> that be done by the merger?

I'm afraid I don't understand the question. You will continue to Cc
respective maintainers, which will include me. In case you refer to a
Reviewed-by: tag - you can only add such tags once they were offered to
you by the respective person. For this specific one it doesn't mean "an
earlier version of this was looked at by <person>" but "this is deemed
okay by <person>".

Jan
Rafaël Kooi May 10, 2023, 5:30 p.m. UTC | #3
On 10/05/2023 11:48, Jan Beulich wrote:> First of all - please don't drop Cc-s when replying. I'm restoring
> xen-devel@ here at least.
> 

Apologies, I didn't notice that replying dropped the Cc-s. Should I send
the emails again with the proper Cc-s?

> On 10.05.2023 10:51, Rafaël Kooi wrote:
>> On 10/05/2023 10:03, Jan Beulich wrote:> On 10.05.2023 02:18, Rafaël Kooi wrote:
>>>> On Arch Linux kernel decompression will fail when Xen has been unified
>>>> with the kernel and initramfs as a single binary. This change works for
>>>> both streaming and non-streaming ZSTD content.
>>>
>>> This could do with better explaining what "unified" means, and how
>>> streaming decompression actually makes a difference.
>>>
>>
>> I don't mind explaining it further, but with the efi documentation for
>> it existing on xenbits, should I just refer to that?
> 
> You may of course refer to existing documentation. Iirc that doesn't
> cover any compression aspects, though.
> 

Right, I'll think about what ends up being the clearest explanation.

>>>> --- a/xen/common/decompress.c
>>>> +++ b/xen/common/decompress.c
>>>> @@ -3,11 +3,26 @@
>>>>    #include <xen/string.h>
>>>>    #include <xen/decompress.h>
>>>>    
>>>> +typedef struct _ZSTD_state
>>>> +{
>>>> +    void *write_buf;
>>>> +    unsigned int write_pos;
>>>> +} ZSTD_state;
>>>> +
>>>>    static void __init cf_check error(const char *msg)
>>>>    {
>>>>        printk("%s\n", msg);
>>>>    }
>>>>    
>>>> +static int __init cf_check ZSTD_flush(void *buf, unsigned int pos,
>>>> +                                      void *userptr)
>>>> +{
>>>> +    ZSTD_state *state = (ZSTD_state*)userptr;
>>>> +    memcpy(state->write_buf + state->write_pos, buf, pos);
>>>> +    state->write_pos += pos;
>>>> +    return pos;
>>>> +}
>>>
>>> This doesn't really belong here, but will (I expect) go away anyway once
>>> you drop the earlier patch.
>>>
>>
>> The ZSTD_flush will have to stay, as that is how the decompressor will
>> start streaming decompression. The difference will be that the book
>> keeping will be "global" (to the translation unit).
> 
> But this bookkeeping should be entirely in zstd code (i.e. presumably
> unzstd.c).
> 

The implementation of the decompression functions seem to indicate
otherwise. Referring to unzstd.c:`unzstd`, the function will take the
streaming decompression path if either `fill` or `flush` have been
supplied. I cross checked with unlzma.c and unxz.c, and that seems to
have similar behavior in regards to flushing the output data. The
`flush` function is passed a buffer to a chunk of decompressed data with
`pos` being the size of the chunk. For the sake of consistency I don't
think it's a good idea to deviate from this behavior in just unzstd.c.

I could wrap the decompression code in another file and function, but
in my opinion it should stay here and be renamed to something generic
like `stream_flush` or `chunk_flush`.

>>>>        if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) )
>>>> -	return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error);
>>>> +    {
>>>> +        // NOTE (Rafaël): On Arch Linux the kernel is compressed in a way
>>>> +        // that requires streaming ZSTD decompression. Otherwise decompression
>>>> +        // will fail when using a unified EFI binary. Somehow decompression
>>>> +        // works when not using a unified EFI binary, I suspect this is the
>>>> +        // kernel self decompressing. Or there is a code path that I am not
>>>> +        // aware of that takes care of the use case properly.
>>>
>>> Along the lines of what I've said for the description, this wants to avoid
>>> terms like "somehow" if at all possible.
>>
>> I've used the term "somehow" because I don't know why decompression
>> works when Xen loads the kernel from the EFI file system. I assume the
>> kernel still gets unpacked by Xen, right? Or does the kernel unpack
>> itself?
> 
> The handling of Dom0 kernel decompression ought to be entirely independent
> of EFI vs legacy. Unless I'm wrong with that (mis-remembering), you
> mentioning EFI is potentially misleading. And yes, at least on x86 the
> kernel is decompressed by Xen (by peeking into the supplied bzImage). The
> difference between a plain bzImage and a "unified EFI binary" is what you
> will want to outline in the description (and at least mention in the
> comment). What I'm wondering is whether there simply is an issue with size
> determination when the kernel is taken from the .kernel section.
> 

Assuming you are talking about size determination of the compressed
bzImage, I notice a discrepancy in the size of the ZSTD stream and the
reported size by the vmlinuz-* header upon further investigation. When
using the streaming decompression code I made it output how many bytes
it reads from the extracted-but-still-compressed bzImage. The code
reads 12,327,560 bytes, but the size of the compressed bzImage in the
header is 12,327,564 bytes. In xen/arch/x86/bzImage.c `decompress` is
called with `orig_image_len`, when the function `output_length`
calculates the end address and removes 4 bytes from that address. If I
remove the last 4 bytes from the compressed bzImage then `unzstd` will
unpack it with `unzstd bzImage.zst -o bzImage`, otherwise it will
complain with `zstd: /*stdin*\: unknown header`. With this new
information I think the correct solution is to try calling `decompress`
a second time with with `orig_image_len - 4` if it fails.

>> When I present the v2 of this patch, do I add you as a reviewer? Or will
>> that be done by the merger?
> 
> I'm afraid I don't understand the question. You will continue to Cc
> respective maintainers, which will include me. In case you refer to a
> Reviewed-by: tag - you can only add such tags once they were offered to
> you by the respective person. For this specific one it doesn't mean "an
> earlier version of this was looked at by <person>" but "this is deemed
> okay by <person>".
> 
> Jan

I meant the "Reviewed-by:" tag indeed. Thanks again.

Rafaël
Jan Beulich May 11, 2023, 6:08 a.m. UTC | #4
On 10.05.2023 19:30, Rafaël Kooi wrote:
> On 10/05/2023 11:48, Jan Beulich wrote:
>> On 10.05.2023 10:51, Rafaël Kooi wrote:
>>> On 10/05/2023 10:03, Jan Beulich wrote:> On 10.05.2023 02:18, Rafaël Kooi wrote:
>>>>> --- a/xen/common/decompress.c
>>>>> +++ b/xen/common/decompress.c
>>>>> @@ -3,11 +3,26 @@
>>>>>    #include <xen/string.h>
>>>>>    #include <xen/decompress.h>
>>>>>    
>>>>> +typedef struct _ZSTD_state
>>>>> +{
>>>>> +    void *write_buf;
>>>>> +    unsigned int write_pos;
>>>>> +} ZSTD_state;
>>>>> +
>>>>>    static void __init cf_check error(const char *msg)
>>>>>    {
>>>>>        printk("%s\n", msg);
>>>>>    }
>>>>>    
>>>>> +static int __init cf_check ZSTD_flush(void *buf, unsigned int pos,
>>>>> +                                      void *userptr)
>>>>> +{
>>>>> +    ZSTD_state *state = (ZSTD_state*)userptr;
>>>>> +    memcpy(state->write_buf + state->write_pos, buf, pos);
>>>>> +    state->write_pos += pos;
>>>>> +    return pos;
>>>>> +}
>>>>
>>>> This doesn't really belong here, but will (I expect) go away anyway once
>>>> you drop the earlier patch.
>>>>
>>>
>>> The ZSTD_flush will have to stay, as that is how the decompressor will
>>> start streaming decompression. The difference will be that the book
>>> keeping will be "global" (to the translation unit).
>>
>> But this bookkeeping should be entirely in zstd code (i.e. presumably
>> unzstd.c).
>>
> 
> The implementation of the decompression functions seem to indicate
> otherwise. Referring to unzstd.c:`unzstd`, the function will take the
> streaming decompression path if either `fill` or `flush` have been
> supplied. I cross checked with unlzma.c and unxz.c, and that seems to
> have similar behavior in regards to flushing the output data. The
> `flush` function is passed a buffer to a chunk of decompressed data with
> `pos` being the size of the chunk. For the sake of consistency I don't
> think it's a good idea to deviate from this behavior in just unzstd.c.

Well, so far we don't use any flush functions, do we? The question
could therefore also be put differently: In how far is the flush
function you introduce zstd-specific? If it isn't be other than the
fact that it is only passed to unzstd(), perhaps it shouldn't be
named as if zstd-specific?

>>>>>        if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) )
>>>>> -	return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error);
>>>>> +    {
>>>>> +        // NOTE (Rafaël): On Arch Linux the kernel is compressed in a way
>>>>> +        // that requires streaming ZSTD decompression. Otherwise decompression
>>>>> +        // will fail when using a unified EFI binary. Somehow decompression
>>>>> +        // works when not using a unified EFI binary, I suspect this is the
>>>>> +        // kernel self decompressing. Or there is a code path that I am not
>>>>> +        // aware of that takes care of the use case properly.
>>>>
>>>> Along the lines of what I've said for the description, this wants to avoid
>>>> terms like "somehow" if at all possible.
>>>
>>> I've used the term "somehow" because I don't know why decompression
>>> works when Xen loads the kernel from the EFI file system. I assume the
>>> kernel still gets unpacked by Xen, right? Or does the kernel unpack
>>> itself?
>>
>> The handling of Dom0 kernel decompression ought to be entirely independent
>> of EFI vs legacy. Unless I'm wrong with that (mis-remembering), you
>> mentioning EFI is potentially misleading. And yes, at least on x86 the
>> kernel is decompressed by Xen (by peeking into the supplied bzImage). The
>> difference between a plain bzImage and a "unified EFI binary" is what you
>> will want to outline in the description (and at least mention in the
>> comment). What I'm wondering is whether there simply is an issue with size
>> determination when the kernel is taken from the .kernel section.
>>
> 
> Assuming you are talking about size determination of the compressed
> bzImage, I notice a discrepancy in the size of the ZSTD stream and the
> reported size by the vmlinuz-* header upon further investigation. When
> using the streaming decompression code I made it output how many bytes
> it reads from the extracted-but-still-compressed bzImage. The code
> reads 12,327,560 bytes, but the size of the compressed bzImage in the
> header is 12,327,564 bytes. In xen/arch/x86/bzImage.c `decompress` is
> called with `orig_image_len`, when the function `output_length`
> calculates the end address and removes 4 bytes from that address. If I
> remove the last 4 bytes from the compressed bzImage then `unzstd` will
> unpack it with `unzstd bzImage.zst -o bzImage`, otherwise it will
> complain with `zstd: /*stdin*\: unknown header`. With this new
> information I think the correct solution is to try calling `decompress`
> a second time with with `orig_image_len - 4` if it fails.

That's not very likely to be an appropriate solution. If the sizes
diverge by 4, that difference needs explaining. Once understood, it'll
hopefully become clear under what conditions (if any) to adjust the
length right away, without any need to retry.

Jan
diff mbox series

Patch

diff --git a/xen/common/decompress.c b/xen/common/decompress.c
index 989336983f..cde754ffb1 100644
--- a/xen/common/decompress.c
+++ b/xen/common/decompress.c
@@ -3,11 +3,26 @@ 
 #include <xen/string.h>
 #include <xen/decompress.h>
 
+typedef struct _ZSTD_state
+{
+    void *write_buf;
+    unsigned int write_pos;
+} ZSTD_state;
+
 static void __init cf_check error(const char *msg)
 {
     printk("%s\n", msg);
 }
 
+static int __init cf_check ZSTD_flush(void *buf, unsigned int pos,
+                                      void *userptr)
+{
+    ZSTD_state *state = (ZSTD_state*)userptr;
+    memcpy(state->write_buf + state->write_pos, buf, pos);
+    state->write_pos += pos;
+    return pos;
+}
+
 int __init decompress(void *inbuf, unsigned int len, void *outbuf)
 {
 #if 0 /* Not needed here yet. */
@@ -17,22 +32,32 @@  int __init decompress(void *inbuf, unsigned int len, void *outbuf)
 #endif
 
     if ( len >= 3 && !memcmp(inbuf, "\x42\x5a\x68", 3) )
-        return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error);
+        return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
 
     if ( len >= 6 && !memcmp(inbuf, "\3757zXZ", 6) )
-        return unxz(inbuf, len, NULL, NULL, outbuf, NULL, error);
+        return unxz(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
 
     if ( len >= 2 && !memcmp(inbuf, "\135\000", 2) )
-        return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error);
+        return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
 
     if ( len >= 5 && !memcmp(inbuf, "\x89LZO", 5) )
-        return unlzo(inbuf, len, NULL, NULL, outbuf, NULL, error);
+        return unlzo(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
 
     if ( len >= 2 && !memcmp(inbuf, "\x02\x21", 2) )
-	return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error);
+        return unlz4(inbuf, len, NULL, NULL, outbuf, NULL, error, NULL);
 
     if ( len >= 4 && !memcmp(inbuf, "\x28\xb5\x2f\xfd", 4) )
-	return unzstd(inbuf, len, NULL, NULL, outbuf, NULL, error);
+    {
+        // NOTE (Rafaël): On Arch Linux the kernel is compressed in a way
+        // that requires streaming ZSTD decompression. Otherwise decompression
+        // will fail when using a unified EFI binary. Somehow decompression
+        // works when not using a unified EFI binary, I suspect this is the
+        // kernel self decompressing. Or there is a code path that I am not
+        // aware of that takes care of the use case properly.
+
+        ZSTD_state state = (ZSTD_state){ outbuf, 0 };
+        return unzstd(inbuf, len, NULL, ZSTD_flush, NULL, NULL, error, &state);
+    }
 
     return 1;
 }