diff mbox series

[v3] xen: simplify bitmap_to_xenctl_bitmap for little endian

Message ID 20250327233419.1119763-1-stefano.stabellini@amd.com (mailing list archive)
State Superseded
Headers show
Series [v3] xen: simplify bitmap_to_xenctl_bitmap for little endian | expand

Commit Message

Stefano Stabellini March 27, 2025, 11:34 p.m. UTC
The little endian implementation of bitmap_to_xenctl_bitmap leads to
unnecessary xmallocs and xfrees. Given that Xen only supports little
endian architectures, it is worth optimizing.

This patch removes the need for the xmalloc on little endian
architectures.

Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
---
Changes in v3:
- code style
- copy_bytes > 1 checks
- copy_bytes > 0 check for copy_to_guest_offset
---
 xen/common/bitmap.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

Comments

Julien Grall March 30, 2025, 9:29 a.m. UTC | #1
Hi Stefano,

On 27/03/2025 23:34, Stefano Stabellini wrote:
> The little endian implementation of bitmap_to_xenctl_bitmap leads to
> unnecessary xmallocs and xfrees. Given that Xen only supports little
> endian architectures, it is worth optimizing.
> 
> This patch removes the need for the xmalloc on little endian
> architectures.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
> Changes in v3:
> - code style
> - copy_bytes > 1 checks
> - copy_bytes > 0 check for copy_to_guest_offset
> ---
>   xen/common/bitmap.c | 42 ++++++++++++++++++++++++++++++------------
>   1 file changed, 30 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/common/bitmap.c b/xen/common/bitmap.c
> index 3da63a32a6..d3f9347e62 100644
> --- a/xen/common/bitmap.c
> +++ b/xen/common/bitmap.c
> @@ -52,7 +52,7 @@ static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
>   	unsigned int remainder = nbits % 8;
>   
>   	if (remainder)
> -		bp[nbits/8] &= (1U << remainder) - 1;
> +		*bp &= (1U << remainder) - 1;

This is changing the behavior of clamp_last_byte(). Yet, it doesn't seem 
the comment on top of is changed. Is this intended? Also, I would 
consider rename 'bp' so it is clearer this is meant to point to the last 
byte of the bitmap rather than the start.

Cheers,
Jan Beulich March 31, 2025, 6:39 a.m. UTC | #2
On 30.03.2025 11:29, Julien Grall wrote:
> On 27/03/2025 23:34, Stefano Stabellini wrote:
>> The little endian implementation of bitmap_to_xenctl_bitmap leads to
>> unnecessary xmallocs and xfrees. Given that Xen only supports little
>> endian architectures, it is worth optimizing.
>>
>> This patch removes the need for the xmalloc on little endian
>> architectures.
>>
>> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
>> ---
>> Changes in v3:
>> - code style
>> - copy_bytes > 1 checks
>> - copy_bytes > 0 check for copy_to_guest_offset
>> ---
>>   xen/common/bitmap.c | 42 ++++++++++++++++++++++++++++++------------
>>   1 file changed, 30 insertions(+), 12 deletions(-)
>>
>> diff --git a/xen/common/bitmap.c b/xen/common/bitmap.c
>> index 3da63a32a6..d3f9347e62 100644
>> --- a/xen/common/bitmap.c
>> +++ b/xen/common/bitmap.c
>> @@ -52,7 +52,7 @@ static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
>>   	unsigned int remainder = nbits % 8;
>>   
>>   	if (remainder)
>> -		bp[nbits/8] &= (1U << remainder) - 1;
>> +		*bp &= (1U << remainder) - 1;
> 
> This is changing the behavior of clamp_last_byte(). Yet, it doesn't seem 
> the comment on top of is changed. Is this intended? Also, I would 
> consider rename 'bp' so it is clearer this is meant to point to the last 
> byte of the bitmap rather than the start.

+1

Perhaps also drop "last" from its name? (It's not clear to me what 'b' actually
is meant to stand for in the parameter name. It may have been meant to say
"base"; it could now be meant to say "byte". Nevertheless just "p" as parameter
name is likely sufficient and then no longer ambiguous.)

Jan
Jan Beulich March 31, 2025, 6:44 a.m. UTC | #3
On 28.03.2025 00:34, Stefano Stabellini wrote:
> --- a/xen/common/bitmap.c
> +++ b/xen/common/bitmap.c
> @@ -52,7 +52,7 @@ static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
>  	unsigned int remainder = nbits % 8;
>  
>  	if (remainder)
> -		bp[nbits/8] &= (1U << remainder) - 1;
> +		*bp &= (1U << remainder) - 1;
>  }

Technically there's nothing wrong with dropping the if(), I think. Even more
so then than now, ...

> @@ -338,7 +338,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
>  			nbits -= 8;
>  		}
>  	}
> -	clamp_last_byte(bp, nbits);
>  }
>  
>  static void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp,
> @@ -363,7 +362,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
>  				unsigned int nbits)
>  {
>  	memcpy(bp, lp, DIV_ROUND_UP(nbits, BITS_PER_BYTE));
> -	clamp_last_byte(bp, nbits);
>  }

... with the two prior call sites now shrunk to ...

> @@ -384,21 +382,41 @@ int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
>      uint8_t zero = 0;
>      int err = 0;
>      unsigned int xen_bytes = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
> -    uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
> -
> -    if ( !bytemap )
> -        return -ENOMEM;
> +    uint8_t last;
>  
>      guest_bytes = DIV_ROUND_UP(xenctl_bitmap->nr_bits, BITS_PER_BYTE);
>      copy_bytes  = min(guest_bytes, xen_bytes);
>  
> -    bitmap_long_to_byte(bytemap, bitmap, nbits);
> +    if ( IS_ENABLED(__BIG_ENDIAN) )
> +    {
> +        uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
>  
> -    if ( copy_bytes &&
> -         copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> -        err = -EFAULT;
> +        if ( !bytemap )
> +            return -ENOMEM;
>  
> -    xfree(bytemap);
> +        bitmap_long_to_byte(bytemap, bitmap, nbits);
> +        last = bytemap[nbits / 8];
> +
> +        if ( copy_bytes > 1 &&
> +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> +            err = -EFAULT;
> +
> +        xfree(bytemap);
> +    }
> +    else
> +    {
> +        const uint8_t *bytemap = (const uint8_t *)bitmap;
> +        last = bytemap[nbits / 8];
> +
> +        if ( copy_bytes > 1 &&
> +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> +            err = -EFAULT;
> +    }
> +
> +    clamp_last_byte(&last, nbits);

... just one, I wonder if that being a separate function is actually still
necessary.

As indicated before, I think it would be nice if the two identical copy-out
operations could also be folded.

Jan
Stefano Stabellini March 31, 2025, 11:56 p.m. UTC | #4
On Mon, 31 Mar 2025, Jan Beulich wrote:

> On 28.03.2025 00:34, Stefano Stabellini wrote:
> > --- a/xen/common/bitmap.c
> > +++ b/xen/common/bitmap.c
> > @@ -52,7 +52,7 @@ static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
> >  	unsigned int remainder = nbits % 8;
> >  
> >  	if (remainder)
> > -		bp[nbits/8] &= (1U << remainder) - 1;
> > +		*bp &= (1U << remainder) - 1;
> >  }
> 
> Technically there's nothing wrong with dropping the if(), I think. Even more
> so then than now, ...
> 
> > @@ -338,7 +338,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
> >  			nbits -= 8;
> >  		}
> >  	}
> > -	clamp_last_byte(bp, nbits);
> >  }
> >  
> >  static void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp,
> > @@ -363,7 +362,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
> >  				unsigned int nbits)
> >  {
> >  	memcpy(bp, lp, DIV_ROUND_UP(nbits, BITS_PER_BYTE));
> > -	clamp_last_byte(bp, nbits);
> >  }
> 
> ... with the two prior call sites now shrunk to ...
> 
> > @@ -384,21 +382,41 @@ int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> >      uint8_t zero = 0;
> >      int err = 0;
> >      unsigned int xen_bytes = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
> > -    uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
> > -
> > -    if ( !bytemap )
> > -        return -ENOMEM;
> > +    uint8_t last;
> >  
> >      guest_bytes = DIV_ROUND_UP(xenctl_bitmap->nr_bits, BITS_PER_BYTE);
> >      copy_bytes  = min(guest_bytes, xen_bytes);
> >  
> > -    bitmap_long_to_byte(bytemap, bitmap, nbits);
> > +    if ( IS_ENABLED(__BIG_ENDIAN) )
> > +    {
> > +        uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
> >  
> > -    if ( copy_bytes &&
> > -         copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> > -        err = -EFAULT;
> > +        if ( !bytemap )
> > +            return -ENOMEM;
> >  
> > -    xfree(bytemap);
> > +        bitmap_long_to_byte(bytemap, bitmap, nbits);
> > +        last = bytemap[nbits / 8];
> > +
> > +        if ( copy_bytes > 1 &&
> > +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> > +            err = -EFAULT;
> > +
> > +        xfree(bytemap);
> > +    }
> > +    else
> > +    {
> > +        const uint8_t *bytemap = (const uint8_t *)bitmap;
> > +        last = bytemap[nbits / 8];
> > +
> > +        if ( copy_bytes > 1 &&
> > +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> > +            err = -EFAULT;
> > +    }
> > +
> > +    clamp_last_byte(&last, nbits);
> 
> ... just one, I wonder if that being a separate function is actually still
> necessary.
> 
> As indicated before, I think it would be nice if the two identical copy-out
> operations could also be folded.
 
Please see: https://marc.info/?l=xen-devel&m=174319650100975

Other than that, I addressed all the other points
Stefano Stabellini April 1, 2025, 12:48 a.m. UTC | #5
On Mon, 31 Mar 2025, Stefano Stabellini wrote:
> On Mon, 31 Mar 2025, Jan Beulich wrote:
> > On 28.03.2025 00:34, Stefano Stabellini wrote:
> > > --- a/xen/common/bitmap.c
> > > +++ b/xen/common/bitmap.c
> > > @@ -52,7 +52,7 @@ static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
> > >  	unsigned int remainder = nbits % 8;
> > >  
> > >  	if (remainder)
> > > -		bp[nbits/8] &= (1U << remainder) - 1;
> > > +		*bp &= (1U << remainder) - 1;
> > >  }
> > 
> > Technically there's nothing wrong with dropping the if(), I think. Even more
> > so then than now, ...

We need to keep the if because otherwise we end up zeroing the last
8-bit-aligned byte


> > > @@ -338,7 +338,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
> > >  			nbits -= 8;
> > >  		}
> > >  	}
> > > -	clamp_last_byte(bp, nbits);
> > >  }
> > >  
> > >  static void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp,
> > > @@ -363,7 +362,6 @@ static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
> > >  				unsigned int nbits)
> > >  {
> > >  	memcpy(bp, lp, DIV_ROUND_UP(nbits, BITS_PER_BYTE));
> > > -	clamp_last_byte(bp, nbits);
> > >  }
> > 
> > ... with the two prior call sites now shrunk to ...
> > 
> > > @@ -384,21 +382,41 @@ int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
> > >      uint8_t zero = 0;
> > >      int err = 0;
> > >      unsigned int xen_bytes = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
> > > -    uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
> > > -
> > > -    if ( !bytemap )
> > > -        return -ENOMEM;
> > > +    uint8_t last;
> > >  
> > >      guest_bytes = DIV_ROUND_UP(xenctl_bitmap->nr_bits, BITS_PER_BYTE);
> > >      copy_bytes  = min(guest_bytes, xen_bytes);
> > >  
> > > -    bitmap_long_to_byte(bytemap, bitmap, nbits);
> > > +    if ( IS_ENABLED(__BIG_ENDIAN) )
> > > +    {
> > > +        uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
> > >  
> > > -    if ( copy_bytes &&
> > > -         copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
> > > -        err = -EFAULT;
> > > +        if ( !bytemap )
> > > +            return -ENOMEM;
> > >  
> > > -    xfree(bytemap);
> > > +        bitmap_long_to_byte(bytemap, bitmap, nbits);
> > > +        last = bytemap[nbits / 8];
> > > +
> > > +        if ( copy_bytes > 1 &&
> > > +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> > > +            err = -EFAULT;
> > > +
> > > +        xfree(bytemap);
> > > +    }
> > > +    else
> > > +    {
> > > +        const uint8_t *bytemap = (const uint8_t *)bitmap;
> > > +        last = bytemap[nbits / 8];
> > > +
> > > +        if ( copy_bytes > 1 &&
> > > +             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
> > > +            err = -EFAULT;
> > > +    }
> > > +
> > > +    clamp_last_byte(&last, nbits);
> > 
> > ... just one, I wonder if that being a separate function is actually still
> > necessary.
> > 
> > As indicated before, I think it would be nice if the two identical copy-out
> > operations could also be folded.
>  
> Please see: https://marc.info/?l=xen-devel&m=174319650100975
> 
> Other than that, I addressed all the other points

Sorry, that's not true, one comment above
diff mbox series

Patch

diff --git a/xen/common/bitmap.c b/xen/common/bitmap.c
index 3da63a32a6..d3f9347e62 100644
--- a/xen/common/bitmap.c
+++ b/xen/common/bitmap.c
@@ -52,7 +52,7 @@  static void clamp_last_byte(uint8_t *bp, unsigned int nbits)
 	unsigned int remainder = nbits % 8;
 
 	if (remainder)
-		bp[nbits/8] &= (1U << remainder) - 1;
+		*bp &= (1U << remainder) - 1;
 }
 
 int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
@@ -338,7 +338,6 @@  static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
 			nbits -= 8;
 		}
 	}
-	clamp_last_byte(bp, nbits);
 }
 
 static void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp,
@@ -363,7 +362,6 @@  static void bitmap_long_to_byte(uint8_t *bp, const unsigned long *lp,
 				unsigned int nbits)
 {
 	memcpy(bp, lp, DIV_ROUND_UP(nbits, BITS_PER_BYTE));
-	clamp_last_byte(bp, nbits);
 }
 
 static void bitmap_byte_to_long(unsigned long *lp, const uint8_t *bp,
@@ -384,21 +382,41 @@  int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
     uint8_t zero = 0;
     int err = 0;
     unsigned int xen_bytes = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
-    uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
-
-    if ( !bytemap )
-        return -ENOMEM;
+    uint8_t last;
 
     guest_bytes = DIV_ROUND_UP(xenctl_bitmap->nr_bits, BITS_PER_BYTE);
     copy_bytes  = min(guest_bytes, xen_bytes);
 
-    bitmap_long_to_byte(bytemap, bitmap, nbits);
+    if ( IS_ENABLED(__BIG_ENDIAN) )
+    {
+        uint8_t *bytemap = xmalloc_array(uint8_t, xen_bytes);
 
-    if ( copy_bytes &&
-         copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
-        err = -EFAULT;
+        if ( !bytemap )
+            return -ENOMEM;
 
-    xfree(bytemap);
+        bitmap_long_to_byte(bytemap, bitmap, nbits);
+        last = bytemap[nbits / 8];
+
+        if ( copy_bytes > 1 &&
+             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
+            err = -EFAULT;
+
+        xfree(bytemap);
+    }
+    else
+    {
+        const uint8_t *bytemap = (const uint8_t *)bitmap;
+        last = bytemap[nbits / 8];
+
+        if ( copy_bytes > 1 &&
+             copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes - 1) )
+            err = -EFAULT;
+    }
+
+    clamp_last_byte(&last, nbits);
+    if ( copy_bytes > 0 &&
+         copy_to_guest_offset(xenctl_bitmap->bitmap, copy_bytes - 1, &last, 1) )
+        err = -EFAULT;
 
     for ( i = copy_bytes; !err && i < guest_bytes; i++ )
         if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )