diff mbox series

[kvm-unit-tests,1/8] s390x: lib: Extend bitops

Message ID 20210813073615.32837-2-frankja@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: Cleanup and maintenance | expand

Commit Message

Janosch Frank Aug. 13, 2021, 7:36 a.m. UTC
Bit setting and clearing is never bad to have.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 lib/s390x/asm/bitops.h | 102 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

Comments

Claudio Imbrenda Aug. 13, 2021, 8:32 a.m. UTC | #1
On Fri, 13 Aug 2021 07:36:08 +0000
Janosch Frank <frankja@linux.ibm.com> wrote:

> Bit setting and clearing is never bad to have.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  lib/s390x/asm/bitops.h | 102
> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102
> insertions(+)
> 
> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
> index 792881ec..f5612855 100644
> --- a/lib/s390x/asm/bitops.h
> +++ b/lib/s390x/asm/bitops.h
> @@ -17,6 +17,78 @@
>  
>  #define BITS_PER_LONG	64
>  
> +static inline unsigned long *bitops_word(unsigned long nr,
> +					 const volatile unsigned
> long *ptr) +{
> +	unsigned long addr;
> +
> +	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG -
> 1))) >> 3);
> +	return (unsigned long *)addr;

why not just 

return ptr + (nr / BITS_PER_LONG);

> +}
> +
> +static inline unsigned long bitops_mask(unsigned long nr)
> +{
> +	return 1UL << (nr & (BITS_PER_LONG - 1));
> +}
> +
> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t
> mask) +{
> +	uint64_t old;
> +
> +	/* load and or 64bit concurrent and interlocked */
> +	asm volatile(
> +		"	laog	%[old],%[mask],%[ptr]\n"
> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
> +		: [mask] "d" (mask)
> +		: "memory", "cc" );
> +	return old;
> +}

do we really need the artillery (asm) here?
is there a reason why we can't do this in C?

> +static inline uint64_t lang(volatile unsigned long *ptr, uint64_t
> mask) +{
> +	uint64_t old;
> +
> +	/* load and and 64bit concurrent and interlocked */
> +	asm volatile(
> +		"	lang	%[old],%[mask],%[ptr]\n"
> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
> +		: [mask] "d" (mask)
> +		: "memory", "cc" );
> +	return old;
> +}

(same here as above)

> +
> +static inline void set_bit(unsigned long nr,
> +			   const volatile unsigned long *ptr)
> +{
> +	uint64_t mask = bitops_mask(nr);
> +	uint64_t *addr = bitops_word(nr, ptr);
> +
> +	laog(addr, mask);
> +}
> +
> +static inline void set_bit_inv(unsigned long nr,
> +			       const volatile unsigned long *ptr)
> +{
> +	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
> +}
> +
> +static inline void clear_bit(unsigned long nr,
> +			     const volatile unsigned long *ptr)
> +{
> +	uint64_t mask = bitops_mask(nr);
> +	uint64_t *addr = bitops_word(nr, ptr);
> +
> +	lang(addr, ~mask);
> +}
> +
> +static inline void clear_bit_inv(unsigned long nr,
> +				 const volatile unsigned long *ptr)
> +{
> +	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
> +}
> +
> +/* non-atomic bit manipulation functions */
> +
>  static inline bool test_bit(unsigned long nr,
>  			    const volatile unsigned long *ptr)
>  {
> @@ -33,4 +105,34 @@ static inline bool test_bit_inv(unsigned long nr,
>  	return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>  }
>  
> +static inline void __set_bit(unsigned long nr,
> +			     const volatile unsigned long *ptr)
> +{
> +	uint64_t mask = bitops_mask(nr);
> +	uint64_t *addr = bitops_word(nr, ptr);
> +
> +	*addr |= mask;
> +}
> +
> +static inline void __set_bit_inv(unsigned long nr,
> +				 const volatile unsigned long *ptr)
> +{
> +	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
> +}
> +
> +static inline void __clear_bit(unsigned long nr,
> +			       const volatile unsigned long *ptr)
> +{
> +	uint64_t mask = bitops_mask(nr);
> +	uint64_t *addr = bitops_word(nr, ptr);
> +
> +	*addr &= ~mask;
> +}
> +
> +static inline void __clear_bit_inv(unsigned long nr,
> +				   const volatile unsigned long *ptr)
> +{
> +	return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
> +}
> +
>  #endif
Janosch Frank Aug. 13, 2021, 11:31 a.m. UTC | #2
On 8/13/21 10:32 AM, Claudio Imbrenda wrote:
> On Fri, 13 Aug 2021 07:36:08 +0000
> Janosch Frank <frankja@linux.ibm.com> wrote:
> 
>> Bit setting and clearing is never bad to have.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> ---
>>  lib/s390x/asm/bitops.h | 102
>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102
>> insertions(+)
>>
>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
>> index 792881ec..f5612855 100644
>> --- a/lib/s390x/asm/bitops.h
>> +++ b/lib/s390x/asm/bitops.h
>> @@ -17,6 +17,78 @@
>>  
>>  #define BITS_PER_LONG	64
>>  
>> +static inline unsigned long *bitops_word(unsigned long nr,
>> +					 const volatile unsigned
>> long *ptr) +{
>> +	unsigned long addr;
>> +
>> +	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG -
>> 1))) >> 3);
>> +	return (unsigned long *)addr;
> 
> why not just 
> 
> return ptr + (nr / BITS_PER_LONG);
> 
>> +}
>> +
>> +static inline unsigned long bitops_mask(unsigned long nr)
>> +{
>> +	return 1UL << (nr & (BITS_PER_LONG - 1));
>> +}
>> +
>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t
>> mask) +{
>> +	uint64_t old;
>> +
>> +	/* load and or 64bit concurrent and interlocked */
>> +	asm volatile(
>> +		"	laog	%[old],%[mask],%[ptr]\n"
>> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
>> +		: [mask] "d" (mask)
>> +		: "memory", "cc" );
>> +	return old;
>> +}
> 
> do we really need the artillery (asm) here?
> is there a reason why we can't do this in C?

Those are the interlocked/atomic instructions and even though we don't
exactly need them right now I wanted to add them for completeness.
We might be able to achieve the same via compiler functionality but this
is not my expertise. Maybe Thomas or David have a few pointers for me?

> 
>> +static inline uint64_t lang(volatile unsigned long *ptr, uint64_t
>> mask) +{
>> +	uint64_t old;
>> +
>> +	/* load and and 64bit concurrent and interlocked */
>> +	asm volatile(
>> +		"	lang	%[old],%[mask],%[ptr]\n"
>> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
>> +		: [mask] "d" (mask)
>> +		: "memory", "cc" );
>> +	return old;
>> +}
> 
> (same here as above)
> 
>> +
>> +static inline void set_bit(unsigned long nr,
>> +			   const volatile unsigned long *ptr)
>> +{
>> +	uint64_t mask = bitops_mask(nr);
>> +	uint64_t *addr = bitops_word(nr, ptr);
>> +
>> +	laog(addr, mask);
>> +}
>> +
>> +static inline void set_bit_inv(unsigned long nr,
>> +			       const volatile unsigned long *ptr)
>> +{
>> +	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>> +}
>> +
>> +static inline void clear_bit(unsigned long nr,
>> +			     const volatile unsigned long *ptr)
>> +{
>> +	uint64_t mask = bitops_mask(nr);
>> +	uint64_t *addr = bitops_word(nr, ptr);
>> +
>> +	lang(addr, ~mask);
>> +}
>> +
>> +static inline void clear_bit_inv(unsigned long nr,
>> +				 const volatile unsigned long *ptr)
>> +{
>> +	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>> +}
>> +
>> +/* non-atomic bit manipulation functions */
>> +
>>  static inline bool test_bit(unsigned long nr,
>>  			    const volatile unsigned long *ptr)
>>  {
>> @@ -33,4 +105,34 @@ static inline bool test_bit_inv(unsigned long nr,
>>  	return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>>  }
>>  
>> +static inline void __set_bit(unsigned long nr,
>> +			     const volatile unsigned long *ptr)
>> +{
>> +	uint64_t mask = bitops_mask(nr);
>> +	uint64_t *addr = bitops_word(nr, ptr);
>> +
>> +	*addr |= mask;
>> +}
>> +
>> +static inline void __set_bit_inv(unsigned long nr,
>> +				 const volatile unsigned long *ptr)
>> +{
>> +	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>> +}
>> +
>> +static inline void __clear_bit(unsigned long nr,
>> +			       const volatile unsigned long *ptr)
>> +{
>> +	uint64_t mask = bitops_mask(nr);
>> +	uint64_t *addr = bitops_word(nr, ptr);
>> +
>> +	*addr &= ~mask;
>> +}
>> +
>> +static inline void __clear_bit_inv(unsigned long nr,
>> +				   const volatile unsigned long *ptr)
>> +{
>> +	return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
>> +}
>> +
>>  #endif
>
Thomas Huth Aug. 18, 2021, 8:20 a.m. UTC | #3
On 13/08/2021 13.31, Janosch Frank wrote:
> On 8/13/21 10:32 AM, Claudio Imbrenda wrote:
>> On Fri, 13 Aug 2021 07:36:08 +0000
>> Janosch Frank <frankja@linux.ibm.com> wrote:
>>
>>> Bit setting and clearing is never bad to have.
>>>
>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>>> ---
>>>   lib/s390x/asm/bitops.h | 102
>>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102
>>> insertions(+)
>>>
>>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
>>> index 792881ec..f5612855 100644
>>> --- a/lib/s390x/asm/bitops.h
>>> +++ b/lib/s390x/asm/bitops.h
>>> @@ -17,6 +17,78 @@
>>>   
>>>   #define BITS_PER_LONG	64
>>>   
>>> +static inline unsigned long *bitops_word(unsigned long nr,
>>> +					 const volatile unsigned
>>> long *ptr) +{
>>> +	unsigned long addr;
>>> +
>>> +	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG -
>>> 1))) >> 3);
>>> +	return (unsigned long *)addr;
>>
>> why not just
>>
>> return ptr + (nr / BITS_PER_LONG);
>>
>>> +}
>>> +
>>> +static inline unsigned long bitops_mask(unsigned long nr)
>>> +{
>>> +	return 1UL << (nr & (BITS_PER_LONG - 1));
>>> +}
>>> +
>>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t
>>> mask) +{
>>> +	uint64_t old;
>>> +
>>> +	/* load and or 64bit concurrent and interlocked */
>>> +	asm volatile(
>>> +		"	laog	%[old],%[mask],%[ptr]\n"
>>> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
>>> +		: [mask] "d" (mask)
>>> +		: "memory", "cc" );
>>> +	return old;
>>> +}
>>
>> do we really need the artillery (asm) here?
>> is there a reason why we can't do this in C?
> 
> Those are the interlocked/atomic instructions and even though we don't
> exactly need them right now I wanted to add them for completeness.

I think I agree with Claudio - unless we really need them, we should not 
clog the sources with arbitrary inline assembly functions.

> We might be able to achieve the same via compiler functionality but this
> is not my expertise. Maybe Thomas or David have a few pointers for me?

I'm not an expert with atomic builtins either, but what's the point of this 
at all? Loading a value and OR-ing something into the value in one go? 
What's that good for?

  Thomas
Janosch Frank Aug. 18, 2021, 8:39 a.m. UTC | #4
On 8/18/21 10:20 AM, Thomas Huth wrote:
> On 13/08/2021 13.31, Janosch Frank wrote:
>> On 8/13/21 10:32 AM, Claudio Imbrenda wrote:
>>> On Fri, 13 Aug 2021 07:36:08 +0000
>>> Janosch Frank <frankja@linux.ibm.com> wrote:
>>>
>>>> Bit setting and clearing is never bad to have.
>>>>
>>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>>>> ---
>>>>   lib/s390x/asm/bitops.h | 102
>>>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102
>>>> insertions(+)
>>>>
>>>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
>>>> index 792881ec..f5612855 100644
>>>> --- a/lib/s390x/asm/bitops.h
>>>> +++ b/lib/s390x/asm/bitops.h
>>>> @@ -17,6 +17,78 @@
>>>>   
>>>>   #define BITS_PER_LONG	64
>>>>   
>>>> +static inline unsigned long *bitops_word(unsigned long nr,
>>>> +					 const volatile unsigned
>>>> long *ptr) +{
>>>> +	unsigned long addr;
>>>> +
>>>> +	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG -
>>>> 1))) >> 3);
>>>> +	return (unsigned long *)addr;
>>>
>>> why not just
>>>
>>> return ptr + (nr / BITS_PER_LONG);
>>>
>>>> +}
>>>> +
>>>> +static inline unsigned long bitops_mask(unsigned long nr)
>>>> +{
>>>> +	return 1UL << (nr & (BITS_PER_LONG - 1));
>>>> +}
>>>> +
>>>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t
>>>> mask) +{
>>>> +	uint64_t old;
>>>> +
>>>> +	/* load and or 64bit concurrent and interlocked */
>>>> +	asm volatile(
>>>> +		"	laog	%[old],%[mask],%[ptr]\n"
>>>> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
>>>> +		: [mask] "d" (mask)
>>>> +		: "memory", "cc" );
>>>> +	return old;
>>>> +}
>>>
>>> do we really need the artillery (asm) here?
>>> is there a reason why we can't do this in C?
>>
>> Those are the interlocked/atomic instructions and even though we don't
>> exactly need them right now I wanted to add them for completeness.
> 
> I think I agree with Claudio - unless we really need them, we should not 
> clog the sources with arbitrary inline assembly functions.

Alright I can trim it down

> 
>> We might be able to achieve the same via compiler functionality but this
>> is not my expertise. Maybe Thomas or David have a few pointers for me?
> 
> I'm not an expert with atomic builtins either, but what's the point of this 
> at all? Loading a value and OR-ing something into the value in one go? 
> What's that good for?

Well it's a block-concurrent interlocked-update load, or and store.
I.e. it loads the data from the ptr and copies it into [old] then ors
the mask and stores it back to the ptr address.

The instruction name "load and or" does not represent the full actions
of the instruction.

> 
>   Thomas
>
Thomas Huth Aug. 18, 2021, 8:57 a.m. UTC | #5
On 18/08/2021 10.39, Janosch Frank wrote:
> On 8/18/21 10:20 AM, Thomas Huth wrote:
>> On 13/08/2021 13.31, Janosch Frank wrote:
>>> On 8/13/21 10:32 AM, Claudio Imbrenda wrote:
>>>> On Fri, 13 Aug 2021 07:36:08 +0000
>>>> Janosch Frank <frankja@linux.ibm.com> wrote:
>>>>
>>>>> Bit setting and clearing is never bad to have.
>>>>>
>>>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>>>>> ---
>>>>>    lib/s390x/asm/bitops.h | 102
>>>>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102
>>>>> insertions(+)
>>>>>
>>>>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
>>>>> index 792881ec..f5612855 100644
>>>>> --- a/lib/s390x/asm/bitops.h
>>>>> +++ b/lib/s390x/asm/bitops.h
>>>>> @@ -17,6 +17,78 @@
>>>>>    
>>>>>    #define BITS_PER_LONG	64
>>>>>    
>>>>> +static inline unsigned long *bitops_word(unsigned long nr,
>>>>> +					 const volatile unsigned
>>>>> long *ptr) +{
>>>>> +	unsigned long addr;
>>>>> +
>>>>> +	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG -
>>>>> 1))) >> 3);
>>>>> +	return (unsigned long *)addr;
>>>>
>>>> why not just
>>>>
>>>> return ptr + (nr / BITS_PER_LONG);
>>>>
>>>>> +}
>>>>> +
>>>>> +static inline unsigned long bitops_mask(unsigned long nr)
>>>>> +{
>>>>> +	return 1UL << (nr & (BITS_PER_LONG - 1));
>>>>> +}
>>>>> +
>>>>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t
>>>>> mask) +{
>>>>> +	uint64_t old;
>>>>> +
>>>>> +	/* load and or 64bit concurrent and interlocked */
>>>>> +	asm volatile(
>>>>> +		"	laog	%[old],%[mask],%[ptr]\n"
>>>>> +		: [old] "=d" (old), [ptr] "+Q" (*ptr)
>>>>> +		: [mask] "d" (mask)
>>>>> +		: "memory", "cc" );
>>>>> +	return old;
>>>>> +}
>>>>
>>>> do we really need the artillery (asm) here?
>>>> is there a reason why we can't do this in C?
>>>
>>> Those are the interlocked/atomic instructions and even though we don't
>>> exactly need them right now I wanted to add them for completeness.
>>
>> I think I agree with Claudio - unless we really need them, we should not
>> clog the sources with arbitrary inline assembly functions.
> 
> Alright I can trim it down
> 
>>
>>> We might be able to achieve the same via compiler functionality but this
>>> is not my expertise. Maybe Thomas or David have a few pointers for me?
>>
>> I'm not an expert with atomic builtins either, but what's the point of this
>> at all? Loading a value and OR-ing something into the value in one go?
>> What's that good for?
> 
> Well it's a block-concurrent interlocked-update load, or and store.
> I.e. it loads the data from the ptr and copies it into [old] then ors
> the mask and stores it back to the ptr address.
> 
> The instruction name "load and or" does not represent the full actions
> of the instruction.

Ok, thanks, that makes more sense now, but you could at least have mentioned 
this in the comment that you added in front of it :-)

Anyway, I guess it's easier to use the builtin atomic functions like 
__atomic_or_fetch() for stuff like this in case we ever need it.

  Thomas
diff mbox series

Patch

diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h
index 792881ec..f5612855 100644
--- a/lib/s390x/asm/bitops.h
+++ b/lib/s390x/asm/bitops.h
@@ -17,6 +17,78 @@ 
 
 #define BITS_PER_LONG	64
 
+static inline unsigned long *bitops_word(unsigned long nr,
+					 const volatile unsigned long *ptr)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+	return (unsigned long *)addr;
+}
+
+static inline unsigned long bitops_mask(unsigned long nr)
+{
+	return 1UL << (nr & (BITS_PER_LONG - 1));
+}
+
+static inline uint64_t laog(volatile unsigned long *ptr, uint64_t mask)
+{
+	uint64_t old;
+
+	/* load and or 64bit concurrent and interlocked */
+	asm volatile(
+		"	laog	%[old],%[mask],%[ptr]\n"
+		: [old] "=d" (old), [ptr] "+Q" (*ptr)
+		: [mask] "d" (mask)
+		: "memory", "cc" );
+	return old;
+}
+
+static inline uint64_t lang(volatile unsigned long *ptr, uint64_t mask)
+{
+	uint64_t old;
+
+	/* load and and 64bit concurrent and interlocked */
+	asm volatile(
+		"	lang	%[old],%[mask],%[ptr]\n"
+		: [old] "=d" (old), [ptr] "+Q" (*ptr)
+		: [mask] "d" (mask)
+		: "memory", "cc" );
+	return old;
+}
+
+static inline void set_bit(unsigned long nr,
+			   const volatile unsigned long *ptr)
+{
+	uint64_t mask = bitops_mask(nr);
+	uint64_t *addr = bitops_word(nr, ptr);
+
+	laog(addr, mask);
+}
+
+static inline void set_bit_inv(unsigned long nr,
+			       const volatile unsigned long *ptr)
+{
+	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void clear_bit(unsigned long nr,
+			     const volatile unsigned long *ptr)
+{
+	uint64_t mask = bitops_mask(nr);
+	uint64_t *addr = bitops_word(nr, ptr);
+
+	lang(addr, ~mask);
+}
+
+static inline void clear_bit_inv(unsigned long nr,
+				 const volatile unsigned long *ptr)
+{
+	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+/* non-atomic bit manipulation functions */
+
 static inline bool test_bit(unsigned long nr,
 			    const volatile unsigned long *ptr)
 {
@@ -33,4 +105,34 @@  static inline bool test_bit_inv(unsigned long nr,
 	return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
 }
 
+static inline void __set_bit(unsigned long nr,
+			     const volatile unsigned long *ptr)
+{
+	uint64_t mask = bitops_mask(nr);
+	uint64_t *addr = bitops_word(nr, ptr);
+
+	*addr |= mask;
+}
+
+static inline void __set_bit_inv(unsigned long nr,
+				 const volatile unsigned long *ptr)
+{
+	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __clear_bit(unsigned long nr,
+			       const volatile unsigned long *ptr)
+{
+	uint64_t mask = bitops_mask(nr);
+	uint64_t *addr = bitops_word(nr, ptr);
+
+	*addr &= ~mask;
+}
+
+static inline void __clear_bit_inv(unsigned long nr,
+				   const volatile unsigned long *ptr)
+{
+	return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
 #endif