Message ID | 20210813073615.32837-2-frankja@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | s390x: Cleanup and maintenance | expand |
On Fri, 13 Aug 2021 07:36:08 +0000 Janosch Frank <frankja@linux.ibm.com> wrote: > Bit setting and clearing is never bad to have. > > Signed-off-by: Janosch Frank <frankja@linux.ibm.com> > --- > lib/s390x/asm/bitops.h | 102 > +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 > insertions(+) > > diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h > index 792881ec..f5612855 100644 > --- a/lib/s390x/asm/bitops.h > +++ b/lib/s390x/asm/bitops.h > @@ -17,6 +17,78 @@ > > #define BITS_PER_LONG 64 > > +static inline unsigned long *bitops_word(unsigned long nr, > + const volatile unsigned > long *ptr) +{ > + unsigned long addr; > + > + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - > 1))) >> 3); > + return (unsigned long *)addr; why not just return ptr + (nr / BITS_PER_LONG); > +} > + > +static inline unsigned long bitops_mask(unsigned long nr) > +{ > + return 1UL << (nr & (BITS_PER_LONG - 1)); > +} > + > +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t > mask) +{ > + uint64_t old; > + > + /* load and or 64bit concurrent and interlocked */ > + asm volatile( > + " laog %[old],%[mask],%[ptr]\n" > + : [old] "=d" (old), [ptr] "+Q" (*ptr) > + : [mask] "d" (mask) > + : "memory", "cc" ); > + return old; > +} do we really need the artillery (asm) here? is there a reason why we can't do this in C? > +static inline uint64_t lang(volatile unsigned long *ptr, uint64_t > mask) +{ > + uint64_t old; > + > + /* load and and 64bit concurrent and interlocked */ > + asm volatile( > + " lang %[old],%[mask],%[ptr]\n" > + : [old] "=d" (old), [ptr] "+Q" (*ptr) > + : [mask] "d" (mask) > + : "memory", "cc" ); > + return old; > +} (same here as above) > + > +static inline void set_bit(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + uint64_t mask = bitops_mask(nr); > + uint64_t *addr = bitops_word(nr, ptr); > + > + laog(addr, mask); > +} > + > +static inline void set_bit_inv(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); > +} > + > +static inline void clear_bit(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + uint64_t mask = bitops_mask(nr); > + uint64_t *addr = bitops_word(nr, ptr); > + > + lang(addr, ~mask); > +} > + > +static inline void clear_bit_inv(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); > +} > + > +/* non-atomic bit manipulation functions */ > + > static inline bool test_bit(unsigned long nr, > const volatile unsigned long *ptr) > { > @@ -33,4 +105,34 @@ static inline bool test_bit_inv(unsigned long nr, > return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); > } > > +static inline void __set_bit(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + uint64_t mask = bitops_mask(nr); > + uint64_t *addr = bitops_word(nr, ptr); > + > + *addr |= mask; > +} > + > +static inline void __set_bit_inv(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); > +} > + > +static inline void __clear_bit(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + uint64_t mask = bitops_mask(nr); > + uint64_t *addr = bitops_word(nr, ptr); > + > + *addr &= ~mask; > +} > + > +static inline void __clear_bit_inv(unsigned long nr, > + const volatile unsigned long *ptr) > +{ > + return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); > +} > + > #endif
On 8/13/21 10:32 AM, Claudio Imbrenda wrote: > On Fri, 13 Aug 2021 07:36:08 +0000 > Janosch Frank <frankja@linux.ibm.com> wrote: > >> Bit setting and clearing is never bad to have. >> >> Signed-off-by: Janosch Frank <frankja@linux.ibm.com> >> --- >> lib/s390x/asm/bitops.h | 102 >> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 >> insertions(+) >> >> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h >> index 792881ec..f5612855 100644 >> --- a/lib/s390x/asm/bitops.h >> +++ b/lib/s390x/asm/bitops.h >> @@ -17,6 +17,78 @@ >> >> #define BITS_PER_LONG 64 >> >> +static inline unsigned long *bitops_word(unsigned long nr, >> + const volatile unsigned >> long *ptr) +{ >> + unsigned long addr; >> + >> + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - >> 1))) >> 3); >> + return (unsigned long *)addr; > > why not just > > return ptr + (nr / BITS_PER_LONG); > >> +} >> + >> +static inline unsigned long bitops_mask(unsigned long nr) >> +{ >> + return 1UL << (nr & (BITS_PER_LONG - 1)); >> +} >> + >> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t >> mask) +{ >> + uint64_t old; >> + >> + /* load and or 64bit concurrent and interlocked */ >> + asm volatile( >> + " laog %[old],%[mask],%[ptr]\n" >> + : [old] "=d" (old), [ptr] "+Q" (*ptr) >> + : [mask] "d" (mask) >> + : "memory", "cc" ); >> + return old; >> +} > > do we really need the artillery (asm) here? > is there a reason why we can't do this in C? Those are the interlocked/atomic instructions and even though we don't exactly need them right now I wanted to add them for completeness. We might be able to achieve the same via compiler functionality but this is not my expertise. Maybe Thomas or David have a few pointers for me? > >> +static inline uint64_t lang(volatile unsigned long *ptr, uint64_t >> mask) +{ >> + uint64_t old; >> + >> + /* load and and 64bit concurrent and interlocked */ >> + asm volatile( >> + " lang %[old],%[mask],%[ptr]\n" >> + : [old] "=d" (old), [ptr] "+Q" (*ptr) >> + : [mask] "d" (mask) >> + : "memory", "cc" ); >> + return old; >> +} > > (same here as above) > >> + >> +static inline void set_bit(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + uint64_t mask = bitops_mask(nr); >> + uint64_t *addr = bitops_word(nr, ptr); >> + >> + laog(addr, mask); >> +} >> + >> +static inline void set_bit_inv(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); >> +} >> + >> +static inline void clear_bit(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + uint64_t mask = bitops_mask(nr); >> + uint64_t *addr = bitops_word(nr, ptr); >> + >> + lang(addr, ~mask); >> +} >> + >> +static inline void clear_bit_inv(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); >> +} >> + >> +/* non-atomic bit manipulation functions */ >> + >> static inline bool test_bit(unsigned long nr, >> const volatile unsigned long *ptr) >> { >> @@ -33,4 +105,34 @@ static inline bool test_bit_inv(unsigned long nr, >> return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); >> } >> >> +static inline void __set_bit(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + uint64_t mask = bitops_mask(nr); >> + uint64_t *addr = bitops_word(nr, ptr); >> + >> + *addr |= mask; >> +} >> + >> +static inline void __set_bit_inv(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); >> +} >> + >> +static inline void __clear_bit(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + uint64_t mask = bitops_mask(nr); >> + uint64_t *addr = bitops_word(nr, ptr); >> + >> + *addr &= ~mask; >> +} >> + >> +static inline void __clear_bit_inv(unsigned long nr, >> + const volatile unsigned long *ptr) >> +{ >> + return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); >> +} >> + >> #endif >
On 13/08/2021 13.31, Janosch Frank wrote: > On 8/13/21 10:32 AM, Claudio Imbrenda wrote: >> On Fri, 13 Aug 2021 07:36:08 +0000 >> Janosch Frank <frankja@linux.ibm.com> wrote: >> >>> Bit setting and clearing is never bad to have. >>> >>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com> >>> --- >>> lib/s390x/asm/bitops.h | 102 >>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 >>> insertions(+) >>> >>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h >>> index 792881ec..f5612855 100644 >>> --- a/lib/s390x/asm/bitops.h >>> +++ b/lib/s390x/asm/bitops.h >>> @@ -17,6 +17,78 @@ >>> >>> #define BITS_PER_LONG 64 >>> >>> +static inline unsigned long *bitops_word(unsigned long nr, >>> + const volatile unsigned >>> long *ptr) +{ >>> + unsigned long addr; >>> + >>> + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - >>> 1))) >> 3); >>> + return (unsigned long *)addr; >> >> why not just >> >> return ptr + (nr / BITS_PER_LONG); >> >>> +} >>> + >>> +static inline unsigned long bitops_mask(unsigned long nr) >>> +{ >>> + return 1UL << (nr & (BITS_PER_LONG - 1)); >>> +} >>> + >>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t >>> mask) +{ >>> + uint64_t old; >>> + >>> + /* load and or 64bit concurrent and interlocked */ >>> + asm volatile( >>> + " laog %[old],%[mask],%[ptr]\n" >>> + : [old] "=d" (old), [ptr] "+Q" (*ptr) >>> + : [mask] "d" (mask) >>> + : "memory", "cc" ); >>> + return old; >>> +} >> >> do we really need the artillery (asm) here? >> is there a reason why we can't do this in C? > > Those are the interlocked/atomic instructions and even though we don't > exactly need them right now I wanted to add them for completeness. I think I agree with Claudio - unless we really need them, we should not clog the sources with arbitrary inline assembly functions. > We might be able to achieve the same via compiler functionality but this > is not my expertise. Maybe Thomas or David have a few pointers for me? I'm not an expert with atomic builtins either, but what's the point of this at all? Loading a value and OR-ing something into the value in one go? What's that good for? Thomas
On 8/18/21 10:20 AM, Thomas Huth wrote: > On 13/08/2021 13.31, Janosch Frank wrote: >> On 8/13/21 10:32 AM, Claudio Imbrenda wrote: >>> On Fri, 13 Aug 2021 07:36:08 +0000 >>> Janosch Frank <frankja@linux.ibm.com> wrote: >>> >>>> Bit setting and clearing is never bad to have. >>>> >>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com> >>>> --- >>>> lib/s390x/asm/bitops.h | 102 >>>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 >>>> insertions(+) >>>> >>>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h >>>> index 792881ec..f5612855 100644 >>>> --- a/lib/s390x/asm/bitops.h >>>> +++ b/lib/s390x/asm/bitops.h >>>> @@ -17,6 +17,78 @@ >>>> >>>> #define BITS_PER_LONG 64 >>>> >>>> +static inline unsigned long *bitops_word(unsigned long nr, >>>> + const volatile unsigned >>>> long *ptr) +{ >>>> + unsigned long addr; >>>> + >>>> + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - >>>> 1))) >> 3); >>>> + return (unsigned long *)addr; >>> >>> why not just >>> >>> return ptr + (nr / BITS_PER_LONG); >>> >>>> +} >>>> + >>>> +static inline unsigned long bitops_mask(unsigned long nr) >>>> +{ >>>> + return 1UL << (nr & (BITS_PER_LONG - 1)); >>>> +} >>>> + >>>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t >>>> mask) +{ >>>> + uint64_t old; >>>> + >>>> + /* load and or 64bit concurrent and interlocked */ >>>> + asm volatile( >>>> + " laog %[old],%[mask],%[ptr]\n" >>>> + : [old] "=d" (old), [ptr] "+Q" (*ptr) >>>> + : [mask] "d" (mask) >>>> + : "memory", "cc" ); >>>> + return old; >>>> +} >>> >>> do we really need the artillery (asm) here? >>> is there a reason why we can't do this in C? >> >> Those are the interlocked/atomic instructions and even though we don't >> exactly need them right now I wanted to add them for completeness. > > I think I agree with Claudio - unless we really need them, we should not > clog the sources with arbitrary inline assembly functions. Alright I can trim it down > >> We might be able to achieve the same via compiler functionality but this >> is not my expertise. Maybe Thomas or David have a few pointers for me? > > I'm not an expert with atomic builtins either, but what's the point of this > at all? Loading a value and OR-ing something into the value in one go? > What's that good for? Well it's a block-concurrent interlocked-update load, or and store. I.e. it loads the data from the ptr and copies it into [old] then ors the mask and stores it back to the ptr address. The instruction name "load and or" does not represent the full actions of the instruction. > > Thomas >
On 18/08/2021 10.39, Janosch Frank wrote: > On 8/18/21 10:20 AM, Thomas Huth wrote: >> On 13/08/2021 13.31, Janosch Frank wrote: >>> On 8/13/21 10:32 AM, Claudio Imbrenda wrote: >>>> On Fri, 13 Aug 2021 07:36:08 +0000 >>>> Janosch Frank <frankja@linux.ibm.com> wrote: >>>> >>>>> Bit setting and clearing is never bad to have. >>>>> >>>>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com> >>>>> --- >>>>> lib/s390x/asm/bitops.h | 102 >>>>> +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 >>>>> insertions(+) >>>>> >>>>> diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h >>>>> index 792881ec..f5612855 100644 >>>>> --- a/lib/s390x/asm/bitops.h >>>>> +++ b/lib/s390x/asm/bitops.h >>>>> @@ -17,6 +17,78 @@ >>>>> >>>>> #define BITS_PER_LONG 64 >>>>> >>>>> +static inline unsigned long *bitops_word(unsigned long nr, >>>>> + const volatile unsigned >>>>> long *ptr) +{ >>>>> + unsigned long addr; >>>>> + >>>>> + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - >>>>> 1))) >> 3); >>>>> + return (unsigned long *)addr; >>>> >>>> why not just >>>> >>>> return ptr + (nr / BITS_PER_LONG); >>>> >>>>> +} >>>>> + >>>>> +static inline unsigned long bitops_mask(unsigned long nr) >>>>> +{ >>>>> + return 1UL << (nr & (BITS_PER_LONG - 1)); >>>>> +} >>>>> + >>>>> +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t >>>>> mask) +{ >>>>> + uint64_t old; >>>>> + >>>>> + /* load and or 64bit concurrent and interlocked */ >>>>> + asm volatile( >>>>> + " laog %[old],%[mask],%[ptr]\n" >>>>> + : [old] "=d" (old), [ptr] "+Q" (*ptr) >>>>> + : [mask] "d" (mask) >>>>> + : "memory", "cc" ); >>>>> + return old; >>>>> +} >>>> >>>> do we really need the artillery (asm) here? >>>> is there a reason why we can't do this in C? >>> >>> Those are the interlocked/atomic instructions and even though we don't >>> exactly need them right now I wanted to add them for completeness. >> >> I think I agree with Claudio - unless we really need them, we should not >> clog the sources with arbitrary inline assembly functions. > > Alright I can trim it down > >> >>> We might be able to achieve the same via compiler functionality but this >>> is not my expertise. Maybe Thomas or David have a few pointers for me? >> >> I'm not an expert with atomic builtins either, but what's the point of this >> at all? Loading a value and OR-ing something into the value in one go? >> What's that good for? > > Well it's a block-concurrent interlocked-update load, or and store. > I.e. it loads the data from the ptr and copies it into [old] then ors > the mask and stores it back to the ptr address. > > The instruction name "load and or" does not represent the full actions > of the instruction. Ok, thanks, that makes more sense now, but you could at least have mentioned this in the comment that you added in front of it :-) Anyway, I guess it's easier to use the builtin atomic functions like __atomic_or_fetch() for stuff like this in case we ever need it. Thomas
diff --git a/lib/s390x/asm/bitops.h b/lib/s390x/asm/bitops.h index 792881ec..f5612855 100644 --- a/lib/s390x/asm/bitops.h +++ b/lib/s390x/asm/bitops.h @@ -17,6 +17,78 @@ #define BITS_PER_LONG 64 +static inline unsigned long *bitops_word(unsigned long nr, + const volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); + return (unsigned long *)addr; +} + +static inline unsigned long bitops_mask(unsigned long nr) +{ + return 1UL << (nr & (BITS_PER_LONG - 1)); +} + +static inline uint64_t laog(volatile unsigned long *ptr, uint64_t mask) +{ + uint64_t old; + + /* load and or 64bit concurrent and interlocked */ + asm volatile( + " laog %[old],%[mask],%[ptr]\n" + : [old] "=d" (old), [ptr] "+Q" (*ptr) + : [mask] "d" (mask) + : "memory", "cc" ); + return old; +} + +static inline uint64_t lang(volatile unsigned long *ptr, uint64_t mask) +{ + uint64_t old; + + /* load and and 64bit concurrent and interlocked */ + asm volatile( + " lang %[old],%[mask],%[ptr]\n" + : [old] "=d" (old), [ptr] "+Q" (*ptr) + : [mask] "d" (mask) + : "memory", "cc" ); + return old; +} + +static inline void set_bit(unsigned long nr, + const volatile unsigned long *ptr) +{ + uint64_t mask = bitops_mask(nr); + uint64_t *addr = bitops_word(nr, ptr); + + laog(addr, mask); +} + +static inline void set_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) +{ + return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void clear_bit(unsigned long nr, + const volatile unsigned long *ptr) +{ + uint64_t mask = bitops_mask(nr); + uint64_t *addr = bitops_word(nr, ptr); + + lang(addr, ~mask); +} + +static inline void clear_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) +{ + return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +/* non-atomic bit manipulation functions */ + static inline bool test_bit(unsigned long nr, const volatile unsigned long *ptr) { @@ -33,4 +105,34 @@ static inline bool test_bit_inv(unsigned long nr, return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } +static inline void __set_bit(unsigned long nr, + const volatile unsigned long *ptr) +{ + uint64_t mask = bitops_mask(nr); + uint64_t *addr = bitops_word(nr, ptr); + + *addr |= mask; +} + +static inline void __set_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) +{ + return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void __clear_bit(unsigned long nr, + const volatile unsigned long *ptr) +{ + uint64_t mask = bitops_mask(nr); + uint64_t *addr = bitops_word(nr, ptr); + + *addr &= ~mask; +} + +static inline void __clear_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) +{ + return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + #endif
Bit setting and clearing is never bad to have. Signed-off-by: Janosch Frank <frankja@linux.ibm.com> --- lib/s390x/asm/bitops.h | 102 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+)