diff mbox series

[v6,07/20] xen/riscv: introduce bitops.h

Message ID 14c91e2ba2aeb6e49f9f7e549232244719fa6959.1710517542.git.oleksii.kurochko@gmail.com (mailing list archive)
State Superseded
Headers show
Series Enable build of full Xen for RISC-V | expand

Commit Message

Oleksii Kurochko March 15, 2024, 6:06 p.m. UTC
Taken from Linux-6.4.0-rc1

Xen's bitops.h consists of several Linux's headers:
* linux/arch/include/asm/bitops.h:
  * The following function were removed as they aren't used in Xen:
        * test_and_set_bit_lock
        * clear_bit_unlock
        * __clear_bit_unlock
  * The following functions were renamed in the way how they are
    used by common code:
        * __test_and_set_bit
        * __test_and_clear_bit
  * The declaration and implementation of the following functios
    were updated to make Xen build happy:
        * clear_bit
        * set_bit
        * __test_and_clear_bit
        * __test_and_set_bit
  * linux/include/asm-generic/bitops/generic-non-atomic.h with the
    following changes:
     * Only functions that can be reused in Xen were left;
       others were removed.
     * it was updated the message inside #ifndef ... #endif.
     * __always_inline -> always_inline to be align with definition in
       xen/compiler.h.
     * update function prototypes from
       generic___test_and_*(unsigned long nr nr, volatile unsigned long *addr)
       to
       generic___test_and_*(unsigned long nr, volatile void *addr) to be
       consistent with other related macros/defines.
     * convert identations from tabs to spaces.
     * inside generic__test_and_* use 'bitops_uint_t' instead of 'unsigned long'
        to be generic.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V6:
 - rebase clean ups were done: drop unused asm-generic includes
---
 Changes in V5:
   - new patch
---
 xen/arch/riscv/include/asm/bitops.h | 144 ++++++++++++++++++++++++++++
 xen/arch/riscv/include/asm/config.h |   2 +
 2 files changed, 146 insertions(+)
 create mode 100644 xen/arch/riscv/include/asm/bitops.h

Comments

Jan Beulich March 20, 2024, 4:03 p.m. UTC | #1
On 15.03.2024 19:06, Oleksii Kurochko wrote:
> Taken from Linux-6.4.0-rc1
> 
> Xen's bitops.h consists of several Linux's headers:
> * linux/arch/include/asm/bitops.h:
>   * The following function were removed as they aren't used in Xen:
>         * test_and_set_bit_lock
>         * clear_bit_unlock
>         * __clear_bit_unlock
>   * The following functions were renamed in the way how they are
>     used by common code:
>         * __test_and_set_bit
>         * __test_and_clear_bit
>   * The declaration and implementation of the following functios
>     were updated to make Xen build happy:
>         * clear_bit
>         * set_bit
>         * __test_and_clear_bit
>         * __test_and_set_bit
>   * linux/include/asm-generic/bitops/generic-non-atomic.h with the
>     following changes:
>      * Only functions that can be reused in Xen were left;
>        others were removed.
>      * it was updated the message inside #ifndef ... #endif.
>      * __always_inline -> always_inline to be align with definition in
>        xen/compiler.h.
>      * update function prototypes from
>        generic___test_and_*(unsigned long nr nr, volatile unsigned long *addr)
>        to
>        generic___test_and_*(unsigned long nr, volatile void *addr) to be
>        consistent with other related macros/defines.
>      * convert identations from tabs to spaces.
>      * inside generic__test_and_* use 'bitops_uint_t' instead of 'unsigned long'
>         to be generic.

This last middle level bullet point looks stale here here, with that header
now being introduced in a separate patch.

> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/bitops.h
> @@ -0,0 +1,144 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright (C) 2012 Regents of the University of California */
> +
> +#ifndef _ASM_RISCV_BITOPS_H
> +#define _ASM_RISCV_BITOPS_H
> +
> +#include <asm/system.h>
> +
> +#define BITOP_BITS_PER_WORD BITS_PER_LONG
> +
> +#define BITOP_TYPE
> +typedef uint64_t bitops_uint_t;
> +
> +#include <asm-generic/bitops/bitops-bits.h>
> +
> +#define __set_bit(n, p)      set_bit(n, p)
> +#define __clear_bit(n, p)    clear_bit(n, p)

If these cam with a TODO, I wouldn't say anything. But without I take it
they're meant to remain that way, at which point I'd like to ask about
the performance aspect: Surely the AMO insns are more expensive than
whatever more basic insns could be used instead? I'd even go as far as
wondering whether

#define __set_bit(n, p)      ((void)__test_and_set_bit(n, p))
#define __clear_bit(n, p)    ((void)__test_and_clear_bit(n, p))

wouldn't be cheaper (the compiler would recognize the unused result
and eliminate its calculation, I'm pretty sure).

> +/* Based on linux/arch/include/asm/bitops.h */
> +
> +#if BITS_PER_LONG == 64
> +#define __AMO(op)   "amo" #op ".d"
> +#elif BITS_PER_LONG == 32
> +#define __AMO(op)   "amo" #op ".w"

This isn't in line with bitops_uint_t above. With BITOP_BITS_PER_WORD 
also expanding to BITS_PER_LONG, I think you mean to use unsigned long
there. Or else you could move stuff into the conditional here (or
really move the conditional here further up).

However, if you look at Arm64 and x86 code, you will notice that they
avoid 64-bit operations here. I'm afraid I can't easily tell whether
the reason(s) for this have meanwhile disappeared; I fear some of that
is still around.

> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +
> +#define test_and_op_bit_ord(op, mod, nr, addr, ord)     \
> +({                                                      \
> +    unsigned long res, mask;                            \
> +    mask = BITOP_MASK(nr);                              \
> +    __asm__ __volatile__ (                              \
> +        __AMO(op) #ord " %0, %2, %1"                    \
> +        : "=r" (res), "+A" (addr[BITOP_WORD(nr)])       \
> +        : "r" (mod(mask))                               \
> +        : "memory");                                    \
> +    ((res & mask) != 0);                                \
> +})
> +
> +#define __op_bit_ord(op, mod, nr, addr, ord)    \
> +    __asm__ __volatile__ (                      \
> +        __AMO(op) #ord " zero, %1, %0"          \
> +        : "+A" (addr[BITOP_WORD(nr)])           \
> +        : "r" (mod(BITOP_MASK(nr)))             \
> +        : "memory");
> +
> +#define test_and_op_bit(op, mod, nr, addr)    \
> +    test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> +#define __op_bit(op, mod, nr, addr) \
> +    __op_bit_ord(op, mod, nr, addr, )

Why are double underscores left on the non-test_and_ macros? Even
without name space considerations that looks odd here, when talk
is of the atomic forms of the operations, which by convention have
no (double) underscore at their front.

> --- a/xen/arch/riscv/include/asm/config.h
> +++ b/xen/arch/riscv/include/asm/config.h
> @@ -113,6 +113,8 @@
>  # error "Unsupported RISCV variant"
>  #endif
>  
> +#define BITS_PER_BYTE 8
> +
>  #define BYTES_PER_LONG (1 << LONG_BYTEORDER)
>  #define BITS_PER_LONG  (BYTES_PER_LONG << 3)
>  #define POINTER_ALIGN  BYTES_PER_LONG

How's this addition related to the change at hand?

Jan
Oleksii Kurochko March 21, 2024, 10:07 a.m. UTC | #2
On Wed, 2024-03-20 at 17:03 +0100, Jan Beulich wrote:
> On 15.03.2024 19:06, Oleksii Kurochko wrote:
> > Taken from Linux-6.4.0-rc1
> > 
> > Xen's bitops.h consists of several Linux's headers:
> > * linux/arch/include/asm/bitops.h:
> >   * The following function were removed as they aren't used in Xen:
> >         * test_and_set_bit_lock
> >         * clear_bit_unlock
> >         * __clear_bit_unlock
> >   * The following functions were renamed in the way how they are
> >     used by common code:
> >         * __test_and_set_bit
> >         * __test_and_clear_bit
> >   * The declaration and implementation of the following functios
> >     were updated to make Xen build happy:
> >         * clear_bit
> >         * set_bit
> >         * __test_and_clear_bit
> >         * __test_and_set_bit
> >   * linux/include/asm-generic/bitops/generic-non-atomic.h with the
> >     following changes:
> >      * Only functions that can be reused in Xen were left;
> >        others were removed.
> >      * it was updated the message inside #ifndef ... #endif.
> >      * __always_inline -> always_inline to be align with definition
> > in
> >        xen/compiler.h.
> >      * update function prototypes from
> >        generic___test_and_*(unsigned long nr nr, volatile unsigned
> > long *addr)
> >        to
> >        generic___test_and_*(unsigned long nr, volatile void *addr)
> > to be
> >        consistent with other related macros/defines.
> >      * convert identations from tabs to spaces.
> >      * inside generic__test_and_* use 'bitops_uint_t' instead of
> > 'unsigned long'
> >         to be generic.
> 
> This last middle level bullet point looks stale here here, with that
> header
> now being introduced in a separate patch.
> 
> > --- /dev/null
> > +++ b/xen/arch/riscv/include/asm/bitops.h
> > @@ -0,0 +1,144 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright (C) 2012 Regents of the University of California */
> > +
> > +#ifndef _ASM_RISCV_BITOPS_H
> > +#define _ASM_RISCV_BITOPS_H
> > +
> > +#include <asm/system.h>
> > +
> > +#define BITOP_BITS_PER_WORD BITS_PER_LONG
> > +
> > +#define BITOP_TYPE
> > +typedef uint64_t bitops_uint_t;
> > +
> > +#include <asm-generic/bitops/bitops-bits.h>
> > +
> > +#define __set_bit(n, p)      set_bit(n, p)
> > +#define __clear_bit(n, p)    clear_bit(n, p)
> 
> If these cam with a TODO, I wouldn't say anything. But without I take
> it
> they're meant to remain that way, at which point I'd like to ask
> about
> the performance aspect: Surely the AMO insns are more expensive than
> whatever more basic insns could be used instead? I'd even go as far
> as
> wondering whether
> 
> #define __set_bit(n, p)      ((void)__test_and_set_bit(n, p))
> #define __clear_bit(n, p)    ((void)__test_and_clear_bit(n, p))
> 
> wouldn't be cheaper (the compiler would recognize the unused result
> and eliminate its calculation, I'm pretty sure).
It was implemented using atomic ops because of Arm:
/*
 * Non-atomic bit manipulation.
 *
 * Implemented using atomics to be interrupt safe. Could alternatively
 * implement with local interrupt masking.
 */
#define __set_bit(n,p)            set_bit(n,p)
#define __clear_bit(n,p)          clear_bit(n,p)

I though that the same comment is true for x86, but after your comment
I checked x86 implementation, I realized that x86 uses non-atomic
operations.

In this case, it seems to me there is a sense to use non-atomic for
RISC-V too.

> > +/* Based on linux/arch/include/asm/bitops.h */
> > +
> > +#if BITS_PER_LONG == 64
> > +#define __AMO(op)   "amo" #op ".d"
> > +#elif BITS_PER_LONG == 32
> > +#define __AMO(op)   "amo" #op ".w"
> 
> This isn't in line with bitops_uint_t above. With BITOP_BITS_PER_WORD
> also expanding to BITS_PER_LONG, I think you mean to use unsigned
> long
> there. Or else you could move stuff into the conditional here (or
> really move the conditional here further up).
> 
> However, if you look at Arm64 and x86 code, you will notice that they
> avoid 64-bit operations here. I'm afraid I can't easily tell whether
> the reason(s) for this have meanwhile disappeared; I fear some of
> that
> is still around.
> 
> > +#else
> > +#error "Unexpected BITS_PER_LONG"
> > +#endif
> > +
> > +#define test_and_op_bit_ord(op, mod, nr, addr, ord)     \
> > +({                                                      \
> > +    unsigned long res, mask;                            \
> > +    mask = BITOP_MASK(nr);                              \
> > +    __asm__ __volatile__ (                              \
> > +        __AMO(op) #ord " %0, %2, %1"                    \
> > +        : "=r" (res), "+A" (addr[BITOP_WORD(nr)])       \
> > +        : "r" (mod(mask))                               \
> > +        : "memory");                                    \
> > +    ((res & mask) != 0);                                \
> > +})
> > +
> > +#define __op_bit_ord(op, mod, nr, addr, ord)    \
> > +    __asm__ __volatile__ (                      \
> > +        __AMO(op) #ord " zero, %1, %0"          \
> > +        : "+A" (addr[BITOP_WORD(nr)])           \
> > +        : "r" (mod(BITOP_MASK(nr)))             \
> > +        : "memory");
> > +
> > +#define test_and_op_bit(op, mod, nr, addr)    \
> > +    test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
> > +#define __op_bit(op, mod, nr, addr) \
> > +    __op_bit_ord(op, mod, nr, addr, )
> 
> Why are double underscores left on the non-test_and_ macros? Even
> without name space considerations that looks odd here, when talk
> is of the atomic forms of the operations, which by convention have
> no (double) underscore at their front.
Missed to update after Linux kernel. I'll take that into account.

> 
> > --- a/xen/arch/riscv/include/asm/config.h
> > +++ b/xen/arch/riscv/include/asm/config.h
> > @@ -113,6 +113,8 @@
> >  # error "Unsupported RISCV variant"
> >  #endif
> >  
> > +#define BITS_PER_BYTE 8
> > +
> >  #define BYTES_PER_LONG (1 << LONG_BYTEORDER)
> >  #define BITS_PER_LONG  (BYTES_PER_LONG << 3)
> >  #define POINTER_ALIGN  BYTES_PER_LONG
> 
> How's this addition related to the change at hand?
It should be a part of cmpxchg.h patch. Thanks.

~ Oleksii
> 
> Jan
Jan Beulich March 21, 2024, 10:28 a.m. UTC | #3
On 21.03.2024 11:07, Oleksii wrote:
> On Wed, 2024-03-20 at 17:03 +0100, Jan Beulich wrote:
>> On 15.03.2024 19:06, Oleksii Kurochko wrote:
>>> --- /dev/null
>>> +++ b/xen/arch/riscv/include/asm/bitops.h
>>> @@ -0,0 +1,144 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/* Copyright (C) 2012 Regents of the University of California */
>>> +
>>> +#ifndef _ASM_RISCV_BITOPS_H
>>> +#define _ASM_RISCV_BITOPS_H
>>> +
>>> +#include <asm/system.h>
>>> +
>>> +#define BITOP_BITS_PER_WORD BITS_PER_LONG
>>> +
>>> +#define BITOP_TYPE
>>> +typedef uint64_t bitops_uint_t;
>>> +
>>> +#include <asm-generic/bitops/bitops-bits.h>
>>> +
>>> +#define __set_bit(n, p)      set_bit(n, p)
>>> +#define __clear_bit(n, p)    clear_bit(n, p)
>>
>> If these cam with a TODO, I wouldn't say anything. But without I take
>> it
>> they're meant to remain that way, at which point I'd like to ask
>> about
>> the performance aspect: Surely the AMO insns are more expensive than
>> whatever more basic insns could be used instead? I'd even go as far
>> as
>> wondering whether
>>
>> #define __set_bit(n, p)      ((void)__test_and_set_bit(n, p))
>> #define __clear_bit(n, p)    ((void)__test_and_clear_bit(n, p))
>>
>> wouldn't be cheaper (the compiler would recognize the unused result
>> and eliminate its calculation, I'm pretty sure).
> It was implemented using atomic ops because of Arm:
> /*
>  * Non-atomic bit manipulation.
>  *
>  * Implemented using atomics to be interrupt safe. Could alternatively
>  * implement with local interrupt masking.
>  */
> #define __set_bit(n,p)            set_bit(n,p)
> #define __clear_bit(n,p)          clear_bit(n,p)
> 
> I though that the same comment is true for x86, but after your comment
> I checked x86 implementation, I realized that x86 uses non-atomic
> operations.
> 
> In this case, it seems to me there is a sense to use non-atomic for
> RISC-V too.

Hmm, wait: There's an important difference between x86 on one side and
Arm/RISC-V/PPC and various other more or less RISC-like ones on the other.
x86 has read-modify-write (memory) insns. Therefore even without using
their atomic (LOCKed) forms, they do the update atomically as far as the
local CPU is concerned. That's not the case when you need to use a three
(or more) step load-op-store sequence.

Had you retained Arm's comment, I probably wouldn't even have asked.
Please add such a comment while sticking to this aliasing you have.

Jan
diff mbox series

Patch

diff --git a/xen/arch/riscv/include/asm/bitops.h b/xen/arch/riscv/include/asm/bitops.h
new file mode 100644
index 0000000000..21c4868355
--- /dev/null
+++ b/xen/arch/riscv/include/asm/bitops.h
@@ -0,0 +1,144 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012 Regents of the University of California */
+
+#ifndef _ASM_RISCV_BITOPS_H
+#define _ASM_RISCV_BITOPS_H
+
+#include <asm/system.h>
+
+#define BITOP_BITS_PER_WORD BITS_PER_LONG
+
+#define BITOP_TYPE
+typedef uint64_t bitops_uint_t;
+
+#include <asm-generic/bitops/bitops-bits.h>
+
+#define __set_bit(n, p)      set_bit(n, p)
+#define __clear_bit(n, p)    clear_bit(n, p)
+
+/* Based on linux/arch/include/asm/bitops.h */
+
+#if BITS_PER_LONG == 64
+#define __AMO(op)   "amo" #op ".d"
+#elif BITS_PER_LONG == 32
+#define __AMO(op)   "amo" #op ".w"
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+#define test_and_op_bit_ord(op, mod, nr, addr, ord)     \
+({                                                      \
+    unsigned long res, mask;                            \
+    mask = BITOP_MASK(nr);                              \
+    __asm__ __volatile__ (                              \
+        __AMO(op) #ord " %0, %2, %1"                    \
+        : "=r" (res), "+A" (addr[BITOP_WORD(nr)])       \
+        : "r" (mod(mask))                               \
+        : "memory");                                    \
+    ((res & mask) != 0);                                \
+})
+
+#define __op_bit_ord(op, mod, nr, addr, ord)    \
+    __asm__ __volatile__ (                      \
+        __AMO(op) #ord " zero, %1, %0"          \
+        : "+A" (addr[BITOP_WORD(nr)])           \
+        : "r" (mod(BITOP_MASK(nr)))             \
+        : "memory");
+
+#define test_and_op_bit(op, mod, nr, addr)    \
+    test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
+#define __op_bit(op, mod, nr, addr) \
+    __op_bit_ord(op, mod, nr, addr, )
+
+/* Bitmask modifiers */
+#define NOP(x)    (x)
+#define NOT(x)    (~(x))
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ */
+static inline int test_and_set_bit(int nr, volatile void *p)
+{
+    volatile bitops_uint_t *addr = p;
+
+    return test_and_op_bit(or, NOP, nr, addr);
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, volatile void *p)
+{
+    volatile bitops_uint_t *addr = p;
+
+    return test_and_op_bit(and, NOT, nr, addr);
+}
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void *p)
+{
+    volatile bitops_uint_t *addr = p;
+
+    __op_bit(or, NOP, nr, addr);
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ */
+static inline void clear_bit(int nr, volatile void *p)
+{
+    volatile bitops_uint_t *addr = p;
+
+    __op_bit(and, NOT, nr, addr);
+}
+
+/**
+ * test_and_change_bit - Toggle (change) a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_op_bit(xor, NOP, nr, addr);
+}
+
+#undef test_and_op_bit
+#undef __op_bit
+#undef NOP
+#undef NOT
+#undef __AMO
+
+#include <asm-generic/bitops/generic-non-atomic.h>
+
+#define __test_and_set_bit generic___test_and_set_bit
+#define __test_and_clear_bit generic___test_and_clear_bit
+#define __test_and_change_bit generic___test_and_change_bit
+
+#include <asm-generic/bitops/test-bit.h>
+
+#endif /* _ASM_RISCV_BITOPS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/riscv/include/asm/config.h b/xen/arch/riscv/include/asm/config.h
index c5f93e6a01..a58086e4b2 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -113,6 +113,8 @@ 
 # error "Unsupported RISCV variant"
 #endif
 
+#define BITS_PER_BYTE 8
+
 #define BYTES_PER_LONG (1 << LONG_BYTEORDER)
 #define BITS_PER_LONG  (BYTES_PER_LONG << 3)
 #define POINTER_ALIGN  BYTES_PER_LONG