diff mbox series

[7/7] riscv: Add qspinlock support based on Zabha extension

Message ID 20240528151052.313031-8-alexghiti@rivosinc.com (mailing list archive)
State Changes Requested
Headers show
Series Zacas/Zabha support and qspinlocks | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Alexandre Ghiti May 28, 2024, 3:10 p.m. UTC
In order to produce a generic kernel, a user can select
CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
spinlock implementation if Zabha is not present.

Note that we can't use alternatives here because the discovery of
extensions is done too late and we need to start with the qspinlock
implementation because the ticket spinlock implementation would pollute
the spinlock value, so let's use static keys.

This is largely based on Guo's work and Leonardo reviews at [1].

Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 .../locking/queued-spinlocks/arch-support.txt |  2 +-
 arch/riscv/Kconfig                            |  1 +
 arch/riscv/include/asm/Kbuild                 |  4 +-
 arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
 arch/riscv/kernel/setup.c                     | 18 +++++++++
 include/asm-generic/qspinlock.h               |  2 +
 include/asm-generic/ticket_spinlock.h         |  2 +
 7 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/spinlock.h

Comments

Andrea Parri May 29, 2024, 12:55 a.m. UTC | #1
> +	select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA

IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
perhaps a similar select for the latter?  (not a kconfig expert)

  Andrea
Guo Ren May 29, 2024, 9:23 a.m. UTC | #2
On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> In order to produce a generic kernel, a user can select
> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> spinlock implementation if Zabha is not present.
>
> Note that we can't use alternatives here because the discovery of
> extensions is done too late and we need to start with the qspinlock
> implementation because the ticket spinlock implementation would pollute
> the spinlock value, so let's use static keys.
>
> This is largely based on Guo's work and Leonardo reviews at [1].
>
> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>  .../locking/queued-spinlocks/arch-support.txt |  2 +-
>  arch/riscv/Kconfig                            |  1 +
>  arch/riscv/include/asm/Kbuild                 |  4 +-
>  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
>  arch/riscv/kernel/setup.c                     | 18 +++++++++
>  include/asm-generic/qspinlock.h               |  2 +
>  include/asm-generic/ticket_spinlock.h         |  2 +
>  7 files changed, 66 insertions(+), 2 deletions(-)
>  create mode 100644 arch/riscv/include/asm/spinlock.h
>
> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> index 22f2990392ff..cf26042480e2 100644
> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> @@ -20,7 +20,7 @@
>      |    openrisc: |  ok  |
>      |      parisc: | TODO |
>      |     powerpc: |  ok  |
> -    |       riscv: | TODO |
> +    |       riscv: |  ok  |
>      |        s390: | TODO |
>      |          sh: | TODO |
>      |       sparc: |  ok  |
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 184a9edb04e0..ccf1703edeb9 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -59,6 +59,7 @@ config RISCV
>         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>         select ARCH_USE_MEMTEST
>         select ARCH_USE_QUEUED_RWLOCKS
> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
Using qspinlock or not depends on real hardware capabilities, not the
compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
& qspinlock three Kconfigs, and the combo-spinlock would compat all
hardware platforms but waste some qspinlock code size.

>         select ARCH_USES_CFI_TRAPS if CFI_CLANG
>         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> index 504f8b7e72d4..ad72f2bd4cc9 100644
> --- a/arch/riscv/include/asm/Kbuild
> +++ b/arch/riscv/include/asm/Kbuild
> @@ -2,10 +2,12 @@
>  generic-y += early_ioremap.h
>  generic-y += flat.h
>  generic-y += kvm_para.h
> +generic-y += mcs_spinlock.h
>  generic-y += parport.h
> -generic-y += spinlock.h
>  generic-y += spinlock_types.h
> +generic-y += ticket_spinlock.h
>  generic-y += qrwlock.h
>  generic-y += qrwlock_types.h
> +generic-y += qspinlock.h
>  generic-y += user.h
>  generic-y += vmlinux.lds.h
> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> new file mode 100644
> index 000000000000..e00429ac20ed
> --- /dev/null
> +++ b/arch/riscv/include/asm/spinlock.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_RISCV_SPINLOCK_H
> +#define __ASM_RISCV_SPINLOCK_H
> +
> +#ifdef CONFIG_QUEUED_SPINLOCKS
> +#define _Q_PENDING_LOOPS       (1 << 9)
> +
> +#define __no_arch_spinlock_redefine
> +#include <asm/ticket_spinlock.h>
> +#include <asm/qspinlock.h>
> +#include <asm/alternative.h>
> +
> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> +
> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> +static __always_inline type arch_spin_##op(type_lock lock)             \
> +{                                                                      \
> +       if (static_branch_unlikely(&qspinlock_key))                     \
> +               return queued_spin_##op(lock);                          \
> +       return ticket_spin_##op(lock);                                  \
> +}
> +
> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> +
> +#else
> +
> +#include <asm/ticket_spinlock.h>
> +
> +#endif
> +
> +#include <asm/qrwlock.h>
> +
> +#endif /* __ASM_RISCV_SPINLOCK_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 4f73c0ae44b2..31ce75522fd4 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>  #endif
>  }
>
> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> +EXPORT_SYMBOL(qspinlock_key);
> +
> +static void __init riscv_spinlock_init(void)
> +{
> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> +                : : : : qspinlock);
> +
> +       static_branch_disable(&qspinlock_key);
> +       pr_info("Ticket spinlock: enabled\n");
> +
> +       return;
> +
> +qspinlock:
> +       pr_info("Queued spinlock: enabled\n");
> +}
> +
>  extern void __init init_rt_signal_env(void);
>
>  void __init setup_arch(char **cmdline_p)
> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>         riscv_set_dma_cache_alignment();
>
>         riscv_user_isa_enable();
> +       riscv_spinlock_init();
>  }
>
>  bool arch_cpu_is_hotpluggable(int cpu)
> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> index 0655aa5b57b2..bf47cca2c375 100644
> --- a/include/asm-generic/qspinlock.h
> +++ b/include/asm-generic/qspinlock.h
> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>  }
>  #endif
>
> +#ifndef __no_arch_spinlock_redefine
>  /*
>   * Remapping spinlock architecture specific functions to the corresponding
>   * queued spinlock functions.
> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>  #define arch_spin_lock(l)              queued_spin_lock(l)
>  #define arch_spin_trylock(l)           queued_spin_trylock(l)
>  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> +#endif
>
>  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> index cfcff22b37b3..325779970d8a 100644
> --- a/include/asm-generic/ticket_spinlock.h
> +++ b/include/asm-generic/ticket_spinlock.h
> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>         return (s16)((val >> 16) - (val & 0xffff)) > 1;
>  }
>
> +#ifndef __no_arch_spinlock_redefine
>  /*
>   * Remapping spinlock architecture specific functions to the corresponding
>   * ticket spinlock functions.
> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>  #define arch_spin_lock(l)              ticket_spin_lock(l)
>  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
>  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> +#endif
>
>  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> --
> 2.39.2
>
Alexandre Ghiti May 29, 2024, 1:03 p.m. UTC | #3
Hi Guo,

On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > In order to produce a generic kernel, a user can select
> > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > spinlock implementation if Zabha is not present.
> >
> > Note that we can't use alternatives here because the discovery of
> > extensions is done too late and we need to start with the qspinlock
> > implementation because the ticket spinlock implementation would pollute
> > the spinlock value, so let's use static keys.
> >
> > This is largely based on Guo's work and Leonardo reviews at [1].
> >
> > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> >  arch/riscv/Kconfig                            |  1 +
> >  arch/riscv/include/asm/Kbuild                 |  4 +-
> >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> >  include/asm-generic/qspinlock.h               |  2 +
> >  include/asm-generic/ticket_spinlock.h         |  2 +
> >  7 files changed, 66 insertions(+), 2 deletions(-)
> >  create mode 100644 arch/riscv/include/asm/spinlock.h
> >
> > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > index 22f2990392ff..cf26042480e2 100644
> > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > @@ -20,7 +20,7 @@
> >      |    openrisc: |  ok  |
> >      |      parisc: | TODO |
> >      |     powerpc: |  ok  |
> > -    |       riscv: | TODO |
> > +    |       riscv: |  ok  |
> >      |        s390: | TODO |
> >      |          sh: | TODO |
> >      |       sparc: |  ok  |
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 184a9edb04e0..ccf1703edeb9 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -59,6 +59,7 @@ config RISCV
> >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >         select ARCH_USE_MEMTEST
> >         select ARCH_USE_QUEUED_RWLOCKS
> > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> Using qspinlock or not depends on real hardware capabilities, not the
> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> hardware platforms but waste some qspinlock code size.

You're right, and I think your comment matches what Conor mentioned
about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
will allow a platform with Zabha capability to use qspinlocks. But if
the hardware does not, it will fallback to the ticket spinlocks.

But I agree that looking at the config alone may be misleading, even
though it will work as expected at runtime. So I agree with you:
unless anyone is strongly against the combo spinlocks, I will do what
you suggest and add them.

Thanks again for your initial work,

Alex

>
> >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > --- a/arch/riscv/include/asm/Kbuild
> > +++ b/arch/riscv/include/asm/Kbuild
> > @@ -2,10 +2,12 @@
> >  generic-y += early_ioremap.h
> >  generic-y += flat.h
> >  generic-y += kvm_para.h
> > +generic-y += mcs_spinlock.h
> >  generic-y += parport.h
> > -generic-y += spinlock.h
> >  generic-y += spinlock_types.h
> > +generic-y += ticket_spinlock.h
> >  generic-y += qrwlock.h
> >  generic-y += qrwlock_types.h
> > +generic-y += qspinlock.h
> >  generic-y += user.h
> >  generic-y += vmlinux.lds.h
> > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > new file mode 100644
> > index 000000000000..e00429ac20ed
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/spinlock.h
> > @@ -0,0 +1,39 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __ASM_RISCV_SPINLOCK_H
> > +#define __ASM_RISCV_SPINLOCK_H
> > +
> > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > +#define _Q_PENDING_LOOPS       (1 << 9)
> > +
> > +#define __no_arch_spinlock_redefine
> > +#include <asm/ticket_spinlock.h>
> > +#include <asm/qspinlock.h>
> > +#include <asm/alternative.h>
> > +
> > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > +
> > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > +{                                                                      \
> > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > +               return queued_spin_##op(lock);                          \
> > +       return ticket_spin_##op(lock);                                  \
> > +}
> > +
> > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > +
> > +#else
> > +
> > +#include <asm/ticket_spinlock.h>
> > +
> > +#endif
> > +
> > +#include <asm/qrwlock.h>
> > +
> > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 4f73c0ae44b2..31ce75522fd4 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >  #endif
> >  }
> >
> > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > +EXPORT_SYMBOL(qspinlock_key);
> > +
> > +static void __init riscv_spinlock_init(void)
> > +{
> > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > +                : : : : qspinlock);
> > +
> > +       static_branch_disable(&qspinlock_key);
> > +       pr_info("Ticket spinlock: enabled\n");
> > +
> > +       return;
> > +
> > +qspinlock:
> > +       pr_info("Queued spinlock: enabled\n");
> > +}
> > +
> >  extern void __init init_rt_signal_env(void);
> >
> >  void __init setup_arch(char **cmdline_p)
> > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >         riscv_set_dma_cache_alignment();
> >
> >         riscv_user_isa_enable();
> > +       riscv_spinlock_init();
> >  }
> >
> >  bool arch_cpu_is_hotpluggable(int cpu)
> > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > index 0655aa5b57b2..bf47cca2c375 100644
> > --- a/include/asm-generic/qspinlock.h
> > +++ b/include/asm-generic/qspinlock.h
> > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >  }
> >  #endif
> >
> > +#ifndef __no_arch_spinlock_redefine
> >  /*
> >   * Remapping spinlock architecture specific functions to the corresponding
> >   * queued spinlock functions.
> > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >  #define arch_spin_lock(l)              queued_spin_lock(l)
> >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > +#endif
> >
> >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > index cfcff22b37b3..325779970d8a 100644
> > --- a/include/asm-generic/ticket_spinlock.h
> > +++ b/include/asm-generic/ticket_spinlock.h
> > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >  }
> >
> > +#ifndef __no_arch_spinlock_redefine
> >  /*
> >   * Remapping spinlock architecture specific functions to the corresponding
> >   * ticket spinlock functions.
> > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > +#endif
> >
> >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > --
> > 2.39.2
> >
>
>
> --
> Best Regards
>  Guo Ren
Guo Ren May 30, 2024, 1:54 a.m. UTC | #4
On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Guo,
>
> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > In order to produce a generic kernel, a user can select
> > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > spinlock implementation if Zabha is not present.
> > >
> > > Note that we can't use alternatives here because the discovery of
> > > extensions is done too late and we need to start with the qspinlock
> > > implementation because the ticket spinlock implementation would pollute
> > > the spinlock value, so let's use static keys.
> > >
> > > This is largely based on Guo's work and Leonardo reviews at [1].
> > >
> > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > >  arch/riscv/Kconfig                            |  1 +
> > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > >  include/asm-generic/qspinlock.h               |  2 +
> > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > >
> > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > index 22f2990392ff..cf26042480e2 100644
> > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > @@ -20,7 +20,7 @@
> > >      |    openrisc: |  ok  |
> > >      |      parisc: | TODO |
> > >      |     powerpc: |  ok  |
> > > -    |       riscv: | TODO |
> > > +    |       riscv: |  ok  |
> > >      |        s390: | TODO |
> > >      |          sh: | TODO |
> > >      |       sparc: |  ok  |
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 184a9edb04e0..ccf1703edeb9 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -59,6 +59,7 @@ config RISCV
> > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > >         select ARCH_USE_MEMTEST
> > >         select ARCH_USE_QUEUED_RWLOCKS
> > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > Using qspinlock or not depends on real hardware capabilities, not the
> > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > hardware platforms but waste some qspinlock code size.
>
> You're right, and I think your comment matches what Conor mentioned
> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> will allow a platform with Zabha capability to use qspinlocks. But if
> the hardware does not, it will fallback to the ticket spinlocks.
>
> But I agree that looking at the config alone may be misleading, even
> though it will work as expected at runtime. So I agree with you:
> unless anyone is strongly against the combo spinlocks, I will do what
> you suggest and add them.
The problem with the v12 combo-spinlock is using a static_branch
instead of the full ALTERNATIVE. Frankly, that's a bad example that
costs more code space. I found that your cmpxchg32/64 also uses a
condition branch, which has a similar problem, right?

Anyway, your patch series inspired me to update the v13
combo-spinlock. My plan is:
1. Separate native-qspinlock out of paravirt-qspinlock.
2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
ticket-lock or qspinlock.

What do you think?


>
> Thanks again for your initial work,
>
> Alex
>
> >
> > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > --- a/arch/riscv/include/asm/Kbuild
> > > +++ b/arch/riscv/include/asm/Kbuild
> > > @@ -2,10 +2,12 @@
> > >  generic-y += early_ioremap.h
> > >  generic-y += flat.h
> > >  generic-y += kvm_para.h
> > > +generic-y += mcs_spinlock.h
> > >  generic-y += parport.h
> > > -generic-y += spinlock.h
> > >  generic-y += spinlock_types.h
> > > +generic-y += ticket_spinlock.h
> > >  generic-y += qrwlock.h
> > >  generic-y += qrwlock_types.h
> > > +generic-y += qspinlock.h
> > >  generic-y += user.h
> > >  generic-y += vmlinux.lds.h
> > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > new file mode 100644
> > > index 000000000000..e00429ac20ed
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -0,0 +1,39 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > +#define __ASM_RISCV_SPINLOCK_H
> > > +
> > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > +
> > > +#define __no_arch_spinlock_redefine
> > > +#include <asm/ticket_spinlock.h>
> > > +#include <asm/qspinlock.h>
> > > +#include <asm/alternative.h>
> > > +
> > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > +
> > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > +{                                                                      \
> > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > +               return queued_spin_##op(lock);                          \
> > > +       return ticket_spin_##op(lock);                                  \
> > > +}
> > > +
> > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > +
> > > +#else
> > > +
> > > +#include <asm/ticket_spinlock.h>
> > > +
> > > +#endif
> > > +
> > > +#include <asm/qrwlock.h>
> > > +
> > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > >  #endif
> > >  }
> > >
> > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > +EXPORT_SYMBOL(qspinlock_key);
> > > +
> > > +static void __init riscv_spinlock_init(void)
> > > +{
> > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > +                : : : : qspinlock);
> > > +
> > > +       static_branch_disable(&qspinlock_key);
> > > +       pr_info("Ticket spinlock: enabled\n");
> > > +
> > > +       return;
> > > +
> > > +qspinlock:
> > > +       pr_info("Queued spinlock: enabled\n");
> > > +}
> > > +
> > >  extern void __init init_rt_signal_env(void);
> > >
> > >  void __init setup_arch(char **cmdline_p)
> > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > >         riscv_set_dma_cache_alignment();
> > >
> > >         riscv_user_isa_enable();
> > > +       riscv_spinlock_init();
> > >  }
> > >
> > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > index 0655aa5b57b2..bf47cca2c375 100644
> > > --- a/include/asm-generic/qspinlock.h
> > > +++ b/include/asm-generic/qspinlock.h
> > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >  }
> > >  #endif
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > >  /*
> > >   * Remapping spinlock architecture specific functions to the corresponding
> > >   * queued spinlock functions.
> > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > +#endif
> > >
> > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > index cfcff22b37b3..325779970d8a 100644
> > > --- a/include/asm-generic/ticket_spinlock.h
> > > +++ b/include/asm-generic/ticket_spinlock.h
> > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > >  }
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > >  /*
> > >   * Remapping spinlock architecture specific functions to the corresponding
> > >   * ticket spinlock functions.
> > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > +#endif
> > >
> > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > --
> > > 2.39.2
> > >
> >
> >
> > --
> > Best Regards
> >  Guo Ren



--
Best Regards
 Guo Ren
Alexandre Ghiti May 30, 2024, 5:30 a.m. UTC | #5
Hi Guo,

On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Hi Guo,
> >
> > On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> > >
> > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > In order to produce a generic kernel, a user can select
> > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > spinlock implementation if Zabha is not present.
> > > >
> > > > Note that we can't use alternatives here because the discovery of
> > > > extensions is done too late and we need to start with the qspinlock
> > > > implementation because the ticket spinlock implementation would pollute
> > > > the spinlock value, so let's use static keys.
> > > >
> > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > >
> > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > ---
> > > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > > >  arch/riscv/Kconfig                            |  1 +
> > > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > > >  include/asm-generic/qspinlock.h               |  2 +
> > > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > > >
> > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > index 22f2990392ff..cf26042480e2 100644
> > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > @@ -20,7 +20,7 @@
> > > >      |    openrisc: |  ok  |
> > > >      |      parisc: | TODO |
> > > >      |     powerpc: |  ok  |
> > > > -    |       riscv: | TODO |
> > > > +    |       riscv: |  ok  |
> > > >      |        s390: | TODO |
> > > >      |          sh: | TODO |
> > > >      |       sparc: |  ok  |
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -59,6 +59,7 @@ config RISCV
> > > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > >         select ARCH_USE_MEMTEST
> > > >         select ARCH_USE_QUEUED_RWLOCKS
> > > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > Using qspinlock or not depends on real hardware capabilities, not the
> > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > hardware platforms but waste some qspinlock code size.
> >
> > You're right, and I think your comment matches what Conor mentioned
> > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > will allow a platform with Zabha capability to use qspinlocks. But if
> > the hardware does not, it will fallback to the ticket spinlocks.
> >
> > But I agree that looking at the config alone may be misleading, even
> > though it will work as expected at runtime. So I agree with you:
> > unless anyone is strongly against the combo spinlocks, I will do what
> > you suggest and add them.
> The problem with the v12 combo-spinlock is using a static_branch
> instead of the full ALTERNATIVE. Frankly, that's a bad example that
> costs more code space. I found that your cmpxchg32/64 also uses a
> condition branch, which has a similar problem, right?
>
> Anyway, your patch series inspired me to update the v13
> combo-spinlock. My plan is:
> 1. Separate native-qspinlock out of paravirt-qspinlock.
> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> ticket-lock or qspinlock.

What's your plan to make use of alternatives here? The alternatives
patching depends on the discovery of the extensions, which is done too
late, at least after the first use of a spinlock (the printk
spinlock). So you'd need to find a way to first use qspinlocks (but
without knowing Zabha is available) and then do the correct patching:
an idea here could be to add an "init" value to the alternatives and
let the patching process do the right thing when the extensions are
known.

Another solution would be the early discovery of the extensions, but I
took a look and it's easy with a device tree, but not with ACPI.

Let me know what you plan to do and how I can help!

Thanks,

Alex

>
> What do you think?
>
>
> >
> > Thanks again for your initial work,
> >
> > Alex
> >
> > >
> > > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > --- a/arch/riscv/include/asm/Kbuild
> > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > @@ -2,10 +2,12 @@
> > > >  generic-y += early_ioremap.h
> > > >  generic-y += flat.h
> > > >  generic-y += kvm_para.h
> > > > +generic-y += mcs_spinlock.h
> > > >  generic-y += parport.h
> > > > -generic-y += spinlock.h
> > > >  generic-y += spinlock_types.h
> > > > +generic-y += ticket_spinlock.h
> > > >  generic-y += qrwlock.h
> > > >  generic-y += qrwlock_types.h
> > > > +generic-y += qspinlock.h
> > > >  generic-y += user.h
> > > >  generic-y += vmlinux.lds.h
> > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > new file mode 100644
> > > > index 000000000000..e00429ac20ed
> > > > --- /dev/null
> > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > @@ -0,0 +1,39 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > +
> > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > +
> > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > > +
> > > > +#define __no_arch_spinlock_redefine
> > > > +#include <asm/ticket_spinlock.h>
> > > > +#include <asm/qspinlock.h>
> > > > +#include <asm/alternative.h>
> > > > +
> > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > +
> > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > > +{                                                                      \
> > > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > > +               return queued_spin_##op(lock);                          \
> > > > +       return ticket_spin_##op(lock);                                  \
> > > > +}
> > > > +
> > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > +
> > > > +#else
> > > > +
> > > > +#include <asm/ticket_spinlock.h>
> > > > +
> > > > +#endif
> > > > +
> > > > +#include <asm/qrwlock.h>
> > > > +
> > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > --- a/arch/riscv/kernel/setup.c
> > > > +++ b/arch/riscv/kernel/setup.c
> > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > >  #endif
> > > >  }
> > > >
> > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > +
> > > > +static void __init riscv_spinlock_init(void)
> > > > +{
> > > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > +                : : : : qspinlock);
> > > > +
> > > > +       static_branch_disable(&qspinlock_key);
> > > > +       pr_info("Ticket spinlock: enabled\n");
> > > > +
> > > > +       return;
> > > > +
> > > > +qspinlock:
> > > > +       pr_info("Queued spinlock: enabled\n");
> > > > +}
> > > > +
> > > >  extern void __init init_rt_signal_env(void);
> > > >
> > > >  void __init setup_arch(char **cmdline_p)
> > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > >         riscv_set_dma_cache_alignment();
> > > >
> > > >         riscv_user_isa_enable();
> > > > +       riscv_spinlock_init();
> > > >  }
> > > >
> > > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > --- a/include/asm-generic/qspinlock.h
> > > > +++ b/include/asm-generic/qspinlock.h
> > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > >  }
> > > >  #endif
> > > >
> > > > +#ifndef __no_arch_spinlock_redefine
> > > >  /*
> > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > >   * queued spinlock functions.
> > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > > +#endif
> > > >
> > > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > index cfcff22b37b3..325779970d8a 100644
> > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > >  }
> > > >
> > > > +#ifndef __no_arch_spinlock_redefine
> > > >  /*
> > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > >   * ticket spinlock functions.
> > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > > +#endif
> > > >
> > > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > --
> > > > 2.39.2
> > > >
> > >
> > >
> > > --
> > > Best Regards
> > >  Guo Ren
>
>
>
> --
> Best Regards
>  Guo Ren
Guo Ren May 31, 2024, 1:57 a.m. UTC | #6
On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Guo,
>
> On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> > > >
> > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > >
> > > > > In order to produce a generic kernel, a user can select
> > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > spinlock implementation if Zabha is not present.
> > > > >
> > > > > Note that we can't use alternatives here because the discovery of
> > > > > extensions is done too late and we need to start with the qspinlock
> > > > > implementation because the ticket spinlock implementation would pollute
> > > > > the spinlock value, so let's use static keys.
> > > > >
> > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > >
> > > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > ---
> > > > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > > > >  arch/riscv/Kconfig                            |  1 +
> > > > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > > > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > > > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > > > >  include/asm-generic/qspinlock.h               |  2 +
> > > > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > > > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > > > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > >
> > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > @@ -20,7 +20,7 @@
> > > > >      |    openrisc: |  ok  |
> > > > >      |      parisc: | TODO |
> > > > >      |     powerpc: |  ok  |
> > > > > -    |       riscv: | TODO |
> > > > > +    |       riscv: |  ok  |
> > > > >      |        s390: | TODO |
> > > > >      |          sh: | TODO |
> > > > >      |       sparc: |  ok  |
> > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > --- a/arch/riscv/Kconfig
> > > > > +++ b/arch/riscv/Kconfig
> > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > >         select ARCH_USE_MEMTEST
> > > > >         select ARCH_USE_QUEUED_RWLOCKS
> > > > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > hardware platforms but waste some qspinlock code size.
> > >
> > > You're right, and I think your comment matches what Conor mentioned
> > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > the hardware does not, it will fallback to the ticket spinlocks.
> > >
> > > But I agree that looking at the config alone may be misleading, even
> > > though it will work as expected at runtime. So I agree with you:
> > > unless anyone is strongly against the combo spinlocks, I will do what
> > > you suggest and add them.
> > The problem with the v12 combo-spinlock is using a static_branch
> > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > costs more code space. I found that your cmpxchg32/64 also uses a
> > condition branch, which has a similar problem, right?
> >
> > Anyway, your patch series inspired me to update the v13
> > combo-spinlock. My plan is:
> > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > ticket-lock or qspinlock.
>
> What's your plan to make use of alternatives here? The alternatives
> patching depends on the discovery of the extensions, which is done too
> late, at least after the first use of a spinlock (the printk
> spinlock). So you'd need to find a way to first use qspinlocks (but
> without knowing Zabha is available) and then do the correct patching:
I do that in v12:
1. Use qspinlock as init.
2. Change to ticket-lock or not.
(Only qspinlock -> ticket-lock, No reverse direction)

If there is no contention, Qspinlock is okay for all platforms before
smp bringup & no-irq environment.

> an idea here could be to add an "init" value to the alternatives and
> let the patching process do the right thing when the extensions are
> known.
>
> Another solution would be the early discovery of the extensions, but I
> took a look and it's easy with a device tree, but not with ACPI.
>
> Let me know what you plan to do and how I can help!
>
> Thanks,
>
> Alex
>
> >
> > What do you think?
> >
> >
> > >
> > > Thanks again for your initial work,
> > >
> > > Alex
> > >
> > > >
> > > > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > @@ -2,10 +2,12 @@
> > > > >  generic-y += early_ioremap.h
> > > > >  generic-y += flat.h
> > > > >  generic-y += kvm_para.h
> > > > > +generic-y += mcs_spinlock.h
> > > > >  generic-y += parport.h
> > > > > -generic-y += spinlock.h
> > > > >  generic-y += spinlock_types.h
> > > > > +generic-y += ticket_spinlock.h
> > > > >  generic-y += qrwlock.h
> > > > >  generic-y += qrwlock_types.h
> > > > > +generic-y += qspinlock.h
> > > > >  generic-y += user.h
> > > > >  generic-y += vmlinux.lds.h
> > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > new file mode 100644
> > > > > index 000000000000..e00429ac20ed
> > > > > --- /dev/null
> > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > @@ -0,0 +1,39 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +
> > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > +
> > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > > > +
> > > > > +#define __no_arch_spinlock_redefine
> > > > > +#include <asm/ticket_spinlock.h>
> > > > > +#include <asm/qspinlock.h>
> > > > > +#include <asm/alternative.h>
> > > > > +
> > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > +
> > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > > > +{                                                                      \
> > > > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > > > +               return queued_spin_##op(lock);                          \
> > > > > +       return ticket_spin_##op(lock);                                  \
> > > > > +}
> > > > > +
> > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > +
> > > > > +#else
> > > > > +
> > > > > +#include <asm/ticket_spinlock.h>
> > > > > +
> > > > > +#endif
> > > > > +
> > > > > +#include <asm/qrwlock.h>
> > > > > +
> > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > --- a/arch/riscv/kernel/setup.c
> > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > >  #endif
> > > > >  }
> > > > >
> > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > +
> > > > > +static void __init riscv_spinlock_init(void)
> > > > > +{
> > > > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > +                : : : : qspinlock);
> > > > > +
> > > > > +       static_branch_disable(&qspinlock_key);
> > > > > +       pr_info("Ticket spinlock: enabled\n");
> > > > > +
> > > > > +       return;
> > > > > +
> > > > > +qspinlock:
> > > > > +       pr_info("Queued spinlock: enabled\n");
> > > > > +}
> > > > > +
> > > > >  extern void __init init_rt_signal_env(void);
> > > > >
> > > > >  void __init setup_arch(char **cmdline_p)
> > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > >         riscv_set_dma_cache_alignment();
> > > > >
> > > > >         riscv_user_isa_enable();
> > > > > +       riscv_spinlock_init();
> > > > >  }
> > > > >
> > > > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > --- a/include/asm-generic/qspinlock.h
> > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > >  }
> > > > >  #endif
> > > > >
> > > > > +#ifndef __no_arch_spinlock_redefine
> > > > >  /*
> > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > >   * queued spinlock functions.
> > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > > > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > > > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > > > +#endif
> > > > >
> > > > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > >  }
> > > > >
> > > > > +#ifndef __no_arch_spinlock_redefine
> > > > >  /*
> > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > >   * ticket spinlock functions.
> > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > > > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > > > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > > > +#endif
> > > > >
> > > > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > --
> > > > > 2.39.2
> > > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > >  Guo Ren
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
Alexandre Ghiti May 31, 2024, 6:22 a.m. UTC | #7
On Fri, May 31, 2024 at 3:57 AM Guo Ren <guoren@kernel.org> wrote:
>
> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Hi Guo,
> >
> > On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
> > >
> > > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > Hi Guo,
> > > >
> > > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> > > > >
> > > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > >
> > > > > > In order to produce a generic kernel, a user can select
> > > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > > spinlock implementation if Zabha is not present.
> > > > > >
> > > > > > Note that we can't use alternatives here because the discovery of
> > > > > > extensions is done too late and we need to start with the qspinlock
> > > > > > implementation because the ticket spinlock implementation would pollute
> > > > > > the spinlock value, so let's use static keys.
> > > > > >
> > > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > > >
> > > > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > ---
> > > > > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > > > > >  arch/riscv/Kconfig                            |  1 +
> > > > > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > > > > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > > > > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > > > > >  include/asm-generic/qspinlock.h               |  2 +
> > > > > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > > > > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > > > > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > > >
> > > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > @@ -20,7 +20,7 @@
> > > > > >      |    openrisc: |  ok  |
> > > > > >      |      parisc: | TODO |
> > > > > >      |     powerpc: |  ok  |
> > > > > > -    |       riscv: | TODO |
> > > > > > +    |       riscv: |  ok  |
> > > > > >      |        s390: | TODO |
> > > > > >      |          sh: | TODO |
> > > > > >      |       sparc: |  ok  |
> > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > > --- a/arch/riscv/Kconfig
> > > > > > +++ b/arch/riscv/Kconfig
> > > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > > >         select ARCH_USE_MEMTEST
> > > > > >         select ARCH_USE_QUEUED_RWLOCKS
> > > > > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > > hardware platforms but waste some qspinlock code size.
> > > >
> > > > You're right, and I think your comment matches what Conor mentioned
> > > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > > the hardware does not, it will fallback to the ticket spinlocks.
> > > >
> > > > But I agree that looking at the config alone may be misleading, even
> > > > though it will work as expected at runtime. So I agree with you:
> > > > unless anyone is strongly against the combo spinlocks, I will do what
> > > > you suggest and add them.
> > > The problem with the v12 combo-spinlock is using a static_branch
> > > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > > costs more code space. I found that your cmpxchg32/64 also uses a
> > > condition branch, which has a similar problem, right?
> > >
> > > Anyway, your patch series inspired me to update the v13
> > > combo-spinlock. My plan is:
> > > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > > ticket-lock or qspinlock.
> >
> > What's your plan to make use of alternatives here? The alternatives
> > patching depends on the discovery of the extensions, which is done too
> > late, at least after the first use of a spinlock (the printk
> > spinlock). So you'd need to find a way to first use qspinlocks (but
> > without knowing Zabha is available) and then do the correct patching:
> I do that in v12:
> 1. Use qspinlock as init.
> 2. Change to ticket-lock or not.
> (Only qspinlock -> ticket-lock, No reverse direction)
>
> If there is no contention, Qspinlock is okay for all platforms before
> smp bringup & no-irq environment.
>

Yes, by using static keys not alternatives. My question was: how do
you plan to use alternatives here instead of static keys? To me, it's
not that simple, hence my suggestions in my previous answer.

Thanks,

Alex

> > an idea here could be to add an "init" value to the alternatives and
> > let the patching process do the right thing when the extensions are
> > known.
> >
> > Another solution would be the early discovery of the extensions, but I
> > took a look and it's easy with a device tree, but not with ACPI.
> >
> > Let me know what you plan to do and how I can help!
> >
> > Thanks,
> >
> > Alex
> >
> > >
> > > What do you think?
> > >
> > >
> > > >
> > > > Thanks again for your initial work,
> > > >
> > > > Alex
> > > >
> > > > >
> > > > > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > > @@ -2,10 +2,12 @@
> > > > > >  generic-y += early_ioremap.h
> > > > > >  generic-y += flat.h
> > > > > >  generic-y += kvm_para.h
> > > > > > +generic-y += mcs_spinlock.h
> > > > > >  generic-y += parport.h
> > > > > > -generic-y += spinlock.h
> > > > > >  generic-y += spinlock_types.h
> > > > > > +generic-y += ticket_spinlock.h
> > > > > >  generic-y += qrwlock.h
> > > > > >  generic-y += qrwlock_types.h
> > > > > > +generic-y += qspinlock.h
> > > > > >  generic-y += user.h
> > > > > >  generic-y += vmlinux.lds.h
> > > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > > new file mode 100644
> > > > > > index 000000000000..e00429ac20ed
> > > > > > --- /dev/null
> > > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > > @@ -0,0 +1,39 @@
> > > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > > +
> > > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > > +
> > > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > > > > +
> > > > > > +#define __no_arch_spinlock_redefine
> > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > +#include <asm/qspinlock.h>
> > > > > > +#include <asm/alternative.h>
> > > > > > +
> > > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > +
> > > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > > > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > > > > +{                                                                      \
> > > > > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > > > > +               return queued_spin_##op(lock);                          \
> > > > > > +       return ticket_spin_##op(lock);                                  \
> > > > > > +}
> > > > > > +
> > > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > > +
> > > > > > +#else
> > > > > > +
> > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > +
> > > > > > +#endif
> > > > > > +
> > > > > > +#include <asm/qrwlock.h>
> > > > > > +
> > > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > > --- a/arch/riscv/kernel/setup.c
> > > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > > >  #endif
> > > > > >  }
> > > > > >
> > > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > > +
> > > > > > +static void __init riscv_spinlock_init(void)
> > > > > > +{
> > > > > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > > +                : : : : qspinlock);
> > > > > > +
> > > > > > +       static_branch_disable(&qspinlock_key);
> > > > > > +       pr_info("Ticket spinlock: enabled\n");
> > > > > > +
> > > > > > +       return;
> > > > > > +
> > > > > > +qspinlock:
> > > > > > +       pr_info("Queued spinlock: enabled\n");
> > > > > > +}
> > > > > > +
> > > > > >  extern void __init init_rt_signal_env(void);
> > > > > >
> > > > > >  void __init setup_arch(char **cmdline_p)
> > > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > > >         riscv_set_dma_cache_alignment();
> > > > > >
> > > > > >         riscv_user_isa_enable();
> > > > > > +       riscv_spinlock_init();
> > > > > >  }
> > > > > >
> > > > > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > > --- a/include/asm-generic/qspinlock.h
> > > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > >  }
> > > > > >  #endif
> > > > > >
> > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > >  /*
> > > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > > >   * queued spinlock functions.
> > > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > > > > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > > > > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > > > > +#endif
> > > > > >
> > > > > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > > >  }
> > > > > >
> > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > >  /*
> > > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > > >   * ticket spinlock functions.
> > > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > > > > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > > > > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > > > > +#endif
> > > > > >
> > > > > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > > --
> > > > > > 2.39.2
> > > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Best Regards
> > > > >  Guo Ren
> > >
> > >
> > >
> > > --
> > > Best Regards
> > >  Guo Ren
>
>
>
> --
> Best Regards
>  Guo Ren
Guo Ren May 31, 2024, 6:42 a.m. UTC | #8
On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> On Fri, May 31, 2024 at 3:57 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
> > > >
> > > > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > >
> > > > > Hi Guo,
> > > > >
> > > > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> > > > > >
> > > > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > >
> > > > > > > In order to produce a generic kernel, a user can select
> > > > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > > > spinlock implementation if Zabha is not present.
> > > > > > >
> > > > > > > Note that we can't use alternatives here because the discovery of
> > > > > > > extensions is done too late and we need to start with the qspinlock
> > > > > > > implementation because the ticket spinlock implementation would pollute
> > > > > > > the spinlock value, so let's use static keys.
> > > > > > >
> > > > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > > > >
> > > > > > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > > ---
> > > > > > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > > > > > >  arch/riscv/Kconfig                            |  1 +
> > > > > > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > > > > > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > > > > > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > > > > > >  include/asm-generic/qspinlock.h               |  2 +
> > > > > > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > > > > > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > > > > > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > > > >
> > > > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > @@ -20,7 +20,7 @@
> > > > > > >      |    openrisc: |  ok  |
> > > > > > >      |      parisc: | TODO |
> > > > > > >      |     powerpc: |  ok  |
> > > > > > > -    |       riscv: | TODO |
> > > > > > > +    |       riscv: |  ok  |
> > > > > > >      |        s390: | TODO |
> > > > > > >      |          sh: | TODO |
> > > > > > >      |       sparc: |  ok  |
> > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > > > --- a/arch/riscv/Kconfig
> > > > > > > +++ b/arch/riscv/Kconfig
> > > > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > > > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > > > >         select ARCH_USE_MEMTEST
> > > > > > >         select ARCH_USE_QUEUED_RWLOCKS
> > > > > > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > > > hardware platforms but waste some qspinlock code size.
> > > > >
> > > > > You're right, and I think your comment matches what Conor mentioned
> > > > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > > > the hardware does not, it will fallback to the ticket spinlocks.
> > > > >
> > > > > But I agree that looking at the config alone may be misleading, even
> > > > > though it will work as expected at runtime. So I agree with you:
> > > > > unless anyone is strongly against the combo spinlocks, I will do what
> > > > > you suggest and add them.
> > > > The problem with the v12 combo-spinlock is using a static_branch
> > > > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > > > costs more code space. I found that your cmpxchg32/64 also uses a
> > > > condition branch, which has a similar problem, right?
> > > >
> > > > Anyway, your patch series inspired me to update the v13
> > > > combo-spinlock. My plan is:
> > > > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > > > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > > > ticket-lock or qspinlock.
> > >
> > > What's your plan to make use of alternatives here? The alternatives
> > > patching depends on the discovery of the extensions, which is done too
> > > late, at least after the first use of a spinlock (the printk
> > > spinlock). So you'd need to find a way to first use qspinlocks (but
> > > without knowing Zabha is available) and then do the correct patching:
> > I do that in v12:
> > 1. Use qspinlock as init.
> > 2. Change to ticket-lock or not.
> > (Only qspinlock -> ticket-lock, No reverse direction)
> >
> > If there is no contention, Qspinlock is okay for all platforms before
> > smp bringup & no-irq environment.
> >
>
> Yes, by using static keys not alternatives. My question was: how do
> you plan to use alternatives here instead of static keys? To me, it's
> not that simple, hence my suggestions in my previous answer.
Yes, it's not that simple. The current framework doesn't support that
and has two problems:
1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
2. Current alternatives patching only for extensions, but qspinlock is
not a formal extension. Could we accept
__RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?

>
> Thanks,
>
> Alex
>
> > > an idea here could be to add an "init" value to the alternatives and
> > > let the patching process do the right thing when the extensions are
> > > known.
> > >
> > > Another solution would be the early discovery of the extensions, but I
> > > took a look and it's easy with a device tree, but not with ACPI.
> > >
> > > Let me know what you plan to do and how I can help!
> > >
> > > Thanks,
> > >
> > > Alex
> > >
> > > >
> > > > What do you think?
> > > >
> > > >
> > > > >
> > > > > Thanks again for your initial work,
> > > > >
> > > > > Alex
> > > > >
> > > > > >
> > > > > > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > > > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > > > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > > > @@ -2,10 +2,12 @@
> > > > > > >  generic-y += early_ioremap.h
> > > > > > >  generic-y += flat.h
> > > > > > >  generic-y += kvm_para.h
> > > > > > > +generic-y += mcs_spinlock.h
> > > > > > >  generic-y += parport.h
> > > > > > > -generic-y += spinlock.h
> > > > > > >  generic-y += spinlock_types.h
> > > > > > > +generic-y += ticket_spinlock.h
> > > > > > >  generic-y += qrwlock.h
> > > > > > >  generic-y += qrwlock_types.h
> > > > > > > +generic-y += qspinlock.h
> > > > > > >  generic-y += user.h
> > > > > > >  generic-y += vmlinux.lds.h
> > > > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..e00429ac20ed
> > > > > > > --- /dev/null
> > > > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > > > @@ -0,0 +1,39 @@
> > > > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > > > +
> > > > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > > > +
> > > > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > > > > > +
> > > > > > > +#define __no_arch_spinlock_redefine
> > > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > > +#include <asm/qspinlock.h>
> > > > > > > +#include <asm/alternative.h>
> > > > > > > +
> > > > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > > +
> > > > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > > > > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > > > > > +{                                                                      \
> > > > > > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > > > > > +               return queued_spin_##op(lock);                          \
> > > > > > > +       return ticket_spin_##op(lock);                                  \
> > > > > > > +}
> > > > > > > +
> > > > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > > > +
> > > > > > > +#else
> > > > > > > +
> > > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > > +
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +#include <asm/qrwlock.h>
> > > > > > > +
> > > > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > > > --- a/arch/riscv/kernel/setup.c
> > > > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > > > >  #endif
> > > > > > >  }
> > > > > > >
> > > > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > > > +
> > > > > > > +static void __init riscv_spinlock_init(void)
> > > > > > > +{
> > > > > > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > > > +                : : : : qspinlock);
> > > > > > > +
> > > > > > > +       static_branch_disable(&qspinlock_key);
> > > > > > > +       pr_info("Ticket spinlock: enabled\n");
> > > > > > > +
> > > > > > > +       return;
> > > > > > > +
> > > > > > > +qspinlock:
> > > > > > > +       pr_info("Queued spinlock: enabled\n");
> > > > > > > +}
> > > > > > > +
> > > > > > >  extern void __init init_rt_signal_env(void);
> > > > > > >
> > > > > > >  void __init setup_arch(char **cmdline_p)
> > > > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > > > >         riscv_set_dma_cache_alignment();
> > > > > > >
> > > > > > >         riscv_user_isa_enable();
> > > > > > > +       riscv_spinlock_init();
> > > > > > >  }
> > > > > > >
> > > > > > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > > > --- a/include/asm-generic/qspinlock.h
> > > > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > >  }
> > > > > > >  #endif
> > > > > > >
> > > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > >  /*
> > > > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > > > >   * queued spinlock functions.
> > > > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > > > > > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > > > > > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > > > > > +#endif
> > > > > > >
> > > > > > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > > > >  }
> > > > > > >
> > > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > >  /*
> > > > > > >   * Remapping spinlock architecture specific functions to the corresponding
> > > > > > >   * ticket spinlock functions.
> > > > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > > > > > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > > > > > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > > > > > +#endif
> > > > > > >
> > > > > > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > > > --
> > > > > > > 2.39.2
> > > > > > >
> > > > > >
> > > > > >
> > > > > > --
> > > > > > Best Regards
> > > > > >  Guo Ren
> > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > >  Guo Ren
> >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
Guo Ren May 31, 2024, 1:10 p.m. UTC | #9
On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Guo,
>
> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >
> > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > In order to produce a generic kernel, a user can select
> > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > spinlock implementation if Zabha is not present.
> > >
> > > Note that we can't use alternatives here because the discovery of
> > > extensions is done too late and we need to start with the qspinlock
> > > implementation because the ticket spinlock implementation would pollute
> > > the spinlock value, so let's use static keys.
Zabha is not a prerequisite for qspinlock; the prerequisite for
qspinlock is the *forward progress guarantee* in the atomic operation
loop during intense contention. Even with Zabha enabled to meet the
requirements of xchg_tail, that still only applies when the number of
CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
xchg_tail when the number of cores is more than 16K. Thus, hardware
support for Zabha does not equate to the safe use of qspinlock.

Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
Forward Progress Guarantee). If RISC-V vendors can ensure the progress
of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
lines are sufficiently sticky, they could then claim support for this
extension. Linux could then select different spinlock implementations
based on this extension's support or not.

> > >
> > > This is largely based on Guo's work and Leonardo reviews at [1].
> > >
> > > Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > >  .../locking/queued-spinlocks/arch-support.txt |  2 +-
> > >  arch/riscv/Kconfig                            |  1 +
> > >  arch/riscv/include/asm/Kbuild                 |  4 +-
> > >  arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> > >  arch/riscv/kernel/setup.c                     | 18 +++++++++
> > >  include/asm-generic/qspinlock.h               |  2 +
> > >  include/asm-generic/ticket_spinlock.h         |  2 +
> > >  7 files changed, 66 insertions(+), 2 deletions(-)
> > >  create mode 100644 arch/riscv/include/asm/spinlock.h
> > >
> > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > index 22f2990392ff..cf26042480e2 100644
> > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > @@ -20,7 +20,7 @@
> > >      |    openrisc: |  ok  |
> > >      |      parisc: | TODO |
> > >      |     powerpc: |  ok  |
> > > -    |       riscv: | TODO |
> > > +    |       riscv: |  ok  |
> > >      |        s390: | TODO |
> > >      |          sh: | TODO |
> > >      |       sparc: |  ok  |
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 184a9edb04e0..ccf1703edeb9 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -59,6 +59,7 @@ config RISCV
> > >         select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > >         select ARCH_USE_MEMTEST
> > >         select ARCH_USE_QUEUED_RWLOCKS
> > > +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > Using qspinlock or not depends on real hardware capabilities, not the
> > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > hardware platforms but waste some qspinlock code size.
>
> You're right, and I think your comment matches what Conor mentioned
> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> will allow a platform with Zabha capability to use qspinlocks. But if
> the hardware does not, it will fallback to the ticket spinlocks.
>
> But I agree that looking at the config alone may be misleading, even
> though it will work as expected at runtime. So I agree with you:
> unless anyone is strongly against the combo spinlocks, I will do what
> you suggest and add them.
>
> Thanks again for your initial work,
>
> Alex
>
> >
> > >         select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > >         select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > --- a/arch/riscv/include/asm/Kbuild
> > > +++ b/arch/riscv/include/asm/Kbuild
> > > @@ -2,10 +2,12 @@
> > >  generic-y += early_ioremap.h
> > >  generic-y += flat.h
> > >  generic-y += kvm_para.h
> > > +generic-y += mcs_spinlock.h
> > >  generic-y += parport.h
> > > -generic-y += spinlock.h
> > >  generic-y += spinlock_types.h
> > > +generic-y += ticket_spinlock.h
> > >  generic-y += qrwlock.h
> > >  generic-y += qrwlock_types.h
> > > +generic-y += qspinlock.h
> > >  generic-y += user.h
> > >  generic-y += vmlinux.lds.h
> > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > new file mode 100644
> > > index 000000000000..e00429ac20ed
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -0,0 +1,39 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > +#define __ASM_RISCV_SPINLOCK_H
> > > +
> > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > +#define _Q_PENDING_LOOPS       (1 << 9)
> > > +
> > > +#define __no_arch_spinlock_redefine
> > > +#include <asm/ticket_spinlock.h>
> > > +#include <asm/qspinlock.h>
> > > +#include <asm/alternative.h>
> > > +
> > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > +
> > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> > > +static __always_inline type arch_spin_##op(type_lock lock)             \
> > > +{                                                                      \
> > > +       if (static_branch_unlikely(&qspinlock_key))                     \
> > > +               return queued_spin_##op(lock);                          \
> > > +       return ticket_spin_##op(lock);                                  \
> > > +}
> > > +
> > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > +
> > > +#else
> > > +
> > > +#include <asm/ticket_spinlock.h>
> > > +
> > > +#endif
> > > +
> > > +#include <asm/qrwlock.h>
> > > +
> > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > >  #endif
> > >  }
> > >
> > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > +EXPORT_SYMBOL(qspinlock_key);
> > > +
> > > +static void __init riscv_spinlock_init(void)
> > > +{
> > > +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > +                : : : : qspinlock);
> > > +
> > > +       static_branch_disable(&qspinlock_key);
> > > +       pr_info("Ticket spinlock: enabled\n");
> > > +
> > > +       return;
> > > +
> > > +qspinlock:
> > > +       pr_info("Queued spinlock: enabled\n");
> > > +}
> > > +
> > >  extern void __init init_rt_signal_env(void);
> > >
> > >  void __init setup_arch(char **cmdline_p)
> > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > >         riscv_set_dma_cache_alignment();
> > >
> > >         riscv_user_isa_enable();
> > > +       riscv_spinlock_init();
> > >  }
> > >
> > >  bool arch_cpu_is_hotpluggable(int cpu)
> > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > index 0655aa5b57b2..bf47cca2c375 100644
> > > --- a/include/asm-generic/qspinlock.h
> > > +++ b/include/asm-generic/qspinlock.h
> > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >  }
> > >  #endif
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > >  /*
> > >   * Remapping spinlock architecture specific functions to the corresponding
> > >   * queued spinlock functions.
> > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > >  #define arch_spin_lock(l)              queued_spin_lock(l)
> > >  #define arch_spin_trylock(l)           queued_spin_trylock(l)
> > >  #define arch_spin_unlock(l)            queued_spin_unlock(l)
> > > +#endif
> > >
> > >  #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > index cfcff22b37b3..325779970d8a 100644
> > > --- a/include/asm-generic/ticket_spinlock.h
> > > +++ b/include/asm-generic/ticket_spinlock.h
> > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >         return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > >  }
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > >  /*
> > >   * Remapping spinlock architecture specific functions to the corresponding
> > >   * ticket spinlock functions.
> > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > >  #define arch_spin_lock(l)              ticket_spin_lock(l)
> > >  #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> > >  #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> > > +#endif
> > >
> > >  #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > --
> > > 2.39.2
> > >
> >
> >
> > --
> > Best Regards
> >  Guo Ren
Alexandre Ghiti May 31, 2024, 1:37 p.m. UTC | #10
Hi Andrea,

On 29/05/2024 02:55, Andrea Parri wrote:
>> +	select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> perhaps a similar select for the latter?  (not a kconfig expert)


Where did you see this dependency? And if that is really a dependency of 
qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a 
Kconfig expert too).


>
>    Andrea
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Andrea Parri May 31, 2024, 3:52 p.m. UTC | #11
> > > +	select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> > perhaps a similar select for the latter?  (not a kconfig expert)
> 
> 
> Where did you see this dependency? And if that is really a dependency of
> qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a Kconfig
> expert too).

The comment on smp_mb__after_unlock_lock() in include/linux/rcupdate.h
(the barrier is currently only used by the RCU subsystem) recalls:

  /*
   * Place this after a lock-acquisition primitive to guarantee that
   * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
   * if the UNLOCK and LOCK are executed by the same CPU or if the
   * UNLOCK and LOCK operate on the same lock variable.
   */
  #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
  #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
  #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
  #define smp_mb__after_unlock_lock()	do { } while (0)
  #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */

Architectures whose UNLOCK+LOCK implementation does not (already) meet
the required "full barrier" ordering property (currently, only powerpc)
can overwrite the "default"/common #define for this barrier (NOP) and
meet the ordering by opting in for ARCH_WEAK_RELEASE_ACQUIRE.

The (current) "generic" ticket lock implementation provides "the full
barrier" in its LOCK operations (hence in part. in UNLOCK+LOCK), cf.

  arch_spin_trylock() -> atomic_try_cmpxchg()
  arch_spin_lock() -> atomic_fetch_add()
                   -> atomic_cond_read_acquire(); smp_mb()

but the "UNLOCK+LOCK pairs act as a full barrier" property doesn't hold
true for riscv (and powerpc) when switching over to queued spinlocks.
OTOH, I see no particular reason for other "users" of queued spinlocks
(notably, x86 and arm64) for selecting ARCH_WEAK_RELEASE_ACQUIRE.

But does this address your concern?  Let me know if I misunderstood it.

  Andrea
Guo Ren June 1, 2024, 6:18 a.m. UTC | #12
On Fri, May 31, 2024 at 11:52 PM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> > > perhaps a similar select for the latter?  (not a kconfig expert)
> >
> >
> > Where did you see this dependency? And if that is really a dependency of
> > qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a Kconfig
> > expert too).
>
> The comment on smp_mb__after_unlock_lock() in include/linux/rcupdate.h
> (the barrier is currently only used by the RCU subsystem) recalls:
>
>   /*
>    * Place this after a lock-acquisition primitive to guarantee that
>    * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
>    * if the UNLOCK and LOCK are executed by the same CPU or if the
>    * UNLOCK and LOCK operate on the same lock variable.
>    */
>   #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
>   #define smp_mb__after_unlock_lock()   smp_mb()  /* Full ordering for lock. */
>   #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
>   #define smp_mb__after_unlock_lock()   do { } while (0)
>   #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
>
> Architectures whose UNLOCK+LOCK implementation does not (already) meet
> the required "full barrier" ordering property (currently, only powerpc)
> can overwrite the "default"/common #define for this barrier (NOP) and
> meet the ordering by opting in for ARCH_WEAK_RELEASE_ACQUIRE.
>
> The (current) "generic" ticket lock implementation provides "the full
> barrier" in its LOCK operations (hence in part. in UNLOCK+LOCK), cf.
>
>   arch_spin_trylock() -> atomic_try_cmpxchg()
>   arch_spin_lock() -> atomic_fetch_add()
>                    -> atomic_cond_read_acquire(); smp_mb()
>
> but the "UNLOCK+LOCK pairs act as a full barrier" property doesn't hold
> true for riscv (and powerpc) when switching over to queued spinlock.
Yes.

> OTOH, I see no particular reason for other "users" of queued spinlocks
> (notably, x86 and arm64) for selecting ARCH_WEAK_RELEASE_ACQUIRE.
I looked at the riscv-unprivileged ppo section, seems RISC-V .rl ->
.aq has RCsc annotations.
ref:
Explicit Synchronization
 5. has an acquire annotation
 6. has a release annotation
 7. a and b both have RCsc annotations

And for qspinlock:
unlock:
        smp_store_release(&lock->locked, 0);

lock:
        if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))

If the hardware has Store-Release and CAS instructions, they all obey
Explicit Synchronization rules. Then RISC-V "UNLOCK+LOCK" pairs act as
a full barrier, right?

>
> But does this address your concern?  Let me know if I misunderstood it.
>
>   Andrea
Andrea Parri June 3, 2024, 12:41 a.m. UTC | #13
> I looked at the riscv-unprivileged ppo section, seems RISC-V .rl ->
> .aq has RCsc annotations.
> ref:
> Explicit Synchronization
>  5. has an acquire annotation
>  6. has a release annotation
>  7. a and b both have RCsc annotations
> 
> And for qspinlock:
> unlock:
>         smp_store_release(&lock->locked, 0);
> 
> lock:
>         if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
> 
> If the hardware has Store-Release and CAS instructions, they all obey
> Explicit Synchronization rules. Then RISC-V "UNLOCK+LOCK" pairs act as
> a full barrier, right?

Presuming you were thinking at CAS.aq (based on your previous remarks
above), that all seems right to me.  In fact, the (putative) Store.rl
and an LR.aq would also do it (by the same/mentioned rules).

  Andrea
Alexandre Ghiti June 3, 2024, 9:21 a.m. UTC | #14
Hi Guo,

On 31/05/2024 08:42, Guo Ren wrote:
> On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> On Fri, May 31, 2024 at 3:57 AM Guo Ren <guoren@kernel.org> wrote:
>>> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>> Hi Guo,
>>>>
>>>> On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
>>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>>>> Hi Guo,
>>>>>>
>>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
>>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>>>>>> In order to produce a generic kernel, a user can select
>>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>>>> spinlock implementation if Zabha is not present.
>>>>>>>>
>>>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>>>> the spinlock value, so let's use static keys.
>>>>>>>>
>>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>>>
>>>>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>>>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>>>>>>> ---
>>>>>>>>   .../locking/queued-spinlocks/arch-support.txt |  2 +-
>>>>>>>>   arch/riscv/Kconfig                            |  1 +
>>>>>>>>   arch/riscv/include/asm/Kbuild                 |  4 +-
>>>>>>>>   arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
>>>>>>>>   arch/riscv/kernel/setup.c                     | 18 +++++++++
>>>>>>>>   include/asm-generic/qspinlock.h               |  2 +
>>>>>>>>   include/asm-generic/ticket_spinlock.h         |  2 +
>>>>>>>>   7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>>>>   create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>>>
>>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> @@ -20,7 +20,7 @@
>>>>>>>>       |    openrisc: |  ok  |
>>>>>>>>       |      parisc: | TODO |
>>>>>>>>       |     powerpc: |  ok  |
>>>>>>>> -    |       riscv: | TODO |
>>>>>>>> +    |       riscv: |  ok  |
>>>>>>>>       |        s390: | TODO |
>>>>>>>>       |          sh: | TODO |
>>>>>>>>       |       sparc: |  ok  |
>>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>>>> --- a/arch/riscv/Kconfig
>>>>>>>> +++ b/arch/riscv/Kconfig
>>>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>>>>          select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>>>>          select ARCH_USE_MEMTEST
>>>>>>>>          select ARCH_USE_QUEUED_RWLOCKS
>>>>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>>>> hardware platforms but waste some qspinlock code size.
>>>>>> You're right, and I think your comment matches what Conor mentioned
>>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>>>
>>>>>> But I agree that looking at the config alone may be misleading, even
>>>>>> though it will work as expected at runtime. So I agree with you:
>>>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>>>> you suggest and add them.
>>>>> The problem with the v12 combo-spinlock is using a static_branch
>>>>> instead of the full ALTERNATIVE. Frankly, that's a bad example that
>>>>> costs more code space. I found that your cmpxchg32/64 also uses a
>>>>> condition branch, which has a similar problem, right?
>>>>>
>>>>> Anyway, your patch series inspired me to update the v13
>>>>> combo-spinlock. My plan is:
>>>>> 1. Separate native-qspinlock out of paravirt-qspinlock.
>>>>> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
>>>>> ticket-lock or qspinlock.
>>>> What's your plan to make use of alternatives here? The alternatives
>>>> patching depends on the discovery of the extensions, which is done too
>>>> late, at least after the first use of a spinlock (the printk
>>>> spinlock). So you'd need to find a way to first use qspinlocks (but
>>>> without knowing Zabha is available) and then do the correct patching:
>>> I do that in v12:
>>> 1. Use qspinlock as init.
>>> 2. Change to ticket-lock or not.
>>> (Only qspinlock -> ticket-lock, No reverse direction)
>>>
>>> If there is no contention, Qspinlock is okay for all platforms before
>>> smp bringup & no-irq environment.
>>>
>> Yes, by using static keys not alternatives. My question was: how do
>> you plan to use alternatives here instead of static keys? To me, it's
>> not that simple, hence my suggestions in my previous answer.
> Yes, it's not that simple. The current framework doesn't support that
> and has two problems:
> 1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
> 2. Current alternatives patching only for extensions, but qspinlock is
> not a formal extension. Could we accept
> __RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?


But the problem is that the alternatives needs to patch the code very 
early in the boot process which is not possible since we don't have the 
list of extensions yet (for ACPI systems), so your 
RISCV_ISA_EXT_XQSPINLOCK proposal would not help.

Thanks,

Alex


>
>> Thanks,
>>
>> Alex
>>
>>>> an idea here could be to add an "init" value to the alternatives and
>>>> let the patching process do the right thing when the extensions are
>>>> known.
>>>>
>>>> Another solution would be the early discovery of the extensions, but I
>>>> took a look and it's easy with a device tree, but not with ACPI.
>>>>
>>>> Let me know what you plan to do and how I can help!
>>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>>
>>>>> What do you think?
>>>>>
>>>>>
>>>>>> Thanks again for your initial work,
>>>>>>
>>>>>> Alex
>>>>>>
>>>>>>>>          select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>>>>          select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>>>>          select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>>>> @@ -2,10 +2,12 @@
>>>>>>>>   generic-y += early_ioremap.h
>>>>>>>>   generic-y += flat.h
>>>>>>>>   generic-y += kvm_para.h
>>>>>>>> +generic-y += mcs_spinlock.h
>>>>>>>>   generic-y += parport.h
>>>>>>>> -generic-y += spinlock.h
>>>>>>>>   generic-y += spinlock_types.h
>>>>>>>> +generic-y += ticket_spinlock.h
>>>>>>>>   generic-y += qrwlock.h
>>>>>>>>   generic-y += qrwlock_types.h
>>>>>>>> +generic-y += qspinlock.h
>>>>>>>>   generic-y += user.h
>>>>>>>>   generic-y += vmlinux.lds.h
>>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..e00429ac20ed
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>>>> @@ -0,0 +1,39 @@
>>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>>>> +
>>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
>>>>>>>> +
>>>>>>>> +#define __no_arch_spinlock_redefine
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +#include <asm/qspinlock.h>
>>>>>>>> +#include <asm/alternative.h>
>>>>>>>> +
>>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +
>>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
>>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
>>>>>>>> +{                                                                      \
>>>>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
>>>>>>>> +               return queued_spin_##op(lock);                          \
>>>>>>>> +       return ticket_spin_##op(lock);                                  \
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>>>> +
>>>>>>>> +#else
>>>>>>>> +
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +#include <asm/qrwlock.h>
>>>>>>>> +
>>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>>>>   #endif
>>>>>>>>   }
>>>>>>>>
>>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>>>> +
>>>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>>>> +{
>>>>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>>>> +                : : : : qspinlock);
>>>>>>>> +
>>>>>>>> +       static_branch_disable(&qspinlock_key);
>>>>>>>> +       pr_info("Ticket spinlock: enabled\n");
>>>>>>>> +
>>>>>>>> +       return;
>>>>>>>> +
>>>>>>>> +qspinlock:
>>>>>>>> +       pr_info("Queued spinlock: enabled\n");
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>   extern void __init init_rt_signal_env(void);
>>>>>>>>
>>>>>>>>   void __init setup_arch(char **cmdline_p)
>>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>>>>          riscv_set_dma_cache_alignment();
>>>>>>>>
>>>>>>>>          riscv_user_isa_enable();
>>>>>>>> +       riscv_spinlock_init();
>>>>>>>>   }
>>>>>>>>
>>>>>>>>   bool arch_cpu_is_hotpluggable(int cpu)
>>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>>   }
>>>>>>>>   #endif
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>>   /*
>>>>>>>>    * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>>    * queued spinlock functions.
>>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>>   #define arch_spin_lock(l)              queued_spin_lock(l)
>>>>>>>>   #define arch_spin_trylock(l)           queued_spin_trylock(l)
>>>>>>>>   #define arch_spin_unlock(l)            queued_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>   #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>>          return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>>>>   }
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>>   /*
>>>>>>>>    * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>>    * ticket spinlock functions.
>>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>>   #define arch_spin_lock(l)              ticket_spin_lock(l)
>>>>>>>>   #define arch_spin_trylock(l)           ticket_spin_trylock(l)
>>>>>>>>   #define arch_spin_unlock(l)            ticket_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>   #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>>>> --
>>>>>>>> 2.39.2
>>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Best Regards
>>>>>>>   Guo Ren
>>>>>
>>>>>
>>>>> --
>>>>> Best Regards
>>>>>   Guo Ren
>>>
>>>
>>> --
>>> Best Regards
>>>   Guo Ren
>
>
Alexandre Ghiti June 3, 2024, 9:49 a.m. UTC | #15
Hi Guo,

On 31/05/2024 15:10, Guo Ren wrote:
> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> Hi Guo,
>>
>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>> In order to produce a generic kernel, a user can select
>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>> spinlock implementation if Zabha is not present.
>>>>
>>>> Note that we can't use alternatives here because the discovery of
>>>> extensions is done too late and we need to start with the qspinlock
>>>> implementation because the ticket spinlock implementation would pollute
>>>> the spinlock value, so let's use static keys.
> Zabha is not a prerequisite for qspinlock; the prerequisite for
> qspinlock is the *forward progress guarantee* in the atomic operation
> loop during intense contention. Even with Zabha enabled to meet the
> requirements of xchg_tail, that still only applies when the number of
> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> xchg_tail when the number of cores is more than 16K. Thus, hardware
> support for Zabha does not equate to the safe use of qspinlock.


But if we have Zacas to implement cmpxchg(), we still provide the 
"forward progress guarantee" then right? Let me know if I missed something.

Thanks,

Alex


>
> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> lines are sufficiently sticky, they could then claim support for this
> extension. Linux could then select different spinlock implementations
> based on this extension's support or not.
>
>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>
>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>>> ---
>>>>   .../locking/queued-spinlocks/arch-support.txt |  2 +-
>>>>   arch/riscv/Kconfig                            |  1 +
>>>>   arch/riscv/include/asm/Kbuild                 |  4 +-
>>>>   arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
>>>>   arch/riscv/kernel/setup.c                     | 18 +++++++++
>>>>   include/asm-generic/qspinlock.h               |  2 +
>>>>   include/asm-generic/ticket_spinlock.h         |  2 +
>>>>   7 files changed, 66 insertions(+), 2 deletions(-)
>>>>   create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>
>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> index 22f2990392ff..cf26042480e2 100644
>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> @@ -20,7 +20,7 @@
>>>>       |    openrisc: |  ok  |
>>>>       |      parisc: | TODO |
>>>>       |     powerpc: |  ok  |
>>>> -    |       riscv: | TODO |
>>>> +    |       riscv: |  ok  |
>>>>       |        s390: | TODO |
>>>>       |          sh: | TODO |
>>>>       |       sparc: |  ok  |
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>          select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>          select ARCH_USE_MEMTEST
>>>>          select ARCH_USE_QUEUED_RWLOCKS
>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>> Using qspinlock or not depends on real hardware capabilities, not the
>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>> hardware platforms but waste some qspinlock code size.
>> You're right, and I think your comment matches what Conor mentioned
>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>> will allow a platform with Zabha capability to use qspinlocks. But if
>> the hardware does not, it will fallback to the ticket spinlocks.
>>
>> But I agree that looking at the config alone may be misleading, even
>> though it will work as expected at runtime. So I agree with you:
>> unless anyone is strongly against the combo spinlocks, I will do what
>> you suggest and add them.
>>
>> Thanks again for your initial work,
>>
>> Alex
>>
>>>>          select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>          select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>          select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>> --- a/arch/riscv/include/asm/Kbuild
>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>> @@ -2,10 +2,12 @@
>>>>   generic-y += early_ioremap.h
>>>>   generic-y += flat.h
>>>>   generic-y += kvm_para.h
>>>> +generic-y += mcs_spinlock.h
>>>>   generic-y += parport.h
>>>> -generic-y += spinlock.h
>>>>   generic-y += spinlock_types.h
>>>> +generic-y += ticket_spinlock.h
>>>>   generic-y += qrwlock.h
>>>>   generic-y += qrwlock_types.h
>>>> +generic-y += qspinlock.h
>>>>   generic-y += user.h
>>>>   generic-y += vmlinux.lds.h
>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>> new file mode 100644
>>>> index 000000000000..e00429ac20ed
>>>> --- /dev/null
>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>> @@ -0,0 +1,39 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +
>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>> +
>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
>>>> +
>>>> +#define __no_arch_spinlock_redefine
>>>> +#include <asm/ticket_spinlock.h>
>>>> +#include <asm/qspinlock.h>
>>>> +#include <asm/alternative.h>
>>>> +
>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>> +
>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
>>>> +{                                                                      \
>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
>>>> +               return queued_spin_##op(lock);                          \
>>>> +       return ticket_spin_##op(lock);                                  \
>>>> +}
>>>> +
>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>> +
>>>> +#else
>>>> +
>>>> +#include <asm/ticket_spinlock.h>
>>>> +
>>>> +#endif
>>>> +
>>>> +#include <asm/qrwlock.h>
>>>> +
>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>> --- a/arch/riscv/kernel/setup.c
>>>> +++ b/arch/riscv/kernel/setup.c
>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>   #endif
>>>>   }
>>>>
>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>> +
>>>> +static void __init riscv_spinlock_init(void)
>>>> +{
>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>> +                : : : : qspinlock);
>>>> +
>>>> +       static_branch_disable(&qspinlock_key);
>>>> +       pr_info("Ticket spinlock: enabled\n");
>>>> +
>>>> +       return;
>>>> +
>>>> +qspinlock:
>>>> +       pr_info("Queued spinlock: enabled\n");
>>>> +}
>>>> +
>>>>   extern void __init init_rt_signal_env(void);
>>>>
>>>>   void __init setup_arch(char **cmdline_p)
>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>          riscv_set_dma_cache_alignment();
>>>>
>>>>          riscv_user_isa_enable();
>>>> +       riscv_spinlock_init();
>>>>   }
>>>>
>>>>   bool arch_cpu_is_hotpluggable(int cpu)
>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>> --- a/include/asm-generic/qspinlock.h
>>>> +++ b/include/asm-generic/qspinlock.h
>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>   }
>>>>   #endif
>>>>
>>>> +#ifndef __no_arch_spinlock_redefine
>>>>   /*
>>>>    * Remapping spinlock architecture specific functions to the corresponding
>>>>    * queued spinlock functions.
>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>   #define arch_spin_lock(l)              queued_spin_lock(l)
>>>>   #define arch_spin_trylock(l)           queued_spin_trylock(l)
>>>>   #define arch_spin_unlock(l)            queued_spin_unlock(l)
>>>> +#endif
>>>>
>>>>   #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>> index cfcff22b37b3..325779970d8a 100644
>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>          return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>   }
>>>>
>>>> +#ifndef __no_arch_spinlock_redefine
>>>>   /*
>>>>    * Remapping spinlock architecture specific functions to the corresponding
>>>>    * ticket spinlock functions.
>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>   #define arch_spin_lock(l)              ticket_spin_lock(l)
>>>>   #define arch_spin_trylock(l)           ticket_spin_trylock(l)
>>>>   #define arch_spin_unlock(l)            ticket_spin_unlock(l)
>>>> +#endif
>>>>
>>>>   #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>> --
>>>> 2.39.2
>>>>
>>>
>>> --
>>> Best Regards
>>>   Guo Ren
>
>
Guo Ren June 3, 2024, 11:11 a.m. UTC | #16
On Mon, Jun 3, 2024 at 5:22 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Guo,
>
> On 31/05/2024 08:42, Guo Ren wrote:
> > On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >> On Fri, May 31, 2024 at 3:57 AM Guo Ren <guoren@kernel.org> wrote:
> >>> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On Thu, May 30, 2024 at 3:55 AM Guo Ren <guoren@kernel.org> wrote:
> >>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>>>> Hi Guo,
> >>>>>>
> >>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>>>>>> In order to produce a generic kernel, a user can select
> >>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>>>> spinlock implementation if Zabha is not present.
> >>>>>>>>
> >>>>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>>>> the spinlock value, so let's use static keys.
> >>>>>>>>
> >>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>>>
> >>>>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> >>>>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> >>>>>>>> ---
> >>>>>>>>   .../locking/queued-spinlocks/arch-support.txt |  2 +-
> >>>>>>>>   arch/riscv/Kconfig                            |  1 +
> >>>>>>>>   arch/riscv/include/asm/Kbuild                 |  4 +-
> >>>>>>>>   arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> >>>>>>>>   arch/riscv/kernel/setup.c                     | 18 +++++++++
> >>>>>>>>   include/asm-generic/qspinlock.h               |  2 +
> >>>>>>>>   include/asm-generic/ticket_spinlock.h         |  2 +
> >>>>>>>>   7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>>>>   create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>>>
> >>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> @@ -20,7 +20,7 @@
> >>>>>>>>       |    openrisc: |  ok  |
> >>>>>>>>       |      parisc: | TODO |
> >>>>>>>>       |     powerpc: |  ok  |
> >>>>>>>> -    |       riscv: | TODO |
> >>>>>>>> +    |       riscv: |  ok  |
> >>>>>>>>       |        s390: | TODO |
> >>>>>>>>       |          sh: | TODO |
> >>>>>>>>       |       sparc: |  ok  |
> >>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>>>> --- a/arch/riscv/Kconfig
> >>>>>>>> +++ b/arch/riscv/Kconfig
> >>>>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>>>>          select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>>>>          select ARCH_USE_MEMTEST
> >>>>>>>>          select ARCH_USE_QUEUED_RWLOCKS
> >>>>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>>>> hardware platforms but waste some qspinlock code size.
> >>>>>> You're right, and I think your comment matches what Conor mentioned
> >>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>>>
> >>>>>> But I agree that looking at the config alone may be misleading, even
> >>>>>> though it will work as expected at runtime. So I agree with you:
> >>>>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>>>> you suggest and add them.
> >>>>> The problem with the v12 combo-spinlock is using a static_branch
> >>>>> instead of the full ALTERNATIVE. Frankly, that's a bad example that
> >>>>> costs more code space. I found that your cmpxchg32/64 also uses a
> >>>>> condition branch, which has a similar problem, right?
> >>>>>
> >>>>> Anyway, your patch series inspired me to update the v13
> >>>>> combo-spinlock. My plan is:
> >>>>> 1. Separate native-qspinlock out of paravirt-qspinlock.
> >>>>> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> >>>>> ticket-lock or qspinlock.
> >>>> What's your plan to make use of alternatives here? The alternatives
> >>>> patching depends on the discovery of the extensions, which is done too
> >>>> late, at least after the first use of a spinlock (the printk
> >>>> spinlock). So you'd need to find a way to first use qspinlocks (but
> >>>> without knowing Zabha is available) and then do the correct patching:
> >>> I do that in v12:
> >>> 1. Use qspinlock as init.
> >>> 2. Change to ticket-lock or not.
> >>> (Only qspinlock -> ticket-lock, No reverse direction)
> >>>
> >>> If there is no contention, Qspinlock is okay for all platforms before
> >>> smp bringup & no-irq environment.
> >>>
> >> Yes, by using static keys not alternatives. My question was: how do
> >> you plan to use alternatives here instead of static keys? To me, it's
> >> not that simple, hence my suggestions in my previous answer.
> > Yes, it's not that simple. The current framework doesn't support that
> > and has two problems:
> > 1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
> > 2. Current alternatives patching only for extensions, but qspinlock is
> > not a formal extension. Could we accept
> > __RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?
>
>
> But the problem is that the alternatives needs to patch the code very
> early in the boot process which is not possible since we don't have the
> list of extensions yet (for ACPI systems), so your
> RISCV_ISA_EXT_XQSPINLOCK proposal would not help.
I think the setup_arch()->apply_boot_alternatives() is okay. I can do
that in v13.

>
> Thanks,
>
> Alex
>
>
> >
> >> Thanks,
> >>
> >> Alex
> >>
> >>>> an idea here could be to add an "init" value to the alternatives and
> >>>> let the patching process do the right thing when the extensions are
> >>>> known.
> >>>>
> >>>> Another solution would be the early discovery of the extensions, but I
> >>>> took a look and it's easy with a device tree, but not with ACPI.
> >>>>
> >>>> Let me know what you plan to do and how I can help!
> >>>>
> >>>> Thanks,
> >>>>
> >>>> Alex
> >>>>
> >>>>> What do you think?
> >>>>>
> >>>>>
> >>>>>> Thanks again for your initial work,
> >>>>>>
> >>>>>> Alex
> >>>>>>
> >>>>>>>>          select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>>>>          select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>>>>          select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>>>> @@ -2,10 +2,12 @@
> >>>>>>>>   generic-y += early_ioremap.h
> >>>>>>>>   generic-y += flat.h
> >>>>>>>>   generic-y += kvm_para.h
> >>>>>>>> +generic-y += mcs_spinlock.h
> >>>>>>>>   generic-y += parport.h
> >>>>>>>> -generic-y += spinlock.h
> >>>>>>>>   generic-y += spinlock_types.h
> >>>>>>>> +generic-y += ticket_spinlock.h
> >>>>>>>>   generic-y += qrwlock.h
> >>>>>>>>   generic-y += qrwlock_types.h
> >>>>>>>> +generic-y += qspinlock.h
> >>>>>>>>   generic-y += user.h
> >>>>>>>>   generic-y += vmlinux.lds.h
> >>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..e00429ac20ed
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> @@ -0,0 +1,39 @@
> >>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>>>> +
> >>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
> >>>>>>>> +
> >>>>>>>> +#define __no_arch_spinlock_redefine
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +#include <asm/qspinlock.h>
> >>>>>>>> +#include <asm/alternative.h>
> >>>>>>>> +
> >>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> >>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
> >>>>>>>> +{                                                                      \
> >>>>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
> >>>>>>>> +               return queued_spin_##op(lock);                          \
> >>>>>>>> +       return ticket_spin_##op(lock);                                  \
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>>>> +
> >>>>>>>> +#else
> >>>>>>>> +
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +#include <asm/qrwlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>>>>   #endif
> >>>>>>>>   }
> >>>>>>>>
> >>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>>>> +{
> >>>>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>>>> +                : : : : qspinlock);
> >>>>>>>> +
> >>>>>>>> +       static_branch_disable(&qspinlock_key);
> >>>>>>>> +       pr_info("Ticket spinlock: enabled\n");
> >>>>>>>> +
> >>>>>>>> +       return;
> >>>>>>>> +
> >>>>>>>> +qspinlock:
> >>>>>>>> +       pr_info("Queued spinlock: enabled\n");
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>   extern void __init init_rt_signal_env(void);
> >>>>>>>>
> >>>>>>>>   void __init setup_arch(char **cmdline_p)
> >>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>>>>          riscv_set_dma_cache_alignment();
> >>>>>>>>
> >>>>>>>>          riscv_user_isa_enable();
> >>>>>>>> +       riscv_spinlock_init();
> >>>>>>>>   }
> >>>>>>>>
> >>>>>>>>   bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>>   }
> >>>>>>>>   #endif
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>>   /*
> >>>>>>>>    * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>>    * queued spinlock functions.
> >>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>>   #define arch_spin_lock(l)              queued_spin_lock(l)
> >>>>>>>>   #define arch_spin_trylock(l)           queued_spin_trylock(l)
> >>>>>>>>   #define arch_spin_unlock(l)            queued_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>>   #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>>          return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>>>>   }
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>>   /*
> >>>>>>>>    * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>>    * ticket spinlock functions.
> >>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>>   #define arch_spin_lock(l)              ticket_spin_lock(l)
> >>>>>>>>   #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> >>>>>>>>   #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>>   #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>>>> --
> >>>>>>>> 2.39.2
> >>>>>>>>
> >>>>>>>
> >>>>>>> --
> >>>>>>> Best Regards
> >>>>>>>   Guo Ren
> >>>>>
> >>>>>
> >>>>> --
> >>>>> Best Regards
> >>>>>   Guo Ren
> >>>
> >>>
> >>> --
> >>> Best Regards
> >>>   Guo Ren
> >
> >
Guo Ren June 3, 2024, 11:28 a.m. UTC | #17
On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Guo,
>
> On 31/05/2024 15:10, Guo Ren wrote:
> > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >> Hi Guo,
> >>
> >> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>> In order to produce a generic kernel, a user can select
> >>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>> spinlock implementation if Zabha is not present.
> >>>>
> >>>> Note that we can't use alternatives here because the discovery of
> >>>> extensions is done too late and we need to start with the qspinlock
> >>>> implementation because the ticket spinlock implementation would pollute
> >>>> the spinlock value, so let's use static keys.
> > Zabha is not a prerequisite for qspinlock; the prerequisite for
> > qspinlock is the *forward progress guarantee* in the atomic operation
> > loop during intense contention. Even with Zabha enabled to meet the
> > requirements of xchg_tail, that still only applies when the number of
> > CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> > xchg_tail when the number of cores is more than 16K. Thus, hardware
> > support for Zabha does not equate to the safe use of qspinlock.
>
>
> But if we have Zacas to implement cmpxchg(), we still provide the
> "forward progress guarantee" then right? Let me know if I missed something.
The qspinlock needs a "forward progress guarantee," not Zacas, and
Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
fwd guarantee that depends on the micro-arch design of the riscv core.
I think the help of AMO instead of LR/SC is it could off-load AMO
operations from LSU to CIU(Next Level Cache or Interconnect), which
gains better performance. "LR/SC pairs" only provide Near-Atomic, but
AMO gives Far-Atomic additionally.


>
> Thanks,
>
> Alex
>
>
> >
> > Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> > Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> > of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> > lines are sufficiently sticky, they could then claim support for this
> > extension. Linux could then select different spinlock implementations
> > based on this extension's support or not.
> >
> >>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>
> >>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> >>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> >>>> ---
> >>>>   .../locking/queued-spinlocks/arch-support.txt |  2 +-
> >>>>   arch/riscv/Kconfig                            |  1 +
> >>>>   arch/riscv/include/asm/Kbuild                 |  4 +-
> >>>>   arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> >>>>   arch/riscv/kernel/setup.c                     | 18 +++++++++
> >>>>   include/asm-generic/qspinlock.h               |  2 +
> >>>>   include/asm-generic/ticket_spinlock.h         |  2 +
> >>>>   7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>   create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>
> >>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> index 22f2990392ff..cf26042480e2 100644
> >>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> @@ -20,7 +20,7 @@
> >>>>       |    openrisc: |  ok  |
> >>>>       |      parisc: | TODO |
> >>>>       |     powerpc: |  ok  |
> >>>> -    |       riscv: | TODO |
> >>>> +    |       riscv: |  ok  |
> >>>>       |        s390: | TODO |
> >>>>       |          sh: | TODO |
> >>>>       |       sparc: |  ok  |
> >>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>> --- a/arch/riscv/Kconfig
> >>>> +++ b/arch/riscv/Kconfig
> >>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>          select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>          select ARCH_USE_MEMTEST
> >>>>          select ARCH_USE_QUEUED_RWLOCKS
> >>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>> Using qspinlock or not depends on real hardware capabilities, not the
> >>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>> hardware platforms but waste some qspinlock code size.
> >> You're right, and I think your comment matches what Conor mentioned
> >> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >> will allow a platform with Zabha capability to use qspinlocks. But if
> >> the hardware does not, it will fallback to the ticket spinlocks.
> >>
> >> But I agree that looking at the config alone may be misleading, even
> >> though it will work as expected at runtime. So I agree with you:
> >> unless anyone is strongly against the combo spinlocks, I will do what
> >> you suggest and add them.
> >>
> >> Thanks again for your initial work,
> >>
> >> Alex
> >>
> >>>>          select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>          select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>          select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>> --- a/arch/riscv/include/asm/Kbuild
> >>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>> @@ -2,10 +2,12 @@
> >>>>   generic-y += early_ioremap.h
> >>>>   generic-y += flat.h
> >>>>   generic-y += kvm_para.h
> >>>> +generic-y += mcs_spinlock.h
> >>>>   generic-y += parport.h
> >>>> -generic-y += spinlock.h
> >>>>   generic-y += spinlock_types.h
> >>>> +generic-y += ticket_spinlock.h
> >>>>   generic-y += qrwlock.h
> >>>>   generic-y += qrwlock_types.h
> >>>> +generic-y += qspinlock.h
> >>>>   generic-y += user.h
> >>>>   generic-y += vmlinux.lds.h
> >>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>> new file mode 100644
> >>>> index 000000000000..e00429ac20ed
> >>>> --- /dev/null
> >>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>> @@ -0,0 +1,39 @@
> >>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>> +
> >>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>> +
> >>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>> +#define _Q_PENDING_LOOPS       (1 << 9)
> >>>> +
> >>>> +#define __no_arch_spinlock_redefine
> >>>> +#include <asm/ticket_spinlock.h>
> >>>> +#include <asm/qspinlock.h>
> >>>> +#include <asm/alternative.h>
> >>>> +
> >>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>> +
> >>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> >>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
> >>>> +{                                                                      \
> >>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
> >>>> +               return queued_spin_##op(lock);                          \
> >>>> +       return ticket_spin_##op(lock);                                  \
> >>>> +}
> >>>> +
> >>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>> +
> >>>> +#else
> >>>> +
> >>>> +#include <asm/ticket_spinlock.h>
> >>>> +
> >>>> +#endif
> >>>> +
> >>>> +#include <asm/qrwlock.h>
> >>>> +
> >>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>> --- a/arch/riscv/kernel/setup.c
> >>>> +++ b/arch/riscv/kernel/setup.c
> >>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>   #endif
> >>>>   }
> >>>>
> >>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>> +
> >>>> +static void __init riscv_spinlock_init(void)
> >>>> +{
> >>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>> +                : : : : qspinlock);
> >>>> +
> >>>> +       static_branch_disable(&qspinlock_key);
> >>>> +       pr_info("Ticket spinlock: enabled\n");
> >>>> +
> >>>> +       return;
> >>>> +
> >>>> +qspinlock:
> >>>> +       pr_info("Queued spinlock: enabled\n");
> >>>> +}
> >>>> +
> >>>>   extern void __init init_rt_signal_env(void);
> >>>>
> >>>>   void __init setup_arch(char **cmdline_p)
> >>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>          riscv_set_dma_cache_alignment();
> >>>>
> >>>>          riscv_user_isa_enable();
> >>>> +       riscv_spinlock_init();
> >>>>   }
> >>>>
> >>>>   bool arch_cpu_is_hotpluggable(int cpu)
> >>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>> --- a/include/asm-generic/qspinlock.h
> >>>> +++ b/include/asm-generic/qspinlock.h
> >>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>   }
> >>>>   #endif
> >>>>
> >>>> +#ifndef __no_arch_spinlock_redefine
> >>>>   /*
> >>>>    * Remapping spinlock architecture specific functions to the corresponding
> >>>>    * queued spinlock functions.
> >>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>   #define arch_spin_lock(l)              queued_spin_lock(l)
> >>>>   #define arch_spin_trylock(l)           queued_spin_trylock(l)
> >>>>   #define arch_spin_unlock(l)            queued_spin_unlock(l)
> >>>> +#endif
> >>>>
> >>>>   #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>> index cfcff22b37b3..325779970d8a 100644
> >>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>          return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>   }
> >>>>
> >>>> +#ifndef __no_arch_spinlock_redefine
> >>>>   /*
> >>>>    * Remapping spinlock architecture specific functions to the corresponding
> >>>>    * ticket spinlock functions.
> >>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>   #define arch_spin_lock(l)              ticket_spin_lock(l)
> >>>>   #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> >>>>   #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> >>>> +#endif
> >>>>
> >>>>   #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>> --
> >>>> 2.39.2
> >>>>
> >>>
> >>> --
> >>> Best Regards
> >>>   Guo Ren
> >
> >
Alexandre Ghiti June 3, 2024, 11:34 a.m. UTC | #18
On 03/06/2024 13:28, Guo Ren wrote:
> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>> Hi Guo,
>>
>> On 31/05/2024 15:10, Guo Ren wrote:
>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>> Hi Guo,
>>>>
>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>>>> In order to produce a generic kernel, a user can select
>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>> spinlock implementation if Zabha is not present.
>>>>>>
>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>> the spinlock value, so let's use static keys.
>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
>>> qspinlock is the *forward progress guarantee* in the atomic operation
>>> loop during intense contention. Even with Zabha enabled to meet the
>>> requirements of xchg_tail, that still only applies when the number of
>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
>>> support for Zabha does not equate to the safe use of qspinlock.
>>
>> But if we have Zacas to implement cmpxchg(), we still provide the
>> "forward progress guarantee" then right? Let me know if I missed something.
> The qspinlock needs a "forward progress guarantee," not Zacas, and
> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> fwd guarantee that depends on the micro-arch design of the riscv core.
> I think the help of AMO instead of LR/SC is it could off-load AMO
> operations from LSU to CIU(Next Level Cache or Interconnect), which
> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> AMO gives Far-Atomic additionally.


I understand qspinlocks require forward progress and that your company's 
LR/SC implementations provide such guarantee, I'm not arguing against 
your new extension proposal.

It seemed to me in your previous message that you implied that when 
NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't 
Zacas provide such guarantee"? I think it does, so qspinlocks should 
actually depend on Zabha *and* Zacas. Is that correct to you?

Let me know if I misunderstood something again.

Thanks,

Alex


>
>
>> Thanks,
>>
>> Alex
>>
>>
>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
>>> lines are sufficiently sticky, they could then claim support for this
>>> extension. Linux could then select different spinlock implementations
>>> based on this extension's support or not.
>>>
>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>
>>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>>>>> ---
>>>>>>    .../locking/queued-spinlocks/arch-support.txt |  2 +-
>>>>>>    arch/riscv/Kconfig                            |  1 +
>>>>>>    arch/riscv/include/asm/Kbuild                 |  4 +-
>>>>>>    arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
>>>>>>    arch/riscv/kernel/setup.c                     | 18 +++++++++
>>>>>>    include/asm-generic/qspinlock.h               |  2 +
>>>>>>    include/asm-generic/ticket_spinlock.h         |  2 +
>>>>>>    7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>>    create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>
>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> @@ -20,7 +20,7 @@
>>>>>>        |    openrisc: |  ok  |
>>>>>>        |      parisc: | TODO |
>>>>>>        |     powerpc: |  ok  |
>>>>>> -    |       riscv: | TODO |
>>>>>> +    |       riscv: |  ok  |
>>>>>>        |        s390: | TODO |
>>>>>>        |          sh: | TODO |
>>>>>>        |       sparc: |  ok  |
>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>> --- a/arch/riscv/Kconfig
>>>>>> +++ b/arch/riscv/Kconfig
>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>>           select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>>           select ARCH_USE_MEMTEST
>>>>>>           select ARCH_USE_QUEUED_RWLOCKS
>>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>> hardware platforms but waste some qspinlock code size.
>>>> You're right, and I think your comment matches what Conor mentioned
>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>
>>>> But I agree that looking at the config alone may be misleading, even
>>>> though it will work as expected at runtime. So I agree with you:
>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>> you suggest and add them.
>>>>
>>>> Thanks again for your initial work,
>>>>
>>>> Alex
>>>>
>>>>>>           select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>>           select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>>           select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>> @@ -2,10 +2,12 @@
>>>>>>    generic-y += early_ioremap.h
>>>>>>    generic-y += flat.h
>>>>>>    generic-y += kvm_para.h
>>>>>> +generic-y += mcs_spinlock.h
>>>>>>    generic-y += parport.h
>>>>>> -generic-y += spinlock.h
>>>>>>    generic-y += spinlock_types.h
>>>>>> +generic-y += ticket_spinlock.h
>>>>>>    generic-y += qrwlock.h
>>>>>>    generic-y += qrwlock_types.h
>>>>>> +generic-y += qspinlock.h
>>>>>>    generic-y += user.h
>>>>>>    generic-y += vmlinux.lds.h
>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..e00429ac20ed
>>>>>> --- /dev/null
>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>> @@ -0,0 +1,39 @@
>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>> +
>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>> +
>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
>>>>>> +
>>>>>> +#define __no_arch_spinlock_redefine
>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>> +#include <asm/qspinlock.h>
>>>>>> +#include <asm/alternative.h>
>>>>>> +
>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>> +
>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
>>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
>>>>>> +{                                                                      \
>>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
>>>>>> +               return queued_spin_##op(lock);                          \
>>>>>> +       return ticket_spin_##op(lock);                                  \
>>>>>> +}
>>>>>> +
>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>> +
>>>>>> +#else
>>>>>> +
>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>> +#include <asm/qrwlock.h>
>>>>>> +
>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>>    #endif
>>>>>>    }
>>>>>>
>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>> +
>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>> +{
>>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>> +                : : : : qspinlock);
>>>>>> +
>>>>>> +       static_branch_disable(&qspinlock_key);
>>>>>> +       pr_info("Ticket spinlock: enabled\n");
>>>>>> +
>>>>>> +       return;
>>>>>> +
>>>>>> +qspinlock:
>>>>>> +       pr_info("Queued spinlock: enabled\n");
>>>>>> +}
>>>>>> +
>>>>>>    extern void __init init_rt_signal_env(void);
>>>>>>
>>>>>>    void __init setup_arch(char **cmdline_p)
>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>>           riscv_set_dma_cache_alignment();
>>>>>>
>>>>>>           riscv_user_isa_enable();
>>>>>> +       riscv_spinlock_init();
>>>>>>    }
>>>>>>
>>>>>>    bool arch_cpu_is_hotpluggable(int cpu)
>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>    }
>>>>>>    #endif
>>>>>>
>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>    /*
>>>>>>     * Remapping spinlock architecture specific functions to the corresponding
>>>>>>     * queued spinlock functions.
>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>    #define arch_spin_lock(l)              queued_spin_lock(l)
>>>>>>    #define arch_spin_trylock(l)           queued_spin_trylock(l)
>>>>>>    #define arch_spin_unlock(l)            queued_spin_unlock(l)
>>>>>> +#endif
>>>>>>
>>>>>>    #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>           return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>>    }
>>>>>>
>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>    /*
>>>>>>     * Remapping spinlock architecture specific functions to the corresponding
>>>>>>     * ticket spinlock functions.
>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>    #define arch_spin_lock(l)              ticket_spin_lock(l)
>>>>>>    #define arch_spin_trylock(l)           ticket_spin_trylock(l)
>>>>>>    #define arch_spin_unlock(l)            ticket_spin_unlock(l)
>>>>>> +#endif
>>>>>>
>>>>>>    #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>> --
>>>>>> 2.39.2
>>>>>>
>>>>> --
>>>>> Best Regards
>>>>>    Guo Ren
>>>
>
>
Guo Ren June 3, 2024, 11:44 a.m. UTC | #19
On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> On 03/06/2024 13:28, Guo Ren wrote:
> > On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >> Hi Guo,
> >>
> >> On 31/05/2024 15:10, Guo Ren wrote:
> >>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>>>> In order to produce a generic kernel, a user can select
> >>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>> spinlock implementation if Zabha is not present.
> >>>>>>
> >>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>> the spinlock value, so let's use static keys.
> >>> Zabha is not a prerequisite for qspinlock; the prerequisite for
> >>> qspinlock is the *forward progress guarantee* in the atomic operation
> >>> loop during intense contention. Even with Zabha enabled to meet the
> >>> requirements of xchg_tail, that still only applies when the number of
> >>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> >>> xchg_tail when the number of cores is more than 16K. Thus, hardware
> >>> support for Zabha does not equate to the safe use of qspinlock.
> >>
> >> But if we have Zacas to implement cmpxchg(), we still provide the
> >> "forward progress guarantee" then right? Let me know if I missed something.
> > The qspinlock needs a "forward progress guarantee," not Zacas, and
> > Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> > AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> > fwd guarantee that depends on the micro-arch design of the riscv core.
> > I think the help of AMO instead of LR/SC is it could off-load AMO
> > operations from LSU to CIU(Next Level Cache or Interconnect), which
> > gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> > AMO gives Far-Atomic additionally.
>
>
> I understand qspinlocks require forward progress and that your company's
> LR/SC implementations provide such guarantee, I'm not arguing against
> your new extension proposal.
>
> It seemed to me in your previous message that you implied that when
> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
> Zacas provide such guarantee"? I think it does, so qspinlocks should
> actually depend on Zabha *and* Zacas. Is that correct to you?
See kernel/locking/qspinlock.c
#if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
        /*
         * We can use relaxed semantics since the caller ensures that the
         * MCS node is properly initialized before updating the tail.
         */
        return (u32)xchg_relaxed(&lock->tail,
                                 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* NR_CPUS >= 16K */
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
        u32 old, new;

        old = atomic_read(&lock->val);
        do {
                new = (old & _Q_LOCKED_PENDING_MASK) | tail;
                /*
                 * We can use relaxed semantics since the caller ensures that
                 * the MCS node is properly initialized before updating the
                 * tail.
                 */
        } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));

        return old;
}
#endif

Look! You, Zacas, still need an additional FWD guarantee to break the
loop. That is, how *stickiness* your cache line is?

>
> Let me know if I misunderstood something again.
>
> Thanks,
>
> Alex
>
>
> >
> >
> >> Thanks,
> >>
> >> Alex
> >>
> >>
> >>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> >>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> >>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> >>> lines are sufficiently sticky, they could then claim support for this
> >>> extension. Linux could then select different spinlock implementations
> >>> based on this extension's support or not.
> >>>
> >>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>
> >>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> >>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> >>>>>> ---
> >>>>>>    .../locking/queued-spinlocks/arch-support.txt |  2 +-
> >>>>>>    arch/riscv/Kconfig                            |  1 +
> >>>>>>    arch/riscv/include/asm/Kbuild                 |  4 +-
> >>>>>>    arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> >>>>>>    arch/riscv/kernel/setup.c                     | 18 +++++++++
> >>>>>>    include/asm-generic/qspinlock.h               |  2 +
> >>>>>>    include/asm-generic/ticket_spinlock.h         |  2 +
> >>>>>>    7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>>    create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>
> >>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> @@ -20,7 +20,7 @@
> >>>>>>        |    openrisc: |  ok  |
> >>>>>>        |      parisc: | TODO |
> >>>>>>        |     powerpc: |  ok  |
> >>>>>> -    |       riscv: | TODO |
> >>>>>> +    |       riscv: |  ok  |
> >>>>>>        |        s390: | TODO |
> >>>>>>        |          sh: | TODO |
> >>>>>>        |       sparc: |  ok  |
> >>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>> --- a/arch/riscv/Kconfig
> >>>>>> +++ b/arch/riscv/Kconfig
> >>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>>           select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>>           select ARCH_USE_MEMTEST
> >>>>>>           select ARCH_USE_QUEUED_RWLOCKS
> >>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>> hardware platforms but waste some qspinlock code size.
> >>>> You're right, and I think your comment matches what Conor mentioned
> >>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>
> >>>> But I agree that looking at the config alone may be misleading, even
> >>>> though it will work as expected at runtime. So I agree with you:
> >>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>> you suggest and add them.
> >>>>
> >>>> Thanks again for your initial work,
> >>>>
> >>>> Alex
> >>>>
> >>>>>>           select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>>           select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>>           select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>> @@ -2,10 +2,12 @@
> >>>>>>    generic-y += early_ioremap.h
> >>>>>>    generic-y += flat.h
> >>>>>>    generic-y += kvm_para.h
> >>>>>> +generic-y += mcs_spinlock.h
> >>>>>>    generic-y += parport.h
> >>>>>> -generic-y += spinlock.h
> >>>>>>    generic-y += spinlock_types.h
> >>>>>> +generic-y += ticket_spinlock.h
> >>>>>>    generic-y += qrwlock.h
> >>>>>>    generic-y += qrwlock_types.h
> >>>>>> +generic-y += qspinlock.h
> >>>>>>    generic-y += user.h
> >>>>>>    generic-y += vmlinux.lds.h
> >>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..e00429ac20ed
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>> @@ -0,0 +1,39 @@
> >>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>> +
> >>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>> +
> >>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
> >>>>>> +
> >>>>>> +#define __no_arch_spinlock_redefine
> >>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>> +#include <asm/qspinlock.h>
> >>>>>> +#include <asm/alternative.h>
> >>>>>> +
> >>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>> +
> >>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> >>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
> >>>>>> +{                                                                      \
> >>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
> >>>>>> +               return queued_spin_##op(lock);                          \
> >>>>>> +       return ticket_spin_##op(lock);                                  \
> >>>>>> +}
> >>>>>> +
> >>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>> +
> >>>>>> +#else
> >>>>>> +
> >>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>> +
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +#include <asm/qrwlock.h>
> >>>>>> +
> >>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>>    #endif
> >>>>>>    }
> >>>>>>
> >>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>> +
> >>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>> +{
> >>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>> +                : : : : qspinlock);
> >>>>>> +
> >>>>>> +       static_branch_disable(&qspinlock_key);
> >>>>>> +       pr_info("Ticket spinlock: enabled\n");
> >>>>>> +
> >>>>>> +       return;
> >>>>>> +
> >>>>>> +qspinlock:
> >>>>>> +       pr_info("Queued spinlock: enabled\n");
> >>>>>> +}
> >>>>>> +
> >>>>>>    extern void __init init_rt_signal_env(void);
> >>>>>>
> >>>>>>    void __init setup_arch(char **cmdline_p)
> >>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>>           riscv_set_dma_cache_alignment();
> >>>>>>
> >>>>>>           riscv_user_isa_enable();
> >>>>>> +       riscv_spinlock_init();
> >>>>>>    }
> >>>>>>
> >>>>>>    bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>    }
> >>>>>>    #endif
> >>>>>>
> >>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>    /*
> >>>>>>     * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>     * queued spinlock functions.
> >>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>    #define arch_spin_lock(l)              queued_spin_lock(l)
> >>>>>>    #define arch_spin_trylock(l)           queued_spin_trylock(l)
> >>>>>>    #define arch_spin_unlock(l)            queued_spin_unlock(l)
> >>>>>> +#endif
> >>>>>>
> >>>>>>    #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>           return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>>    }
> >>>>>>
> >>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>    /*
> >>>>>>     * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>     * ticket spinlock functions.
> >>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>    #define arch_spin_lock(l)              ticket_spin_lock(l)
> >>>>>>    #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> >>>>>>    #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> >>>>>> +#endif
> >>>>>>
> >>>>>>    #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>> --
> >>>>>> 2.39.2
> >>>>>>
> >>>>> --
> >>>>> Best Regards
> >>>>>    Guo Ren
> >>>
> >
> >
Alexandre Ghiti June 3, 2024, 11:49 a.m. UTC | #20
On 03/06/2024 13:44, Guo Ren wrote:
> On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>> On 03/06/2024 13:28, Guo Ren wrote:
>>> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>>>> Hi Guo,
>>>>
>>>> On 31/05/2024 15:10, Guo Ren wrote:
>>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>>>> Hi Guo,
>>>>>>
>>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
>>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>>>>>>> In order to produce a generic kernel, a user can select
>>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>>>> spinlock implementation if Zabha is not present.
>>>>>>>>
>>>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>>>> the spinlock value, so let's use static keys.
>>>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
>>>>> qspinlock is the *forward progress guarantee* in the atomic operation
>>>>> loop during intense contention. Even with Zabha enabled to meet the
>>>>> requirements of xchg_tail, that still only applies when the number of
>>>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
>>>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
>>>>> support for Zabha does not equate to the safe use of qspinlock.
>>>> But if we have Zacas to implement cmpxchg(), we still provide the
>>>> "forward progress guarantee" then right? Let me know if I missed something.
>>> The qspinlock needs a "forward progress guarantee," not Zacas, and
>>> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
>>> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
>>> fwd guarantee that depends on the micro-arch design of the riscv core.
>>> I think the help of AMO instead of LR/SC is it could off-load AMO
>>> operations from LSU to CIU(Next Level Cache or Interconnect), which
>>> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
>>> AMO gives Far-Atomic additionally.
>>
>> I understand qspinlocks require forward progress and that your company's
>> LR/SC implementations provide such guarantee, I'm not arguing against
>> your new extension proposal.
>>
>> It seemed to me in your previous message that you implied that when
>> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
>> Zacas provide such guarantee"? I think it does, so qspinlocks should
>> actually depend on Zabha *and* Zacas. Is that correct to you?
> See kernel/locking/qspinlock.c
> #if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
> static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> {
>          /*
>           * We can use relaxed semantics since the caller ensures that the
>           * MCS node is properly initialized before updating the tail.
>           */
>          return (u32)xchg_relaxed(&lock->tail,
>                                   tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> }
> #else /* NR_CPUS >= 16K */
> static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> {
>          u32 old, new;
>
>          old = atomic_read(&lock->val);
>          do {
>                  new = (old & _Q_LOCKED_PENDING_MASK) | tail;
>                  /*
>                   * We can use relaxed semantics since the caller ensures that
>                   * the MCS node is properly initialized before updating the
>                   * tail.
>                   */
>          } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
>
>          return old;
> }
> #endif
>
> Look! You, Zacas, still need an additional FWD guarantee to break the
> loop. That is, how *stickiness* your cache line is?


But then the problem comes from this generic implementation of 
xchg_tail(), not from the arch cas implementation right?


>
>> Let me know if I misunderstood something again.
>>
>> Thanks,
>>
>> Alex
>>
>>
>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>>
>>>>
>>>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
>>>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
>>>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
>>>>> lines are sufficiently sticky, they could then claim support for this
>>>>> extension. Linux could then select different spinlock implementations
>>>>> based on this extension's support or not.
>>>>>
>>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>>>
>>>>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
>>>>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>>>>>>> ---
>>>>>>>>     .../locking/queued-spinlocks/arch-support.txt |  2 +-
>>>>>>>>     arch/riscv/Kconfig                            |  1 +
>>>>>>>>     arch/riscv/include/asm/Kbuild                 |  4 +-
>>>>>>>>     arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
>>>>>>>>     arch/riscv/kernel/setup.c                     | 18 +++++++++
>>>>>>>>     include/asm-generic/qspinlock.h               |  2 +
>>>>>>>>     include/asm-generic/ticket_spinlock.h         |  2 +
>>>>>>>>     7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>>>>     create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>>>
>>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> @@ -20,7 +20,7 @@
>>>>>>>>         |    openrisc: |  ok  |
>>>>>>>>         |      parisc: | TODO |
>>>>>>>>         |     powerpc: |  ok  |
>>>>>>>> -    |       riscv: | TODO |
>>>>>>>> +    |       riscv: |  ok  |
>>>>>>>>         |        s390: | TODO |
>>>>>>>>         |          sh: | TODO |
>>>>>>>>         |       sparc: |  ok  |
>>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>>>> --- a/arch/riscv/Kconfig
>>>>>>>> +++ b/arch/riscv/Kconfig
>>>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>>>>            select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>>>>            select ARCH_USE_MEMTEST
>>>>>>>>            select ARCH_USE_QUEUED_RWLOCKS
>>>>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>>>> hardware platforms but waste some qspinlock code size.
>>>>>> You're right, and I think your comment matches what Conor mentioned
>>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>>>
>>>>>> But I agree that looking at the config alone may be misleading, even
>>>>>> though it will work as expected at runtime. So I agree with you:
>>>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>>>> you suggest and add them.
>>>>>>
>>>>>> Thanks again for your initial work,
>>>>>>
>>>>>> Alex
>>>>>>
>>>>>>>>            select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>>>>            select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>>>>            select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>>>> @@ -2,10 +2,12 @@
>>>>>>>>     generic-y += early_ioremap.h
>>>>>>>>     generic-y += flat.h
>>>>>>>>     generic-y += kvm_para.h
>>>>>>>> +generic-y += mcs_spinlock.h
>>>>>>>>     generic-y += parport.h
>>>>>>>> -generic-y += spinlock.h
>>>>>>>>     generic-y += spinlock_types.h
>>>>>>>> +generic-y += ticket_spinlock.h
>>>>>>>>     generic-y += qrwlock.h
>>>>>>>>     generic-y += qrwlock_types.h
>>>>>>>> +generic-y += qspinlock.h
>>>>>>>>     generic-y += user.h
>>>>>>>>     generic-y += vmlinux.lds.h
>>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..e00429ac20ed
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>>>> @@ -0,0 +1,39 @@
>>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>>>> +
>>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
>>>>>>>> +
>>>>>>>> +#define __no_arch_spinlock_redefine
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +#include <asm/qspinlock.h>
>>>>>>>> +#include <asm/alternative.h>
>>>>>>>> +
>>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +
>>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
>>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
>>>>>>>> +{                                                                      \
>>>>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
>>>>>>>> +               return queued_spin_##op(lock);                          \
>>>>>>>> +       return ticket_spin_##op(lock);                                  \
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>>>> +
>>>>>>>> +#else
>>>>>>>> +
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +#include <asm/qrwlock.h>
>>>>>>>> +
>>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>>>>     #endif
>>>>>>>>     }
>>>>>>>>
>>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>>>> +
>>>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>>>> +{
>>>>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>>>> +                : : : : qspinlock);
>>>>>>>> +
>>>>>>>> +       static_branch_disable(&qspinlock_key);
>>>>>>>> +       pr_info("Ticket spinlock: enabled\n");
>>>>>>>> +
>>>>>>>> +       return;
>>>>>>>> +
>>>>>>>> +qspinlock:
>>>>>>>> +       pr_info("Queued spinlock: enabled\n");
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>     extern void __init init_rt_signal_env(void);
>>>>>>>>
>>>>>>>>     void __init setup_arch(char **cmdline_p)
>>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>>>>            riscv_set_dma_cache_alignment();
>>>>>>>>
>>>>>>>>            riscv_user_isa_enable();
>>>>>>>> +       riscv_spinlock_init();
>>>>>>>>     }
>>>>>>>>
>>>>>>>>     bool arch_cpu_is_hotpluggable(int cpu)
>>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>>     }
>>>>>>>>     #endif
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>>     /*
>>>>>>>>      * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>>      * queued spinlock functions.
>>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>>     #define arch_spin_lock(l)              queued_spin_lock(l)
>>>>>>>>     #define arch_spin_trylock(l)           queued_spin_trylock(l)
>>>>>>>>     #define arch_spin_unlock(l)            queued_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>     #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>>            return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>>>>     }
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>>     /*
>>>>>>>>      * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>>      * ticket spinlock functions.
>>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>>     #define arch_spin_lock(l)              ticket_spin_lock(l)
>>>>>>>>     #define arch_spin_trylock(l)           ticket_spin_trylock(l)
>>>>>>>>     #define arch_spin_unlock(l)            ticket_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>>     #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>>>> --
>>>>>>>> 2.39.2
>>>>>>>>
>>>>>>> --
>>>>>>> Best Regards
>>>>>>>     Guo Ren
>>>
>
>
Guo Ren June 3, 2024, 11:57 a.m. UTC | #21
On Mon, Jun 3, 2024 at 7:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> On 03/06/2024 13:44, Guo Ren wrote:
> > On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >> On 03/06/2024 13:28, Guo Ren wrote:
> >>> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On 31/05/2024 15:10, Guo Ren wrote:
> >>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>>>> Hi Guo,
> >>>>>>
> >>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <guoren@kernel.org> wrote:
> >>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >>>>>>>> In order to produce a generic kernel, a user can select
> >>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>>>> spinlock implementation if Zabha is not present.
> >>>>>>>>
> >>>>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>>>> the spinlock value, so let's use static keys.
> >>>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
> >>>>> qspinlock is the *forward progress guarantee* in the atomic operation
> >>>>> loop during intense contention. Even with Zabha enabled to meet the
> >>>>> requirements of xchg_tail, that still only applies when the number of
> >>>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> >>>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
> >>>>> support for Zabha does not equate to the safe use of qspinlock.
> >>>> But if we have Zacas to implement cmpxchg(), we still provide the
> >>>> "forward progress guarantee" then right? Let me know if I missed something.
> >>> The qspinlock needs a "forward progress guarantee," not Zacas, and
> >>> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> >>> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> >>> fwd guarantee that depends on the micro-arch design of the riscv core.
> >>> I think the help of AMO instead of LR/SC is it could off-load AMO
> >>> operations from LSU to CIU(Next Level Cache or Interconnect), which
> >>> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> >>> AMO gives Far-Atomic additionally.
> >>
> >> I understand qspinlocks require forward progress and that your company's
> >> LR/SC implementations provide such guarantee, I'm not arguing against
> >> your new extension proposal.
> >>
> >> It seemed to me in your previous message that you implied that when
> >> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
> >> Zacas provide such guarantee"? I think it does, so qspinlocks should
> >> actually depend on Zabha *and* Zacas. Is that correct to you?
> > See kernel/locking/qspinlock.c
> > #if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
> > static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> > {
> >          /*
> >           * We can use relaxed semantics since the caller ensures that the
> >           * MCS node is properly initialized before updating the tail.
> >           */
> >          return (u32)xchg_relaxed(&lock->tail,
> >                                   tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> > }
> > #else /* NR_CPUS >= 16K */
> > static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> > {
> >          u32 old, new;
> >
> >          old = atomic_read(&lock->val);
> >          do {
> >                  new = (old & _Q_LOCKED_PENDING_MASK) | tail;
> >                  /*
> >                   * We can use relaxed semantics since the caller ensures that
> >                   * the MCS node is properly initialized before updating the
> >                   * tail.
> >                   */
> >          } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
> >
> >          return old;
> > }
> > #endif
> >
> > Look! You, Zacas, still need an additional FWD guarantee to break the
> > loop. That is, how *stickiness* your cache line is?
>
>
> But then the problem comes from this generic implementation of
> xchg_tail(), not from the arch cas implementation right?
The cmpxchg loop forward guarantee problems are needed in the whole
Linux, not only in qspinlock. If the machine couldn't give a fwd
guarantee, that seems a crap one.

>
>
> >
> >> Let me know if I misunderstood something again.
> >>
> >> Thanks,
> >>
> >> Alex
> >>
> >>
> >>>
> >>>> Thanks,
> >>>>
> >>>> Alex
> >>>>
> >>>>
> >>>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> >>>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> >>>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> >>>>> lines are sufficiently sticky, they could then claim support for this
> >>>>> extension. Linux could then select different spinlock implementations
> >>>>> based on this extension's support or not.
> >>>>>
> >>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>>>
> >>>>>>>> Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1]
> >>>>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> >>>>>>>> ---
> >>>>>>>>     .../locking/queued-spinlocks/arch-support.txt |  2 +-
> >>>>>>>>     arch/riscv/Kconfig                            |  1 +
> >>>>>>>>     arch/riscv/include/asm/Kbuild                 |  4 +-
> >>>>>>>>     arch/riscv/include/asm/spinlock.h             | 39 +++++++++++++++++++
> >>>>>>>>     arch/riscv/kernel/setup.c                     | 18 +++++++++
> >>>>>>>>     include/asm-generic/qspinlock.h               |  2 +
> >>>>>>>>     include/asm-generic/ticket_spinlock.h         |  2 +
> >>>>>>>>     7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>>>>     create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>>>
> >>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> @@ -20,7 +20,7 @@
> >>>>>>>>         |    openrisc: |  ok  |
> >>>>>>>>         |      parisc: | TODO |
> >>>>>>>>         |     powerpc: |  ok  |
> >>>>>>>> -    |       riscv: | TODO |
> >>>>>>>> +    |       riscv: |  ok  |
> >>>>>>>>         |        s390: | TODO |
> >>>>>>>>         |          sh: | TODO |
> >>>>>>>>         |       sparc: |  ok  |
> >>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>>>> --- a/arch/riscv/Kconfig
> >>>>>>>> +++ b/arch/riscv/Kconfig
> >>>>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>>>>            select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>>>>            select ARCH_USE_MEMTEST
> >>>>>>>>            select ARCH_USE_QUEUED_RWLOCKS
> >>>>>>>> +       select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>>>> hardware platforms but waste some qspinlock code size.
> >>>>>> You're right, and I think your comment matches what Conor mentioned
> >>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>>>
> >>>>>> But I agree that looking at the config alone may be misleading, even
> >>>>>> though it will work as expected at runtime. So I agree with you:
> >>>>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>>>> you suggest and add them.
> >>>>>>
> >>>>>> Thanks again for your initial work,
> >>>>>>
> >>>>>> Alex
> >>>>>>
> >>>>>>>>            select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>>>>            select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>>>>            select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>>>> @@ -2,10 +2,12 @@
> >>>>>>>>     generic-y += early_ioremap.h
> >>>>>>>>     generic-y += flat.h
> >>>>>>>>     generic-y += kvm_para.h
> >>>>>>>> +generic-y += mcs_spinlock.h
> >>>>>>>>     generic-y += parport.h
> >>>>>>>> -generic-y += spinlock.h
> >>>>>>>>     generic-y += spinlock_types.h
> >>>>>>>> +generic-y += ticket_spinlock.h
> >>>>>>>>     generic-y += qrwlock.h
> >>>>>>>>     generic-y += qrwlock_types.h
> >>>>>>>> +generic-y += qspinlock.h
> >>>>>>>>     generic-y += user.h
> >>>>>>>>     generic-y += vmlinux.lds.h
> >>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..e00429ac20ed
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> @@ -0,0 +1,39 @@
> >>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>>>> +
> >>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>>>> +#define _Q_PENDING_LOOPS       (1 << 9)
> >>>>>>>> +
> >>>>>>>> +#define __no_arch_spinlock_redefine
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +#include <asm/qspinlock.h>
> >>>>>>>> +#include <asm/alternative.h>
> >>>>>>>> +
> >>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock)                     \
> >>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock)             \
> >>>>>>>> +{                                                                      \
> >>>>>>>> +       if (static_branch_unlikely(&qspinlock_key))                     \
> >>>>>>>> +               return queued_spin_##op(lock);                          \
> >>>>>>>> +       return ticket_spin_##op(lock);                                  \
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>>>> +
> >>>>>>>> +#else
> >>>>>>>> +
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +#include <asm/qrwlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>>>>     #endif
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>>>> +{
> >>>>>>>> +       asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>>>> +                : : : : qspinlock);
> >>>>>>>> +
> >>>>>>>> +       static_branch_disable(&qspinlock_key);
> >>>>>>>> +       pr_info("Ticket spinlock: enabled\n");
> >>>>>>>> +
> >>>>>>>> +       return;
> >>>>>>>> +
> >>>>>>>> +qspinlock:
> >>>>>>>> +       pr_info("Queued spinlock: enabled\n");
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>>     extern void __init init_rt_signal_env(void);
> >>>>>>>>
> >>>>>>>>     void __init setup_arch(char **cmdline_p)
> >>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>>>>            riscv_set_dma_cache_alignment();
> >>>>>>>>
> >>>>>>>>            riscv_user_isa_enable();
> >>>>>>>> +       riscv_spinlock_init();
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>>     bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>>     }
> >>>>>>>>     #endif
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>>     /*
> >>>>>>>>      * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>>      * queued spinlock functions.
> >>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>>     #define arch_spin_lock(l)              queued_spin_lock(l)
> >>>>>>>>     #define arch_spin_trylock(l)           queued_spin_trylock(l)
> >>>>>>>>     #define arch_spin_unlock(l)            queued_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>>     #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>>            return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>>     /*
> >>>>>>>>      * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>>      * ticket spinlock functions.
> >>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>>     #define arch_spin_lock(l)              ticket_spin_lock(l)
> >>>>>>>>     #define arch_spin_trylock(l)           ticket_spin_trylock(l)
> >>>>>>>>     #define arch_spin_unlock(l)            ticket_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>>     #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>>>> --
> >>>>>>>> 2.39.2
> >>>>>>>>
> >>>>>>> --
> >>>>>>> Best Regards
> >>>>>>>     Guo Ren
> >>>
> >
> >
>
diff mbox series

Patch

diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 22f2990392ff..cf26042480e2 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -20,7 +20,7 @@ 
     |    openrisc: |  ok  |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 184a9edb04e0..ccf1703edeb9 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -59,6 +59,7 @@  config RISCV
 	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
 	select ARCH_USES_CFI_TRAPS if CFI_CLANG
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 504f8b7e72d4..ad72f2bd4cc9 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -2,10 +2,12 @@ 
 generic-y += early_ioremap.h
 generic-y += flat.h
 generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
 generic-y += parport.h
-generic-y += spinlock.h
 generic-y += spinlock_types.h
+generic-y += ticket_spinlock.h
 generic-y += qrwlock.h
 generic-y += qrwlock_types.h
+generic-y += qspinlock.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
new file mode 100644
index 000000000000..e00429ac20ed
--- /dev/null
+++ b/arch/riscv/include/asm/spinlock.h
@@ -0,0 +1,39 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RISCV_SPINLOCK_H
+#define __ASM_RISCV_SPINLOCK_H
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#define _Q_PENDING_LOOPS	(1 << 9)
+
+#define __no_arch_spinlock_redefine
+#include <asm/ticket_spinlock.h>
+#include <asm/qspinlock.h>
+#include <asm/alternative.h>
+
+DECLARE_STATIC_KEY_TRUE(qspinlock_key);
+
+#define SPINLOCK_BASE_DECLARE(op, type, type_lock)			\
+static __always_inline type arch_spin_##op(type_lock lock)		\
+{									\
+	if (static_branch_unlikely(&qspinlock_key))			\
+		return queued_spin_##op(lock);				\
+	return ticket_spin_##op(lock);					\
+}
+
+SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
+
+#else
+
+#include <asm/ticket_spinlock.h>
+
+#endif
+
+#include <asm/qrwlock.h>
+
+#endif /* __ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4f73c0ae44b2..31ce75522fd4 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -244,6 +244,23 @@  static void __init parse_dtb(void)
 #endif
 }
 
+DEFINE_STATIC_KEY_TRUE(qspinlock_key);
+EXPORT_SYMBOL(qspinlock_key);
+
+static void __init riscv_spinlock_init(void)
+{
+	asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
+		 : : : : qspinlock);
+
+	static_branch_disable(&qspinlock_key);
+	pr_info("Ticket spinlock: enabled\n");
+
+	return;
+
+qspinlock:
+	pr_info("Queued spinlock: enabled\n");
+}
+
 extern void __init init_rt_signal_env(void);
 
 void __init setup_arch(char **cmdline_p)
@@ -295,6 +312,7 @@  void __init setup_arch(char **cmdline_p)
 	riscv_set_dma_cache_alignment();
 
 	riscv_user_isa_enable();
+	riscv_spinlock_init();
 }
 
 bool arch_cpu_is_hotpluggable(int cpu)
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 0655aa5b57b2..bf47cca2c375 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -136,6 +136,7 @@  static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 }
 #endif
 
+#ifndef __no_arch_spinlock_redefine
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
@@ -146,5 +147,6 @@  static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 #define arch_spin_lock(l)		queued_spin_lock(l)
 #define arch_spin_trylock(l)		queued_spin_trylock(l)
 #define arch_spin_unlock(l)		queued_spin_unlock(l)
+#endif
 
 #endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
index cfcff22b37b3..325779970d8a 100644
--- a/include/asm-generic/ticket_spinlock.h
+++ b/include/asm-generic/ticket_spinlock.h
@@ -89,6 +89,7 @@  static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
 	return (s16)((val >> 16) - (val & 0xffff)) > 1;
 }
 
+#ifndef __no_arch_spinlock_redefine
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * ticket spinlock functions.
@@ -99,5 +100,6 @@  static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
 #define arch_spin_lock(l)		ticket_spin_lock(l)
 #define arch_spin_trylock(l)		ticket_spin_trylock(l)
 #define arch_spin_unlock(l)		ticket_spin_unlock(l)
+#endif
 
 #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */