diff mbox series

[RFC,03/14] RISC-V: paravirt: Implement steal-time support

Message ID 20230417103402.798596-4-ajones@ventanamicro.com (mailing list archive)
State Awaiting Upstream, archived
Headers show
Series RISC-V: Add steal-time support | expand

Checks

Context Check Description
conchuod/tree_selection fail Failed to apply to next/pending-fixes or riscv/for-next

Commit Message

Andrew Jones April 17, 2023, 10:33 a.m. UTC
When the SBI STA extension exists we can use it to implement
paravirt steal-time support. Fill in the empty pv-time functions
with an SBI STA implementation.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/kernel/paravirt.c | 56 ++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

Comments

Conor Dooley April 18, 2023, 7:02 p.m. UTC | #1
On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:

> +static int pv_time_cpu_online(unsigned int cpu)
> +{
> +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> +	phys_addr_t pa = __pa(st);
> +	unsigned long lo = (unsigned long)pa;
> +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> +
> +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> +}
> +
>  static int pv_time_cpu_down_prepare(unsigned int cpu)
>  {
> -	return 0;
> +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);

I'm not really a fan of this -1s without an explanation of what passing
-1 to the ecall does.

>  }
>  
>  static u64 pv_time_steal_clock(int cpu)
>  {
> -	return 0;
> +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> +	u32 sequence;
> +	u64 steal;
> +
> +	do {
> +		sequence = st->sequence;
> +		virt_rmb();
> +		steal = st->steal;
> +		virt_rmb();
> +	} while ((sequence & 1) || (sequence != st->sequence));

Call me a bit anal, but should we yoink your:
| The supervisor-mode software MUST check this field
| before and after reading the `steal` field, and
| repeat the read if they are different or odd
and add it here so that is it immediately obvious without reading the
SBI spec what is going on here? Or it is a case of "go read the SBI spec
if you have questions about what the kernel is doing w/ SBI stuff?

(sidenote, s/they are/it is/?)

Guess both of my comments are in the same vein, but would make
at-a-glance understanding easier IMO.

Cheers,
Conor.

> +
> +	return steal;
>  }
>  
>  int __init pv_time_init(void)
> -- 
> 2.39.2
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Andrew Jones April 19, 2023, 8:24 a.m. UTC | #2
On Tue, Apr 18, 2023 at 08:02:06PM +0100, Conor Dooley wrote:
> On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> 
> > +static int pv_time_cpu_online(unsigned int cpu)
> > +{
> > +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> > +	phys_addr_t pa = __pa(st);
> > +	unsigned long lo = (unsigned long)pa;
> > +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> > +
> > +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> > +}
> > +
> >  static int pv_time_cpu_down_prepare(unsigned int cpu)
> >  {
> > -	return 0;
> > +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
> 
> I'm not really a fan of this -1s without an explanation of what passing
> -1 to the ecall does.
> 
> >  }
> >  
> >  static u64 pv_time_steal_clock(int cpu)
> >  {
> > -	return 0;
> > +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> > +	u32 sequence;
> > +	u64 steal;
> > +
> > +	do {
> > +		sequence = st->sequence;
> > +		virt_rmb();
> > +		steal = st->steal;
> > +		virt_rmb();
> > +	} while ((sequence & 1) || (sequence != st->sequence));
> 
> Call me a bit anal, but should we yoink your:
> | The supervisor-mode software MUST check this field
> | before and after reading the `steal` field, and
> | repeat the read if they are different or odd
> and add it here so that is it immediately obvious without reading the
> SBI spec what is going on here? Or it is a case of "go read the SBI spec
> if you have questions about what the kernel is doing w/ SBI stuff?
> 
> (sidenote, s/they are/it is/?)

Thanks, I updated the spec for its v3 posting. Before posting v3,
I'd be happy to take more comments on it over on its v2 posting.

> 
> Guess both of my comments are in the same vein, but would make
> at-a-glance understanding easier IMO.

I'm inclined to say "go read the spec" for everything, since the KVM
implementation would otherwise also require several comments which
duplicate the spec, but I'm also OK with adding the comments you've
suggested here. I'll do that for rfc-v2.

Thanks,
drew
Andrew Jones April 19, 2023, 8:42 a.m. UTC | #3
On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> When the SBI STA extension exists we can use it to implement
> paravirt steal-time support. Fill in the empty pv-time functions
> with an SBI STA implementation.
> 
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/kernel/paravirt.c | 56 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 53 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
> index 141dbcc36fa2..5f8d96b919e4 100644
> --- a/arch/riscv/kernel/paravirt.c
> +++ b/arch/riscv/kernel/paravirt.c
> @@ -6,12 +6,21 @@
>  #define pr_fmt(fmt) "riscv-pv: " fmt
>  
>  #include <linux/cpuhotplug.h>
> +#include <linux/compiler.h>
> +#include <linux/errno.h>
>  #include <linux/init.h>
>  #include <linux/jump_label.h>
> +#include <linux/kconfig.h>
> +#include <linux/kernel.h>
> +#include <linux/percpu-defs.h>
>  #include <linux/printk.h>
>  #include <linux/static_call.h>
>  #include <linux/types.h>
>  
> +#include <asm/barrier.h>
> +#include <asm/page.h>
> +#include <asm/sbi.h>
> +
>  struct static_key paravirt_steal_enabled;
>  struct static_key paravirt_steal_rq_enabled;
>  
> @@ -31,24 +40,65 @@ static int __init parse_no_stealacc(char *arg)
>  
>  early_param("no-steal-acc", parse_no_stealacc);
>  
> +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
> +
>  static bool __init has_pv_steal_clock(void)
>  {
> +	if (sbi_probe_extension(SBI_EXT_STA) > 0) {
> +		pr_info("SBI STA extension detected\n");
> +		return true;
> +	}
> +
>  	return false;
>  }
>  
> -static int pv_time_cpu_online(unsigned int cpu)
> +static int sbi_sta_set_steal_time_shmem(unsigned long lo, unsigned long hi,
> +					unsigned long flags)
>  {
> +	struct sbiret ret;
> +
> +	ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_SET_STEAL_TIME_SHMEM,
> +			lo, hi, flags, 0, 0, 0);
> +	if (ret.error) {
> +		if (lo == -1 && hi == -1)
> +			pr_warn("Failed to disable steal-time shmem");
> +		else
> +			pr_warn("Failed to set steal-time shmem");
> +		return -ENOMEM;
> +	}
> +
>  	return 0;
>  }
>  
> +static int pv_time_cpu_online(unsigned int cpu)
> +{
> +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> +	phys_addr_t pa = __pa(st);
> +	unsigned long lo = (unsigned long)pa;
> +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> +
> +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> +}
> +
>  static int pv_time_cpu_down_prepare(unsigned int cpu)
>  {
> -	return 0;
> +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
>  }
>  
>  static u64 pv_time_steal_clock(int cpu)
>  {
> -	return 0;
> +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> +	u32 sequence;
> +	u64 steal;
> +
> +	do {
> +		sequence = st->sequence;
> +		virt_rmb();
> +		steal = st->steal;
> +		virt_rmb();
> +	} while ((sequence & 1) || (sequence != st->sequence));
> +
> +	return steal;
>  }

So anybody poking around the implementations for other architectures will
see that I shamelessly ripped this off from x86's implementation. However,
looking at it again, I think all the references to the steal-time info
should be wrapped in READ_ONCE(). I'll write a patch for x86/kvm to
add READ_ONCE's and also update this patch for rfc-v2.

Thanks,
drew
Andrew Jones April 19, 2023, 12:14 p.m. UTC | #4
On Wed, Apr 19, 2023 at 10:42:16AM +0200, Andrew Jones wrote:
> On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> > When the SBI STA extension exists we can use it to implement
> > paravirt steal-time support. Fill in the empty pv-time functions
> > with an SBI STA implementation.
> > 
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/kernel/paravirt.c | 56 ++++++++++++++++++++++++++++++++++--
> >  1 file changed, 53 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
> > index 141dbcc36fa2..5f8d96b919e4 100644
> > --- a/arch/riscv/kernel/paravirt.c
> > +++ b/arch/riscv/kernel/paravirt.c
> > @@ -6,12 +6,21 @@
> >  #define pr_fmt(fmt) "riscv-pv: " fmt
> >  
> >  #include <linux/cpuhotplug.h>
> > +#include <linux/compiler.h>
> > +#include <linux/errno.h>
> >  #include <linux/init.h>
> >  #include <linux/jump_label.h>
> > +#include <linux/kconfig.h>
> > +#include <linux/kernel.h>
> > +#include <linux/percpu-defs.h>
> >  #include <linux/printk.h>
> >  #include <linux/static_call.h>
> >  #include <linux/types.h>
> >  
> > +#include <asm/barrier.h>
> > +#include <asm/page.h>
> > +#include <asm/sbi.h>
> > +
> >  struct static_key paravirt_steal_enabled;
> >  struct static_key paravirt_steal_rq_enabled;
> >  
> > @@ -31,24 +40,65 @@ static int __init parse_no_stealacc(char *arg)
> >  
> >  early_param("no-steal-acc", parse_no_stealacc);
> >  
> > +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
> > +
> >  static bool __init has_pv_steal_clock(void)
> >  {
> > +	if (sbi_probe_extension(SBI_EXT_STA) > 0) {
> > +		pr_info("SBI STA extension detected\n");
> > +		return true;
> > +	}
> > +
> >  	return false;
> >  }
> >  
> > -static int pv_time_cpu_online(unsigned int cpu)
> > +static int sbi_sta_set_steal_time_shmem(unsigned long lo, unsigned long hi,
> > +					unsigned long flags)
> >  {
> > +	struct sbiret ret;
> > +
> > +	ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_SET_STEAL_TIME_SHMEM,
> > +			lo, hi, flags, 0, 0, 0);
> > +	if (ret.error) {
> > +		if (lo == -1 && hi == -1)
> > +			pr_warn("Failed to disable steal-time shmem");
> > +		else
> > +			pr_warn("Failed to set steal-time shmem");
> > +		return -ENOMEM;
> > +	}
> > +
> >  	return 0;
> >  }
> >  
> > +static int pv_time_cpu_online(unsigned int cpu)
> > +{
> > +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> > +	phys_addr_t pa = __pa(st);
> > +	unsigned long lo = (unsigned long)pa;
> > +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> > +
> > +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> > +}
> > +
> >  static int pv_time_cpu_down_prepare(unsigned int cpu)
> >  {
> > -	return 0;
> > +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
> >  }
> >  
> >  static u64 pv_time_steal_clock(int cpu)
> >  {
> > -	return 0;
> > +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> > +	u32 sequence;
> > +	u64 steal;
> > +
> > +	do {
> > +		sequence = st->sequence;
> > +		virt_rmb();
> > +		steal = st->steal;
> > +		virt_rmb();
> > +	} while ((sequence & 1) || (sequence != st->sequence));
> > +
> > +	return steal;
> >  }
> 
> So anybody poking around the implementations for other architectures will
> see that I shamelessly ripped this off from x86's implementation. However,
> looking at it again, I think all the references to the steal-time info
> should be wrapped in READ_ONCE(). I'll write a patch for x86/kvm to
> add READ_ONCE's and also update this patch for rfc-v2.
>

After rereading "COMPILER BARRIER" of Documentation/memory-barriers.txt, I
no longer believe we need the READ_ONCE wrappers for x86 nor riscv, unless
we add them "for documentation purposes", as suggested by that section.
The reason is that the virt_rmb() barriers also act as compiler barriers,
which not only ensures program access order, but also "Within a loop,
forces the compiler to load the variables used in that loop's conditional
on each pass through that loop."

That said, my preference would be to add them, though, so I'll at least
add them for riscv for rfc-v2. Maybe I'll still send the x86 patch too.

Thanks,
drew
Conor Dooley April 19, 2023, 4:41 p.m. UTC | #5
On Wed, Apr 19, 2023 at 10:24:27AM +0200, Andrew Jones wrote:
> On Tue, Apr 18, 2023 at 08:02:06PM +0100, Conor Dooley wrote:
> > On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> > 
> > > +static int pv_time_cpu_online(unsigned int cpu)
> > > +{
> > > +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> > > +	phys_addr_t pa = __pa(st);
> > > +	unsigned long lo = (unsigned long)pa;
> > > +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> > > +
> > > +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> > > +}
> > > +
> > >  static int pv_time_cpu_down_prepare(unsigned int cpu)
> > >  {
> > > -	return 0;
> > > +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
> > 
> > I'm not really a fan of this -1s without an explanation of what passing
> > -1 to the ecall does.
> > 
> > >  }
> > >  
> > >  static u64 pv_time_steal_clock(int cpu)
> > >  {
> > > -	return 0;
> > > +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> > > +	u32 sequence;
> > > +	u64 steal;
> > > +
> > > +	do {
> > > +		sequence = st->sequence;
> > > +		virt_rmb();
> > > +		steal = st->steal;
> > > +		virt_rmb();
> > > +	} while ((sequence & 1) || (sequence != st->sequence));
> > 
> > Call me a bit anal, but should we yoink your:
> > | The supervisor-mode software MUST check this field
> > | before and after reading the `steal` field, and
> > | repeat the read if they are different or odd
> > and add it here so that is it immediately obvious without reading the
> > SBI spec what is going on here? Or it is a case of "go read the SBI spec
> > if you have questions about what the kernel is doing w/ SBI stuff?
> > 
> > (sidenote, s/they are/it is/?)
> 
> Thanks, I updated the spec for its v3 posting. Before posting v3,
> I'd be happy to take more comments on it over on its v2 posting.

I did read it last week or whenever you sent it, but didn't feel
qualified to give an opinion on the content nor did I notice that
grammaro.

I'd have replied there, but I was too lazy to dump your mail from there
into a format readable by a decent mail client. I should just sign up
there with this email address instead.

> > Guess both of my comments are in the same vein, but would make
> > at-a-glance understanding easier IMO.
> 
> I'm inclined to say "go read the spec" for everything, since the KVM
> implementation would otherwise also require several comments which
> duplicate the spec, but I'm also OK with adding the comments you've
> suggested here. I'll do that for rfc-v2.

I don't really have strong feelings, the magic -1s can be explained away
with a define so they're sortable without cogging stuff from the spec.
Guo Ren Aug. 2, 2023, 11:26 p.m. UTC | #6
On Wed, Apr 19, 2023 at 10:42:16AM +0200, Andrew Jones wrote:
> On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> > When the SBI STA extension exists we can use it to implement
> > paravirt steal-time support. Fill in the empty pv-time functions
> > with an SBI STA implementation.
> > 
> > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/kernel/paravirt.c | 56 ++++++++++++++++++++++++++++++++++--
> >  1 file changed, 53 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
> > index 141dbcc36fa2..5f8d96b919e4 100644
> > --- a/arch/riscv/kernel/paravirt.c
> > +++ b/arch/riscv/kernel/paravirt.c
> > @@ -6,12 +6,21 @@
> >  #define pr_fmt(fmt) "riscv-pv: " fmt
> >  
> >  #include <linux/cpuhotplug.h>
> > +#include <linux/compiler.h>
> > +#include <linux/errno.h>
> >  #include <linux/init.h>
> >  #include <linux/jump_label.h>
> > +#include <linux/kconfig.h>
> > +#include <linux/kernel.h>
> > +#include <linux/percpu-defs.h>
> >  #include <linux/printk.h>
> >  #include <linux/static_call.h>
> >  #include <linux/types.h>
> >  
> > +#include <asm/barrier.h>
> > +#include <asm/page.h>
> > +#include <asm/sbi.h>
> > +
> >  struct static_key paravirt_steal_enabled;
> >  struct static_key paravirt_steal_rq_enabled;
> >  
> > @@ -31,24 +40,65 @@ static int __init parse_no_stealacc(char *arg)
> >  
> >  early_param("no-steal-acc", parse_no_stealacc);
> >  
> > +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
> > +
> >  static bool __init has_pv_steal_clock(void)
> >  {
> > +	if (sbi_probe_extension(SBI_EXT_STA) > 0) {
> > +		pr_info("SBI STA extension detected\n");
> > +		return true;
> > +	}
> > +
> >  	return false;
> >  }
> >  
> > -static int pv_time_cpu_online(unsigned int cpu)
> > +static int sbi_sta_set_steal_time_shmem(unsigned long lo, unsigned long hi,
> > +					unsigned long flags)
> >  {
> > +	struct sbiret ret;
> > +
> > +	ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_SET_STEAL_TIME_SHMEM,
> > +			lo, hi, flags, 0, 0, 0);
> > +	if (ret.error) {
> > +		if (lo == -1 && hi == -1)
> > +			pr_warn("Failed to disable steal-time shmem");
> > +		else
> > +			pr_warn("Failed to set steal-time shmem");
> > +		return -ENOMEM;
> > +	}
> > +
> >  	return 0;
> >  }
> >  
> > +static int pv_time_cpu_online(unsigned int cpu)
> > +{
> > +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> > +	phys_addr_t pa = __pa(st);
> > +	unsigned long lo = (unsigned long)pa;
> > +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> > +
> > +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> > +}
> > +
> >  static int pv_time_cpu_down_prepare(unsigned int cpu)
> >  {
> > -	return 0;
> > +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
> >  }
> >  
> >  static u64 pv_time_steal_clock(int cpu)
> >  {
> > -	return 0;
> > +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> > +	u32 sequence;
> > +	u64 steal;
> > +
> > +	do {
> > +		sequence = st->sequence;
> > +		virt_rmb();
> > +		steal = st->steal;
> > +		virt_rmb();
> > +	} while ((sequence & 1) || (sequence != st->sequence));
> > +
> > +	return steal;
> >  }
> 
> So anybody poking around the implementations for other architectures will
> see that I shamelessly ripped this off from x86's implementation. However,
> looking at it again, I think all the references to the steal-time info
> should be wrapped in READ_ONCE(). I'll write a patch for x86/kvm to
> add READ_ONCE's and also update this patch for rfc-v2.
Hello, what's the status of rfc-v2? The riscv paravirt_qspinlock is
based on your series to reuse the paravirt.c and CONFIG_PARAVIRT.

https://lore.kernel.org/linux-riscv/20230802164701.192791-10-guoren@kernel.org/
https://lore.kernel.org/linux-riscv/20230802164701.192791-17-guoren@kernel.org/

> 
> Thanks,
> drew
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Andrew Jones Aug. 3, 2023, 7:04 a.m. UTC | #7
On Wed, Aug 02, 2023 at 07:26:53PM -0400, Guo Ren wrote:
> On Wed, Apr 19, 2023 at 10:42:16AM +0200, Andrew Jones wrote:
> > On Mon, Apr 17, 2023 at 12:33:51PM +0200, Andrew Jones wrote:
> > > When the SBI STA extension exists we can use it to implement
> > > paravirt steal-time support. Fill in the empty pv-time functions
> > > with an SBI STA implementation.
> > > 
> > > Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> > > ---
> > >  arch/riscv/kernel/paravirt.c | 56 ++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 53 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
> > > index 141dbcc36fa2..5f8d96b919e4 100644
> > > --- a/arch/riscv/kernel/paravirt.c
> > > +++ b/arch/riscv/kernel/paravirt.c
> > > @@ -6,12 +6,21 @@
> > >  #define pr_fmt(fmt) "riscv-pv: " fmt
> > >  
> > >  #include <linux/cpuhotplug.h>
> > > +#include <linux/compiler.h>
> > > +#include <linux/errno.h>
> > >  #include <linux/init.h>
> > >  #include <linux/jump_label.h>
> > > +#include <linux/kconfig.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/percpu-defs.h>
> > >  #include <linux/printk.h>
> > >  #include <linux/static_call.h>
> > >  #include <linux/types.h>
> > >  
> > > +#include <asm/barrier.h>
> > > +#include <asm/page.h>
> > > +#include <asm/sbi.h>
> > > +
> > >  struct static_key paravirt_steal_enabled;
> > >  struct static_key paravirt_steal_rq_enabled;
> > >  
> > > @@ -31,24 +40,65 @@ static int __init parse_no_stealacc(char *arg)
> > >  
> > >  early_param("no-steal-acc", parse_no_stealacc);
> > >  
> > > +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
> > > +
> > >  static bool __init has_pv_steal_clock(void)
> > >  {
> > > +	if (sbi_probe_extension(SBI_EXT_STA) > 0) {
> > > +		pr_info("SBI STA extension detected\n");
> > > +		return true;
> > > +	}
> > > +
> > >  	return false;
> > >  }
> > >  
> > > -static int pv_time_cpu_online(unsigned int cpu)
> > > +static int sbi_sta_set_steal_time_shmem(unsigned long lo, unsigned long hi,
> > > +					unsigned long flags)
> > >  {
> > > +	struct sbiret ret;
> > > +
> > > +	ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_SET_STEAL_TIME_SHMEM,
> > > +			lo, hi, flags, 0, 0, 0);
> > > +	if (ret.error) {
> > > +		if (lo == -1 && hi == -1)
> > > +			pr_warn("Failed to disable steal-time shmem");
> > > +		else
> > > +			pr_warn("Failed to set steal-time shmem");
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > >  	return 0;
> > >  }
> > >  
> > > +static int pv_time_cpu_online(unsigned int cpu)
> > > +{
> > > +	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
> > > +	phys_addr_t pa = __pa(st);
> > > +	unsigned long lo = (unsigned long)pa;
> > > +	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
> > > +
> > > +	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
> > > +}
> > > +
> > >  static int pv_time_cpu_down_prepare(unsigned int cpu)
> > >  {
> > > -	return 0;
> > > +	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
> > >  }
> > >  
> > >  static u64 pv_time_steal_clock(int cpu)
> > >  {
> > > -	return 0;
> > > +	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
> > > +	u32 sequence;
> > > +	u64 steal;
> > > +
> > > +	do {
> > > +		sequence = st->sequence;
> > > +		virt_rmb();
> > > +		steal = st->steal;
> > > +		virt_rmb();
> > > +	} while ((sequence & 1) || (sequence != st->sequence));
> > > +
> > > +	return steal;
> > >  }
> > 
> > So anybody poking around the implementations for other architectures will
> > see that I shamelessly ripped this off from x86's implementation. However,
> > looking at it again, I think all the references to the steal-time info
> > should be wrapped in READ_ONCE(). I'll write a patch for x86/kvm to
> > add READ_ONCE's and also update this patch for rfc-v2.
> Hello, what's the status of rfc-v2? The riscv paravirt_qspinlock is
> based on your series to reuse the paravirt.c and CONFIG_PARAVIRT.

I prepared it, but then didn't bother posting, since the series has to
stay an RFC and not be merged until the spec is frozen and the changes
were pretty minor (mostly just name changes). The pv_time_steal_clock()
changes described above look like this

static u64 pv_time_steal_clock(int cpu)
{
        struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
        u32 sequence;
        u64 steal;

        /*
         * Check the sequence field before and after reading the steal
         * field. Repeat the read if it is different or odd.
         */
        do {
                sequence = READ_ONCE(st->sequence);
                virt_rmb();
                steal = READ_ONCE(st->steal);
                virt_rmb();
        } while ((le32_to_cpu(sequence) & 1) || sequence != READ_ONCE(st->sequence));

        return le64_to_cpu(steal);
}

> 
> https://lore.kernel.org/linux-riscv/20230802164701.192791-10-guoren@kernel.org/
> https://lore.kernel.org/linux-riscv/20230802164701.192791-17-guoren@kernel.org/

Thanks, I'll take a look at this.

drew
diff mbox series

Patch

diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
index 141dbcc36fa2..5f8d96b919e4 100644
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -6,12 +6,21 @@ 
 #define pr_fmt(fmt) "riscv-pv: " fmt
 
 #include <linux/cpuhotplug.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/jump_label.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/percpu-defs.h>
 #include <linux/printk.h>
 #include <linux/static_call.h>
 #include <linux/types.h>
 
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/sbi.h>
+
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
@@ -31,24 +40,65 @@  static int __init parse_no_stealacc(char *arg)
 
 early_param("no-steal-acc", parse_no_stealacc);
 
+DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
+
 static bool __init has_pv_steal_clock(void)
 {
+	if (sbi_probe_extension(SBI_EXT_STA) > 0) {
+		pr_info("SBI STA extension detected\n");
+		return true;
+	}
+
 	return false;
 }
 
-static int pv_time_cpu_online(unsigned int cpu)
+static int sbi_sta_set_steal_time_shmem(unsigned long lo, unsigned long hi,
+					unsigned long flags)
 {
+	struct sbiret ret;
+
+	ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_SET_STEAL_TIME_SHMEM,
+			lo, hi, flags, 0, 0, 0);
+	if (ret.error) {
+		if (lo == -1 && hi == -1)
+			pr_warn("Failed to disable steal-time shmem");
+		else
+			pr_warn("Failed to set steal-time shmem");
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
+static int pv_time_cpu_online(unsigned int cpu)
+{
+	struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
+	phys_addr_t pa = __pa(st);
+	unsigned long lo = (unsigned long)pa;
+	unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
+
+	return sbi_sta_set_steal_time_shmem(lo, hi, 0);
+}
+
 static int pv_time_cpu_down_prepare(unsigned int cpu)
 {
-	return 0;
+	return sbi_sta_set_steal_time_shmem(-1, -1, 0);
 }
 
 static u64 pv_time_steal_clock(int cpu)
 {
-	return 0;
+	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
+	u32 sequence;
+	u64 steal;
+
+	do {
+		sequence = st->sequence;
+		virt_rmb();
+		steal = st->steal;
+		virt_rmb();
+	} while ((sequence & 1) || (sequence != st->sequence));
+
+	return steal;
 }
 
 int __init pv_time_init(void)