Message ID | 20190822084131.114764-16-anup.patel@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM RISC-V Support | expand |
On 22.08.19 10:46, Anup Patel wrote: > From: Atish Patra <atish.patra@wdc.com> > > The RISC-V hypervisor specification doesn't have any virtual timer > feature. > > Due to this, the guest VCPU timer will be programmed via SBI calls. > The host will use a separate hrtimer event for each guest VCPU to > provide timer functionality. We inject a virtual timer interrupt to > the guest VCPU whenever the guest VCPU hrtimer event expires. > > The following features are not supported yet and will be added in > future: > 1. A time offset to adjust guest time from host time > 2. A saved next event in guest vcpu for vm migration Implementing these 2 bits right now should be trivial. Why wait? > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > Signed-off-by: Anup Patel <anup.patel@wdc.com> > Acked-by: Paolo Bonzini <pbonzini@redhat.com> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/riscv/include/asm/kvm_host.h | 4 + > arch/riscv/include/asm/kvm_vcpu_timer.h | 32 +++++++ > arch/riscv/kvm/Makefile | 2 +- > arch/riscv/kvm/vcpu.c | 6 ++ > arch/riscv/kvm/vcpu_timer.c | 106 ++++++++++++++++++++++++ > drivers/clocksource/timer-riscv.c | 8 ++ > include/clocksource/timer-riscv.h | 16 ++++ > 7 files changed, 173 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h > create mode 100644 arch/riscv/kvm/vcpu_timer.c > create mode 100644 include/clocksource/timer-riscv.h > > diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h > index ab33e59a3d88..d2a2e45eefc0 100644 > --- a/arch/riscv/include/asm/kvm_host.h > +++ b/arch/riscv/include/asm/kvm_host.h > @@ -12,6 +12,7 @@ > #include <linux/types.h> > #include <linux/kvm.h> > #include <linux/kvm_types.h> > +#include <asm/kvm_vcpu_timer.h> > > #ifdef CONFIG_64BIT > #define KVM_MAX_VCPUS (1U << 16) > @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { > unsigned long irqs_pending; > unsigned long irqs_pending_mask; > > + /* VCPU Timer */ > + struct kvm_vcpu_timer timer; > + > /* MMIO instruction details */ > struct kvm_mmio_decode mmio_decode; > > diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h > new file mode 100644 > index 000000000000..df67ea86988e > --- /dev/null > +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h > @@ -0,0 +1,32 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > + * > + * Authors: > + * Atish Patra <atish.patra@wdc.com> > + */ > + > +#ifndef __KVM_VCPU_RISCV_TIMER_H > +#define __KVM_VCPU_RISCV_TIMER_H > + > +#include <linux/hrtimer.h> > + > +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 > + > +struct kvm_vcpu_timer { > + bool init_done; > + /* Check if the timer is programmed */ > + bool is_set; > + struct hrtimer hrt; > + /* Mult & Shift values to get nanosec from cycles */ > + u32 mult; > + u32 shift; > +}; > + > +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > + unsigned long ncycles); This function never gets called? > + > +#endif > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > index c0f57f26c13d..3e0c7558320d 100644 > --- a/arch/riscv/kvm/Makefile > +++ b/arch/riscv/kvm/Makefile > @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm > kvm-objs := $(common-objs-y) > > kvm-objs += main.o vm.o vmid.o tlb.o mmu.o > -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o > +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o > > obj-$(CONFIG_KVM) += kvm.o > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c > index 6124077d154f..018fca436776 100644 > --- a/arch/riscv/kvm/vcpu.c > +++ b/arch/riscv/kvm/vcpu.c > @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) > > memcpy(cntx, reset_cntx, sizeof(*cntx)); > > + kvm_riscv_vcpu_timer_reset(vcpu); > + > WRITE_ONCE(vcpu->arch.irqs_pending, 0); > WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); > } > @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > cntx->hstatus |= HSTATUS_SP2P; > cntx->hstatus |= HSTATUS_SPV; > > + /* Setup VCPU timer */ > + kvm_riscv_vcpu_timer_init(vcpu); > + > /* Reset VCPU */ > kvm_riscv_reset_vcpu(vcpu); > > @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > { > + kvm_riscv_vcpu_timer_deinit(vcpu); > kvm_riscv_stage2_flush_cache(vcpu); > kmem_cache_free(kvm_vcpu_cache, vcpu); > } > diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c > new file mode 100644 > index 000000000000..a45ca06e1aa6 > --- /dev/null > +++ b/arch/riscv/kvm/vcpu_timer.c > @@ -0,0 +1,106 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > + * > + * Authors: > + * Atish Patra <atish.patra@wdc.com> > + */ > + > +#include <linux/errno.h> > +#include <linux/err.h> > +#include <linux/kvm_host.h> > +#include <clocksource/timer-riscv.h> > +#include <asm/csr.h> > +#include <asm/kvm_vcpu_timer.h> > + > +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) > +{ > + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); > + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); > + > + t->is_set = false; > + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); > + > + return HRTIMER_NORESTART; > +} > + > +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) > +{ > + unsigned long flags; > + u64 cycles_now, cycles_delta, delta_ns; > + > + local_irq_save(flags); > + cycles_now = get_cycles64(); > + if (cycles_now < cycles) > + cycles_delta = cycles - cycles_now; > + else > + cycles_delta = 0; > + delta_ns = (cycles_delta * t->mult) >> t->shift; > + local_irq_restore(flags); > + > + return delta_ns; > +} > + > +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) > +{ > + if (!t->init_done || !t->is_set) > + return -EINVAL; > + > + hrtimer_cancel(&t->hrt); > + t->is_set = false; > + > + return 0; > +} > + > +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > + unsigned long ncycles) > +{ > + struct kvm_vcpu_timer *t = &vcpu->arch.timer; > + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); ... in fact, I feel like I'm missing something obvious here. How does the guest trigger the timer event? What is the argument it uses for that and how does that play with the tbfreq in the earlier patch? Alex
On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: > > On 22.08.19 10:46, Anup Patel wrote: > > From: Atish Patra <atish.patra@wdc.com> > > > > The RISC-V hypervisor specification doesn't have any virtual timer > > feature. > > > > Due to this, the guest VCPU timer will be programmed via SBI calls. > > The host will use a separate hrtimer event for each guest VCPU to > > provide timer functionality. We inject a virtual timer interrupt to > > the guest VCPU whenever the guest VCPU hrtimer event expires. > > > > The following features are not supported yet and will be added in > > future: > > 1. A time offset to adjust guest time from host time > > 2. A saved next event in guest vcpu for vm migration > > Implementing these 2 bits right now should be trivial. Why wait? We were waiting for HTIMEDELTA CSR to be merged so we deferred this items. > > > > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > > Signed-off-by: Anup Patel <anup.patel@wdc.com> > > Acked-by: Paolo Bonzini <pbonzini@redhat.com> > > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > > --- > > arch/riscv/include/asm/kvm_host.h | 4 + > > arch/riscv/include/asm/kvm_vcpu_timer.h | 32 +++++++ > > arch/riscv/kvm/Makefile | 2 +- > > arch/riscv/kvm/vcpu.c | 6 ++ > > arch/riscv/kvm/vcpu_timer.c | 106 ++++++++++++++++++++++++ > > drivers/clocksource/timer-riscv.c | 8 ++ > > include/clocksource/timer-riscv.h | 16 ++++ > > 7 files changed, 173 insertions(+), 1 deletion(-) > > create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h > > create mode 100644 arch/riscv/kvm/vcpu_timer.c > > create mode 100644 include/clocksource/timer-riscv.h > > > > diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h > > index ab33e59a3d88..d2a2e45eefc0 100644 > > --- a/arch/riscv/include/asm/kvm_host.h > > +++ b/arch/riscv/include/asm/kvm_host.h > > @@ -12,6 +12,7 @@ > > #include <linux/types.h> > > #include <linux/kvm.h> > > #include <linux/kvm_types.h> > > +#include <asm/kvm_vcpu_timer.h> > > > > #ifdef CONFIG_64BIT > > #define KVM_MAX_VCPUS (1U << 16) > > @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { > > unsigned long irqs_pending; > > unsigned long irqs_pending_mask; > > > > + /* VCPU Timer */ > > + struct kvm_vcpu_timer timer; > > + > > /* MMIO instruction details */ > > struct kvm_mmio_decode mmio_decode; > > > > diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h > > new file mode 100644 > > index 000000000000..df67ea86988e > > --- /dev/null > > +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h > > @@ -0,0 +1,32 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > > + * > > + * Authors: > > + * Atish Patra <atish.patra@wdc.com> > > + */ > > + > > +#ifndef __KVM_VCPU_RISCV_TIMER_H > > +#define __KVM_VCPU_RISCV_TIMER_H > > + > > +#include <linux/hrtimer.h> > > + > > +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 > > + > > +struct kvm_vcpu_timer { > > + bool init_done; > > + /* Check if the timer is programmed */ > > + bool is_set; > > + struct hrtimer hrt; > > + /* Mult & Shift values to get nanosec from cycles */ > > + u32 mult; > > + u32 shift; > > +}; > > + > > +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); > > +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); > > +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); > > +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > > + unsigned long ncycles); > > This function never gets called? It's called from SBI emulation. > > > + > > +#endif > > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > > index c0f57f26c13d..3e0c7558320d 100644 > > --- a/arch/riscv/kvm/Makefile > > +++ b/arch/riscv/kvm/Makefile > > @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm > > kvm-objs := $(common-objs-y) > > > > kvm-objs += main.o vm.o vmid.o tlb.o mmu.o > > -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o > > +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o > > > > obj-$(CONFIG_KVM) += kvm.o > > diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c > > index 6124077d154f..018fca436776 100644 > > --- a/arch/riscv/kvm/vcpu.c > > +++ b/arch/riscv/kvm/vcpu.c > > @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) > > > > memcpy(cntx, reset_cntx, sizeof(*cntx)); > > > > + kvm_riscv_vcpu_timer_reset(vcpu); > > + > > WRITE_ONCE(vcpu->arch.irqs_pending, 0); > > WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); > > } > > @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > cntx->hstatus |= HSTATUS_SP2P; > > cntx->hstatus |= HSTATUS_SPV; > > > > + /* Setup VCPU timer */ > > + kvm_riscv_vcpu_timer_init(vcpu); > > + > > /* Reset VCPU */ > > kvm_riscv_reset_vcpu(vcpu); > > > > @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > > > void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > { > > + kvm_riscv_vcpu_timer_deinit(vcpu); > > kvm_riscv_stage2_flush_cache(vcpu); > > kmem_cache_free(kvm_vcpu_cache, vcpu); > > } > > diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c > > new file mode 100644 > > index 000000000000..a45ca06e1aa6 > > --- /dev/null > > +++ b/arch/riscv/kvm/vcpu_timer.c > > @@ -0,0 +1,106 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > > + * > > + * Authors: > > + * Atish Patra <atish.patra@wdc.com> > > + */ > > + > > +#include <linux/errno.h> > > +#include <linux/err.h> > > +#include <linux/kvm_host.h> > > +#include <clocksource/timer-riscv.h> > > +#include <asm/csr.h> > > +#include <asm/kvm_vcpu_timer.h> > > + > > +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) > > +{ > > + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); > > + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); > > + > > + t->is_set = false; > > + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); > > + > > + return HRTIMER_NORESTART; > > +} > > + > > +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) > > +{ > > + unsigned long flags; > > + u64 cycles_now, cycles_delta, delta_ns; > > + > > + local_irq_save(flags); > > + cycles_now = get_cycles64(); > > + if (cycles_now < cycles) > > + cycles_delta = cycles - cycles_now; > > + else > > + cycles_delta = 0; > > + delta_ns = (cycles_delta * t->mult) >> t->shift; > > + local_irq_restore(flags); > > + > > + return delta_ns; > > +} > > + > > +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) > > +{ > > + if (!t->init_done || !t->is_set) > > + return -EINVAL; > > + > > + hrtimer_cancel(&t->hrt); > > + t->is_set = false; > > + > > + return 0; > > +} > > + > > +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > > + unsigned long ncycles) > > +{ > > + struct kvm_vcpu_timer *t = &vcpu->arch.timer; > > + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); > > ... in fact, I feel like I'm missing something obvious here. How does > the guest trigger the timer event? What is the argument it uses for that > and how does that play with the tbfreq in the earlier patch? We have SBI call inferface between Hypervisor and Guest. One of the SBI call allows Guest to program time event. The next event is specified as absolute cycles. The Guest can read time using TIME CSR which returns system timer value (@ tbfreq freqency). Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL and it has to be same as Host tbfreq. The TBFREQ config register visible to user-space is a read-only CONFIG register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. Regards, Anup > > > Alex >
> Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@brainfault.org>: > >> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: >> >>> On 22.08.19 10:46, Anup Patel wrote: >>> From: Atish Patra <atish.patra@wdc.com> >>> >>> The RISC-V hypervisor specification doesn't have any virtual timer >>> feature. >>> >>> Due to this, the guest VCPU timer will be programmed via SBI calls. >>> The host will use a separate hrtimer event for each guest VCPU to >>> provide timer functionality. We inject a virtual timer interrupt to >>> the guest VCPU whenever the guest VCPU hrtimer event expires. >>> >>> The following features are not supported yet and will be added in >>> future: >>> 1. A time offset to adjust guest time from host time >>> 2. A saved next event in guest vcpu for vm migration >> >> Implementing these 2 bits right now should be trivial. Why wait? > > We were waiting for HTIMEDELTA CSR to be merged so we > deferred this items. > >> >>> >>> Signed-off-by: Atish Patra <atish.patra@wdc.com> >>> Signed-off-by: Anup Patel <anup.patel@wdc.com> >>> Acked-by: Paolo Bonzini <pbonzini@redhat.com> >>> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> >>> --- >>> arch/riscv/include/asm/kvm_host.h | 4 + >>> arch/riscv/include/asm/kvm_vcpu_timer.h | 32 +++++++ >>> arch/riscv/kvm/Makefile | 2 +- >>> arch/riscv/kvm/vcpu.c | 6 ++ >>> arch/riscv/kvm/vcpu_timer.c | 106 ++++++++++++++++++++++++ >>> drivers/clocksource/timer-riscv.c | 8 ++ >>> include/clocksource/timer-riscv.h | 16 ++++ >>> 7 files changed, 173 insertions(+), 1 deletion(-) >>> create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h >>> create mode 100644 arch/riscv/kvm/vcpu_timer.c >>> create mode 100644 include/clocksource/timer-riscv.h >>> >>> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h >>> index ab33e59a3d88..d2a2e45eefc0 100644 >>> --- a/arch/riscv/include/asm/kvm_host.h >>> +++ b/arch/riscv/include/asm/kvm_host.h >>> @@ -12,6 +12,7 @@ >>> #include <linux/types.h> >>> #include <linux/kvm.h> >>> #include <linux/kvm_types.h> >>> +#include <asm/kvm_vcpu_timer.h> >>> >>> #ifdef CONFIG_64BIT >>> #define KVM_MAX_VCPUS (1U << 16) >>> @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { >>> unsigned long irqs_pending; >>> unsigned long irqs_pending_mask; >>> >>> + /* VCPU Timer */ >>> + struct kvm_vcpu_timer timer; >>> + >>> /* MMIO instruction details */ >>> struct kvm_mmio_decode mmio_decode; >>> >>> diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h >>> new file mode 100644 >>> index 000000000000..df67ea86988e >>> --- /dev/null >>> +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h >>> @@ -0,0 +1,32 @@ >>> +/* SPDX-License-Identifier: GPL-2.0-only */ >>> +/* >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. >>> + * >>> + * Authors: >>> + * Atish Patra <atish.patra@wdc.com> >>> + */ >>> + >>> +#ifndef __KVM_VCPU_RISCV_TIMER_H >>> +#define __KVM_VCPU_RISCV_TIMER_H >>> + >>> +#include <linux/hrtimer.h> >>> + >>> +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 >>> + >>> +struct kvm_vcpu_timer { >>> + bool init_done; >>> + /* Check if the timer is programmed */ >>> + bool is_set; >>> + struct hrtimer hrt; >>> + /* Mult & Shift values to get nanosec from cycles */ >>> + u32 mult; >>> + u32 shift; >>> +}; >>> + >>> +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, >>> + unsigned long ncycles); >> >> This function never gets called? > > It's called from SBI emulation. > >> >>> + >>> +#endif >>> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile >>> index c0f57f26c13d..3e0c7558320d 100644 >>> --- a/arch/riscv/kvm/Makefile >>> +++ b/arch/riscv/kvm/Makefile >>> @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm >>> kvm-objs := $(common-objs-y) >>> >>> kvm-objs += main.o vm.o vmid.o tlb.o mmu.o >>> -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o >>> +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o >>> >>> obj-$(CONFIG_KVM) += kvm.o >>> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c >>> index 6124077d154f..018fca436776 100644 >>> --- a/arch/riscv/kvm/vcpu.c >>> +++ b/arch/riscv/kvm/vcpu.c >>> @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) >>> >>> memcpy(cntx, reset_cntx, sizeof(*cntx)); >>> >>> + kvm_riscv_vcpu_timer_reset(vcpu); >>> + >>> WRITE_ONCE(vcpu->arch.irqs_pending, 0); >>> WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); >>> } >>> @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) >>> cntx->hstatus |= HSTATUS_SP2P; >>> cntx->hstatus |= HSTATUS_SPV; >>> >>> + /* Setup VCPU timer */ >>> + kvm_riscv_vcpu_timer_init(vcpu); >>> + >>> /* Reset VCPU */ >>> kvm_riscv_reset_vcpu(vcpu); >>> >>> @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) >>> >>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) >>> { >>> + kvm_riscv_vcpu_timer_deinit(vcpu); >>> kvm_riscv_stage2_flush_cache(vcpu); >>> kmem_cache_free(kvm_vcpu_cache, vcpu); >>> } >>> diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c >>> new file mode 100644 >>> index 000000000000..a45ca06e1aa6 >>> --- /dev/null >>> +++ b/arch/riscv/kvm/vcpu_timer.c >>> @@ -0,0 +1,106 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. >>> + * >>> + * Authors: >>> + * Atish Patra <atish.patra@wdc.com> >>> + */ >>> + >>> +#include <linux/errno.h> >>> +#include <linux/err.h> >>> +#include <linux/kvm_host.h> >>> +#include <clocksource/timer-riscv.h> >>> +#include <asm/csr.h> >>> +#include <asm/kvm_vcpu_timer.h> >>> + >>> +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) >>> +{ >>> + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); >>> + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); >>> + >>> + t->is_set = false; >>> + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); >>> + >>> + return HRTIMER_NORESTART; >>> +} >>> + >>> +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) >>> +{ >>> + unsigned long flags; >>> + u64 cycles_now, cycles_delta, delta_ns; >>> + >>> + local_irq_save(flags); >>> + cycles_now = get_cycles64(); >>> + if (cycles_now < cycles) >>> + cycles_delta = cycles - cycles_now; >>> + else >>> + cycles_delta = 0; >>> + delta_ns = (cycles_delta * t->mult) >> t->shift; >>> + local_irq_restore(flags); >>> + >>> + return delta_ns; >>> +} >>> + >>> +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) >>> +{ >>> + if (!t->init_done || !t->is_set) >>> + return -EINVAL; >>> + >>> + hrtimer_cancel(&t->hrt); >>> + t->is_set = false; >>> + >>> + return 0; >>> +} >>> + >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, >>> + unsigned long ncycles) >>> +{ >>> + struct kvm_vcpu_timer *t = &vcpu->arch.timer; >>> + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); >> >> ... in fact, I feel like I'm missing something obvious here. How does >> the guest trigger the timer event? What is the argument it uses for that >> and how does that play with the tbfreq in the earlier patch? > > We have SBI call inferface between Hypervisor and Guest. One of the > SBI call allows Guest to program time event. The next event is specified > as absolute cycles. The Guest can read time using TIME CSR which > returns system timer value (@ tbfreq freqency). > > Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL > and it has to be same as Host tbfreq. > > The TBFREQ config register visible to user-space is a read-only CONFIG > register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. And it's read-only because you can not trap on TB reads? Alex > > Regards, > Anup > >> >> >> Alex >>
On Fri, Aug 23, 2019 at 5:03 PM Graf (AWS), Alexander <graf@amazon.com> wrote: > > > > > Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@brainfault.org>: > > > >> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: > >> > >>> On 22.08.19 10:46, Anup Patel wrote: > >>> From: Atish Patra <atish.patra@wdc.com> > >>> > >>> The RISC-V hypervisor specification doesn't have any virtual timer > >>> feature. > >>> > >>> Due to this, the guest VCPU timer will be programmed via SBI calls. > >>> The host will use a separate hrtimer event for each guest VCPU to > >>> provide timer functionality. We inject a virtual timer interrupt to > >>> the guest VCPU whenever the guest VCPU hrtimer event expires. > >>> > >>> The following features are not supported yet and will be added in > >>> future: > >>> 1. A time offset to adjust guest time from host time > >>> 2. A saved next event in guest vcpu for vm migration > >> > >> Implementing these 2 bits right now should be trivial. Why wait? > > > > We were waiting for HTIMEDELTA CSR to be merged so we > > deferred this items. > > > >> > >>> > >>> Signed-off-by: Atish Patra <atish.patra@wdc.com> > >>> Signed-off-by: Anup Patel <anup.patel@wdc.com> > >>> Acked-by: Paolo Bonzini <pbonzini@redhat.com> > >>> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > >>> --- > >>> arch/riscv/include/asm/kvm_host.h | 4 + > >>> arch/riscv/include/asm/kvm_vcpu_timer.h | 32 +++++++ > >>> arch/riscv/kvm/Makefile | 2 +- > >>> arch/riscv/kvm/vcpu.c | 6 ++ > >>> arch/riscv/kvm/vcpu_timer.c | 106 ++++++++++++++++++++++++ > >>> drivers/clocksource/timer-riscv.c | 8 ++ > >>> include/clocksource/timer-riscv.h | 16 ++++ > >>> 7 files changed, 173 insertions(+), 1 deletion(-) > >>> create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h > >>> create mode 100644 arch/riscv/kvm/vcpu_timer.c > >>> create mode 100644 include/clocksource/timer-riscv.h > >>> > >>> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h > >>> index ab33e59a3d88..d2a2e45eefc0 100644 > >>> --- a/arch/riscv/include/asm/kvm_host.h > >>> +++ b/arch/riscv/include/asm/kvm_host.h > >>> @@ -12,6 +12,7 @@ > >>> #include <linux/types.h> > >>> #include <linux/kvm.h> > >>> #include <linux/kvm_types.h> > >>> +#include <asm/kvm_vcpu_timer.h> > >>> > >>> #ifdef CONFIG_64BIT > >>> #define KVM_MAX_VCPUS (1U << 16) > >>> @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { > >>> unsigned long irqs_pending; > >>> unsigned long irqs_pending_mask; > >>> > >>> + /* VCPU Timer */ > >>> + struct kvm_vcpu_timer timer; > >>> + > >>> /* MMIO instruction details */ > >>> struct kvm_mmio_decode mmio_decode; > >>> > >>> diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h > >>> new file mode 100644 > >>> index 000000000000..df67ea86988e > >>> --- /dev/null > >>> +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h > >>> @@ -0,0 +1,32 @@ > >>> +/* SPDX-License-Identifier: GPL-2.0-only */ > >>> +/* > >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > >>> + * > >>> + * Authors: > >>> + * Atish Patra <atish.patra@wdc.com> > >>> + */ > >>> + > >>> +#ifndef __KVM_VCPU_RISCV_TIMER_H > >>> +#define __KVM_VCPU_RISCV_TIMER_H > >>> + > >>> +#include <linux/hrtimer.h> > >>> + > >>> +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 > >>> + > >>> +struct kvm_vcpu_timer { > >>> + bool init_done; > >>> + /* Check if the timer is programmed */ > >>> + bool is_set; > >>> + struct hrtimer hrt; > >>> + /* Mult & Shift values to get nanosec from cycles */ > >>> + u32 mult; > >>> + u32 shift; > >>> +}; > >>> + > >>> +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); > >>> +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); > >>> +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); > >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > >>> + unsigned long ncycles); > >> > >> This function never gets called? > > > > It's called from SBI emulation. > > > >> > >>> + > >>> +#endif > >>> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > >>> index c0f57f26c13d..3e0c7558320d 100644 > >>> --- a/arch/riscv/kvm/Makefile > >>> +++ b/arch/riscv/kvm/Makefile > >>> @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm > >>> kvm-objs := $(common-objs-y) > >>> > >>> kvm-objs += main.o vm.o vmid.o tlb.o mmu.o > >>> -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o > >>> +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o > >>> > >>> obj-$(CONFIG_KVM) += kvm.o > >>> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c > >>> index 6124077d154f..018fca436776 100644 > >>> --- a/arch/riscv/kvm/vcpu.c > >>> +++ b/arch/riscv/kvm/vcpu.c > >>> @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) > >>> > >>> memcpy(cntx, reset_cntx, sizeof(*cntx)); > >>> > >>> + kvm_riscv_vcpu_timer_reset(vcpu); > >>> + > >>> WRITE_ONCE(vcpu->arch.irqs_pending, 0); > >>> WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); > >>> } > >>> @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > >>> cntx->hstatus |= HSTATUS_SP2P; > >>> cntx->hstatus |= HSTATUS_SPV; > >>> > >>> + /* Setup VCPU timer */ > >>> + kvm_riscv_vcpu_timer_init(vcpu); > >>> + > >>> /* Reset VCPU */ > >>> kvm_riscv_reset_vcpu(vcpu); > >>> > >>> @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > >>> > >>> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > >>> { > >>> + kvm_riscv_vcpu_timer_deinit(vcpu); > >>> kvm_riscv_stage2_flush_cache(vcpu); > >>> kmem_cache_free(kvm_vcpu_cache, vcpu); > >>> } > >>> diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c > >>> new file mode 100644 > >>> index 000000000000..a45ca06e1aa6 > >>> --- /dev/null > >>> +++ b/arch/riscv/kvm/vcpu_timer.c > >>> @@ -0,0 +1,106 @@ > >>> +// SPDX-License-Identifier: GPL-2.0 > >>> +/* > >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates. > >>> + * > >>> + * Authors: > >>> + * Atish Patra <atish.patra@wdc.com> > >>> + */ > >>> + > >>> +#include <linux/errno.h> > >>> +#include <linux/err.h> > >>> +#include <linux/kvm_host.h> > >>> +#include <clocksource/timer-riscv.h> > >>> +#include <asm/csr.h> > >>> +#include <asm/kvm_vcpu_timer.h> > >>> + > >>> +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) > >>> +{ > >>> + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); > >>> + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); > >>> + > >>> + t->is_set = false; > >>> + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); > >>> + > >>> + return HRTIMER_NORESTART; > >>> +} > >>> + > >>> +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) > >>> +{ > >>> + unsigned long flags; > >>> + u64 cycles_now, cycles_delta, delta_ns; > >>> + > >>> + local_irq_save(flags); > >>> + cycles_now = get_cycles64(); > >>> + if (cycles_now < cycles) > >>> + cycles_delta = cycles - cycles_now; > >>> + else > >>> + cycles_delta = 0; > >>> + delta_ns = (cycles_delta * t->mult) >> t->shift; > >>> + local_irq_restore(flags); > >>> + > >>> + return delta_ns; > >>> +} > >>> + > >>> +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) > >>> +{ > >>> + if (!t->init_done || !t->is_set) > >>> + return -EINVAL; > >>> + > >>> + hrtimer_cancel(&t->hrt); > >>> + t->is_set = false; > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, > >>> + unsigned long ncycles) > >>> +{ > >>> + struct kvm_vcpu_timer *t = &vcpu->arch.timer; > >>> + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); > >> > >> ... in fact, I feel like I'm missing something obvious here. How does > >> the guest trigger the timer event? What is the argument it uses for that > >> and how does that play with the tbfreq in the earlier patch? > > > > We have SBI call inferface between Hypervisor and Guest. One of the > > SBI call allows Guest to program time event. The next event is specified > > as absolute cycles. The Guest can read time using TIME CSR which > > returns system timer value (@ tbfreq freqency). > > > > Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL > > and it has to be same as Host tbfreq. > > > > The TBFREQ config register visible to user-space is a read-only CONFIG > > register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. > > And it's read-only because you can not trap on TB reads? There is no TB registers. The tbfreq can only be know through DT/ACPI kind-of HW description for both Host and Guest. The KVM user-space tool needs to know TBFREQ so that it can set correct value in generated DT for Guest Linux. Regards, Anup > > Alex > > > > > Regards, > > Anup > > > >> > >> > >> Alex > >>
On 23.08.19 13:46, Anup Patel wrote: > On Fri, Aug 23, 2019 at 5:03 PM Graf (AWS), Alexander <graf@amazon.com> wrote: >> >> >> >>> Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@brainfault.org>: >>> >>>> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: >>>> >>>>> On 22.08.19 10:46, Anup Patel wrote: >>>>> From: Atish Patra <atish.patra@wdc.com> >>>>> >>>>> The RISC-V hypervisor specification doesn't have any virtual timer >>>>> feature. >>>>> >>>>> Due to this, the guest VCPU timer will be programmed via SBI calls. >>>>> The host will use a separate hrtimer event for each guest VCPU to >>>>> provide timer functionality. We inject a virtual timer interrupt to >>>>> the guest VCPU whenever the guest VCPU hrtimer event expires. >>>>> >>>>> The following features are not supported yet and will be added in >>>>> future: >>>>> 1. A time offset to adjust guest time from host time >>>>> 2. A saved next event in guest vcpu for vm migration >>>> >>>> Implementing these 2 bits right now should be trivial. Why wait? >>> [...] >>>> ... in fact, I feel like I'm missing something obvious here. How does >>>> the guest trigger the timer event? What is the argument it uses for that >>>> and how does that play with the tbfreq in the earlier patch? >>> >>> We have SBI call inferface between Hypervisor and Guest. One of the >>> SBI call allows Guest to program time event. The next event is specified >>> as absolute cycles. The Guest can read time using TIME CSR which >>> returns system timer value (@ tbfreq freqency). >>> >>> Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL >>> and it has to be same as Host tbfreq. >>> >>> The TBFREQ config register visible to user-space is a read-only CONFIG >>> register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. >> >> And it's read-only because you can not trap on TB reads? > > There is no TB registers. > > The tbfreq can only be know through DT/ACPI kind-of HW description > for both Host and Guest. > > The KVM user-space tool needs to know TBFREQ so that it can set correct > value in generated DT for Guest Linux. So what access methods do get influenced by TBFREQ? If it's only the SBI timer, we can control the frequency, which means we can make TBFREQ read/write. Alex
On Fri, Aug 23, 2019 at 5:19 PM Alexander Graf <graf@amazon.com> wrote: > > > > On 23.08.19 13:46, Anup Patel wrote: > > On Fri, Aug 23, 2019 at 5:03 PM Graf (AWS), Alexander <graf@amazon.com> wrote: > >> > >> > >> > >>> Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@brainfault.org>: > >>> > >>>> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: > >>>> > >>>>> On 22.08.19 10:46, Anup Patel wrote: > >>>>> From: Atish Patra <atish.patra@wdc.com> > >>>>> > >>>>> The RISC-V hypervisor specification doesn't have any virtual timer > >>>>> feature. > >>>>> > >>>>> Due to this, the guest VCPU timer will be programmed via SBI calls. > >>>>> The host will use a separate hrtimer event for each guest VCPU to > >>>>> provide timer functionality. We inject a virtual timer interrupt to > >>>>> the guest VCPU whenever the guest VCPU hrtimer event expires. > >>>>> > >>>>> The following features are not supported yet and will be added in > >>>>> future: > >>>>> 1. A time offset to adjust guest time from host time > >>>>> 2. A saved next event in guest vcpu for vm migration > >>>> > >>>> Implementing these 2 bits right now should be trivial. Why wait? > >>> > > [...] > > >>>> ... in fact, I feel like I'm missing something obvious here. How does > >>>> the guest trigger the timer event? What is the argument it uses for that > >>>> and how does that play with the tbfreq in the earlier patch? > >>> > >>> We have SBI call inferface between Hypervisor and Guest. One of the > >>> SBI call allows Guest to program time event. The next event is specified > >>> as absolute cycles. The Guest can read time using TIME CSR which > >>> returns system timer value (@ tbfreq freqency). > >>> > >>> Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL > >>> and it has to be same as Host tbfreq. > >>> > >>> The TBFREQ config register visible to user-space is a read-only CONFIG > >>> register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. > >> > >> And it's read-only because you can not trap on TB reads? > > > > There is no TB registers. > > > > The tbfreq can only be know through DT/ACPI kind-of HW description > > for both Host and Guest. > > > > The KVM user-space tool needs to know TBFREQ so that it can set correct > > value in generated DT for Guest Linux. > > So what access methods do get influenced by TBFREQ? If it's only the SBI > timer, we can control the frequency, which means we can make TBFREQ > read/write. There are two things influenced by TBFREQ: 1. TIME CSR which is a free running counter 2. SBI calls for programming next timer event The Guest TIME CSR will be at same rate as Host TIME CSR so we cannot show different TBFREQ to Guest Linux. In future, we will be having a dedicated RISC-V timer extension which will have all programming done via CSRs but until then we are stuck with TIME CSR + SBI call combination. Regards, Anup > > > Alex
On 23.08.19 14:11, Anup Patel wrote: > On Fri, Aug 23, 2019 at 5:19 PM Alexander Graf <graf@amazon.com> wrote: >> >> >> >> On 23.08.19 13:46, Anup Patel wrote: >>> On Fri, Aug 23, 2019 at 5:03 PM Graf (AWS), Alexander <graf@amazon.com> wrote: >>>> >>>> >>>> >>>>> Am 23.08.2019 um 13:05 schrieb Anup Patel <anup@brainfault.org>: >>>>> >>>>>> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <graf@amazon.com> wrote: >>>>>> >>>>>>> On 22.08.19 10:46, Anup Patel wrote: >>>>>>> From: Atish Patra <atish.patra@wdc.com> >>>>>>> >>>>>>> The RISC-V hypervisor specification doesn't have any virtual timer >>>>>>> feature. >>>>>>> >>>>>>> Due to this, the guest VCPU timer will be programmed via SBI calls. >>>>>>> The host will use a separate hrtimer event for each guest VCPU to >>>>>>> provide timer functionality. We inject a virtual timer interrupt to >>>>>>> the guest VCPU whenever the guest VCPU hrtimer event expires. >>>>>>> >>>>>>> The following features are not supported yet and will be added in >>>>>>> future: >>>>>>> 1. A time offset to adjust guest time from host time >>>>>>> 2. A saved next event in guest vcpu for vm migration >>>>>> >>>>>> Implementing these 2 bits right now should be trivial. Why wait? >>>>> >> >> [...] >> >>>>>> ... in fact, I feel like I'm missing something obvious here. How does >>>>>> the guest trigger the timer event? What is the argument it uses for that >>>>>> and how does that play with the tbfreq in the earlier patch? >>>>> >>>>> We have SBI call inferface between Hypervisor and Guest. One of the >>>>> SBI call allows Guest to program time event. The next event is specified >>>>> as absolute cycles. The Guest can read time using TIME CSR which >>>>> returns system timer value (@ tbfreq freqency). >>>>> >>>>> Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL >>>>> and it has to be same as Host tbfreq. >>>>> >>>>> The TBFREQ config register visible to user-space is a read-only CONFIG >>>>> register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq. >>>> >>>> And it's read-only because you can not trap on TB reads? >>> >>> There is no TB registers. >>> >>> The tbfreq can only be know through DT/ACPI kind-of HW description >>> for both Host and Guest. >>> >>> The KVM user-space tool needs to know TBFREQ so that it can set correct >>> value in generated DT for Guest Linux. >> >> So what access methods do get influenced by TBFREQ? If it's only the SBI >> timer, we can control the frequency, which means we can make TBFREQ >> read/write. > > There are two things influenced by TBFREQ: > 1. TIME CSR which is a free running counter > 2. SBI calls for programming next timer event > > The Guest TIME CSR will be at same rate as Host TIME CSR so > we cannot show different TBFREQ to Guest Linux. > > In future, we will be having a dedicated RISC-V timer extension which > will have all programming done via CSRs but until then we are stuck > with TIME CSR + SBI call combination. Please make sure that in a future revision of the spec either a) TIME CSR can be trapped or b) TIME CSR can be virtualized (virtual TIME READ has offset and multiplier on phys TIME READ applied) and the same goes for the timer extension - either make it all trappable or all propery adjustable. You need to be double cautious there that people don't design something that breaks live migration between hosts that have a different TBFREQ. Thanks, Alex
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index ab33e59a3d88..d2a2e45eefc0 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/kvm.h> #include <linux/kvm_types.h> +#include <asm/kvm_vcpu_timer.h> #ifdef CONFIG_64BIT #define KVM_MAX_VCPUS (1U << 16) @@ -167,6 +168,9 @@ struct kvm_vcpu_arch { unsigned long irqs_pending; unsigned long irqs_pending_mask; + /* VCPU Timer */ + struct kvm_vcpu_timer timer; + /* MMIO instruction details */ struct kvm_mmio_decode mmio_decode; diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h new file mode 100644 index 000000000000..df67ea86988e --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#ifndef __KVM_VCPU_RISCV_TIMER_H +#define __KVM_VCPU_RISCV_TIMER_H + +#include <linux/hrtimer.h> + +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000 + +struct kvm_vcpu_timer { + bool init_done; + /* Check if the timer is programmed */ + bool is_set; + struct hrtimer hrt; + /* Mult & Shift values to get nanosec from cycles */ + u32 mult; + u32 shift; +}; + +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, + unsigned long ncycles); + +#endif diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index c0f57f26c13d..3e0c7558320d 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm kvm-objs := $(common-objs-y) kvm-objs += main.o vm.o vmid.o tlb.o mmu.o -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 6124077d154f..018fca436776 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(cntx, reset_cntx, sizeof(*cntx)); + kvm_riscv_vcpu_timer_reset(vcpu); + WRITE_ONCE(vcpu->arch.irqs_pending, 0); WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); } @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) cntx->hstatus |= HSTATUS_SP2P; cntx->hstatus |= HSTATUS_SPV; + /* Setup VCPU timer */ + kvm_riscv_vcpu_timer_init(vcpu); + /* Reset VCPU */ kvm_riscv_reset_vcpu(vcpu); @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + kvm_riscv_vcpu_timer_deinit(vcpu); kvm_riscv_stage2_flush_cache(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c new file mode 100644 index 000000000000..a45ca06e1aa6 --- /dev/null +++ b/arch/riscv/kvm/vcpu_timer.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <clocksource/timer-riscv.h> +#include <asm/csr.h> +#include <asm/kvm_vcpu_timer.h> + +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) +{ + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); + + t->is_set = false; + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); + + return HRTIMER_NORESTART; +} + +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t) +{ + unsigned long flags; + u64 cycles_now, cycles_delta, delta_ns; + + local_irq_save(flags); + cycles_now = get_cycles64(); + if (cycles_now < cycles) + cycles_delta = cycles - cycles_now; + else + cycles_delta = 0; + delta_ns = (cycles_delta * t->mult) >> t->shift; + local_irq_restore(flags); + + return delta_ns; +} + +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) +{ + if (!t->init_done || !t->is_set) + return -EINVAL; + + hrtimer_cancel(&t->hrt); + t->is_set = false; + + return 0; +} + +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, + unsigned long ncycles) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t); + + if (!t->init_done) + return -EINVAL; + + kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_TIMER); + + if (delta_ns > VCPU_TIMER_PROGRAM_THRESHOLD_NS) { + hrtimer_start(&t->hrt, ktime_add_ns(ktime_get(), delta_ns), + HRTIMER_MODE_ABS); + t->is_set = true; + } else + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER); + + return 0; +} + +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + if (t->init_done) + return -EINVAL; + + hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + t->init_done = true; + t->is_set = false; + + riscv_cs_get_mult_shift(&t->mult, &t->shift); + + return 0; +} + +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu) +{ + int ret; + + ret = kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); + vcpu->arch.timer.init_done = false; + + return ret; +} + +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu) +{ + return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); +} diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 09e031176bc6..7c595203aa5c 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -8,6 +8,7 @@ #include <linux/cpu.h> #include <linux/delay.h> #include <linux/irq.h> +#include <linux/module.h> #include <linux/sched_clock.h> #include <asm/smp.h> #include <asm/sbi.h> @@ -80,6 +81,13 @@ static int riscv_timer_dying_cpu(unsigned int cpu) return 0; } +void riscv_cs_get_mult_shift(u32 *mult, u32 *shift) +{ + *mult = riscv_clocksource.mult; + *shift = riscv_clocksource.shift; +} +EXPORT_SYMBOL_GPL(riscv_cs_get_mult_shift); + /* called directly from the low-level interrupt handler */ void riscv_timer_interrupt(void) { diff --git a/include/clocksource/timer-riscv.h b/include/clocksource/timer-riscv.h new file mode 100644 index 000000000000..e94e4feecbe8 --- /dev/null +++ b/include/clocksource/timer-riscv.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#ifndef __TIMER_RISCV_H +#define __TIMER_RISCV_H + +#include <linux/types.h> + +void riscv_cs_get_mult_shift(u32 *mult, u32 *shift); + +#endif