@@ -21,6 +21,7 @@ enum ipi_msg_type {
typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+ atomic_t messages ____cacheline_aligned_in_smp;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
@@ -43,6 +43,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+ u64 hvcl_exits;
};
#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
@@ -9,6 +9,10 @@
#define HYPERVISOR_VENDOR_SHIFT 8
#define HYPERCALL_CODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+#define KVM_HC_CODE_SERVICE 0
+#define KVM_HC_SERVICE HYPERCALL_CODE(HYPERVISOR_KVM, KVM_HC_CODE_SERVICE)
+#define KVM_HC_FUNC_IPI 1
+
/*
* LoongArch hypcall return code
*/
@@ -16,6 +20,126 @@
#define KVM_HC_INVALID_CODE -1UL
#define KVM_HC_INVALID_PARAMETER -2UL
+/*
+ * Hypercalls interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in v0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall(u64 fid)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+ unsigned long arg0, unsigned long arg1)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall4(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall5(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4)
+{
+ register long ret asm("v0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+ register unsigned long a5 asm("a5") = arg4;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HC_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
+ : "memory"
+ );
+
+ return ret;
+}
+
+
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
@@ -167,6 +167,7 @@
#define CPUCFG_KVM_SIG CPUCFG_KVM_BASE
#define KVM_SIGNATURE "KVM\0"
#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+#define KVM_FEATURE_PV_IPI BIT(1)
#ifndef __ASSEMBLY__
/* CSR */
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
}
- set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+ set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
}
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <linux/jump_label.h>
#include <linux/kvm_para.h>
#include <asm/paravirt.h>
@@ -16,6 +17,101 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+ unsigned int min, old;
+ unsigned long bitmap = 0;
+ irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+ action = BIT(action);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old == 0) {
+ min = cpu_logical_map(cpu);
+ bitmap = 1;
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+ }
+}
+
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned int cpu, i, min = 0, max = 0, old;
+ u64 bitmap = 0;
+ irq_cpustat_t *info;
+
+ if (cpumask_empty(mask))
+ return;
+
+ action = BIT(action);
+ for_each_cpu(i, mask) {
+ info = &per_cpu(irq_stat, i);
+ old = atomic_fetch_or(action, &info->messages);
+ if (old)
+ continue;
+
+ cpu = cpu_logical_map(i);
+ if (!bitmap) {
+ min = max = cpu;
+ } else if (cpu > min && cpu < min + BITS_PER_LONG) {
+ max = cpu > max ? cpu : max;
+ } else if (cpu < min && (max - cpu) < BITS_PER_LONG) {
+ bitmap <<= min - cpu;
+ min = cpu;
+ } else {
+ /*
+ * Physical cpuid is sorted in ascending order ascend
+ * for the next mask calculation, send IPI here
+ * directly and skip the remainding cpus
+ */
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+ min = max = cpu;
+ bitmap = 0;
+ }
+ __set_bit(cpu - min, (unsigned long *)&bitmap);
+ }
+
+ if (bitmap)
+ kvm_hypercall2(KVM_HC_FUNC_IPI, bitmap, min);
+}
+
+static irqreturn_t loongson_do_swi(int irq, void *dev)
+{
+ irq_cpustat_t *info;
+ long action;
+
+ clear_csr_estat(1 << INT_SWI0);
+
+ info = this_cpu_ptr(&irq_stat);
+ do {
+ action = atomic_xchg(&info->messages, 0);
+ if (action & SMP_CALL_FUNCTION) {
+ generic_smp_call_function_interrupt();
+ info->ipi_irqs[IPI_CALL_FUNCTION]++;
+ }
+
+ if (action & SMP_RESCHEDULE) {
+ scheduler_ipi();
+ info->ipi_irqs[IPI_RESCHEDULE]++;
+ }
+ } while (action);
+
+ return IRQ_HANDLED;
+}
+
+static void pv_ipi_init(void)
+{
+ int r, swi0;
+
+ swi0 = get_percpu_irq(INT_SWI0);
+ if (swi0 < 0)
+ panic("SWI0 IRQ mapping failed\n");
+ irq_set_percpu_devid(swi0);
+ r = request_percpu_irq(swi0, loongson_do_swi, "SWI0", &irq_stat);
+ if (r < 0)
+ panic("SWI0 IRQ request failed\n");
+}
+#endif
+
static bool kvm_para_available(void)
{
static int hypervisor_type;
@@ -32,10 +128,24 @@ static bool kvm_para_available(void)
int __init pv_guest_init(void)
{
+ int feature;
+
if (!cpu_has_hypervisor)
return 0;
if (!kvm_para_available())
return 0;
+ /*
+ * check whether KVM hypervisor supports pv_ipi or not
+ */
+#ifdef CONFIG_SMP
+ feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+ if (feature & KVM_FEATURE_PV_IPI) {
+ smp_ops.call_func_single_ipi = pv_send_ipi_single;
+ smp_ops.call_func_ipi = pv_send_ipi_mask;
+ smp_ops.ipi_init = pv_ipi_init;
+ }
+#endif
+
return 1;
}
@@ -282,7 +282,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
void loongson_init_secondary(void)
{
unsigned int cpu = smp_processor_id();
- unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
+ unsigned int imask = ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
change_csr_ecfg(ECFG0_IM, imask);
@@ -227,6 +227,9 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
case CPUCFG_KVM_SIG:
vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
break;
+ case CPUCFG_KVM_FEATURE:
+ vcpu->arch.gprs[rd] = KVM_FEATURE_PV_IPI;
+ break;
default:
vcpu->arch.gprs[rd] = 0;
break;
@@ -664,12 +667,75 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_pv_send_ipi(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+ u64 ipi_bitmap;
+ unsigned int min, cpu;
+ struct kvm_vcpu *dest;
+
+ ipi_bitmap = vcpu->arch.gprs[LOONGARCH_GPR_A1];
+ min = vcpu->arch.gprs[LOONGARCH_GPR_A2];
+
+ if (ipi_bitmap) {
+ cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+ while (cpu < BITS_PER_LONG) {
+ dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+ if (dest) {
+ /*
+ * Send SWI0 to dest vcpu to emulate IPI interrupt
+ */
+ kvm_queue_irq(dest, INT_SWI0);
+ kvm_vcpu_kick(dest);
+ }
+ cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * hypcall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_pv_hcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long func = vcpu->arch.gprs[LOONGARCH_GPR_A0];
+ long ret;
+
+ switch (func) {
+ case KVM_HC_FUNC_IPI:
+ kvm_pv_send_ipi(vcpu);
+ ret = KVM_HC_STATUS_SUCCESS;
+ break;
+ default:
+ ret = KVM_HC_INVALID_CODE;
+ break;
+ };
+
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = ret;
+}
+
static int kvm_handle_hypcall(struct kvm_vcpu *vcpu)
{
+ larch_inst inst;
+ unsigned int code;
+
+ inst.word = vcpu->arch.badi;
+ code = inst.reg0i15_format.immediate;
update_pc(&vcpu->arch);
- /* Treat it as noop intruction, only set return value */
- vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ switch (code) {
+ case KVM_HC_SERVICE:
+ vcpu->stat.hvcl_exits++;
+ kvm_handle_pv_hcall(vcpu);
+ break;
+ default:
+ /* Treat it as noop intruction, only set return value */
+ vcpu->arch.gprs[LOONGARCH_GPR_A0] = KVM_HC_INVALID_CODE;
+ break;
+ }
+
return RESUME_GUEST;
}
@@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, idle_exits),
STATS_DESC_COUNTER(VCPU, cpucfg_exits),
STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, hvcl_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
On LoongArch system, ipi hw uses iocsr registers, there is one iocsr register access on ipi sender and two iocsr access on ipi receiver which is ipi interrupt handler. On VM mode all iocsr registers accessing will trap into hypervisor. So with one ipi hw notification there will be three times of trap. This patch adds pv ipi support for VM, hypercall instruction is used to ipi sender, and hypervisor will inject SWI on the VM. During SWI interrupt handler, only estat CSR register is written to clear irq. Estat CSR register access will not trap into hypervisor. So with pv ipi supported, pv ipi sender will trap into hypervsor one time, pv ipi revicer will not trap, there is only one time of trap. Also this patch adds ipi multicast support, the method is similar with x86. With ipi multicast support, ipi notification can be sent to at most 64 vcpus at one time. It reduces trap into hypervisor greatly. Signed-off-by: Bibo Mao <maobibo@loongson.cn> --- arch/loongarch/include/asm/hardirq.h | 1 + arch/loongarch/include/asm/kvm_host.h | 1 + arch/loongarch/include/asm/kvm_para.h | 124 +++++++++++++++++++++++++ arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/kernel/irq.c | 2 +- arch/loongarch/kernel/paravirt.c | 110 ++++++++++++++++++++++ arch/loongarch/kernel/smp.c | 2 +- arch/loongarch/kvm/exit.c | 70 +++++++++++++- arch/loongarch/kvm/vcpu.c | 1 + 9 files changed, 308 insertions(+), 4 deletions(-)