diff mbox series

[22/23] KVM: arm64: Add a rVIC/rVID in-kernel implementation

Message ID 20200903152610.1078827-23-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: rVIC/rVID PV interrupt controller | expand

Commit Message

Marc Zyngier Sept. 3, 2020, 3:26 p.m. UTC
The rVIC (reduced Virtual Interrupt Controller), and its rVID
(reduced Virtual Interrupt Distributor) companion are the two
parts of a PV interrupt controller architecture, aiming at supporting
VMs with minimal interrupt requirements.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h |    7 +-
 arch/arm64/include/asm/kvm_irq.h  |    2 +
 arch/arm64/include/uapi/asm/kvm.h |    9 +
 arch/arm64/kvm/Makefile           |    2 +-
 arch/arm64/kvm/arm.c              |    3 +
 arch/arm64/kvm/hypercalls.c       |    7 +
 arch/arm64/kvm/rvic-cpu.c         | 1073 +++++++++++++++++++++++++++++
 include/kvm/arm_rvic.h            |   41 ++
 include/linux/irqchip/irq-rvic.h  |    4 +
 include/uapi/linux/kvm.h          |    2 +
 10 files changed, 1148 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/kvm/rvic-cpu.c
 create mode 100644 include/kvm/arm_rvic.h

Comments

Jonathan Cameron Sept. 4, 2020, 4 p.m. UTC | #1
On Thu, 3 Sep 2020 16:26:09 +0100
Marc Zyngier <maz@kernel.org> wrote:

> The rVIC (reduced Virtual Interrupt Controller), and its rVID
> (reduced Virtual Interrupt Distributor) companion are the two
> parts of a PV interrupt controller architecture, aiming at supporting
> VMs with minimal interrupt requirements.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>

A few trivial things from a first read through.

> ---
>  arch/arm64/include/asm/kvm_host.h |    7 +-
>  arch/arm64/include/asm/kvm_irq.h  |    2 +
>  arch/arm64/include/uapi/asm/kvm.h |    9 +
>  arch/arm64/kvm/Makefile           |    2 +-
>  arch/arm64/kvm/arm.c              |    3 +
>  arch/arm64/kvm/hypercalls.c       |    7 +
>  arch/arm64/kvm/rvic-cpu.c         | 1073 +++++++++++++++++++++++++++++
>  include/kvm/arm_rvic.h            |   41 ++
>  include/linux/irqchip/irq-rvic.h  |    4 +
>  include/uapi/linux/kvm.h          |    2 +
>  10 files changed, 1148 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arm64/kvm/rvic-cpu.c
>  create mode 100644 include/kvm/arm_rvic.h
> 

...

> diff --git a/arch/arm64/kvm/rvic-cpu.c b/arch/arm64/kvm/rvic-cpu.c
> new file mode 100644
> index 000000000000..5fb200c637d9
> --- /dev/null
> +++ b/arch/arm64/kvm/rvic-cpu.c

...

> +
> +static int rvic_inject_irq(struct kvm *kvm, unsigned int cpu,
> +			   unsigned int intid, bool level, void *owner)
> +{
> +	struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
> +	struct rvic *rvic;
> +
> +	if (unlikely(!vcpu))
> +		return -EINVAL;
> +
> +	rvic = kvm_vcpu_to_rvic(vcpu);
> +	if (unlikely(intid >= rvic->nr_total))
> +		return -EINVAL;
> +
> +	/* Ignore interrupt owner for now */
> +	rvic_vcpu_inject_irq(vcpu, intid, level);

For consistency blank line?

> +	return 0;
> +}
> +

...

> +
> +static int rvic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
> +			      struct kvm *kvm, int irq_source_id,
> +			      int level, bool line_status)
> +{
> +	/* Abuse the userspace interface to perform the routing*/

Space before */

> +	return rvic_inject_userspace_irq(kvm, KVM_ARM_IRQ_TYPE_SPI, 0,
> +					 e->irqchip.pin, level);
> +}
> +

...

> +
> +/* Device management */
> +static int rvic_device_create(struct kvm_device *dev, u32 type)
> +{
> +	struct kvm *kvm = dev->kvm;
> +	struct kvm_vcpu *vcpu;
> +	int i, ret;

It's personal preference, but I'd avoid the fiddly
ret handling in the good path. (up to you though!)

ret = 0;
> +
> +	if (irqchip_in_kernel(kvm))
> +		return -EEXIST;
> +
> +	ret = -EBUSY;
> +	if (!lock_all_vcpus(kvm))
> +		return ret;
	if (!lock_all_vcpus(kvm))
		return -EBUSY;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		if (vcpu->arch.has_run_once) {
			ret = -EBUSY;
> +			goto out_unlock;
		}
> +	}
> +
> +	ret = 0;
> +
> +	/*
> +	 * The good thing about not having any HW is that you don't
> +	 * get the limitations of the HW...
> +	 */
> +	kvm->arch.max_vcpus		= KVM_MAX_VCPUS;
> +	kvm->arch.irqchip_type		= IRQCHIP_RVIC;
> +	kvm->arch.irqchip_flow		= rvic_irqchip_flow;
> +	kvm->arch.irqchip_data		= NULL;
> +
> +out_unlock:
> +	unlock_all_vcpus(kvm);
> +	return ret;
> +}
> +
> +static void rvic_device_destroy(struct kvm_device *dev)
> +{
> +	kfree(dev->kvm->arch.irqchip_data);
> +	kfree(dev);
> +}
> +
> +static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +	struct rvic_vm_data *data;
> +	struct kvm_vcpu *vcpu;
> +	u32 __user *uaddr, val;
> +	u16 trusted, total;
> +	int i, ret = -ENXIO;
> +
> +	mutex_lock(&dev->kvm->lock);
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> +		if (attr->attr)
> +			break;
> +
> +		if (dev->kvm->arch.irqchip_data) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +
> +		uaddr = (u32 __user *)(uintptr_t)attr->addr;
> +		if (get_user(val, uaddr)) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
> +		total   = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
> +		if (total < trusted || trusted < 32 || total < 64 ||
> +		    trusted % 32 || total % 32 || total > 2048) {

As I read the spec, we need at least 32 untrusted. (R0058) 
This condition seems to allow that if trusted = 64 and untrusted = 0


> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		data = kzalloc(struct_size(data, rvid_map, (total - trusted)),
> +			       GFP_KERNEL);
> +		if (!data) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		data->nr_trusted = trusted;
> +		data->nr_total = total;
> +		spin_lock_init(&data->lock);
> +		/* Default to no mapping */
> +		for (i = 0; i < (total - trusted); i++) {
> +			/*
> +			 * an intid < nr_trusted is invalid as the
> +			 * result of a translation through the rvid,
> +			 * hence the input in unmapped.
> +			 */
> +			data->rvid_map[i].target_vcpu = 0;
> +			data->rvid_map[i].intid = 0;
> +		}
> +
> +		dev->kvm->arch.irqchip_data = data;
> +
> +		ret = 0;
> +		break;
> +
> +	case KVM_DEV_ARM_RVIC_GRP_INIT:
> +		if (attr->attr)
> +			break;
> +
> +		if (!dev->kvm->arch.irqchip_data)
> +			break;
> +
> +		ret = 0;
> +
> +		/* Init the rvic on any already created vcpu */
> +		kvm_for_each_vcpu(i, vcpu, dev->kvm) {
> +			ret = rvic_vcpu_init(vcpu);
> +			if (ret)
> +				break;
> +		}
> +
> +		if (!ret)
> +			ret = rvic_setup_default_irq_routing(dev->kvm);
> +		if (!ret)
> +			dev->kvm->arch.irqchip_finalized = true;

Personally I'd prefer the more idiomatic 

		if (ret)
			break;

		ret =...
		if (ret)
			break;
		dev->kvm->arch.....

> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	mutex_unlock(&dev->kvm->lock);
> +
> +	return ret;
> +}
> +

...

> +static int rvic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +	int ret = -ENXIO;
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> +	case KVM_DEV_ARM_RVIC_GRP_INIT:
> +		if (attr->attr)
> +			break;
> +		ret = 0;

Trivial:
Early returns?  Bit shorter and easier to read?

> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static const struct kvm_device_ops rvic_dev_ops = {
> +	.name		= "kvm-arm-rvic",
> +	.create		= rvic_device_create,
> +	.destroy	= rvic_device_destroy,
> +	.set_attr	= rvic_set_attr,
> +	.get_attr	= rvic_get_attr,
> +	.has_attr	= rvic_has_attr,
> +};
> +
> +int kvm_register_rvic_device(void)
> +{
> +	return kvm_register_device_ops(&rvic_dev_ops, KVM_DEV_TYPE_ARM_RVIC);
> +}
Marc Zyngier Sept. 5, 2020, 1:16 p.m. UTC | #2
On Fri, 04 Sep 2020 17:00:36 +0100,
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:
> 
> On Thu, 3 Sep 2020 16:26:09 +0100
> Marc Zyngier <maz@kernel.org> wrote:

[...]

> > +static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +	struct rvic_vm_data *data;
> > +	struct kvm_vcpu *vcpu;
> > +	u32 __user *uaddr, val;
> > +	u16 trusted, total;
> > +	int i, ret = -ENXIO;
> > +
> > +	mutex_lock(&dev->kvm->lock);
> > +
> > +	switch (attr->group) {
> > +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> > +		if (attr->attr)
> > +			break;
> > +
> > +		if (dev->kvm->arch.irqchip_data) {
> > +			ret = -EBUSY;
> > +			break;
> > +		}
> > +
> > +		uaddr = (u32 __user *)(uintptr_t)attr->addr;
> > +		if (get_user(val, uaddr)) {
> > +			ret = -EFAULT;
> > +			break;
> > +		}
> > +
> > +		trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
> > +		total   = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
> > +		if (total < trusted || trusted < 32 || total < 64 ||
> > +		    trusted % 32 || total % 32 || total > 2048) {
> 
> As I read the spec, we need at least 32 untrusted. (R0058) 
> This condition seems to allow that if trusted = 64 and untrusted = 0

Well spotted. I think the following would capture the constraints
correctly:

		if (total <= trusted || trusted < 32 || total < 64 ||
		    trusted % 32 || total % 32 || total > 2048) {

On the other hand, I wonder if this code would gain from being
directly written in terms of trusted/untrusted, rather than
trusted/total. It could make the reading against the spec easier.

Thanks,

	M.
Lorenzo Pieralisi Sept. 29, 2020, 3:13 p.m. UTC | #3
On Thu, Sep 03, 2020 at 04:26:09PM +0100, Marc Zyngier wrote:

[...]

> +static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
> +			    bool was_signaling)
> +{
> +	struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
> +	bool signal = __rvic_can_signal(rvic);
> +
> +	/* We're hitting our own rVIC: update HCR_VI locally */
> +	if (vcpu == target) {
> +		if (signal)
> +			*vcpu_hcr(vcpu) |= HCR_VI;
> +		else
> +			*vcpu_hcr(vcpu) &= ~HCR_VI;
> +
> +		return;
> +	}
> +
> +	/*
> +	 * Remote rVIC case:
> +	 *
> +	 * We kick even if the interrupt disappears, as ISR_EL1.I must
> +	 * always reflect the state of the rVIC. This forces a reload
> +	 * of the vcpu state, making it consistent.

Forgive me the question but this is unclear to me. IIUC here we do _not_
want to change the target_vcpu.hcr and we force a kick to make sure it
syncs the hcr (so the rvic) state on its own upon exit. Is that correct ?

Furthermore, I think it would be extremely useful to elaborate (ie
rework the comment) further on ISR_EL1.I and how it is linked to this
code path - I think it is key to understanding it.

Thanks,
Lorenzo

> +	 *
> +	 * This avoids modifying the target's own copy of HCR_EL2, as
> +	 * we are in a cross-vcpu call, and changing it from under its
> +	 * feet is dodgy.
> +	 */
> +	if (was_signaling != signal)
> +		__rvic_kick_vcpu(target);
> +}
> +
> +static void rvic_version(struct kvm_vcpu *vcpu)
> +{
> +	/* ALP0.3 is the name of the game */
> +	smccc_set_retval(vcpu, RVIC_STATUS_SUCCESS, RVIC_VERSION(0, 3), 0, 0);
> +}
> +
> +static void rvic_info(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long what = smccc_get_arg1(vcpu);
> +	unsigned long a0, a1;
> +
> +	switch (what) {
> +	case RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS:
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
> +		a1 = rvic->nr_trusted;
> +		break;
> +	case RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS:
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
> +		a1 = rvic_nr_untrusted(rvic);
> +		break;
> +	default:
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
> +		a1 = 0;
> +		break;
> +	}
> +
> +	smccc_set_retval(vcpu, a0, a1, 0, 0);
> +}
> +
> +static void rvic_enable(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long flags;
> +	bool was_signaling;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	was_signaling = __rvic_can_signal(rvic);
> +	__rvic_enable(rvic);
> +	__rvic_sync_hcr(vcpu, rvic, was_signaling);
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
> +			 0, 0, 0);
> +}
> +
> +static void rvic_disable(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long flags;
> +	bool was_signaling;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	was_signaling = __rvic_can_signal(rvic);
> +	__rvic_disable(rvic);
> +	__rvic_sync_hcr(vcpu, rvic, was_signaling);
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
> +			 0, 0, 0);
> +}
> +
> +typedef void (*rvic_action_fn_t)(struct rvic *, unsigned int);
> +
> +static int validate_rvic_call(struct kvm_vcpu *vcpu, struct rvic **rvicp,
> +			      unsigned int *intidp)
> +{
> +	unsigned long mpidr = smccc_get_arg1(vcpu);
> +	unsigned int intid = smccc_get_arg2(vcpu);
> +	struct kvm_vcpu *target;
> +	struct rvic *rvic;
> +
> +	/* FIXME: The spec distinguishes between invalid MPIDR and invalid CPU */
> +
> +	target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
> +	if (!target) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_INVALID_CPU, 0),
> +				 0, 0, 0);
> +		return -1;
> +	}
> +
> +	rvic = kvm_vcpu_to_rvic(target);
> +	if (intid >= rvic->nr_total) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 1),
> +				 0, 0, 0);
> +		return -1;
> +	}
> +
> +	*rvicp = rvic;
> +	*intidp = intid;
> +
> +	return 0;
> +}
> +
> +static void __rvic_action(struct kvm_vcpu *vcpu, rvic_action_fn_t action,
> +			  bool check_enabled)
> +{
> +	struct rvic *rvic;
> +	unsigned long a0;
> +	unsigned long flags;
> +	int intid;
> +
> +	if (validate_rvic_call(vcpu, &rvic, &intid))
> +		return;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	if (unlikely(check_enabled && !__rvic_is_enabled(rvic))) {
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
> +	} else {
> +		bool was_signaling = __rvic_can_signal(rvic);
> +		action(rvic, intid);
> +		__rvic_sync_hcr(vcpu, rvic, was_signaling);
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
> +	}
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, a0, 0, 0, 0);
> +}
> +
> +static void rvic_set_masked(struct kvm_vcpu *vcpu)
> +{
> +	__rvic_action(vcpu, __rvic_set_masked, false);
> +}
> +
> +static void rvic_clear_masked(struct kvm_vcpu *vcpu)
> +{
> +	__rvic_action(vcpu, __rvic_clear_masked, false);
> +}
> +
> +static void rvic_clear_pending(struct kvm_vcpu *vcpu)
> +{
> +	__rvic_action(vcpu, __rvic_clear_pending, false);
> +}
> +
> +static void rvic_signal(struct kvm_vcpu *vcpu)
> +{
> +	__rvic_action(vcpu, __rvic_set_pending, true);
> +}
> +
> +static void rvic_is_pending(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long flags;
> +	struct rvic *rvic;
> +	int intid;
> +	bool res;
> +
> +	if (validate_rvic_call(vcpu, &rvic, &intid))
> +		return;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	res = __rvic_is_pending(rvic, intid);
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
> +			 res, 0, 0);
> +}
> +
> +/*
> + * Ack and Resample are the only "interesting" operations that are
> + * strictly per-CPU.
> + */
> +static void rvic_acknowledge(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long a0, a1;
> +	unsigned long flags;
> +	unsigned int intid;
> +	struct rvic *rvic;
> +
> +	rvic = kvm_vcpu_to_rvic(vcpu);
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	if (unlikely(!__rvic_is_enabled(rvic))) {
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
> +		a1 = 0;
> +	} else {
> +		intid = __rvic_ack(rvic);
> +		__rvic_sync_hcr(vcpu, rvic, true);
> +		if (unlikely(intid >= rvic->nr_total)) {
> +			a0 = RVIx_STATUS_PACK(RVIC_STATUS_NO_INTERRUPTS, 0);
> +			a1 = 0;
> +		} else {
> +			a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
> +			a1 = intid;
> +		}
> +	}
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, a0, a1, 0, 0);
> +}
> +
> +static void rvic_resample(struct kvm_vcpu *vcpu)
> +{
> +	unsigned int intid = smccc_get_arg1(vcpu);
> +	unsigned long flags;
> +	unsigned long a0;
> +	struct rvic *rvic;
> +
> +	rvic = kvm_vcpu_to_rvic(vcpu);
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	if (unlikely(intid >= rvic->nr_trusted)) {
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
> +	} else {
> +		__rvic_resample(rvic, intid);
> +
> +		/*
> +		 * Don't bother finding out if we were signalling, we
> +		 * will update HCR_EL2 anyway as we are guaranteed not
> +		 * to be in a cross-call.
> +		 */
> +		__rvic_sync_hcr(vcpu, rvic, true);
> +		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
> +	}
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	smccc_set_retval(vcpu, a0, 0, 0, 0);
> +}
> +
> +int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu)
> +{
> +	pr_debug("RVIC: HC %08x", (unsigned int)smccc_get_function(vcpu));
> +	switch (smccc_get_function(vcpu)) {
> +	case SMC64_RVIC_VERSION:
> +		rvic_version(vcpu);
> +		break;
> +	case SMC64_RVIC_INFO:
> +		rvic_info(vcpu);
> +		break;
> +	case SMC64_RVIC_ENABLE:
> +		rvic_enable(vcpu);
> +		break;
> +	case SMC64_RVIC_DISABLE:
> +		rvic_disable(vcpu);
> +		break;
> +	case SMC64_RVIC_SET_MASKED:
> +		rvic_set_masked(vcpu);
> +		break;
> +	case SMC64_RVIC_CLEAR_MASKED:
> +		rvic_clear_masked(vcpu);
> +		break;
> +	case SMC64_RVIC_IS_PENDING:
> +		rvic_is_pending(vcpu);
> +		break;
> +	case SMC64_RVIC_SIGNAL:
> +		rvic_signal(vcpu);
> +		break;
> +	case SMC64_RVIC_CLEAR_PENDING:
> +		rvic_clear_pending(vcpu);
> +		break;
> +	case SMC64_RVIC_ACKNOWLEDGE:
> +		rvic_acknowledge(vcpu);
> +		break;
> +	case SMC64_RVIC_RESAMPLE:
> +		rvic_resample(vcpu);
> +		break;
> +	default:
> +		smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
> +		break;
> +	}
> +
> +	return 1;
> +}
> +
> +static void rvid_version(struct kvm_vcpu *vcpu)
> +{
> +	/* ALP0.3 is the name of the game */
> +	smccc_set_retval(vcpu, RVID_STATUS_SUCCESS, RVID_VERSION(0, 3), 0, 0);
> +}
> +
> +static void rvid_map(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long input = smccc_get_arg1(vcpu);
> +	unsigned long mpidr = smccc_get_arg2(vcpu);
> +	unsigned int intid = smccc_get_arg3(vcpu);
> +	unsigned long flags;
> +	struct rvic_vm_data *data;
> +	struct kvm_vcpu *target;
> +
> +	data = vcpu->kvm->arch.irqchip_data;
> +
> +	if (input > rvic_nr_untrusted(data)) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
> +				 0, 0, 0);
> +		return;
> +	}
> +
> +	/* FIXME: different error from RVIC. Why? */
> +	target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
> +	if (!target) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 1),
> +				 0, 0, 0);
> +		return;
> +	}
> +
> +	if (intid < data->nr_trusted || intid >= data->nr_total) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 2),
> +				 0, 0, 0);
> +		return;
> +	}
> +
> +	spin_lock_irqsave(&data->lock, flags);
> +	data->rvid_map[input].target_vcpu	= target->vcpu_id;
> +	data->rvid_map[input].intid		= intid;
> +	spin_unlock_irqrestore(&data->lock, flags);
> +
> +	smccc_set_retval(vcpu, 0, 0, 0, 0);
> +}
> +
> +static void rvid_unmap(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long input = smccc_get_arg1(vcpu);
> +	unsigned long flags;
> +	struct rvic_vm_data *data;
> +
> +	data = vcpu->kvm->arch.irqchip_data;
> +
> +	if (input > rvic_nr_untrusted(data)) {
> +		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
> +				 0, 0, 0);
> +		return;
> +	}
> +
> +	spin_lock_irqsave(&data->lock, flags);
> +	data->rvid_map[input].target_vcpu	= 0;
> +	data->rvid_map[input].intid		= 0;
> +	spin_unlock_irqrestore(&data->lock, flags);
> +
> +	smccc_set_retval(vcpu, 0, 0, 0, 0);
> +}
> +
> +int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu)
> +{
> +	pr_debug("RVID: HC %08x", (unsigned int)smccc_get_function(vcpu));
> +	switch (smccc_get_function(vcpu)) {
> +	case SMC64_RVID_VERSION:
> +		rvid_version(vcpu);
> +		break;
> +	case SMC64_RVID_MAP:
> +		rvid_map(vcpu);
> +		break;
> +	case SMC64_RVID_UNMAP:
> +		rvid_unmap(vcpu);
> +		break;
> +	default:
> +		smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
> +		break;
> +	}
> +
> +	return 1;
> +}
> +
> +/*
> + * KVM internal interface to the rVIC
> + */
> +
> +/* This *must* be called from the vcpu thread */
> +static void rvic_flush_signaling_state(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	__rvic_sync_hcr(vcpu, rvic, true);
> +
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +}
> +
> +/* This can be called from any context */
> +static void rvic_vcpu_inject_irq(struct kvm_vcpu *vcpu, unsigned int intid,
> +				 bool level)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long flags;
> +	bool prev;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +
> +	if (WARN_ON(intid >= rvic->nr_total))
> +		goto out;
> +
> +	/*
> +	 * Although really ugly, this should be safe as we hold the
> +	 * rvic lock, and the only path that uses this information is
> +	 * resample, which takes this lock too.
> +	 */
> +	if (!rvic->irqs[intid].get_line_level)
> +		rvic->irqs[intid].line_level = level;
> +
> +	if (level) {
> +		prev = __rvic_can_signal(rvic);
> +		__rvic_set_pending(rvic, intid);
> +		if (prev != __rvic_can_signal(rvic))
> +			__rvic_kick_vcpu(vcpu);
> +	}
> +out:
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +}
> +
> +static int rvic_inject_irq(struct kvm *kvm, unsigned int cpu,
> +			   unsigned int intid, bool level, void *owner)
> +{
> +	struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
> +	struct rvic *rvic;
> +
> +	if (unlikely(!vcpu))
> +		return -EINVAL;
> +
> +	rvic = kvm_vcpu_to_rvic(vcpu);
> +	if (unlikely(intid >= rvic->nr_total))
> +		return -EINVAL;
> +
> +	/* Ignore interrupt owner for now */
> +	rvic_vcpu_inject_irq(vcpu, intid, level);
> +	return 0;
> +}
> +
> +static int rvic_inject_userspace_irq(struct kvm *kvm, unsigned int type,
> +				     unsigned int cpu,
> +				     unsigned int intid, bool level)
> +{
> +	struct rvic_vm_data *data = kvm->arch.irqchip_data;
> +	unsigned long flags;
> +	u16 output;
> +
> +	switch (type) {
> +	case KVM_ARM_IRQ_TYPE_SPI:
> +		/*
> +		 * Userspace can only inject interrupts that are
> +		 * translated by the rvid, so the cpu parameter is
> +		 * irrelevant and we override it when resolving the
> +		 * translation.
> +		 */
> +		if (intid >= rvic_nr_untrusted(data))
> +			return -EINVAL;
> +
> +		spin_lock_irqsave(&data->lock, flags);
> +		output = data->rvid_map[intid].intid;
> +		cpu = data->rvid_map[intid].target_vcpu;
> +		spin_unlock_irqrestore(&data->lock, flags);
> +
> +		/* Silently ignore unmapped interrupts */
> +		if (output < data->nr_trusted)
> +			return 0;
> +
> +		return rvic_inject_irq(kvm, cpu, output, level, NULL);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +static int rvic_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic_vm_data *data = vcpu->kvm->arch.irqchip_data;
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	int i;
> +
> +	/* irqchip not ready yet, we will come back later */
> +	if (!data)
> +		return 0;
> +
> +	if (WARN_ON(rvic->irqs))
> +		return -EINVAL;
> +
> +	spin_lock_init(&rvic->lock);
> +	INIT_LIST_HEAD(&rvic->delivery);
> +	rvic->nr_trusted	= data->nr_trusted;
> +	rvic->nr_total		= data->nr_total;
> +	rvic->enabled		= false;
> +
> +	rvic->irqs = kcalloc(rvic->nr_total, sizeof(*rvic->irqs), GFP_ATOMIC);
> +	if (!rvic->irqs)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < rvic->nr_total; i++) {
> +		struct rvic_irq *irq = &rvic->irqs[i];
> +
> +		spin_lock_init(&irq->lock);
> +		INIT_LIST_HEAD(&irq->delivery_entry);
> +		irq->get_line_level	= NULL;
> +		irq->intid		= i;
> +		irq->host_irq		= 0;
> +		irq->pending		= false;
> +		irq->masked		= true;
> +		irq->line_level		= false;
> +	}
> +
> +	return 0;
> +}
> +
> +static void rvic_destroy(struct kvm *kvm)
> +{
> +	struct kvm_vcpu *vcpu;
> +	int i;
> +
> +	mutex_lock(&kvm->lock);
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +
> +		INIT_LIST_HEAD(&rvic->delivery);
> +		kfree(rvic->irqs);
> +		rvic->irqs = NULL;
> +	}
> +
> +	mutex_unlock(&kvm->lock);
> +}
> +
> +static int rvic_pending_irq(struct kvm_vcpu *vcpu)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	unsigned long flags;
> +	bool res;
> +
> +	spin_lock_irqsave(&rvic->lock, flags);
> +	res = __rvic_can_signal(rvic);
> +	spin_unlock_irqrestore(&rvic->lock, flags);
> +
> +	return res;
> +}
> +
> +static int rvic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
> +			     u32 intid, bool (*get_line_level)(int))
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&irq->lock, flags);
> +	irq->host_irq = host_irq;
> +	irq->get_line_level = get_line_level;
> +	spin_unlock_irqrestore(&irq->lock, flags);
> +
> +	return 0;
> +}
> +
> +static int rvic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int intid)
> +{
> +	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
> +	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&irq->lock, flags);
> +	irq->host_irq = 0;
> +	irq->get_line_level = NULL;
> +	spin_unlock_irqrestore(&irq->lock, flags);
> +
> +	return 0;
> +}
> +
> +static int rvic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
> +			      struct kvm *kvm, int irq_source_id,
> +			      int level, bool line_status)
> +{
> +	/* Abuse the userspace interface to perform the routing*/
> +	return rvic_inject_userspace_irq(kvm, KVM_ARM_IRQ_TYPE_SPI, 0,
> +					 e->irqchip.pin, level);
> +}
> +
> +static int rvic_set_msi(struct kvm_kernel_irq_routing_entry *e,
> +			struct kvm *kvm, int irq_source_id,
> +			int level, bool line_status)
> +{
> +	return -ENODEV;
> +}
> +
> +static int rvic_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
> +				 struct kvm *kvm, int irq_source_id,
> +				 int level, bool line_status)
> +{
> +	if (e->type != KVM_IRQ_ROUTING_IRQCHIP)
> +		return -EWOULDBLOCK;
> +
> +	return rvic_irqfd_set_irq(e, kvm, irq_source_id, level, line_status);
> +}
> +
> +static const struct kvm_irqchip_flow rvic_irqchip_flow = {
> +	.irqchip_destroy		= rvic_destroy,
> +	.irqchip_vcpu_init		= rvic_vcpu_init,
> +	/* Nothing to do on block/unblock */
> +	/* Nothing to do on load/put */
> +	.irqchip_vcpu_pending_irq	= rvic_pending_irq,
> +	.irqchip_vcpu_flush_hwstate	= rvic_flush_signaling_state,
> +	/* Nothing tp do on sync_hwstate */
> +	.irqchip_inject_irq		= rvic_inject_irq,
> +	.irqchip_inject_userspace_irq	= rvic_inject_userspace_irq,
> +	/* No reset_mapped_irq as we allow spurious interrupts */
> +	.irqchip_map_phys_irq		= rvic_map_phys_irq,
> +	.irqchip_unmap_phys_irq		= rvic_unmap_phys_irq,
> +	.irqchip_irqfd_set_irq		= rvic_irqfd_set_irq,
> +	.irqchip_set_msi		= rvic_set_msi,
> +	.irqchip_set_irq_inatomic	= rvic_set_irq_inatomic,
> +};
> +
> +static int rvic_setup_default_irq_routing(struct kvm *kvm)
> +{
> +	struct rvic_vm_data *data = kvm->arch.irqchip_data;
> +	unsigned int nr = rvic_nr_untrusted(data);
> +	struct kvm_irq_routing_entry *entries;
> +	int i, ret;
> +
> +	entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
> +	if (!entries)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < nr; i++) {
> +		entries[i].gsi = i;
> +		entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
> +		entries[i].u.irqchip.irqchip = 0;
> +		entries[i].u.irqchip.pin = i;
> +	}
> +	ret = kvm_set_irq_routing(kvm, entries, nr, 0);
> +	kfree(entries);
> +	return ret;
> +}
> +
> +/* Device management */
> +static int rvic_device_create(struct kvm_device *dev, u32 type)
> +{
> +	struct kvm *kvm = dev->kvm;
> +	struct kvm_vcpu *vcpu;
> +	int i, ret;
> +
> +	if (irqchip_in_kernel(kvm))
> +		return -EEXIST;
> +
> +	ret = -EBUSY;
> +	if (!lock_all_vcpus(kvm))
> +		return ret;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		if (vcpu->arch.has_run_once)
> +			goto out_unlock;
> +	}
> +
> +	ret = 0;
> +
> +	/*
> +	 * The good thing about not having any HW is that you don't
> +	 * get the limitations of the HW...
> +	 */
> +	kvm->arch.max_vcpus		= KVM_MAX_VCPUS;
> +	kvm->arch.irqchip_type		= IRQCHIP_RVIC;
> +	kvm->arch.irqchip_flow		= rvic_irqchip_flow;
> +	kvm->arch.irqchip_data		= NULL;
> +
> +out_unlock:
> +	unlock_all_vcpus(kvm);
> +	return ret;
> +}
> +
> +static void rvic_device_destroy(struct kvm_device *dev)
> +{
> +	kfree(dev->kvm->arch.irqchip_data);
> +	kfree(dev);
> +}
> +
> +static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +	struct rvic_vm_data *data;
> +	struct kvm_vcpu *vcpu;
> +	u32 __user *uaddr, val;
> +	u16 trusted, total;
> +	int i, ret = -ENXIO;
> +
> +	mutex_lock(&dev->kvm->lock);
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> +		if (attr->attr)
> +			break;
> +
> +		if (dev->kvm->arch.irqchip_data) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +
> +		uaddr = (u32 __user *)(uintptr_t)attr->addr;
> +		if (get_user(val, uaddr)) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
> +		total   = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
> +		if (total < trusted || trusted < 32 || total < 64 ||
> +		    trusted % 32 || total % 32 || total > 2048) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		data = kzalloc(struct_size(data, rvid_map, (total - trusted)),
> +			       GFP_KERNEL);
> +		if (!data) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		data->nr_trusted = trusted;
> +		data->nr_total = total;
> +		spin_lock_init(&data->lock);
> +		/* Default to no mapping */
> +		for (i = 0; i < (total - trusted); i++) {
> +			/*
> +			 * an intid < nr_trusted is invalid as the
> +			 * result of a translation through the rvid,
> +			 * hence the input in unmapped.
> +			 */
> +			data->rvid_map[i].target_vcpu = 0;
> +			data->rvid_map[i].intid = 0;
> +		}
> +
> +		dev->kvm->arch.irqchip_data = data;
> +
> +		ret = 0;
> +		break;
> +
> +	case KVM_DEV_ARM_RVIC_GRP_INIT:
> +		if (attr->attr)
> +			break;
> +
> +		if (!dev->kvm->arch.irqchip_data)
> +			break;
> +
> +		ret = 0;
> +
> +		/* Init the rvic on any already created vcpu */
> +		kvm_for_each_vcpu(i, vcpu, dev->kvm) {
> +			ret = rvic_vcpu_init(vcpu);
> +			if (ret)
> +				break;
> +		}
> +
> +		if (!ret)
> +			ret = rvic_setup_default_irq_routing(dev->kvm);
> +		if (!ret)
> +			dev->kvm->arch.irqchip_finalized = true;
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	mutex_unlock(&dev->kvm->lock);
> +
> +	return ret;
> +}
> +
> +static int rvic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +	struct rvic_vm_data *data;
> +	u32 __user *uaddr, val;
> +	int ret = -ENXIO;
> +
> +	mutex_lock(&dev->kvm->lock);
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> +		if (attr->attr)
> +			break;
> +
> +		data = dev->kvm->arch.irqchip_data;
> +		if (!data)
> +			break;
> +
> +		val  = FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK,
> +					 data->nr_trusted);
> +		val |= FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK,
> +					 data->nr_total);
> +
> +		uaddr = (u32 __user *)(uintptr_t)attr->addr;
> +		ret = put_user(val, uaddr);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	mutex_unlock(&dev->kvm->lock);
> +
> +	return ret;
> +}
> +
> +static int rvic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> +{
> +	int ret = -ENXIO;
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
> +	case KVM_DEV_ARM_RVIC_GRP_INIT:
> +		if (attr->attr)
> +			break;
> +		ret = 0;
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static const struct kvm_device_ops rvic_dev_ops = {
> +	.name		= "kvm-arm-rvic",
> +	.create		= rvic_device_create,
> +	.destroy	= rvic_device_destroy,
> +	.set_attr	= rvic_set_attr,
> +	.get_attr	= rvic_get_attr,
> +	.has_attr	= rvic_has_attr,
> +};
> +
> +int kvm_register_rvic_device(void)
> +{
> +	return kvm_register_device_ops(&rvic_dev_ops, KVM_DEV_TYPE_ARM_RVIC);
> +}
> diff --git a/include/kvm/arm_rvic.h b/include/kvm/arm_rvic.h
> new file mode 100644
> index 000000000000..9e67a83fa384
> --- /dev/null
> +++ b/include/kvm/arm_rvic.h
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
> + *
> + * Copyright 2020 Google LLC.
> + * Author: Marc Zyngier <maz@kernel.org>
> + */
> +
> +#ifndef __KVM_ARM_RVIC_H__
> +#define __KVM_ARM_RVIC_H__
> +
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +
> +struct kvm_vcpu;
> +
> +struct rvic_irq {
> +	spinlock_t		lock;
> +	struct list_head	delivery_entry;
> +	bool			(*get_line_level)(int intid);
> +	unsigned int		intid;
> +	unsigned int		host_irq;
> +	bool			pending;
> +	bool			masked;
> +	bool			line_level; /* If get_line_level == NULL */
> +};
> +
> +struct rvic {
> +	spinlock_t		lock;
> +	struct list_head	delivery;
> +	struct rvic_irq		*irqs;
> +	unsigned int		nr_trusted;
> +	unsigned int		nr_total;
> +	bool			enabled;
> +};
> +
> +int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu);
> +int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu);
> +int kvm_register_rvic_device(void);
> +
> +#endif
> diff --git a/include/linux/irqchip/irq-rvic.h b/include/linux/irqchip/irq-rvic.h
> index 4545c1e89741..b188773729fb 100644
> --- a/include/linux/irqchip/irq-rvic.h
> +++ b/include/linux/irqchip/irq-rvic.h
> @@ -57,6 +57,8 @@
>  #define SMC64_RVIC_ACKNOWLEDGE		SMC64_RVIC_FN(9)
>  #define SMC64_RVIC_RESAMPLE		SMC64_RVIC_FN(10)
>  
> +#define SMC64_RVIC_LAST			SMC64_RVIC_RESAMPLE
> +
>  #define RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS	0
>  #define RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS	1
>  
> @@ -82,6 +84,8 @@
>  #define SMC64_RVID_MAP			SMC64_RVID_FN(1)
>  #define SMC64_RVID_UNMAP		SMC64_RVID_FN(2)
>  
> +#define SMC64_RVID_LAST			SMC64_RVID_UNMAP
> +
>  #define RVID_VERSION(M, m)		RVIx_VERSION((M), (m))
>  
>  #define RVID_VERSION_MAJOR(v)		RVIx_VERSION_MAJOR((v))
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index f6d86033c4fa..6d245d2dc9e6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1264,6 +1264,8 @@ enum kvm_device_type {
>  #define KVM_DEV_TYPE_XIVE		KVM_DEV_TYPE_XIVE
>  	KVM_DEV_TYPE_ARM_PV_TIME,
>  #define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
> +	KVM_DEV_TYPE_ARM_RVIC,
> +#define KVM_DEV_TYPE_ARM_RVIC		KVM_DEV_TYPE_ARM_RVIC
>  	KVM_DEV_TYPE_MAX,
>  };
>  
> -- 
> 2.27.0
>
Marc Zyngier Sept. 29, 2020, 3:27 p.m. UTC | #4
Hi Lorenzo,

On 2020-09-29 16:13, Lorenzo Pieralisi wrote:
> On Thu, Sep 03, 2020 at 04:26:09PM +0100, Marc Zyngier wrote:
> 
> [...]
> 
>> +static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
>> +			    bool was_signaling)
>> +{
>> +	struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
>> +	bool signal = __rvic_can_signal(rvic);
>> +
>> +	/* We're hitting our own rVIC: update HCR_VI locally */
>> +	if (vcpu == target) {
>> +		if (signal)
>> +			*vcpu_hcr(vcpu) |= HCR_VI;
>> +		else
>> +			*vcpu_hcr(vcpu) &= ~HCR_VI;
>> +
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Remote rVIC case:
>> +	 *
>> +	 * We kick even if the interrupt disappears, as ISR_EL1.I must
>> +	 * always reflect the state of the rVIC. This forces a reload
>> +	 * of the vcpu state, making it consistent.
> 
> Forgive me the question but this is unclear to me. IIUC here we do 
> _not_
> want to change the target_vcpu.hcr and we force a kick to make sure it
> syncs the hcr (so the rvic) state on its own upon exit. Is that correct 
> ?

This is indeed correct. Changing the vcpu's hcr is racy as we sometimes
update it on vcpu exit, so directly updating this field would require
introducing atomic accesses between El1 and EL2. Not happening.

Instead, we force the vcpu to reload its own state as it *reenters*
the guest (and not on exit). This way, no locking, no cmpxchg, 
everything
is still single threaded.

> Furthermore, I think it would be extremely useful to elaborate (ie
> rework the comment) further on ISR_EL1.I and how it is linked to this
> code path - I think it is key to understanding it.

I'm not really sure what to add here, apart from paraphrasing the ARM 
ARM.
ISR_EL1 always represents the interrupt input to the CPU, virtual or 
not,
and we must honor this requirements by making any change of output of
the rVIC directly observable (i.e. update HCR_EL2.VI).

         M.
Lorenzo Pieralisi Sept. 29, 2020, 4:09 p.m. UTC | #5
On Tue, Sep 29, 2020 at 04:27:45PM +0100, Marc Zyngier wrote:
> Hi Lorenzo,
> 
> On 2020-09-29 16:13, Lorenzo Pieralisi wrote:
> > On Thu, Sep 03, 2020 at 04:26:09PM +0100, Marc Zyngier wrote:
> > 
> > [...]
> > 
> > > +static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
> > > +			    bool was_signaling)
> > > +{
> > > +	struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
> > > +	bool signal = __rvic_can_signal(rvic);
> > > +
> > > +	/* We're hitting our own rVIC: update HCR_VI locally */
> > > +	if (vcpu == target) {
> > > +		if (signal)
> > > +			*vcpu_hcr(vcpu) |= HCR_VI;
> > > +		else
> > > +			*vcpu_hcr(vcpu) &= ~HCR_VI;
> > > +
> > > +		return;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Remote rVIC case:
> > > +	 *
> > > +	 * We kick even if the interrupt disappears, as ISR_EL1.I must
> > > +	 * always reflect the state of the rVIC. This forces a reload
> > > +	 * of the vcpu state, making it consistent.
> > 
> > Forgive me the question but this is unclear to me. IIUC here we do _not_
> > want to change the target_vcpu.hcr and we force a kick to make sure it
> > syncs the hcr (so the rvic) state on its own upon exit. Is that correct
> > ?
> 
> This is indeed correct. Changing the vcpu's hcr is racy as we sometimes
> update it on vcpu exit, so directly updating this field would require
> introducing atomic accesses between El1 and EL2. Not happening.
> 
> Instead, we force the vcpu to reload its own state as it *reenters*
> the guest (and not on exit). This way, no locking, no cmpxchg, everything
> is still single threaded.
> 
> > Furthermore, I think it would be extremely useful to elaborate (ie
> > rework the comment) further on ISR_EL1.I and how it is linked to this
> > code path - I think it is key to understanding it.
> 
> I'm not really sure what to add here, apart from paraphrasing the ARM ARM.
> ISR_EL1 always represents the interrupt input to the CPU, virtual or not,
> and we must honor this requirements by making any change of output of
> the rVIC directly observable (i.e. update HCR_EL2.VI).

Ok got it. Basically we kick the target_vcpu because there is a change
in its IRQ input signal that has to be signalled (by updating the
HCR_EL2.VI that in turns updates the ISR_EL1.I). I read the comment as
if we don't care about the target_vcpu.hcr_el2 update and was struggling
to understand the whole code path.

Back to reading the code (properly :)).

Thanks,
Lorenzo
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5dd92873d40f..381d3ff6e0b7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -35,6 +35,7 @@ 
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 #include <kvm/arm_pmu.h>
+#include <kvm/arm_rvic.h>
 
 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
 
@@ -102,6 +103,7 @@  struct kvm_arch {
 	enum kvm_irqchip_type	irqchip_type;
 	bool			irqchip_finalized;
 	struct kvm_irqchip_flow	irqchip_flow;
+	void			*irqchip_data;
 	struct vgic_dist	vgic;
 
 	/* Mandated version of PSCI */
@@ -324,7 +326,10 @@  struct kvm_vcpu_arch {
 	} host_debug_state;
 
 	/* VGIC state */
-	struct vgic_cpu vgic_cpu;
+	union {
+		struct vgic_cpu vgic_cpu;
+		struct rvic rvic;
+	};
 	struct arch_timer_cpu timer_cpu;
 	struct kvm_pmu pmu;
 
diff --git a/arch/arm64/include/asm/kvm_irq.h b/arch/arm64/include/asm/kvm_irq.h
index 05fbe5241642..bb1666093f80 100644
--- a/arch/arm64/include/asm/kvm_irq.h
+++ b/arch/arm64/include/asm/kvm_irq.h
@@ -11,11 +11,13 @@  enum kvm_irqchip_type {
 	IRQCHIP_USER,		/* Implemented in userspace */
 	IRQCHIP_GICv2,		/* v2 on v2, or v2 on v3 */
 	IRQCHIP_GICv3,		/* v3 on v3 */
+	IRQCHIP_RVIC,		/* PV irqchip */
 };
 
 #define irqchip_in_kernel(k)	((k)->arch.irqchip_type != IRQCHIP_USER)
 #define irqchip_is_gic_v2(k)	((k)->arch.irqchip_type == IRQCHIP_GICv2)
 #define irqchip_is_gic_v3(k)	((k)->arch.irqchip_type == IRQCHIP_GICv3)
+#define irqchip_is_rvic(k)	((k)->arch.irqchip_type == IRQCHIP_RVIC)
 
 #define irqchip_finalized(k)	((k)->arch.irqchip_finalized)
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..9fc26c84903f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -335,6 +335,15 @@  struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_PVTIME_CTRL	2
 #define   KVM_ARM_VCPU_PVTIME_IPA	0
 
+/*
+ * Device Control API: ARM RVIC. We only use the group, not the group
+ * attributes. They must be set to 0 for now.
+ */
+#define KVM_DEV_ARM_RVIC_GRP_NR_IRQS	0
+#define   KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK	0xffff
+#define   KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK	(0xffff << 16)
+#define KVM_DEV_ARM_RVIC_GRP_INIT	1
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_VCPU2_SHIFT		28
 #define KVM_ARM_IRQ_VCPU2_MASK		0xf
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 99977c1972cc..e378293ce99b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@  kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
 	 inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
 	 guest.o debug.o reset.o sys_regs.o \
 	 vgic-sys-reg-v3.o fpsimd.o pmu.o \
-	 aarch32.o arch_timer.o \
+	 aarch32.o arch_timer.o rvic-cpu.o \
 	 vgic/vgic.o vgic/vgic-init.o \
 	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
 	 vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 0d4c8de27d1e..bf0b11bdce84 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -41,6 +41,7 @@ 
 #include <kvm/arm_hypercalls.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
@@ -1402,6 +1403,8 @@  static int init_subsystems(void)
 	switch (err) {
 	case 0:
 		vgic_present = true;
+		if (kvm_register_rvic_device())
+			kvm_err("Failed to register rvic device type\n");
 		break;
 	case -ENODEV:
 	case -ENXIO:
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 550dfa3e53cd..f6620be74ce5 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -8,6 +8,9 @@ 
 
 #include <kvm/arm_hypercalls.h>
 #include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
 
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
@@ -62,6 +65,10 @@  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 		if (gpa != GPA_INVALID)
 			val = gpa;
 		break;
+	case SMC64_RVIC_BASE ... SMC64_RVIC_LAST:
+		return kvm_rvic_handle_hcall(vcpu);
+	case SMC64_RVID_BASE ... SMC64_RVID_LAST:
+		return kvm_rvid_handle_hcall(vcpu);
 	default:
 		return kvm_psci_call(vcpu);
 	}
diff --git a/arch/arm64/kvm/rvic-cpu.c b/arch/arm64/kvm/rvic-cpu.c
new file mode 100644
index 000000000000..5fb200c637d9
--- /dev/null
+++ b/arch/arm64/kvm/rvic-cpu.c
@@ -0,0 +1,1073 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
+
+/* FIXME: lock/unlock_all_vcpus */
+#include "vgic/vgic.h"
+
+#define kvm_vcpu_to_rvic(v)	(&(v)->arch.rvic)
+#define kvm_rvic_to_vcpu(r)	(container_of((r), struct kvm_vcpu, arch.rvic))
+
+#define rvic_nr_untrusted(r)	((r)->nr_total - (r)->nr_trusted)
+
+struct rvic_vm_data {
+	u16		nr_trusted;
+	u16		nr_total;
+	spinlock_t	lock;
+	/* Map is a dynamically allocated array of (total-trusted) elements */
+	struct {
+		u16	target_vcpu;
+		u16	intid;
+	} rvid_map[];
+};
+
+/*
+ * rvic_irq state machine:
+ *
+ * idle <- S/C -> pending
+ *  ^          /    ^
+ *  |         /     |
+ * U/M       A     U/M
+ *  |       /       |
+ *  v     v         V
+ * masked <- S/C -> masked+pending
+ *
+ * [S]: Set Pending, [C]: Clear Pending
+ * [U]: Unmask, [M]: Mask
+ * [A]: Ack
+ */
+
+static struct rvic_irq *rvic_get_irq(struct rvic *rvic, unsigned int intid)
+{
+	if (intid >= rvic->nr_total)
+		return NULL;
+	return &rvic->irqs[intid];
+}
+
+static bool rvic_irq_queued(struct rvic_irq *irq)
+{
+	return !list_empty(&irq->delivery_entry);
+}
+
+/* RVIC primitives. They all imply that the RVIC lock is held */
+static void __rvic_enable(struct rvic *rvic)
+{
+	rvic->enabled = true;
+}
+
+static void __rvic_disable(struct rvic *rvic)
+{
+	rvic->enabled = false;
+}
+
+static bool __rvic_is_enabled(struct rvic *rvic)
+{
+	return rvic->enabled;
+}
+
+static void __rvic_set_pending(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	if (!__rvic_is_enabled(rvic)) {
+		pr_debug("dropping intid %u\n", intid);
+		return;
+	}
+
+	spin_lock_irqsave(&irq->lock, flags);
+
+	irq->pending = true;
+	if (!irq->masked && !rvic_irq_queued(irq))
+		list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+	spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_pending(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq->lock, flags);
+
+	irq->pending = false;
+	list_del_init(&irq->delivery_entry);
+
+	spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static bool __rvic_is_pending(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+	bool pend;
+
+	spin_lock_irqsave(&irq->lock, flags);
+	pend = irq->pending;
+	spin_unlock_irqrestore(&irq->lock, flags);
+
+	return pend;
+}
+
+static void __rvic_set_masked(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq->lock, flags);
+
+	irq->masked = true;
+	if (irq->pending)
+		list_del_init(&irq->delivery_entry);
+
+	spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_masked(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq->lock, flags);
+
+	irq->masked = false;
+	if (__rvic_is_enabled(rvic) && irq->pending && !rvic_irq_queued(irq))
+		list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+	spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static unsigned int __rvic_ack(struct rvic *rvic)
+{
+	unsigned int intid = ~0U;
+	struct rvic_irq *irq;
+
+	if (!__rvic_is_enabled(rvic))
+		return intid;
+
+	irq = list_first_entry_or_null(&rvic->delivery, struct rvic_irq,
+				       delivery_entry);
+	if (irq) {
+		intid = irq->intid;
+		__rvic_set_masked(rvic, intid);
+		__rvic_clear_pending(rvic, intid);
+	}
+
+	return intid;
+}
+
+static bool __rvic_can_signal(struct rvic *rvic)
+{
+	return __rvic_is_enabled(rvic) && !list_empty(&rvic->delivery);
+}
+
+static void __rvic_resample(struct rvic *rvic, unsigned int intid)
+{
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+	bool pending;
+
+	spin_lock_irqsave(&irq->lock, flags);
+	if (irq->get_line_level) {
+		pending = irq->get_line_level(irq->intid);
+
+		/*
+		 * As part of the resampling, tickle the GIC so that
+		 * new interrupts can trickle in.
+		 */
+		if (!pending && irq->host_irq)
+			irq_set_irqchip_state(irq->host_irq,
+					      IRQCHIP_STATE_ACTIVE, false);
+	} else {
+		pending = irq->line_level;
+	}
+
+	spin_unlock_irqrestore(&irq->lock, flags);
+
+	if (pending)
+		__rvic_set_pending(rvic, intid);
+}
+
+/*
+ * rVIC hypercall handling. All functions assume they are being called
+ * from the vcpu thread that triggers the hypercall.
+ */
+static void __rvic_kick_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+	kvm_vcpu_kick(vcpu);
+}
+
+static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
+			    bool was_signaling)
+{
+	struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
+	bool signal = __rvic_can_signal(rvic);
+
+	/* We're hitting our own rVIC: update HCR_VI locally */
+	if (vcpu == target) {
+		if (signal)
+			*vcpu_hcr(vcpu) |= HCR_VI;
+		else
+			*vcpu_hcr(vcpu) &= ~HCR_VI;
+
+		return;
+	}
+
+	/*
+	 * Remote rVIC case:
+	 *
+	 * We kick even if the interrupt disappears, as ISR_EL1.I must
+	 * always reflect the state of the rVIC. This forces a reload
+	 * of the vcpu state, making it consistent.
+	 *
+	 * This avoids modifying the target's own copy of HCR_EL2, as
+	 * we are in a cross-vcpu call, and changing it from under its
+	 * feet is dodgy.
+	 */
+	if (was_signaling != signal)
+		__rvic_kick_vcpu(target);
+}
+
+static void rvic_version(struct kvm_vcpu *vcpu)
+{
+	/* ALP0.3 is the name of the game */
+	smccc_set_retval(vcpu, RVIC_STATUS_SUCCESS, RVIC_VERSION(0, 3), 0, 0);
+}
+
+static void rvic_info(struct kvm_vcpu *vcpu)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long what = smccc_get_arg1(vcpu);
+	unsigned long a0, a1;
+
+	switch (what) {
+	case RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS:
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+		a1 = rvic->nr_trusted;
+		break;
+	case RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS:
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+		a1 = rvic_nr_untrusted(rvic);
+		break;
+	default:
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+		a1 = 0;
+		break;
+	}
+
+	smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_enable(struct kvm_vcpu *vcpu)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long flags;
+	bool was_signaling;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	was_signaling = __rvic_can_signal(rvic);
+	__rvic_enable(rvic);
+	__rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+			 0, 0, 0);
+}
+
+static void rvic_disable(struct kvm_vcpu *vcpu)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long flags;
+	bool was_signaling;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	was_signaling = __rvic_can_signal(rvic);
+	__rvic_disable(rvic);
+	__rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+			 0, 0, 0);
+}
+
+typedef void (*rvic_action_fn_t)(struct rvic *, unsigned int);
+
+static int validate_rvic_call(struct kvm_vcpu *vcpu, struct rvic **rvicp,
+			      unsigned int *intidp)
+{
+	unsigned long mpidr = smccc_get_arg1(vcpu);
+	unsigned int intid = smccc_get_arg2(vcpu);
+	struct kvm_vcpu *target;
+	struct rvic *rvic;
+
+	/* FIXME: The spec distinguishes between invalid MPIDR and invalid CPU */
+
+	target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+	if (!target) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_INVALID_CPU, 0),
+				 0, 0, 0);
+		return -1;
+	}
+
+	rvic = kvm_vcpu_to_rvic(target);
+	if (intid >= rvic->nr_total) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 1),
+				 0, 0, 0);
+		return -1;
+	}
+
+	*rvicp = rvic;
+	*intidp = intid;
+
+	return 0;
+}
+
+static void __rvic_action(struct kvm_vcpu *vcpu, rvic_action_fn_t action,
+			  bool check_enabled)
+{
+	struct rvic *rvic;
+	unsigned long a0;
+	unsigned long flags;
+	int intid;
+
+	if (validate_rvic_call(vcpu, &rvic, &intid))
+		return;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	if (unlikely(check_enabled && !__rvic_is_enabled(rvic))) {
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+	} else {
+		bool was_signaling = __rvic_can_signal(rvic);
+		action(rvic, intid);
+		__rvic_sync_hcr(vcpu, rvic, was_signaling);
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+	}
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+static void rvic_set_masked(struct kvm_vcpu *vcpu)
+{
+	__rvic_action(vcpu, __rvic_set_masked, false);
+}
+
+static void rvic_clear_masked(struct kvm_vcpu *vcpu)
+{
+	__rvic_action(vcpu, __rvic_clear_masked, false);
+}
+
+static void rvic_clear_pending(struct kvm_vcpu *vcpu)
+{
+	__rvic_action(vcpu, __rvic_clear_pending, false);
+}
+
+static void rvic_signal(struct kvm_vcpu *vcpu)
+{
+	__rvic_action(vcpu, __rvic_set_pending, true);
+}
+
+static void rvic_is_pending(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	struct rvic *rvic;
+	int intid;
+	bool res;
+
+	if (validate_rvic_call(vcpu, &rvic, &intid))
+		return;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	res = __rvic_is_pending(rvic, intid);
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+			 res, 0, 0);
+}
+
+/*
+ * Ack and Resample are the only "interesting" operations that are
+ * strictly per-CPU.
+ */
+static void rvic_acknowledge(struct kvm_vcpu *vcpu)
+{
+	unsigned long a0, a1;
+	unsigned long flags;
+	unsigned int intid;
+	struct rvic *rvic;
+
+	rvic = kvm_vcpu_to_rvic(vcpu);
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	if (unlikely(!__rvic_is_enabled(rvic))) {
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+		a1 = 0;
+	} else {
+		intid = __rvic_ack(rvic);
+		__rvic_sync_hcr(vcpu, rvic, true);
+		if (unlikely(intid >= rvic->nr_total)) {
+			a0 = RVIx_STATUS_PACK(RVIC_STATUS_NO_INTERRUPTS, 0);
+			a1 = 0;
+		} else {
+			a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+			a1 = intid;
+		}
+	}
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_resample(struct kvm_vcpu *vcpu)
+{
+	unsigned int intid = smccc_get_arg1(vcpu);
+	unsigned long flags;
+	unsigned long a0;
+	struct rvic *rvic;
+
+	rvic = kvm_vcpu_to_rvic(vcpu);
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	if (unlikely(intid >= rvic->nr_trusted)) {
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+	} else {
+		__rvic_resample(rvic, intid);
+
+		/*
+		 * Don't bother finding out if we were signalling, we
+		 * will update HCR_EL2 anyway as we are guaranteed not
+		 * to be in a cross-call.
+		 */
+		__rvic_sync_hcr(vcpu, rvic, true);
+		a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+	}
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu)
+{
+	pr_debug("RVIC: HC %08x", (unsigned int)smccc_get_function(vcpu));
+	switch (smccc_get_function(vcpu)) {
+	case SMC64_RVIC_VERSION:
+		rvic_version(vcpu);
+		break;
+	case SMC64_RVIC_INFO:
+		rvic_info(vcpu);
+		break;
+	case SMC64_RVIC_ENABLE:
+		rvic_enable(vcpu);
+		break;
+	case SMC64_RVIC_DISABLE:
+		rvic_disable(vcpu);
+		break;
+	case SMC64_RVIC_SET_MASKED:
+		rvic_set_masked(vcpu);
+		break;
+	case SMC64_RVIC_CLEAR_MASKED:
+		rvic_clear_masked(vcpu);
+		break;
+	case SMC64_RVIC_IS_PENDING:
+		rvic_is_pending(vcpu);
+		break;
+	case SMC64_RVIC_SIGNAL:
+		rvic_signal(vcpu);
+		break;
+	case SMC64_RVIC_CLEAR_PENDING:
+		rvic_clear_pending(vcpu);
+		break;
+	case SMC64_RVIC_ACKNOWLEDGE:
+		rvic_acknowledge(vcpu);
+		break;
+	case SMC64_RVIC_RESAMPLE:
+		rvic_resample(vcpu);
+		break;
+	default:
+		smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+		break;
+	}
+
+	return 1;
+}
+
+static void rvid_version(struct kvm_vcpu *vcpu)
+{
+	/* ALP0.3 is the name of the game */
+	smccc_set_retval(vcpu, RVID_STATUS_SUCCESS, RVID_VERSION(0, 3), 0, 0);
+}
+
+static void rvid_map(struct kvm_vcpu *vcpu)
+{
+	unsigned long input = smccc_get_arg1(vcpu);
+	unsigned long mpidr = smccc_get_arg2(vcpu);
+	unsigned int intid = smccc_get_arg3(vcpu);
+	unsigned long flags;
+	struct rvic_vm_data *data;
+	struct kvm_vcpu *target;
+
+	data = vcpu->kvm->arch.irqchip_data;
+
+	if (input > rvic_nr_untrusted(data)) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+				 0, 0, 0);
+		return;
+	}
+
+	/* FIXME: different error from RVIC. Why? */
+	target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+	if (!target) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 1),
+				 0, 0, 0);
+		return;
+	}
+
+	if (intid < data->nr_trusted || intid >= data->nr_total) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 2),
+				 0, 0, 0);
+		return;
+	}
+
+	spin_lock_irqsave(&data->lock, flags);
+	data->rvid_map[input].target_vcpu	= target->vcpu_id;
+	data->rvid_map[input].intid		= intid;
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+static void rvid_unmap(struct kvm_vcpu *vcpu)
+{
+	unsigned long input = smccc_get_arg1(vcpu);
+	unsigned long flags;
+	struct rvic_vm_data *data;
+
+	data = vcpu->kvm->arch.irqchip_data;
+
+	if (input > rvic_nr_untrusted(data)) {
+		smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+				 0, 0, 0);
+		return;
+	}
+
+	spin_lock_irqsave(&data->lock, flags);
+	data->rvid_map[input].target_vcpu	= 0;
+	data->rvid_map[input].intid		= 0;
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu)
+{
+	pr_debug("RVID: HC %08x", (unsigned int)smccc_get_function(vcpu));
+	switch (smccc_get_function(vcpu)) {
+	case SMC64_RVID_VERSION:
+		rvid_version(vcpu);
+		break;
+	case SMC64_RVID_MAP:
+		rvid_map(vcpu);
+		break;
+	case SMC64_RVID_UNMAP:
+		rvid_unmap(vcpu);
+		break;
+	default:
+		smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+		break;
+	}
+
+	return 1;
+}
+
+/*
+ * KVM internal interface to the rVIC
+ */
+
+/* This *must* be called from the vcpu thread */
+static void rvic_flush_signaling_state(struct kvm_vcpu *vcpu)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	__rvic_sync_hcr(vcpu, rvic, true);
+
+	spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+/* This can be called from any context */
+static void rvic_vcpu_inject_irq(struct kvm_vcpu *vcpu, unsigned int intid,
+				 bool level)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long flags;
+	bool prev;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+
+	if (WARN_ON(intid >= rvic->nr_total))
+		goto out;
+
+	/*
+	 * Although really ugly, this should be safe as we hold the
+	 * rvic lock, and the only path that uses this information is
+	 * resample, which takes this lock too.
+	 */
+	if (!rvic->irqs[intid].get_line_level)
+		rvic->irqs[intid].line_level = level;
+
+	if (level) {
+		prev = __rvic_can_signal(rvic);
+		__rvic_set_pending(rvic, intid);
+		if (prev != __rvic_can_signal(rvic))
+			__rvic_kick_vcpu(vcpu);
+	}
+out:
+	spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+static int rvic_inject_irq(struct kvm *kvm, unsigned int cpu,
+			   unsigned int intid, bool level, void *owner)
+{
+	struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
+	struct rvic *rvic;
+
+	if (unlikely(!vcpu))
+		return -EINVAL;
+
+	rvic = kvm_vcpu_to_rvic(vcpu);
+	if (unlikely(intid >= rvic->nr_total))
+		return -EINVAL;
+
+	/* Ignore interrupt owner for now */
+	rvic_vcpu_inject_irq(vcpu, intid, level);
+	return 0;
+}
+
+static int rvic_inject_userspace_irq(struct kvm *kvm, unsigned int type,
+				     unsigned int cpu,
+				     unsigned int intid, bool level)
+{
+	struct rvic_vm_data *data = kvm->arch.irqchip_data;
+	unsigned long flags;
+	u16 output;
+
+	switch (type) {
+	case KVM_ARM_IRQ_TYPE_SPI:
+		/*
+		 * Userspace can only inject interrupts that are
+		 * translated by the rvid, so the cpu parameter is
+		 * irrelevant and we override it when resolving the
+		 * translation.
+		 */
+		if (intid >= rvic_nr_untrusted(data))
+			return -EINVAL;
+
+		spin_lock_irqsave(&data->lock, flags);
+		output = data->rvid_map[intid].intid;
+		cpu = data->rvid_map[intid].target_vcpu;
+		spin_unlock_irqrestore(&data->lock, flags);
+
+		/* Silently ignore unmapped interrupts */
+		if (output < data->nr_trusted)
+			return 0;
+
+		return rvic_inject_irq(kvm, cpu, output, level, NULL);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int rvic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct rvic_vm_data *data = vcpu->kvm->arch.irqchip_data;
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	int i;
+
+	/* irqchip not ready yet, we will come back later */
+	if (!data)
+		return 0;
+
+	if (WARN_ON(rvic->irqs))
+		return -EINVAL;
+
+	spin_lock_init(&rvic->lock);
+	INIT_LIST_HEAD(&rvic->delivery);
+	rvic->nr_trusted	= data->nr_trusted;
+	rvic->nr_total		= data->nr_total;
+	rvic->enabled		= false;
+
+	rvic->irqs = kcalloc(rvic->nr_total, sizeof(*rvic->irqs), GFP_ATOMIC);
+	if (!rvic->irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < rvic->nr_total; i++) {
+		struct rvic_irq *irq = &rvic->irqs[i];
+
+		spin_lock_init(&irq->lock);
+		INIT_LIST_HEAD(&irq->delivery_entry);
+		irq->get_line_level	= NULL;
+		irq->intid		= i;
+		irq->host_irq		= 0;
+		irq->pending		= false;
+		irq->masked		= true;
+		irq->line_level		= false;
+	}
+
+	return 0;
+}
+
+static void rvic_destroy(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	mutex_lock(&kvm->lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+
+		INIT_LIST_HEAD(&rvic->delivery);
+		kfree(rvic->irqs);
+		rvic->irqs = NULL;
+	}
+
+	mutex_unlock(&kvm->lock);
+}
+
+static int rvic_pending_irq(struct kvm_vcpu *vcpu)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	unsigned long flags;
+	bool res;
+
+	spin_lock_irqsave(&rvic->lock, flags);
+	res = __rvic_can_signal(rvic);
+	spin_unlock_irqrestore(&rvic->lock, flags);
+
+	return res;
+}
+
+static int rvic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
+			     u32 intid, bool (*get_line_level)(int))
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq->lock, flags);
+	irq->host_irq = host_irq;
+	irq->get_line_level = get_line_level;
+	spin_unlock_irqrestore(&irq->lock, flags);
+
+	return 0;
+}
+
+static int rvic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int intid)
+{
+	struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+	struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq->lock, flags);
+	irq->host_irq = 0;
+	irq->get_line_level = NULL;
+	spin_unlock_irqrestore(&irq->lock, flags);
+
+	return 0;
+}
+
+static int rvic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id,
+			      int level, bool line_status)
+{
+	/* Abuse the userspace interface to perform the routing*/
+	return rvic_inject_userspace_irq(kvm, KVM_ARM_IRQ_TYPE_SPI, 0,
+					 e->irqchip.pin, level);
+}
+
+static int rvic_set_msi(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int irq_source_id,
+			int level, bool line_status)
+{
+	return -ENODEV;
+}
+
+static int rvic_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+				 struct kvm *kvm, int irq_source_id,
+				 int level, bool line_status)
+{
+	if (e->type != KVM_IRQ_ROUTING_IRQCHIP)
+		return -EWOULDBLOCK;
+
+	return rvic_irqfd_set_irq(e, kvm, irq_source_id, level, line_status);
+}
+
+static const struct kvm_irqchip_flow rvic_irqchip_flow = {
+	.irqchip_destroy		= rvic_destroy,
+	.irqchip_vcpu_init		= rvic_vcpu_init,
+	/* Nothing to do on block/unblock */
+	/* Nothing to do on load/put */
+	.irqchip_vcpu_pending_irq	= rvic_pending_irq,
+	.irqchip_vcpu_flush_hwstate	= rvic_flush_signaling_state,
+	/* Nothing tp do on sync_hwstate */
+	.irqchip_inject_irq		= rvic_inject_irq,
+	.irqchip_inject_userspace_irq	= rvic_inject_userspace_irq,
+	/* No reset_mapped_irq as we allow spurious interrupts */
+	.irqchip_map_phys_irq		= rvic_map_phys_irq,
+	.irqchip_unmap_phys_irq		= rvic_unmap_phys_irq,
+	.irqchip_irqfd_set_irq		= rvic_irqfd_set_irq,
+	.irqchip_set_msi		= rvic_set_msi,
+	.irqchip_set_irq_inatomic	= rvic_set_irq_inatomic,
+};
+
+static int rvic_setup_default_irq_routing(struct kvm *kvm)
+{
+	struct rvic_vm_data *data = kvm->arch.irqchip_data;
+	unsigned int nr = rvic_nr_untrusted(data);
+	struct kvm_irq_routing_entry *entries;
+	int i, ret;
+
+	entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		entries[i].gsi = i;
+		entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+		entries[i].u.irqchip.irqchip = 0;
+		entries[i].u.irqchip.pin = i;
+	}
+	ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+	kfree(entries);
+	return ret;
+}
+
+/* Device management */
+static int rvic_device_create(struct kvm_device *dev, u32 type)
+{
+	struct kvm *kvm = dev->kvm;
+	struct kvm_vcpu *vcpu;
+	int i, ret;
+
+	if (irqchip_in_kernel(kvm))
+		return -EEXIST;
+
+	ret = -EBUSY;
+	if (!lock_all_vcpus(kvm))
+		return ret;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.has_run_once)
+			goto out_unlock;
+	}
+
+	ret = 0;
+
+	/*
+	 * The good thing about not having any HW is that you don't
+	 * get the limitations of the HW...
+	 */
+	kvm->arch.max_vcpus		= KVM_MAX_VCPUS;
+	kvm->arch.irqchip_type		= IRQCHIP_RVIC;
+	kvm->arch.irqchip_flow		= rvic_irqchip_flow;
+	kvm->arch.irqchip_data		= NULL;
+
+out_unlock:
+	unlock_all_vcpus(kvm);
+	return ret;
+}
+
+static void rvic_device_destroy(struct kvm_device *dev)
+{
+	kfree(dev->kvm->arch.irqchip_data);
+	kfree(dev);
+}
+
+static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct rvic_vm_data *data;
+	struct kvm_vcpu *vcpu;
+	u32 __user *uaddr, val;
+	u16 trusted, total;
+	int i, ret = -ENXIO;
+
+	mutex_lock(&dev->kvm->lock);
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+		if (attr->attr)
+			break;
+
+		if (dev->kvm->arch.irqchip_data) {
+			ret = -EBUSY;
+			break;
+		}
+
+		uaddr = (u32 __user *)(uintptr_t)attr->addr;
+		if (get_user(val, uaddr)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
+		total   = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
+		if (total < trusted || trusted < 32 || total < 64 ||
+		    trusted % 32 || total % 32 || total > 2048) {
+			ret = -EINVAL;
+			break;
+		}
+
+		data = kzalloc(struct_size(data, rvid_map, (total - trusted)),
+			       GFP_KERNEL);
+		if (!data) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		data->nr_trusted = trusted;
+		data->nr_total = total;
+		spin_lock_init(&data->lock);
+		/* Default to no mapping */
+		for (i = 0; i < (total - trusted); i++) {
+			/*
+			 * an intid < nr_trusted is invalid as the
+			 * result of a translation through the rvid,
+			 * hence the input in unmapped.
+			 */
+			data->rvid_map[i].target_vcpu = 0;
+			data->rvid_map[i].intid = 0;
+		}
+
+		dev->kvm->arch.irqchip_data = data;
+
+		ret = 0;
+		break;
+
+	case KVM_DEV_ARM_RVIC_GRP_INIT:
+		if (attr->attr)
+			break;
+
+		if (!dev->kvm->arch.irqchip_data)
+			break;
+
+		ret = 0;
+
+		/* Init the rvic on any already created vcpu */
+		kvm_for_each_vcpu(i, vcpu, dev->kvm) {
+			ret = rvic_vcpu_init(vcpu);
+			if (ret)
+				break;
+		}
+
+		if (!ret)
+			ret = rvic_setup_default_irq_routing(dev->kvm);
+		if (!ret)
+			dev->kvm->arch.irqchip_finalized = true;
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&dev->kvm->lock);
+
+	return ret;
+}
+
+static int rvic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct rvic_vm_data *data;
+	u32 __user *uaddr, val;
+	int ret = -ENXIO;
+
+	mutex_lock(&dev->kvm->lock);
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+		if (attr->attr)
+			break;
+
+		data = dev->kvm->arch.irqchip_data;
+		if (!data)
+			break;
+
+		val  = FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK,
+					 data->nr_trusted);
+		val |= FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK,
+					 data->nr_total);
+
+		uaddr = (u32 __user *)(uintptr_t)attr->addr;
+		ret = put_user(val, uaddr);
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&dev->kvm->lock);
+
+	return ret;
+}
+
+static int rvic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+	case KVM_DEV_ARM_RVIC_GRP_INIT:
+		if (attr->attr)
+			break;
+		ret = 0;
+		break;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static const struct kvm_device_ops rvic_dev_ops = {
+	.name		= "kvm-arm-rvic",
+	.create		= rvic_device_create,
+	.destroy	= rvic_device_destroy,
+	.set_attr	= rvic_set_attr,
+	.get_attr	= rvic_get_attr,
+	.has_attr	= rvic_has_attr,
+};
+
+int kvm_register_rvic_device(void)
+{
+	return kvm_register_device_ops(&rvic_dev_ops, KVM_DEV_TYPE_ARM_RVIC);
+}
diff --git a/include/kvm/arm_rvic.h b/include/kvm/arm_rvic.h
new file mode 100644
index 000000000000..9e67a83fa384
--- /dev/null
+++ b/include/kvm/arm_rvic.h
@@ -0,0 +1,41 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#ifndef __KVM_ARM_RVIC_H__
+#define __KVM_ARM_RVIC_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct kvm_vcpu;
+
+struct rvic_irq {
+	spinlock_t		lock;
+	struct list_head	delivery_entry;
+	bool			(*get_line_level)(int intid);
+	unsigned int		intid;
+	unsigned int		host_irq;
+	bool			pending;
+	bool			masked;
+	bool			line_level; /* If get_line_level == NULL */
+};
+
+struct rvic {
+	spinlock_t		lock;
+	struct list_head	delivery;
+	struct rvic_irq		*irqs;
+	unsigned int		nr_trusted;
+	unsigned int		nr_total;
+	bool			enabled;
+};
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_register_rvic_device(void);
+
+#endif
diff --git a/include/linux/irqchip/irq-rvic.h b/include/linux/irqchip/irq-rvic.h
index 4545c1e89741..b188773729fb 100644
--- a/include/linux/irqchip/irq-rvic.h
+++ b/include/linux/irqchip/irq-rvic.h
@@ -57,6 +57,8 @@ 
 #define SMC64_RVIC_ACKNOWLEDGE		SMC64_RVIC_FN(9)
 #define SMC64_RVIC_RESAMPLE		SMC64_RVIC_FN(10)
 
+#define SMC64_RVIC_LAST			SMC64_RVIC_RESAMPLE
+
 #define RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS	0
 #define RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS	1
 
@@ -82,6 +84,8 @@ 
 #define SMC64_RVID_MAP			SMC64_RVID_FN(1)
 #define SMC64_RVID_UNMAP		SMC64_RVID_FN(2)
 
+#define SMC64_RVID_LAST			SMC64_RVID_UNMAP
+
 #define RVID_VERSION(M, m)		RVIx_VERSION((M), (m))
 
 #define RVID_VERSION_MAJOR(v)		RVIx_VERSION_MAJOR((v))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f6d86033c4fa..6d245d2dc9e6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1264,6 +1264,8 @@  enum kvm_device_type {
 #define KVM_DEV_TYPE_XIVE		KVM_DEV_TYPE_XIVE
 	KVM_DEV_TYPE_ARM_PV_TIME,
 #define KVM_DEV_TYPE_ARM_PV_TIME	KVM_DEV_TYPE_ARM_PV_TIME
+	KVM_DEV_TYPE_ARM_RVIC,
+#define KVM_DEV_TYPE_ARM_RVIC		KVM_DEV_TYPE_ARM_RVIC
 	KVM_DEV_TYPE_MAX,
 };