diff mbox

[v3,1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

Message ID 1433289107-20638-1-git-send-email-srutherford@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Rutherford June 2, 2015, 11:51 p.m. UTC
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Suggested-by: Andrew Honig <ahonig@google.com>
---
 Documentation/virtual/kvm/api.txt | 15 ++++++++++++
 arch/powerpc/kvm/irq.h            |  5 ++++
 arch/s390/kvm/irq.h               |  4 ++++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/assigned-dev.c       |  4 ++--
 arch/x86/kvm/irq.c                |  6 ++---
 arch/x86/kvm/irq.h                | 11 +++++++++
 arch/x86/kvm/irq_comm.c           |  7 ++++++
 arch/x86/kvm/lapic.c              | 13 +++++++----
 arch/x86/kvm/mmu.c                |  2 +-
 arch/x86/kvm/svm.c                |  4 ++--
 arch/x86/kvm/vmx.c                | 12 +++++-----
 arch/x86/kvm/x86.c                | 49 +++++++++++++++++++++++++++------------
 include/kvm/arm_vgic.h            |  1 +
 include/linux/kvm_host.h          |  1 +
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/irqchip.c                |  2 +-
 17 files changed, 104 insertions(+), 35 deletions(-)

Comments

Paolo Bonzini June 3, 2015, 8:54 a.m. UTC | #1
On 03/06/2015 01:51, Steve Rutherford wrote:
> First patch in a series which enables the relocation of the
> PIC/IOAPIC to userspace.
> 
> Adds capability KVM_CAP_SPLIT_IRQCHIP;
> 
> KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
> rest of the irqchip.

The documentation is not updated.

Changing other arches is definitely a no-no, unfortunately.  But there
are so many s/irqchip_in_kernel/lapic_in_kernel/ changes here, that I
wonder if you should just keep irqchip_in_kernel true in the "split
irqchip" case.  You are already testing irqchip_split in a few cases,
and you can add ioapic_in_kernel whenever you need to test
"lapic_in_kernel && !irqchip_split" at the same time.

Paolo

> Compile tested for x86.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> Suggested-by: Andrew Honig <ahonig@google.com>
> ---
>  Documentation/virtual/kvm/api.txt | 15 ++++++++++++
>  arch/powerpc/kvm/irq.h            |  5 ++++
>  arch/s390/kvm/irq.h               |  4 ++++
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/assigned-dev.c       |  4 ++--
>  arch/x86/kvm/irq.c                |  6 ++---
>  arch/x86/kvm/irq.h                | 11 +++++++++
>  arch/x86/kvm/irq_comm.c           |  7 ++++++
>  arch/x86/kvm/lapic.c              | 13 +++++++----
>  arch/x86/kvm/mmu.c                |  2 +-
>  arch/x86/kvm/svm.c                |  4 ++--
>  arch/x86/kvm/vmx.c                | 12 +++++-----
>  arch/x86/kvm/x86.c                | 49 +++++++++++++++++++++++++++------------
>  include/kvm/arm_vgic.h            |  1 +
>  include/linux/kvm_host.h          |  1 +
>  include/uapi/linux/kvm.h          |  1 +
>  virt/kvm/irqchip.c                |  2 +-
>  17 files changed, 104 insertions(+), 35 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 6955444..9a43d42 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
>  and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
>  which is the maximum number of possibly pending cpu-local interrupts.
>  
> +
>  5. The kvm_run structure
>  ------------------------
>  
> @@ -3575,6 +3576,20 @@ struct {
>  
>  KVM handlers should exit to userspace with rc = -EREMOTE.
>  
> +7.5 KVM_SPLIT_IRQCHIP
> +
> +Capability: KVM_CAP_SPLIT_IRQCHIP
> +Architectures: x86
> +Type:  VM ioctl
> +Parameters: None
> +Returns: 0 on success, -1 on error
> +
> +Create a local apic for each processor in the kernel.  This differs from
> +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither
> +the ioapic nor the pic in the kernel. Also, enables in kernel routing of
> +interrupt requests. Fails if VCPU has already been created, or if the irqchip is
> +already in the kernel.
> +
>  
>  8. Other capabilities.
>  ----------------------
> diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
> index 5a9a10b..5e6fa06 100644
> --- a/arch/powerpc/kvm/irq.h
> +++ b/arch/powerpc/kvm/irq.h
> @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  	return ret;
>  }
>  
> +static inline int lapic_in_kernel(struct kvm *kvm)
> +{
> +	return irqchip_in_kernel(kvm);
> +}
> +
>  #endif
> diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
> index d98e415..db876c3 100644
> --- a/arch/s390/kvm/irq.h
> +++ b/arch/s390/kvm/irq.h
> @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  	return 1;
>  }
>  
> +static inline int lapic_in_kernel(struct kvm *kvm)
> +{
> +	return irqchip_in_kernel(kvm);
> +}
>  #endif
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 7276107..af3225a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -639,6 +639,8 @@ struct kvm_arch {
>  	bool boot_vcpu_runs_old_kvmclock;
>  
>  	u64 disabled_quirks;
> +
> +	bool irqchip_split;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
> index d090ecf..1237e92 100644
> --- a/arch/x86/kvm/assigned-dev.c
> +++ b/arch/x86/kvm/assigned-dev.c
> @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm,
>  {
>  	unsigned long guest_irq_type, host_irq_type;
>  
> -	if (!irqchip_in_kernel(kvm))
> +	if (!lapic_in_kernel(kvm))
>  		return -EINVAL;
>  	/* no irq assignment to deassign */
>  	if (!assigned_dev->irq_requested_type)
> @@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
>  	struct kvm_assigned_dev_kernel *match;
>  	unsigned long host_irq_type, guest_irq_type;
>  
> -	if (!irqchip_in_kernel(kvm))
> +	if (!lapic_in_kernel(kvm))
>  		return r;
>  
>  	mutex_lock(&kvm->lock);
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index a1ec6a50..706e47a 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
>   */
>  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
>  {
> -	if (!irqchip_in_kernel(v->kvm))
> +	if (!lapic_in_kernel(v->kvm))
>  		return v->arch.interrupt.pending;
>  
>  	if (kvm_cpu_has_extint(v))
> @@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
>   */
>  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  {
> -	if (!irqchip_in_kernel(v->kvm))
> +	if (!lapic_in_kernel(v->kvm))
>  		return v->arch.interrupt.pending;
>  
>  	if (kvm_cpu_has_extint(v))
> @@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
>  {
>  	int vector;
>  
> -	if (!irqchip_in_kernel(v->kvm))
> +	if (!lapic_in_kernel(v->kvm))
>  		return v->arch.interrupt.nr;
>  
>  	vector = kvm_cpu_get_extint(v);
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index ad68c73..e46abf3 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  	return ret;
>  }
>  
> +static inline int irqchip_split(struct kvm *kvm)
> +{
> +	return kvm->arch.irqchip_split;
> +}
> +
> +static inline int lapic_in_kernel(struct kvm *kvm)
> +{
> +	return irqchip_split(kvm) || irqchip_in_kernel(kvm);
> +}
> +
> +
>  void kvm_pic_reset(struct kvm_kpic_state *s);
>  
>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..f43c59a 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
>  	return kvm_set_irq_routing(kvm, default_routing,
>  				   ARRAY_SIZE(default_routing), 0);
>  }
> +
> +static const struct kvm_irq_routing_entry empty_routing[] = {};
> +
> +int kvm_setup_empty_irq_routing(struct kvm *kvm)
> +{
> +	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index c789e00..92f4c98 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,7 +209,8 @@ out:
>  	if (old)
>  		kfree_rcu(old, rcu);
>  
> -	kvm_vcpu_request_scan_ioapic(kvm);
> +	if (!irqchip_split(kvm))
> +		kvm_vcpu_request_scan_ioapic(kvm);
>  }
>  
>  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> @@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>  				apic_find_highest_isr(apic));
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> -	kvm_rtc_eoi_tracking_restore_one(vcpu);
> +	if (!irqchip_split(vcpu->kvm))
> +		kvm_rtc_eoi_tracking_restore_one(vcpu);
>  }
>  
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> @@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
>  	    /* Cache not set: could be safe but we don't bother. */
>  	    apic->highest_isr_cache == -1 ||
>  	    /* Need EOI to update ioapic. */
> -	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
> +	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
> +	    irqchip_split(vcpu->kvm)) {
>  		/*
>  		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
>  		 * so we need not do anything here.
> @@ -1966,7 +1969,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u32 reg = (msr - APIC_BASE_MSR) << 4;
>  
> -	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> +	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
>  		return 1;
>  
>  	if (reg == APIC_ICR2)
> @@ -1983,7 +1986,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
>  
> -	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> +	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
>  		return 1;
>  
>  	if (reg == APIC_DFR || reg == APIC_ICR2) {
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a65ce12..1513d14 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3507,7 +3507,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
>  
>  static bool can_do_async_pf(struct kvm_vcpu *vcpu)
>  {
> -	if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
> +	if (unlikely(!lapic_in_kernel(vcpu->kvm) ||
>  		     kvm_event_needs_reinjection(vcpu)))
>  		return false;
>  
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index b9f9e10..59166de 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
>  	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
>  	/* instruction emulation calls kvm_set_cr8() */
>  	r = cr_interception(svm);
> -	if (irqchip_in_kernel(svm->vcpu.kvm))
> +	if (lapic_in_kernel(svm->vcpu.kvm))
>  		return r;
>  	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
>  		return r;
> @@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
>  	 * If the user space waits to inject interrupts, exit as soon as
>  	 * possible
>  	 */
> -	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
> +	if (!lapic_in_kernel(svm->vcpu.kvm) &&
>  	    kvm_run->request_interrupt_window &&
>  	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
>  		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 9cf5030..3b58788 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
>  
>  static inline bool vm_need_tpr_shadow(struct kvm *kvm)
>  {
> -	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
> +	return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm);
>  }
>  
>  static inline bool cpu_has_secondary_exec_ctrls(void)
> @@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void)
>  
>  static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
>  {
> -	return flexpriority_enabled && irqchip_in_kernel(kvm);
> +	return flexpriority_enabled && lapic_in_kernel(kvm);
>  }
>  
>  static inline bool cpu_has_vmx_vpid(void)
> @@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
>  
>  static int vmx_vm_has_apicv(struct kvm *kvm)
>  {
> -	return enable_apicv && irqchip_in_kernel(kvm);
> +	return enable_apicv && lapic_in_kernel(kvm);
>  }
>  
>  static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
> @@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>  				u8 cr8 = (u8)val;
>  				err = kvm_set_cr8(vcpu, cr8);
>  				kvm_complete_insn_gp(vcpu, err);
> -				if (irqchip_in_kernel(vcpu->kvm))
> +				if (lapic_in_kernel(vcpu->kvm))
>  					return 1;
>  				if (cr8_prev <= cr8)
>  					return 1;
> @@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
>  	 * If the user space waits to inject interrupts, exit as soon as
>  	 * possible
>  	 */
> -	if (!irqchip_in_kernel(vcpu->kvm) &&
> +	if (!lapic_in_kernel(vcpu->kvm) &&
>  	    vcpu->run->request_interrupt_window &&
>  	    !kvm_cpu_has_interrupt(vcpu)) {
>  		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
> @@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>  	 * emulated by vmx_set_efer(), below.
>  	 */
> -	vm_entry_controls_init(vmx, 
> +	vm_entry_controls_init(vmx,
>  		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
>  			~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 79dde16..19c8980 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -784,7 +784,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
>  {
>  	if (cr8 & CR8_RESERVED_BITS)
>  		return 1;
> -	if (irqchip_in_kernel(vcpu->kvm))
> +	if (lapic_in_kernel(vcpu->kvm))
>  		kvm_lapic_set_tpr(vcpu, cr8);
>  	else
>  		vcpu->arch.cr8 = cr8;
> @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
>  
>  unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
>  {
> -	if (irqchip_in_kernel(vcpu->kvm))
> +	if (lapic_in_kernel(vcpu->kvm))
>  		return kvm_lapic_get_cr8(vcpu);
>  	else
>  		return vcpu->arch.cr8;
> @@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_TSC_DEADLINE_TIMER:
>  	case KVM_CAP_ENABLE_CAP_VM:
>  	case KVM_CAP_DISABLE_QUIRKS:
> +	case KVM_CAP_SPLIT_IRQCHIP:
>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
> @@ -3068,7 +3069,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
>  {
>  	if (irq->irq >= KVM_NR_INTERRUPTS)
>  		return -EINVAL;
> -	if (irqchip_in_kernel(vcpu->kvm))
> +	if (lapic_in_kernel(vcpu->kvm))
>  		return -ENXIO;
>  
>  	kvm_queue_interrupt(vcpu, irq->irq, false);
> @@ -3546,7 +3547,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>  		struct kvm_vapic_addr va;
>  
>  		r = -EINVAL;
> -		if (!irqchip_in_kernel(vcpu->kvm))
> +		if (!lapic_in_kernel(vcpu->kvm))
>  			goto out;
>  		r = -EFAULT;
>  		if (copy_from_user(&va, argp, sizeof va))
> @@ -3904,7 +3905,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>  int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
>  			bool line_status)
>  {
> -	if (!irqchip_in_kernel(kvm))
> +	if (!lapic_in_kernel(kvm))
>  		return -ENXIO;
>  
>  	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
> @@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>  		kvm->arch.disabled_quirks = cap->args[0];
>  		r = 0;
>  		break;
> +	case KVM_CAP_SPLIT_IRQCHIP: {
> +		mutex_lock(&kvm->lock);
> +		r = -EEXIST;
> +		if (lapic_in_kernel(kvm))
> +			goto split_irqchip_unlock;
> +		r = -EINVAL;
> +		if (atomic_read(&kvm->online_vcpus))
> +			goto split_irqchip_unlock;
> +		r = kvm_setup_empty_irq_routing(kvm);
> +		if (r)
> +			goto split_irqchip_unlock;
> +		kvm->arch.irqchip_split = true;
> +		r = 0;
> +split_irqchip_unlock:
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
>  	default:
>  		r = -EINVAL;
>  		break;
> @@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
>  		break;
>  	}
> +
>  	default:
>  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
>  	}
> @@ -5959,7 +5978,7 @@ void kvm_arch_exit(void)
>  int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
>  {
>  	++vcpu->stat.halt_exits;
> -	if (irqchip_in_kernel(vcpu->kvm)) {
> +	if (lapic_in_kernel(vcpu->kvm)) {
>  		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
>  		return 1;
>  	} else {
> @@ -6126,7 +6145,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
>   */
>  static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
>  {
> -	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
> +	return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
>  		vcpu->run->request_interrupt_window &&
>  		kvm_arch_interrupt_allowed(vcpu));
>  }
> @@ -6138,7 +6157,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
>  	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
>  	kvm_run->cr8 = kvm_get_cr8(vcpu);
>  	kvm_run->apic_base = kvm_get_apic_base(vcpu);
> -	if (irqchip_in_kernel(vcpu->kvm))
> +	if (lapic_in_kernel(vcpu->kvm))
>  		kvm_run->ready_for_interrupt_injection = 1;
>  	else
>  		kvm_run->ready_for_interrupt_injection =
> @@ -6285,7 +6304,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
>  {
>  	struct page *page = NULL;
>  
> -	if (!irqchip_in_kernel(vcpu->kvm))
> +	if (!lapic_in_kernel(vcpu->kvm))
>  		return;
>  
>  	if (!kvm_x86_ops->set_apic_access_page_addr)
> @@ -6323,7 +6342,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
>  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  {
>  	int r;
> -	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> +	bool req_int_win = !lapic_in_kernel(vcpu->kvm) &&
>  		vcpu->run->request_interrupt_window;
>  	bool req_immediate_exit = false;
>  
> @@ -6712,7 +6731,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	}
>  
>  	/* re-sync apic's tpr */
> -	if (!irqchip_in_kernel(vcpu->kvm)) {
> +	if (!lapic_in_kernel(vcpu->kvm)) {
>  		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
>  			r = -EINVAL;
>  			goto out;
> @@ -7421,7 +7440,7 @@ void kvm_arch_check_processor_compat(void *rtn)
>  
>  bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
>  {
> -	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
> +	return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
>  }
>  
>  struct static_key kvm_no_apic_vcpu __read_mostly;
> @@ -7437,7 +7456,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  
>  	vcpu->arch.pv.pv_unhalted = false;
>  	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
> -	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
> +	if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
>  		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>  	else
>  		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
> @@ -7455,7 +7474,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	if (r < 0)
>  		goto fail_free_pio_data;
>  
> -	if (irqchip_in_kernel(kvm)) {
> +	if (lapic_in_kernel(kvm)) {
>  		r = kvm_create_lapic(vcpu);
>  		if (r < 0)
>  			goto fail_mmu_destroy;
> @@ -7518,7 +7537,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
>  	kvm_mmu_destroy(vcpu);
>  	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>  	free_page((unsigned long)vcpu->arch.pio_data);
> -	if (!irqchip_in_kernel(vcpu->kvm))
> +	if (!lapic_in_kernel(vcpu->kvm))
>  		static_key_slow_dec(&kvm_no_apic_vcpu);
>  }
>  
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 133ea00..ffe1f4e 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>  
>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
> +#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
>  #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
>  #define vgic_ready(k)		((k)->arch.vgic.ready)
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index a8bcbc9..7e2b41a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
>  #endif
>  
>  int kvm_setup_default_irq_routing(struct kvm *kvm);
> +int kvm_setup_empty_irq_routing(struct kvm *kvm);
>  int kvm_set_irq_routing(struct kvm *kvm,
>  			const struct kvm_irq_routing_entry *entries,
>  			unsigned nr,
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 75bd9f7..1e6f6c3 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_S390_IRQ_STATE 114
>  #define KVM_CAP_PPC_HWRNG 115
>  #define KVM_CAP_DISABLE_QUIRKS 116
> +#define KVM_CAP_SPLIT_IRQCHIP 117
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 1d56a90..8aaceed 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
>  {
>  	struct kvm_kernel_irq_routing_entry route;
>  
> -	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
> +	if (!lapic_in_kernel(kvm) || msi->flags != 0)
>  		return -EINVAL;
>  
>  	route.msi.address_lo = msi->address_lo;
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford June 4, 2015, 8:38 p.m. UTC | #2
On Wed, Jun 03, 2015 at 10:54:41AM +0200, Paolo Bonzini wrote:
> 
> 
> On 03/06/2015 01:51, Steve Rutherford wrote:
> > First patch in a series which enables the relocation of the
> > PIC/IOAPIC to userspace.
> > 
> > Adds capability KVM_CAP_SPLIT_IRQCHIP;
> > 
> > KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
> > rest of the irqchip.
> 
> The documentation is not updated.
Ack.
> 
> Changing other arches is definitely a no-no, unfortunately.  But there
> are so many s/irqchip_in_kernel/lapic_in_kernel/ changes here, that I
> wonder if you should just keep irqchip_in_kernel true in the "split
> irqchip" case.  You are already testing irqchip_split in a few cases,
> and you can add ioapic_in_kernel whenever you need to test
> "lapic_in_kernel && !irqchip_split" at the same time.

From the perspective of avoiding impacting other architectures, this is a
good idea, but the naming seems strange in the x86 case. Having
irqchip_in_kernel be "true" when the ioapic/pic are in userspace seems
strange. Admittedly, the irqchip isn't a "real" concept on x86, so
inventing a new meaning is fine.

Despite my hesitation, I'll change the naming around.

Steve

> 
> Paolo
> 
> > Compile tested for x86.
> > 
> > Signed-off-by: Steve Rutherford <srutherford@google.com>
> > Suggested-by: Andrew Honig <ahonig@google.com>
> > ---
> >  Documentation/virtual/kvm/api.txt | 15 ++++++++++++
> >  arch/powerpc/kvm/irq.h            |  5 ++++
> >  arch/s390/kvm/irq.h               |  4 ++++
> >  arch/x86/include/asm/kvm_host.h   |  2 ++
> >  arch/x86/kvm/assigned-dev.c       |  4 ++--
> >  arch/x86/kvm/irq.c                |  6 ++---
> >  arch/x86/kvm/irq.h                | 11 +++++++++
> >  arch/x86/kvm/irq_comm.c           |  7 ++++++
> >  arch/x86/kvm/lapic.c              | 13 +++++++----
> >  arch/x86/kvm/mmu.c                |  2 +-
> >  arch/x86/kvm/svm.c                |  4 ++--
> >  arch/x86/kvm/vmx.c                | 12 +++++-----
> >  arch/x86/kvm/x86.c                | 49 +++++++++++++++++++++++++++------------
> >  include/kvm/arm_vgic.h            |  1 +
> >  include/linux/kvm_host.h          |  1 +
> >  include/uapi/linux/kvm.h          |  1 +
> >  virt/kvm/irqchip.c                |  2 +-
> >  17 files changed, 104 insertions(+), 35 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index 6955444..9a43d42 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -2979,6 +2979,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
> >  and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
> >  which is the maximum number of possibly pending cpu-local interrupts.
> >  
> > +
> >  5. The kvm_run structure
> >  ------------------------
> >  
> > @@ -3575,6 +3576,20 @@ struct {
> >  
> >  KVM handlers should exit to userspace with rc = -EREMOTE.
> >  
> > +7.5 KVM_SPLIT_IRQCHIP
> > +
> > +Capability: KVM_CAP_SPLIT_IRQCHIP
> > +Architectures: x86
> > +Type:  VM ioctl
> > +Parameters: None
> > +Returns: 0 on success, -1 on error
> > +
> > +Create a local apic for each processor in the kernel.  This differs from
> > +KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither
> > +the ioapic nor the pic in the kernel. Also, enables in kernel routing of
> > +interrupt requests. Fails if VCPU has already been created, or if the irqchip is
> > +already in the kernel.
> > +
> >  
> >  8. Other capabilities.
> >  ----------------------
> > diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
> > index 5a9a10b..5e6fa06 100644
> > --- a/arch/powerpc/kvm/irq.h
> > +++ b/arch/powerpc/kvm/irq.h
> > @@ -17,4 +17,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
> >  	return ret;
> >  }
> >  
> > +static inline int lapic_in_kernel(struct kvm *kvm)
> > +{
> > +	return irqchip_in_kernel(kvm);
> > +}
> > +
> >  #endif
> > diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
> > index d98e415..db876c3 100644
> > --- a/arch/s390/kvm/irq.h
> > +++ b/arch/s390/kvm/irq.h
> > @@ -19,4 +19,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
> >  	return 1;
> >  }
> >  
> > +static inline int lapic_in_kernel(struct kvm *kvm)
> > +{
> > +	return irqchip_in_kernel(kvm);
> > +}
> >  #endif
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 7276107..af3225a 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -639,6 +639,8 @@ struct kvm_arch {
> >  	bool boot_vcpu_runs_old_kvmclock;
> >  
> >  	u64 disabled_quirks;
> > +
> > +	bool irqchip_split;
> >  };
> >  
> >  struct kvm_vm_stat {
> > diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
> > index d090ecf..1237e92 100644
> > --- a/arch/x86/kvm/assigned-dev.c
> > +++ b/arch/x86/kvm/assigned-dev.c
> > @@ -291,7 +291,7 @@ static int kvm_deassign_irq(struct kvm *kvm,
> >  {
> >  	unsigned long guest_irq_type, host_irq_type;
> >  
> > -	if (!irqchip_in_kernel(kvm))
> > +	if (!lapic_in_kernel(kvm))
> >  		return -EINVAL;
> >  	/* no irq assignment to deassign */
> >  	if (!assigned_dev->irq_requested_type)
> > @@ -568,7 +568,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
> >  	struct kvm_assigned_dev_kernel *match;
> >  	unsigned long host_irq_type, guest_irq_type;
> >  
> > -	if (!irqchip_in_kernel(kvm))
> > +	if (!lapic_in_kernel(kvm))
> >  		return r;
> >  
> >  	mutex_lock(&kvm->lock);
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index a1ec6a50..706e47a 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -57,7 +57,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >   */
> >  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
> >  {
> > -	if (!irqchip_in_kernel(v->kvm))
> > +	if (!lapic_in_kernel(v->kvm))
> >  		return v->arch.interrupt.pending;
> >  
> >  	if (kvm_cpu_has_extint(v))
> > @@ -75,7 +75,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
> >   */
> >  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >  {
> > -	if (!irqchip_in_kernel(v->kvm))
> > +	if (!lapic_in_kernel(v->kvm))
> >  		return v->arch.interrupt.pending;
> >  
> >  	if (kvm_cpu_has_extint(v))
> > @@ -103,7 +103,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
> >  {
> >  	int vector;
> >  
> > -	if (!irqchip_in_kernel(v->kvm))
> > +	if (!lapic_in_kernel(v->kvm))
> >  		return v->arch.interrupt.nr;
> >  
> >  	vector = kvm_cpu_get_extint(v);
> > diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> > index ad68c73..e46abf3 100644
> > --- a/arch/x86/kvm/irq.h
> > +++ b/arch/x86/kvm/irq.h
> > @@ -92,6 +92,17 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
> >  	return ret;
> >  }
> >  
> > +static inline int irqchip_split(struct kvm *kvm)
> > +{
> > +	return kvm->arch.irqchip_split;
> > +}
> > +
> > +static inline int lapic_in_kernel(struct kvm *kvm)
> > +{
> > +	return irqchip_split(kvm) || irqchip_in_kernel(kvm);
> > +}
> > +
> > +
> >  void kvm_pic_reset(struct kvm_kpic_state *s);
> >  
> >  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 9efff9e..f43c59a 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
> >  	return kvm_set_irq_routing(kvm, default_routing,
> >  				   ARRAY_SIZE(default_routing), 0);
> >  }
> > +
> > +static const struct kvm_irq_routing_entry empty_routing[] = {};
> > +
> > +int kvm_setup_empty_irq_routing(struct kvm *kvm)
> > +{
> > +	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
> > +}
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index c789e00..92f4c98 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -209,7 +209,8 @@ out:
> >  	if (old)
> >  		kfree_rcu(old, rcu);
> >  
> > -	kvm_vcpu_request_scan_ioapic(kvm);
> > +	if (!irqchip_split(kvm))
> > +		kvm_vcpu_request_scan_ioapic(kvm);
> >  }
> >  
> >  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> > @@ -1827,7 +1828,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> >  		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
> >  				apic_find_highest_isr(apic));
> >  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > -	kvm_rtc_eoi_tracking_restore_one(vcpu);
> > +	if (!irqchip_split(vcpu->kvm))
> > +		kvm_rtc_eoi_tracking_restore_one(vcpu);
> >  }
> >  
> >  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> > @@ -1910,7 +1912,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
> >  	    /* Cache not set: could be safe but we don't bother. */
> >  	    apic->highest_isr_cache == -1 ||
> >  	    /* Need EOI to update ioapic. */
> > -	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
> > +	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
> > +	    irqchip_split(vcpu->kvm)) {
> >  		/*
> >  		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
> >  		 * so we need not do anything here.
> > @@ -1966,7 +1969,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  	struct kvm_lapic *apic = vcpu->arch.apic;
> >  	u32 reg = (msr - APIC_BASE_MSR) << 4;
> >  
> > -	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> > +	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> >  		return 1;
> >  
> >  	if (reg == APIC_ICR2)
> > @@ -1983,7 +1986,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
> >  	struct kvm_lapic *apic = vcpu->arch.apic;
> >  	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
> >  
> > -	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> > +	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
> >  		return 1;
> >  
> >  	if (reg == APIC_DFR || reg == APIC_ICR2) {
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index a65ce12..1513d14 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -3507,7 +3507,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
> >  
> >  static bool can_do_async_pf(struct kvm_vcpu *vcpu)
> >  {
> > -	if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
> > +	if (unlikely(!lapic_in_kernel(vcpu->kvm) ||
> >  		     kvm_event_needs_reinjection(vcpu)))
> >  		return false;
> >  
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index b9f9e10..59166de 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -3054,7 +3054,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
> >  	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
> >  	/* instruction emulation calls kvm_set_cr8() */
> >  	r = cr_interception(svm);
> > -	if (irqchip_in_kernel(svm->vcpu.kvm))
> > +	if (lapic_in_kernel(svm->vcpu.kvm))
> >  		return r;
> >  	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
> >  		return r;
> > @@ -3295,7 +3295,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
> >  	 * If the user space waits to inject interrupts, exit as soon as
> >  	 * possible
> >  	 */
> > -	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
> > +	if (!lapic_in_kernel(svm->vcpu.kvm) &&
> >  	    kvm_run->request_interrupt_window &&
> >  	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
> >  		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 9cf5030..3b58788 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
> >  
> >  static inline bool vm_need_tpr_shadow(struct kvm *kvm)
> >  {
> > -	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
> > +	return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm);
> >  }
> >  
> >  static inline bool cpu_has_secondary_exec_ctrls(void)
> > @@ -1064,7 +1064,7 @@ static inline bool cpu_has_vmx_ple(void)
> >  
> >  static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
> >  {
> > -	return flexpriority_enabled && irqchip_in_kernel(kvm);
> > +	return flexpriority_enabled && lapic_in_kernel(kvm);
> >  }
> >  
> >  static inline bool cpu_has_vmx_vpid(void)
> > @@ -4341,7 +4341,7 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> >  
> >  static int vmx_vm_has_apicv(struct kvm *kvm)
> >  {
> > -	return enable_apicv && irqchip_in_kernel(kvm);
> > +	return enable_apicv && lapic_in_kernel(kvm);
> >  }
> >  
> >  static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
> > @@ -5317,7 +5317,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> >  				u8 cr8 = (u8)val;
> >  				err = kvm_set_cr8(vcpu, cr8);
> >  				kvm_complete_insn_gp(vcpu, err);
> > -				if (irqchip_in_kernel(vcpu->kvm))
> > +				if (lapic_in_kernel(vcpu->kvm))
> >  					return 1;
> >  				if (cr8_prev <= cr8)
> >  					return 1;
> > @@ -5534,7 +5534,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
> >  	 * If the user space waits to inject interrupts, exit as soon as
> >  	 * possible
> >  	 */
> > -	if (!irqchip_in_kernel(vcpu->kvm) &&
> > +	if (!lapic_in_kernel(vcpu->kvm) &&
> >  	    vcpu->run->request_interrupt_window &&
> >  	    !kvm_cpu_has_interrupt(vcpu)) {
> >  		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
> > @@ -9419,7 +9419,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> >  	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
> >  	 * emulated by vmx_set_efer(), below.
> >  	 */
> > -	vm_entry_controls_init(vmx, 
> > +	vm_entry_controls_init(vmx,
> >  		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
> >  			~VM_ENTRY_IA32E_MODE) |
> >  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 79dde16..19c8980 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -784,7 +784,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
> >  {
> >  	if (cr8 & CR8_RESERVED_BITS)
> >  		return 1;
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > +	if (lapic_in_kernel(vcpu->kvm))
> >  		kvm_lapic_set_tpr(vcpu, cr8);
> >  	else
> >  		vcpu->arch.cr8 = cr8;
> > @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
> >  
> >  unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
> >  {
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > +	if (lapic_in_kernel(vcpu->kvm))
> >  		return kvm_lapic_get_cr8(vcpu);
> >  	else
> >  		return vcpu->arch.cr8;
> > @@ -2866,6 +2866,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >  	case KVM_CAP_TSC_DEADLINE_TIMER:
> >  	case KVM_CAP_ENABLE_CAP_VM:
> >  	case KVM_CAP_DISABLE_QUIRKS:
> > +	case KVM_CAP_SPLIT_IRQCHIP:
> >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> >  	case KVM_CAP_ASSIGN_DEV_IRQ:
> >  	case KVM_CAP_PCI_2_3:
> > @@ -3068,7 +3069,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
> >  {
> >  	if (irq->irq >= KVM_NR_INTERRUPTS)
> >  		return -EINVAL;
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > +	if (lapic_in_kernel(vcpu->kvm))
> >  		return -ENXIO;
> >  
> >  	kvm_queue_interrupt(vcpu, irq->irq, false);
> > @@ -3546,7 +3547,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> >  		struct kvm_vapic_addr va;
> >  
> >  		r = -EINVAL;
> > -		if (!irqchip_in_kernel(vcpu->kvm))
> > +		if (!lapic_in_kernel(vcpu->kvm))
> >  			goto out;
> >  		r = -EFAULT;
> >  		if (copy_from_user(&va, argp, sizeof va))
> > @@ -3904,7 +3905,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> >  int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
> >  			bool line_status)
> >  {
> > -	if (!irqchip_in_kernel(kvm))
> > +	if (!lapic_in_kernel(kvm))
> >  		return -ENXIO;
> >  
> >  	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
> > @@ -3926,6 +3927,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> >  		kvm->arch.disabled_quirks = cap->args[0];
> >  		r = 0;
> >  		break;
> > +	case KVM_CAP_SPLIT_IRQCHIP: {
> > +		mutex_lock(&kvm->lock);
> > +		r = -EEXIST;
> > +		if (lapic_in_kernel(kvm))
> > +			goto split_irqchip_unlock;
> > +		r = -EINVAL;
> > +		if (atomic_read(&kvm->online_vcpus))
> > +			goto split_irqchip_unlock;
> > +		r = kvm_setup_empty_irq_routing(kvm);
> > +		if (r)
> > +			goto split_irqchip_unlock;
> > +		kvm->arch.irqchip_split = true;
> > +		r = 0;
> > +split_irqchip_unlock:
> > +		mutex_unlock(&kvm->lock);
> > +		break;
> > +	}
> >  	default:
> >  		r = -EINVAL;
> >  		break;
> > @@ -4194,6 +4212,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
> >  		break;
> >  	}
> > +
> >  	default:
> >  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> >  	}
> > @@ -5959,7 +5978,7 @@ void kvm_arch_exit(void)
> >  int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
> >  {
> >  	++vcpu->stat.halt_exits;
> > -	if (irqchip_in_kernel(vcpu->kvm)) {
> > +	if (lapic_in_kernel(vcpu->kvm)) {
> >  		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> >  		return 1;
> >  	} else {
> > @@ -6126,7 +6145,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
> >   */
> >  static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
> >  {
> > -	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
> > +	return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
> >  		vcpu->run->request_interrupt_window &&
> >  		kvm_arch_interrupt_allowed(vcpu));
> >  }
> > @@ -6138,7 +6157,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
> >  	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
> >  	kvm_run->cr8 = kvm_get_cr8(vcpu);
> >  	kvm_run->apic_base = kvm_get_apic_base(vcpu);
> > -	if (irqchip_in_kernel(vcpu->kvm))
> > +	if (lapic_in_kernel(vcpu->kvm))
> >  		kvm_run->ready_for_interrupt_injection = 1;
> >  	else
> >  		kvm_run->ready_for_interrupt_injection =
> > @@ -6285,7 +6304,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
> >  {
> >  	struct page *page = NULL;
> >  
> > -	if (!irqchip_in_kernel(vcpu->kvm))
> > +	if (!lapic_in_kernel(vcpu->kvm))
> >  		return;
> >  
> >  	if (!kvm_x86_ops->set_apic_access_page_addr)
> > @@ -6323,7 +6342,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
> >  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  {
> >  	int r;
> > -	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> > +	bool req_int_win = !lapic_in_kernel(vcpu->kvm) &&
> >  		vcpu->run->request_interrupt_window;
> >  	bool req_immediate_exit = false;
> >  
> > @@ -6712,7 +6731,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  	}
> >  
> >  	/* re-sync apic's tpr */
> > -	if (!irqchip_in_kernel(vcpu->kvm)) {
> > +	if (!lapic_in_kernel(vcpu->kvm)) {
> >  		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
> >  			r = -EINVAL;
> >  			goto out;
> > @@ -7421,7 +7440,7 @@ void kvm_arch_check_processor_compat(void *rtn)
> >  
> >  bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
> >  {
> > -	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
> > +	return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
> >  }
> >  
> >  struct static_key kvm_no_apic_vcpu __read_mostly;
> > @@ -7437,7 +7456,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  
> >  	vcpu->arch.pv.pv_unhalted = false;
> >  	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
> > -	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
> > +	if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
> >  		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> >  	else
> >  		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
> > @@ -7455,7 +7474,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  	if (r < 0)
> >  		goto fail_free_pio_data;
> >  
> > -	if (irqchip_in_kernel(kvm)) {
> > +	if (lapic_in_kernel(kvm)) {
> >  		r = kvm_create_lapic(vcpu);
> >  		if (r < 0)
> >  			goto fail_mmu_destroy;
> > @@ -7518,7 +7537,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> >  	kvm_mmu_destroy(vcpu);
> >  	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> >  	free_page((unsigned long)vcpu->arch.pio_data);
> > -	if (!irqchip_in_kernel(vcpu->kvm))
> > +	if (!lapic_in_kernel(vcpu->kvm))
> >  		static_key_slow_dec(&kvm_no_apic_vcpu);
> >  }
> >  
> > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> > index 133ea00..ffe1f4e 100644
> > --- a/include/kvm/arm_vgic.h
> > +++ b/include/kvm/arm_vgic.h
> > @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >  
> >  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
> > +#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
> >  #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
> >  #define vgic_ready(k)		((k)->arch.vgic.ready)
> >  
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index a8bcbc9..7e2b41a 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -935,6 +935,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
> >  #endif
> >  
> >  int kvm_setup_default_irq_routing(struct kvm *kvm);
> > +int kvm_setup_empty_irq_routing(struct kvm *kvm);
> >  int kvm_set_irq_routing(struct kvm *kvm,
> >  			const struct kvm_irq_routing_entry *entries,
> >  			unsigned nr,
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 75bd9f7..1e6f6c3 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
> >  #define KVM_CAP_S390_IRQ_STATE 114
> >  #define KVM_CAP_PPC_HWRNG 115
> >  #define KVM_CAP_DISABLE_QUIRKS 116
> > +#define KVM_CAP_SPLIT_IRQCHIP 117
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> > index 1d56a90..8aaceed 100644
> > --- a/virt/kvm/irqchip.c
> > +++ b/virt/kvm/irqchip.c
> > @@ -73,7 +73,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
> >  {
> >  	struct kvm_kernel_irq_routing_entry route;
> >  
> > -	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
> > +	if (!lapic_in_kernel(kvm) || msi->flags != 0)
> >  		return -EINVAL;
> >  
> >  	route.msi.address_lo = msi->address_lo;
> > 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 5, 2015, 7:19 a.m. UTC | #3
> From the perspective of avoiding impacting other architectures, this is a
> good idea, but the naming seems strange in the x86 case. Having
> irqchip_in_kernel be "true" when the ioapic/pic are in userspace seems
> strange. Admittedly, the irqchip isn't a "real" concept on x86, so
> inventing a new meaning is fine.

From the KVM point of view, the "irqchip" is whatever delivers
interrupts to the vCPU---which is the LAPIC for x86.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6955444..9a43d42 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,7 @@  len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+
 5. The kvm_run structure
 ------------------------
 
@@ -3575,6 +3576,20 @@  struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Architectures: x86
+Type:  VM ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel.  This differs from
+KVM_CREATE_IRQCHIP in that it only creates the local apic; it creates neither
+the ioapic nor the pic in the kernel. Also, enables in kernel routing of
+interrupt requests. Fails if VCPU has already been created, or if the irqchip is
+already in the kernel.
+
 
 8. Other capabilities.
 ----------------------
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b..5e6fa06 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -17,4 +17,9 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 	return ret;
 }
 
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
+
 #endif
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e415..db876c3 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -19,4 +19,8 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 	return 1;
 }
 
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7276107..af3225a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -639,6 +639,8 @@  struct kvm_arch {
 	bool boot_vcpu_runs_old_kvmclock;
 
 	u64 disabled_quirks;
+
+	bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index d090ecf..1237e92 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -291,7 +291,7 @@  static int kvm_deassign_irq(struct kvm *kvm,
 {
 	unsigned long guest_irq_type, host_irq_type;
 
-	if (!irqchip_in_kernel(kvm))
+	if (!lapic_in_kernel(kvm))
 		return -EINVAL;
 	/* no irq assignment to deassign */
 	if (!assigned_dev->irq_requested_type)
@@ -568,7 +568,7 @@  static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 	struct kvm_assigned_dev_kernel *match;
 	unsigned long host_irq_type, guest_irq_type;
 
-	if (!irqchip_in_kernel(kvm))
+	if (!lapic_in_kernel(kvm))
 		return r;
 
 	mutex_lock(&kvm->lock);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50..706e47a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -57,7 +57,7 @@  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v->kvm))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
@@ -75,7 +75,7 @@  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v->kvm))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
@@ -103,7 +103,7 @@  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
 	int vector;
 
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v->kvm))
 		return v->arch.interrupt.nr;
 
 	vector = kvm_cpu_get_extint(v);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..e46abf3 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -92,6 +92,17 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 	return ret;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+	return kvm->arch.irqchip_split;
+}
+
+static inline int lapic_in_kernel(struct kvm *kvm)
+{
+	return irqchip_split(kvm) || irqchip_in_kernel(kvm);
+}
+
+
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..f43c59a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -328,3 +328,10 @@  int kvm_setup_default_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, default_routing,
 				   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c789e00..92f4c98 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,7 +209,8 @@  out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	kvm_vcpu_request_scan_ioapic(kvm);
+	if (!irqchip_split(kvm))
+		kvm_vcpu_request_scan_ioapic(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -1827,7 +1828,8 @@  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	kvm_rtc_eoi_tracking_restore_one(vcpu);
+	if (!irqchip_split(vcpu->kvm))
+		kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1910,7 +1912,8 @@  static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
 	    /* Cache not set: could be safe but we don't bother. */
 	    apic->highest_isr_cache == -1 ||
 	    /* Need EOI to update ioapic. */
-	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
+	    irqchip_split(vcpu->kvm)) {
 		/*
 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
 		 * so we need not do anything here.
@@ -1966,7 +1969,7 @@  int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 reg = (msr - APIC_BASE_MSR) << 4;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
 	if (reg == APIC_ICR2)
@@ -1983,7 +1986,7 @@  int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+	if (!lapic_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
 	if (reg == APIC_DFR || reg == APIC_ICR2) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a65ce12..1513d14 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3507,7 +3507,7 @@  static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 {
-	if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+	if (unlikely(!lapic_in_kernel(vcpu->kvm) ||
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b9f9e10..59166de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3054,7 +3054,7 @@  static int cr8_write_interception(struct vcpu_svm *svm)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	r = cr_interception(svm);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (lapic_in_kernel(svm->vcpu.kvm))
 		return r;
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return r;
@@ -3295,7 +3295,7 @@  static int interrupt_window_interception(struct vcpu_svm *svm)
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+	if (!lapic_in_kernel(svm->vcpu.kvm) &&
 	    kvm_run->request_interrupt_window &&
 	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9cf5030..3b58788 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@  static inline bool cpu_has_vmx_tpr_shadow(void)
 
 static inline bool vm_need_tpr_shadow(struct kvm *kvm)
 {
-	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
+	return (cpu_has_vmx_tpr_shadow()) && lapic_in_kernel(kvm);
 }
 
 static inline bool cpu_has_secondary_exec_ctrls(void)
@@ -1064,7 +1064,7 @@  static inline bool cpu_has_vmx_ple(void)
 
 static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
-	return flexpriority_enabled && irqchip_in_kernel(kvm);
+	return flexpriority_enabled && lapic_in_kernel(kvm);
 }
 
 static inline bool cpu_has_vmx_vpid(void)
@@ -4341,7 +4341,7 @@  static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 
 static int vmx_vm_has_apicv(struct kvm *kvm)
 {
-	return enable_apicv && irqchip_in_kernel(kvm);
+	return enable_apicv && lapic_in_kernel(kvm);
 }
 
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -5317,7 +5317,7 @@  static int handle_cr(struct kvm_vcpu *vcpu)
 				u8 cr8 = (u8)val;
 				err = kvm_set_cr8(vcpu, cr8);
 				kvm_complete_insn_gp(vcpu, err);
-				if (irqchip_in_kernel(vcpu->kvm))
+				if (lapic_in_kernel(vcpu->kvm))
 					return 1;
 				if (cr8_prev <= cr8)
 					return 1;
@@ -5534,7 +5534,7 @@  static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (!irqchip_in_kernel(vcpu->kvm) &&
+	if (!lapic_in_kernel(vcpu->kvm) &&
 	    vcpu->run->request_interrupt_window &&
 	    !kvm_cpu_has_interrupt(vcpu)) {
 		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
@@ -9419,7 +9419,7 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 	 * emulated by vmx_set_efer(), below.
 	 */
-	vm_entry_controls_init(vmx, 
+	vm_entry_controls_init(vmx,
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
 			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79dde16..19c8980 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -784,7 +784,7 @@  int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	if (cr8 & CR8_RESERVED_BITS)
 		return 1;
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu->kvm))
 		kvm_lapic_set_tpr(vcpu, cr8);
 	else
 		vcpu->arch.cr8 = cr8;
@@ -794,7 +794,7 @@  EXPORT_SYMBOL_GPL(kvm_set_cr8);
 
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 {
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu->kvm))
 		return kvm_lapic_get_cr8(vcpu);
 	else
 		return vcpu->arch.cr8;
@@ -2866,6 +2866,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_DEADLINE_TIMER:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
+	case KVM_CAP_SPLIT_IRQCHIP:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3068,7 +3069,7 @@  static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu->kvm))
 		return -ENXIO;
 
 	kvm_queue_interrupt(vcpu, irq->irq, false);
@@ -3546,7 +3547,7 @@  long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_vapic_addr va;
 
 		r = -EINVAL;
-		if (!irqchip_in_kernel(vcpu->kvm))
+		if (!lapic_in_kernel(vcpu->kvm))
 			goto out;
 		r = -EFAULT;
 		if (copy_from_user(&va, argp, sizeof va))
@@ -3904,7 +3905,7 @@  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			bool line_status)
 {
-	if (!irqchip_in_kernel(kvm))
+	if (!lapic_in_kernel(kvm))
 		return -ENXIO;
 
 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
@@ -3926,6 +3927,23 @@  static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		kvm->arch.disabled_quirks = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_SPLIT_IRQCHIP: {
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (lapic_in_kernel(kvm))
+			goto split_irqchip_unlock;
+		r = -EINVAL;
+		if (atomic_read(&kvm->online_vcpus))
+			goto split_irqchip_unlock;
+		r = kvm_setup_empty_irq_routing(kvm);
+		if (r)
+			goto split_irqchip_unlock;
+		kvm->arch.irqchip_split = true;
+		r = 0;
+split_irqchip_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 	default:
 		r = -EINVAL;
 		break;
@@ -4194,6 +4212,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
@@ -5959,7 +5978,7 @@  void kvm_arch_exit(void)
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (lapic_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
 		return 1;
 	} else {
@@ -6126,7 +6145,7 @@  static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
  */
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
 {
-	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
+	return (!lapic_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		vcpu->run->request_interrupt_window &&
 		kvm_arch_interrupt_allowed(vcpu));
 }
@@ -6138,7 +6157,7 @@  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
@@ -6285,7 +6304,7 @@  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
 	struct page *page = NULL;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!lapic_in_kernel(vcpu->kvm))
 		return;
 
 	if (!kvm_x86_ops->set_apic_access_page_addr)
@@ -6323,7 +6342,7 @@  void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+	bool req_int_win = !lapic_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
 	bool req_immediate_exit = false;
 
@@ -6712,7 +6731,7 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	/* re-sync apic's tpr */
-	if (!irqchip_in_kernel(vcpu->kvm)) {
+	if (!lapic_in_kernel(vcpu->kvm)) {
 		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
 			r = -EINVAL;
 			goto out;
@@ -7421,7 +7440,7 @@  void kvm_arch_check_processor_compat(void *rtn)
 
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+	return lapic_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
 }
 
 struct static_key kvm_no_apic_vcpu __read_mostly;
@@ -7437,7 +7456,7 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
+	if (!lapic_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7455,7 +7474,7 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	if (r < 0)
 		goto fail_free_pio_data;
 
-	if (irqchip_in_kernel(kvm)) {
+	if (lapic_in_kernel(kvm)) {
 		r = kvm_create_lapic(vcpu);
 		if (r < 0)
 			goto fail_mmu_destroy;
@@ -7518,7 +7537,7 @@  void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvm_mmu_destroy(vcpu);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!lapic_in_kernel(vcpu->kvm))
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 133ea00..ffe1f4e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -329,6 +329,7 @@  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
+#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
 #define vgic_ready(k)		((k)->arch.vgic.ready)
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a8bcbc9..7e2b41a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -935,6 +935,7 @@  static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 #endif
 
 int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 75bd9f7..1e6f6c3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -815,6 +815,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_IRQ_STATE 114
 #define KVM_CAP_PPC_HWRNG 115
 #define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_SPLIT_IRQCHIP 117
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 1d56a90..8aaceed 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -73,7 +73,7 @@  int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;
 
-	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+	if (!lapic_in_kernel(kvm) || msi->flags != 0)
 		return -EINVAL;
 
 	route.msi.address_lo = msi->address_lo;