diff mbox

[v5,1/4] KVM: x86: Split the APIC from the rest of IRQCHIP.

Message ID 1438039062-3168-1-git-send-email-srutherford@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Rutherford July 27, 2015, 11:17 p.m. UTC
First patch in a series which enables the relocation of the
PIC/IOAPIC to userspace.

Adds capability KVM_CAP_SPLIT_IRQCHIP;

KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
rest of the irqchip.

Compile tested for x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Suggested-by: Andrew Honig <ahonig@google.com>
---
 Documentation/virtual/kvm/api.txt | 15 +++++++++++++++
 arch/powerpc/kvm/irq.h            |  1 -
 arch/s390/kvm/irq.h               |  1 -
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/i8254.c              |  5 ++++-
 arch/x86/kvm/ioapic.h             |  9 +++++++++
 arch/x86/kvm/irq.h                |  6 ++++++
 arch/x86/kvm/irq_comm.c           |  9 ++++++++-
 arch/x86/kvm/lapic.c              |  9 ++++++---
 arch/x86/kvm/vmx.c                |  4 ++--
 arch/x86/kvm/x86.c                | 23 +++++++++++++++++++++--
 include/kvm/arm_vgic.h            |  1 +
 include/linux/kvm_host.h          |  1 +
 include/uapi/linux/kvm.h          |  1 +
 14 files changed, 76 insertions(+), 11 deletions(-)

Comments

Paolo Bonzini July 29, 2015, 12:56 p.m. UTC | #1
On 28/07/2015 01:17, Steve Rutherford wrote:
> First patch in a series which enables the relocation of the
> PIC/IOAPIC to userspace.
> 
> Adds capability KVM_CAP_SPLIT_IRQCHIP;
> 
> KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the
> rest of the irqchip.
> 
> Compile tested for x86.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> Suggested-by: Andrew Honig <ahonig@google.com>
> ---
>  Documentation/virtual/kvm/api.txt | 15 +++++++++++++++
>  arch/powerpc/kvm/irq.h            |  1 -
>  arch/s390/kvm/irq.h               |  1 -
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/i8254.c              |  5 ++++-
>  arch/x86/kvm/ioapic.h             |  9 +++++++++
>  arch/x86/kvm/irq.h                |  6 ++++++
>  arch/x86/kvm/irq_comm.c           |  9 ++++++++-
>  arch/x86/kvm/lapic.c              |  9 ++++++---
>  arch/x86/kvm/vmx.c                |  4 ++--
>  arch/x86/kvm/x86.c                | 23 +++++++++++++++++++++--
>  include/kvm/arm_vgic.h            |  1 +
>  include/linux/kvm_host.h          |  1 +
>  include/uapi/linux/kvm.h          |  1 +
>  14 files changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index a4ebcb7..b655024 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3620,6 +3620,21 @@ struct {
>  
>  KVM handlers should exit to userspace with rc = -EREMOTE.
>  
> +7.5 KVM_SPLIT_IRQCHIP
> +
> +Architectures: x86
> +Parameters: None
> +Returns: 0 on success, -1 on error
> +
> +Create a local apic for each processor in the kernel. With this capability
> +enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
> +
> +This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
> +IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
> +
> +Fails if VCPU has already been created, or if the irqchip is already in the
> +kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
> +
>  
>  8. Other capabilities.
>  ----------------------
> diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
> index 5a9a10b..772fa8c 100644
> --- a/arch/powerpc/kvm/irq.h
> +++ b/arch/powerpc/kvm/irq.h
> @@ -16,5 +16,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  	smp_rmb();
>  	return ret;
>  }
> -
>  #endif
> diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
> index d98e415..9a21a86 100644
> --- a/arch/s390/kvm/irq.h
> +++ b/arch/s390/kvm/irq.h
> @@ -18,5 +18,4 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  {
>  	return 1;
>  }
> -
>  #endif
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fa32b53..18a110b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -669,6 +669,8 @@ struct kvm_arch {
>  	bool boot_vcpu_runs_old_kvmclock;
>  
>  	u64 disabled_quirks;
> +
> +	bool irqchip_split;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index f90952f..5708850 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -35,6 +35,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/slab.h>
>  
> +#include "ioapic.h"
>  #include "irq.h"
>  #include "i8254.h"
>  #include "x86.h"
> @@ -333,7 +334,9 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
>  	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
>  	s64 interval;
>  
> -	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
> +	if (!irqchip_in_kernel(kvm) ||
> +	    !ioapic_in_kernel(kvm) ||

Here the irqchip_in_kernel check is unnecessary.

> +	    ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
>  		return;
>  
>  	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index ca0b0b4..d8cc54b 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -98,6 +98,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
>  	return kvm->arch.vioapic;
>  }
>  
> +static inline int ioapic_in_kernel(struct kvm *kvm)
> +{
> +	int ret;
> +
> +	ret = (ioapic_irqchip(kvm) != NULL);
> +	smp_rmb();
> +	return ret;
> +}
> +
>  static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
>  {
>  	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index ad68c73..2f13dd5 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -83,11 +83,17 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
>  	return kvm->arch.vpic;
>  }
>  
> +static inline int irqchip_split(struct kvm *kvm)
> +{
> +	return kvm->arch.irqchip_split;
> +}
> +
>  static inline int irqchip_in_kernel(struct kvm *kvm)
>  {
>  	int ret;
>  
>  	ret = (pic_irqchip(kvm) != NULL);
> +	ret |= irqchip_split(kvm);
>  	smp_rmb();
>  	return ret;
>  }
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..67f6b62 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
>  		goto unlock;
>  	}
>  	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
> -	if (!irqchip_in_kernel(kvm))
> +	if (!ioapic_in_kernel(kvm))
>  		goto unlock;
>  
>  	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
> @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
>  	return kvm_set_irq_routing(kvm, default_routing,
>  				   ARRAY_SIZE(default_routing), 0);
>  }
> +
> +static const struct kvm_irq_routing_entry empty_routing[] = {};
> +
> +int kvm_setup_empty_irq_routing(struct kvm *kvm)
> +{
> +	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2a5ca97..536b79e 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,7 +209,8 @@ out:
>  	if (old)
>  		kfree_rcu(old, rcu);
>  
> -	kvm_vcpu_request_scan_ioapic(kvm);
> +	if (!irqchip_split(kvm))

Here please check ioapic_in_kernel.

> +		kvm_vcpu_request_scan_ioapic(kvm);
>  }
>  
>  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> @@ -1838,7 +1839,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
>  				apic_find_highest_isr(apic));
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> -	kvm_rtc_eoi_tracking_restore_one(vcpu);
> +	if (!irqchip_split(vcpu->kvm))

Here please check ioapic_in_kernel.

> +		kvm_rtc_eoi_tracking_restore_one(vcpu);
>  }
>  
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> @@ -1921,7 +1923,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
>  	    /* Cache not set: could be safe but we don't bother. */
>  	    apic->highest_isr_cache == -1 ||
>  	    /* Need EOI to update ioapic. */
> -	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
> +	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
> +	    irqchip_split(vcpu->kvm)) {

This is ugly (and if anything irqchip_split should be done before
kvm_ioapic_handles_vector).  Could this just test the EOI exit bitmap
instead?

Also, who sets TMR in the split irqchip case?  I'll post a patch roday
or tomorrow to compute TMR in __apic_accept_irq and to do the
aforementioned EOI exit bitmap test.

>  		/*
>  		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
>  		 * so we need not do anything here.
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 4014a82..08203a1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
>  
>  static inline bool vm_need_tpr_shadow(struct kvm *kvm)
>  {
> -	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
> +	return (cpu_has_vmx_tpr_shadow()) && irqchip_in_kernel(kvm);
>  }
>  
>  static inline bool cpu_has_secondary_exec_ctrls(void)
> @@ -9485,7 +9485,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>  	 * emulated by vmx_set_efer(), below.
>  	 */
> -	vm_entry_controls_init(vmx, 
> +	vm_entry_controls_init(vmx,
>  		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
>  			~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 28076c2..6d4b4dc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2461,6 +2461,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_TSC_DEADLINE_TIMER:
>  	case KVM_CAP_ENABLE_CAP_VM:
>  	case KVM_CAP_DISABLE_QUIRKS:
> +	case KVM_CAP_SPLIT_IRQCHIP:
>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
> @@ -3568,6 +3569,23 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>  		kvm->arch.disabled_quirks = cap->args[0];
>  		r = 0;
>  		break;
> +	case KVM_CAP_SPLIT_IRQCHIP: {
> +		mutex_lock(&kvm->lock);
> +		r = -EEXIST;
> +		if (irqchip_in_kernel(kvm))
> +			goto split_irqchip_unlock;
> +		r = -EINVAL;
> +		if (atomic_read(&kvm->online_vcpus))
> +			goto split_irqchip_unlock;
> +		r = kvm_setup_empty_irq_routing(kvm);
> +		if (r)
> +			goto split_irqchip_unlock;

Need a smp_wmb() here, pairing with irqchip_in_kernel.

Paolo

> +		kvm->arch.irqchip_split = true;
> +		r = 0;
> +split_irqchip_unlock:
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
>  	default:
>  		r = -EINVAL;
>  		break;
> @@ -3686,7 +3704,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		}
>  
>  		r = -ENXIO;
> -		if (!irqchip_in_kernel(kvm))
> +		if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
>  			goto get_irqchip_out;
>  		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
>  		if (r)
> @@ -3710,7 +3728,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		}
>  
>  		r = -ENXIO;
> -		if (!irqchip_in_kernel(kvm))
> +		if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
>  			goto set_irqchip_out;
>  		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
>  		if (r)
> @@ -3836,6 +3854,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
>  		break;
>  	}
> +
>  	default:
>  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
>  	}
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 133ea00..ffe1f4e 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -329,6 +329,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>  
>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
> +#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
>  #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
>  #define vgic_ready(k)		((k)->arch.vgic.ready)
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 51103f0..f7eab09 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1000,6 +1000,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
>  #endif
>  
>  int kvm_setup_default_irq_routing(struct kvm *kvm);
> +int kvm_setup_empty_irq_routing(struct kvm *kvm);
>  int kvm_set_irq_routing(struct kvm *kvm,
>  			const struct kvm_irq_routing_entry *entries,
>  			unsigned nr,
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9ef19eb..e4304d0 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -818,6 +818,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_DISABLE_QUIRKS 116
>  #define KVM_CAP_X86_SMM 117
>  #define KVM_CAP_MULTI_ADDRESS_SPACE 118
> +#define KVM_CAP_SPLIT_IRQCHIP 119
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford July 30, 2015, 3:04 a.m. UTC | #2
On Wed, Jul 29, 2015 at 02:56:42PM +0200, Paolo Bonzini wrote:
> 
> > +		kvm_rtc_eoi_tracking_restore_one(vcpu);
> >  }
> >  
> >  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
> > @@ -1921,7 +1923,8 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
> >  	    /* Cache not set: could be safe but we don't bother. */
> >  	    apic->highest_isr_cache == -1 ||
> >  	    /* Need EOI to update ioapic. */
> > -	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
> > +	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
> > +	    irqchip_split(vcpu->kvm)) {
> 
> This is ugly (and if anything irqchip_split should be done before
> kvm_ioapic_handles_vector).  Could this just test the EOI exit bitmap
> instead?
> 
That could be done. The EOI exit bitmap write paths for split and !split
would need to be consolidated. (We can't pull them from the VMCS, so we'd
need to fetch them from the one stored in kvm_vcpu).

> Also, who sets TMR in the split irqchip case?  I'll post a patch roday
> or tomorrow to compute TMR in __apic_accept_irq and to do the
> aforementioned EOI exit bitmap test.
Another option would be to compute the TMR in vcpu_scan_ioapic, by
extracting it from the EOI exit bitmaps (which would be most similar
to how it had been done previously), but I prefer computing it in
__apic_accept_irq.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a4ebcb7..b655024 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3620,6 +3620,21 @@  struct {
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
 
+7.5 KVM_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: None
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. With this capability
+enabled, the userspace VMM is expected to emulate the IOAPIC and PIC.
+
+This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
+IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
 
 8. Other capabilities.
 ----------------------
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b..772fa8c 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -16,5 +16,4 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 	smp_rmb();
 	return ret;
 }
-
 #endif
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e415..9a21a86 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -18,5 +18,4 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	return 1;
 }
-
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fa32b53..18a110b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -669,6 +669,8 @@  struct kvm_arch {
 	bool boot_vcpu_runs_old_kvmclock;
 
 	u64 disabled_quirks;
+
+	bool irqchip_split;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f..5708850 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@ 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
 
+#include "ioapic.h"
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
@@ -333,7 +334,9 @@  static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 	s64 interval;
 
-	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
+	if (!irqchip_in_kernel(kvm) ||
+	    !ioapic_in_kernel(kvm) ||
+	    ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
 		return;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..d8cc54b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -98,6 +98,15 @@  static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+	int ret;
+
+	ret = (ioapic_irqchip(kvm) != NULL);
+	smp_rmb();
+	return ret;
+}
+
 static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c73..2f13dd5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,11 +83,17 @@  static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 	return kvm->arch.vpic;
 }
 
+static inline int irqchip_split(struct kvm *kvm)
+{
+	return kvm->arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	int ret;
 
 	ret = (pic_irqchip(kvm) != NULL);
+	ret |= irqchip_split(kvm);
 	smp_rmb();
 	return ret;
 }
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..67f6b62 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -208,7 +208,7 @@  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		goto unlock;
 	}
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	if (!irqchip_in_kernel(kvm))
+	if (!ioapic_in_kernel(kvm))
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
@@ -328,3 +328,10 @@  int kvm_setup_default_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, default_routing,
 				   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..536b79e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,7 +209,8 @@  out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	kvm_vcpu_request_scan_ioapic(kvm);
+	if (!irqchip_split(kvm))
+		kvm_vcpu_request_scan_ioapic(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -1838,7 +1839,8 @@  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	kvm_rtc_eoi_tracking_restore_one(vcpu);
+	if (!irqchip_split(vcpu->kvm))
+		kvm_rtc_eoi_tracking_restore_one(vcpu);
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1921,7 +1923,8 @@  static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
 	    /* Cache not set: could be safe but we don't bother. */
 	    apic->highest_isr_cache == -1 ||
 	    /* Need EOI to update ioapic. */
-	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache) ||
+	    irqchip_split(vcpu->kvm)) {
 		/*
 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
 		 * so we need not do anything here.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4014a82..08203a1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@  static inline bool cpu_has_vmx_tpr_shadow(void)
 
 static inline bool vm_need_tpr_shadow(struct kvm *kvm)
 {
-	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
+	return (cpu_has_vmx_tpr_shadow()) && irqchip_in_kernel(kvm);
 }
 
 static inline bool cpu_has_secondary_exec_ctrls(void)
@@ -9485,7 +9485,7 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 	 * emulated by vmx_set_efer(), below.
 	 */
-	vm_entry_controls_init(vmx, 
+	vm_entry_controls_init(vmx,
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
 			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 28076c2..6d4b4dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2461,6 +2461,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_DEADLINE_TIMER:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
+	case KVM_CAP_SPLIT_IRQCHIP:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3568,6 +3569,23 @@  static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		kvm->arch.disabled_quirks = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_SPLIT_IRQCHIP: {
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (irqchip_in_kernel(kvm))
+			goto split_irqchip_unlock;
+		r = -EINVAL;
+		if (atomic_read(&kvm->online_vcpus))
+			goto split_irqchip_unlock;
+		r = kvm_setup_empty_irq_routing(kvm);
+		if (r)
+			goto split_irqchip_unlock;
+		kvm->arch.irqchip_split = true;
+		r = 0;
+split_irqchip_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 	default:
 		r = -EINVAL;
 		break;
@@ -3686,7 +3704,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
 			goto get_irqchip_out;
 		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
 		if (r)
@@ -3710,7 +3728,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm))
 			goto set_irqchip_out;
 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
 		if (r)
@@ -3836,6 +3854,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 133ea00..ffe1f4e 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -329,6 +329,7 @@  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
+#define lapic_in_kernel(k)      (irqchip_in_kernel(k))
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
 #define vgic_ready(k)		((k)->arch.vgic.ready)
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 51103f0..f7eab09 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1000,6 +1000,7 @@  static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 #endif
 
 int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9ef19eb..e4304d0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -818,6 +818,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_DISABLE_QUIRKS 116
 #define KVM_CAP_X86_SMM 117
 #define KVM_CAP_MULTI_ADDRESS_SPACE 118
+#define KVM_CAP_SPLIT_IRQCHIP 119
 
 #ifdef KVM_CAP_IRQ_ROUTING