diff mbox

[PART1,RFC,v3,06/12] KVM: x86: Detect and Initialize AVIC support

Message ID 1458281388-14452-7-git-send-email-Suravee.Suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suthikulpanit, Suravee March 18, 2016, 6:09 a.m. UTC
This patch introduces AVIC-related data structure, and AVIC
initialization code.

There are three main data structures for AVIC:
    * Virtual APIC (vAPIC) backing page (per-VCPU)
    * Physical APIC ID table (per-VM)
    * Logical APIC ID table (per-VM)

Currently, AVIC is disabled by default. Users can manually
enable AVIC via kernel boot option kvm-amd.avic=1 or during
kvm-amd module loading with parameter avic=1.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/include/asm/kvm_host.h |   8 ++
 arch/x86/include/asm/svm.h      |   3 +
 arch/x86/kvm/svm.c              | 232 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 242 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini March 18, 2016, 11:21 a.m. UTC | #1
On 18/03/2016 07:09, Suravee Suthikulpanit wrote:
> This patch introduces AVIC-related data structure, and AVIC
> initialization code.
> 
> There are three main data structures for AVIC:
>     * Virtual APIC (vAPIC) backing page (per-VCPU)
>     * Physical APIC ID table (per-VM)
>     * Logical APIC ID table (per-VM)
> 
> Currently, AVIC is disabled by default. Users can manually
> enable AVIC via kernel boot option kvm-amd.avic=1 or during
> kvm-amd module loading with parameter avic=1.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |   8 ++
>  arch/x86/include/asm/svm.h      |   3 +
>  arch/x86/kvm/svm.c              | 232 +++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 242 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 87eac2a..2698e40 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -754,6 +754,14 @@ struct kvm_arch {
>  
>  	bool irqchip_split;
>  	u8 nr_reserved_ioapic_pins;
> +
> +	/* Struct members for AVIC */
> +	struct hlist_node hnode;

Unused, please remove.

> +	struct kvm *kvm;

Not needed, please use container_of instead like

	struct kvm_arch *ka;
	...
	struct kvm *kvm = container_of(ka, struct kvm, arch)

> +	u32 ldr_mode;
> +	u32 avic_tag;

Where is avic_tag used?

> +	struct page *avic_log_apic_id_table_page;

Remove apic_ if you want to keep the variable name shorter, but please
spell logical and physical in full.

> +	struct page *avic_phy_apic_id_table_page;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index 66e26a0..287e635 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -116,6 +116,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
>  #define V_INTR_MASKING_SHIFT 24
>  #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
>  
> +#define AVIC_ENABLE_SHIFT 31
> +#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
> +
>  #define SVM_INTERRUPT_SHADOW_MASK 1
>  
>  #define SVM_IOIO_STR_SHIFT 2
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c13a64b..621c948 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -14,6 +14,9 @@
>   * the COPYING file in the top-level directory.
>   *
>   */
> +
> +#define pr_fmt(fmt) "SVM: " fmt
> +
>  #include <linux/kvm_host.h>
>  
>  #include "irq.h"
> @@ -78,6 +81,11 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
>  #define TSC_RATIO_MIN		0x0000000000000001ULL
>  #define TSC_RATIO_MAX		0x000000ffffffffffULL
>  
> +#define AVIC_HPA_MASK	~((0xFFFULL << 52) || 0xFFF)
> +
> +/* NOTE: Current max index allowed for physical APIC ID table is 255 */
> +#define AVIC_PHY_APIC_ID_MAX	0xFF
> +
>  static bool erratum_383_found __read_mostly;
>  
>  static const u32 host_save_user_msrs[] = {
> @@ -162,8 +170,20 @@ struct vcpu_svm {
>  
>  	/* cached guest cpuid flags for faster access */
>  	bool nrips_enabled	: 1;
> +
> +	struct page *avic_backing_page;
> +	u64 *avic_phy_apic_id_cache;

Spelling same as above.

> +	bool avic_was_running;
>  };
>  
> +#define AVIC_LOG_APIC_ID__GUEST_PHY_APIC_ID_MSK	(0xFF)

MSK->MASK
LOG_APIC->LOGICAL
PHY_APIC->PHYSICAL

> +#define AVIC_LOG_APIC_ID__VALID_MSK		(1 << 31)
> +
> +#define AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK	(0xFFULL)
> +#define AVIC_PHY_APIC_ID__BACKING_PAGE_MSK	(0xFFFFFFFFFFULL << 12)
> +#define AVIC_PHY_APIC_ID__IS_RUN_MSK		(1ULL << 62)

IS_RUN->IS_RUNNING

> +#define AVIC_PHY_APIC_ID__VALID_MSK		(1ULL << 63)
> +
>  static DEFINE_PER_CPU(u64, current_tsc_ratio);
>  #define TSC_RATIO_DEFAULT	0x0100000000ULL
>  
> @@ -205,6 +225,10 @@ module_param(npt, int, S_IRUGO);
>  static int nested = true;
>  module_param(nested, int, S_IRUGO);
>  
> +/* enable / disable AVIC */
> +static int avic;
> +module_param(avic, int, S_IRUGO);
> +
>  static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
>  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>  static void svm_complete_interrupts(struct vcpu_svm *svm);
> @@ -234,6 +258,18 @@ enum {
>  /* TPR and CR2 are always written before VMRUN */
>  #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
>  
> +#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
> +
> +static inline bool svm_vcpu_avic_enabled(struct vcpu_svm *svm)
> +{
> +	return (avic && (svm->vmcb->control.int_ctl & AVIC_ENABLE_MASK));
> +}
> +
> +static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
> +{
> +	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
> +}
> +
>  static inline void mark_all_dirty(struct vmcb *vmcb)
>  {
>  	vmcb->control.clean = 0;
> @@ -923,6 +959,13 @@ static __init int svm_hardware_setup(void)
>  	} else
>  		kvm_disable_tdp();
>  
> +	if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
> +		avic = false;
> +
> +	if (avic) {
> +		printk(KERN_INFO "kvm: AVIC enabled\n");
> +	}
> +
>  	return 0;
>  
>  err:
> @@ -1000,6 +1043,24 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
>  	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
>  }
>  
> +static void avic_init_vmcb(struct vcpu_svm *svm)
> +{
> +	struct vmcb *vmcb = svm->vmcb;
> +	struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
> +	phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
> +	phys_addr_t lpa = page_to_phys(vm_data->avic_log_apic_id_table_page);
> +	phys_addr_t ppa = page_to_phys(vm_data->avic_phy_apic_id_table_page);
> +
> +	if (!vmcb)
> +		return;
> +
> +	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
> +	vmcb->control.avic_log_apic_id = lpa & AVIC_HPA_MASK;
> +	vmcb->control.avic_phy_apic_id = ppa & AVIC_HPA_MASK;
> +	vmcb->control.avic_phy_apic_id |= AVIC_PHY_APIC_ID_MAX;
> +	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
> +}
> +
>  static void init_vmcb(struct vcpu_svm *svm)
>  {
>  	struct vmcb_control_area *control = &svm->vmcb->control;
> @@ -1113,6 +1174,139 @@ static void init_vmcb(struct vcpu_svm *svm)
>  	mark_all_dirty(svm->vmcb);
>  
>  	enable_gif(svm);
> +
> +	if (avic)
> +		avic_init_vmcb(svm);
> +}
> +
> +static u64 *avic_get_phy_apic_id_entry(struct kvm_vcpu *vcpu, int index)
> +{
> +	u64 *avic_phy_ait;

u64 *avic_physical_id_table;

> +	struct kvm_arch *vm_data = &vcpu->kvm->arch;
> +
> +	/* Note: APIC ID = 0xff is used for broadcast.
> +	 *       APIC ID > 0xff is reserved.
> +	 */
> +	if (index >= 0xff)
> +		return NULL;
> +
> +	avic_phy_ait = page_address(vm_data->avic_phy_apic_id_table_page);
> +
> +	return &avic_phy_ait[index];
> +}
> +
> +static int avic_init_backing_page(struct kvm_vcpu *vcpu)
> +{
> +	u64 *entry, new_entry;
> +	int id = vcpu->vcpu_id;
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (id >= AVIC_PHY_APIC_ID_MAX)
> +		return -EINVAL;
> +
> +	if (!svm->vcpu.arch.apic->regs)
> +		return -EINVAL;
> +
> +	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
> +
> +	avic_init_vmcb(svm);
> +
> +	/* Setting AVIC backing page address in the phy APIC ID table */
> +	entry = avic_get_phy_apic_id_entry(vcpu, id);
> +	if (!entry)
> +		return -EINVAL;
> +
> +	new_entry = READ_ONCE(*entry);
> +	new_entry = (page_to_phys(svm->avic_backing_page) &
> +		     AVIC_PHY_APIC_ID__BACKING_PAGE_MSK) |
> +		     AVIC_PHY_APIC_ID__VALID_MSK;
> +	WRITE_ONCE(*entry, new_entry);
> +
> +	svm->avic_phy_apic_id_cache = entry;
> +
> +	return 0;
> +}
> +
> +static void avic_vm_uninit(struct kvm *kvm)
> +{
> +	unsigned long flags;
> +	struct kvm_arch *vm_data = &kvm->arch;
> +
> +	if (vm_data->avic_log_apic_id_table_page)
> +		__free_page(vm_data->avic_log_apic_id_table_page);
> +	if (vm_data->avic_phy_apic_id_table_page)
> +		__free_page(vm_data->avic_phy_apic_id_table_page);
> +}
> +
> +static void avic_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (!avic)
> +		return;
> +
> +	if (svm->avic_phy_apic_id_cache)
> +		svm->avic_phy_apic_id_cache = NULL;
> +}
> +
> +static atomic_t avic_tag_gen = ATOMIC_INIT(1);
> +
> +static inline u32 avic_get_next_tag(void)
> +{
> +	u32 tag = atomic_read(&avic_tag_gen);
> +
> +	atomic_inc(&avic_tag_gen);
> +	return tag;

This should be atomic_inc_return, otherwise it's not thread-safe.

> +}
> +
> +static int avic_vm_init(struct kvm *kvm)
> +{
> +	int err = -ENOMEM;
> +	struct kvm_arch *vm_data = &kvm->arch;
> +	struct page *p_page;
> +	struct page *l_page;
> +
> +	/**
> +	 * Note:
> +	 * AVIC hardware walks the nested page table to check permissions,
> +	 * but does not use the SPA address specified in the leaf page
> +	 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
> +	 * field of the VMCB. Therefore, we set up the
> +	 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
> +	 */
> +	mutex_lock(&kvm->slots_lock);
> +	err = __x86_set_memory_region(kvm,
> +				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
> +				      APIC_DEFAULT_PHYS_BASE,
> +				      PAGE_SIZE);
> +	mutex_unlock(&kvm->slots_lock);

Please use x86_set_memory_region, since you don't have to do anything
else while the lock is taken.

Paolo

> +	if (err)
> +		goto free_avic;
> +	kvm->arch.apic_access_page_done = true;
> +
> +	/* Allocating physical APIC ID table (4KB) */
> +	p_page = alloc_page(GFP_KERNEL);
> +	if (!p_page)
> +		goto free_avic;
> +
> +	vm_data->avic_phy_apic_id_table_page = p_page;
> +	clear_page(page_address(p_page));
> +
> +	/* Allocating logical APIC ID table (4KB) */
> +	l_page = alloc_page(GFP_KERNEL);
> +	if (!l_page)
> +		goto free_avic;
> +
> +	vm_data->avic_log_apic_id_table_page = l_page;
> +	clear_page(page_address(l_page));
> +
> +	vm_data->avic_tag = avic_get_next_tag();
> +
> +	return 0;
> +
> +free_avic:
> +	avic_vm_uninit(kvm);
> +	return err;
>  }
>  
>  static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> @@ -1131,6 +1325,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  
>  	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
>  	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
> +
> +	if (svm_vcpu_avic_enabled(svm) && !init_event)
> +		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
>  }
>  
>  static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
> @@ -1169,6 +1366,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
>  	if (!hsave_page)
>  		goto free_page3;
>  
> +	if (avic) {
> +		err = avic_init_backing_page(&svm->vcpu);
> +		if (err) {
> +			avic_vcpu_uninit(&svm->vcpu);
> +			goto free_page4;
> +		}
> +	}
> +
>  	svm->nested.hsave = page_address(hsave_page);
>  
>  	svm->msrpm = page_address(msrpm_pages);
> @@ -1187,6 +1392,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
>  
>  	return &svm->vcpu;
>  
> +free_page4:
> +	__free_page(hsave_page);
>  free_page3:
>  	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
>  free_page2:
> @@ -1209,6 +1416,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
>  	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
>  	__free_page(virt_to_page(svm->nested.hsave));
>  	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
> +	avic_vcpu_uninit(vcpu);
>  	kvm_vcpu_uninit(vcpu);
>  	kmem_cache_free(kvm_vcpu_cache, svm);
>  }
> @@ -3371,6 +3579,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
>  	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
>  	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
>  	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
> +	pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
>  	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
>  	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
>  	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
> @@ -3602,11 +3811,27 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  
>  static bool svm_get_enable_apicv(void)
>  {
> -	return false;
> +	return avic;
>  }
>  
> +static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
> +{
> +}
> +
> +static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
> +{
> +}
> +
> +/* Note: Currently only used by Hyper-V. */
>  static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>  {
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	struct vmcb *vmcb = svm->vmcb;
> +
> +	if (!avic)
> +		return;
> +
> +	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
>  }
>  
>  static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
> @@ -4318,6 +4543,9 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.vcpu_free = svm_free_vcpu,
>  	.vcpu_reset = svm_vcpu_reset,
>  
> +	.vm_init = avic_vm_init,
> +	.vm_uninit = avic_vm_uninit,
> +
>  	.prepare_guest_switch = svm_prepare_guest_switch,
>  	.vcpu_load = svm_vcpu_load,
>  	.vcpu_put = svm_vcpu_put,
> @@ -4375,6 +4603,8 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
>  	.load_eoi_exitmap = svm_load_eoi_exitmap,
>  	.sync_pir_to_irr = svm_sync_pir_to_irr,
> +	.hwapic_irr_update = svm_hwapic_irr_update,
> +	.hwapic_isr_update = svm_hwapic_isr_update,
>  
>  	.set_tss_addr = svm_set_tss_addr,
>  	.get_tdp_level = get_npt_level,
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87eac2a..2698e40 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -754,6 +754,14 @@  struct kvm_arch {
 
 	bool irqchip_split;
 	u8 nr_reserved_ioapic_pins;
+
+	/* Struct members for AVIC */
+	struct hlist_node hnode;
+	struct kvm *kvm;
+	u32 ldr_mode;
+	u32 avic_tag;
+	struct page *avic_log_apic_id_table_page;
+	struct page *avic_phy_apic_id_table_page;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 66e26a0..287e635 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -116,6 +116,9 @@  struct __attribute__ ((__packed__)) vmcb_control_area {
 #define V_INTR_MASKING_SHIFT 24
 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
 
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
 #define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c13a64b..621c948 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -14,6 +14,9 @@ 
  * the COPYING file in the top-level directory.
  *
  */
+
+#define pr_fmt(fmt) "SVM: " fmt
+
 #include <linux/kvm_host.h>
 
 #include "irq.h"
@@ -78,6 +81,11 @@  MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define TSC_RATIO_MIN		0x0000000000000001ULL
 #define TSC_RATIO_MAX		0x000000ffffffffffULL
 
+#define AVIC_HPA_MASK	~((0xFFFULL << 52) || 0xFFF)
+
+/* NOTE: Current max index allowed for physical APIC ID table is 255 */
+#define AVIC_PHY_APIC_ID_MAX	0xFF
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -162,8 +170,20 @@  struct vcpu_svm {
 
 	/* cached guest cpuid flags for faster access */
 	bool nrips_enabled	: 1;
+
+	struct page *avic_backing_page;
+	u64 *avic_phy_apic_id_cache;
+	bool avic_was_running;
 };
 
+#define AVIC_LOG_APIC_ID__GUEST_PHY_APIC_ID_MSK	(0xFF)
+#define AVIC_LOG_APIC_ID__VALID_MSK		(1 << 31)
+
+#define AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK	(0xFFULL)
+#define AVIC_PHY_APIC_ID__BACKING_PAGE_MSK	(0xFFFFFFFFFFULL << 12)
+#define AVIC_PHY_APIC_ID__IS_RUN_MSK		(1ULL << 62)
+#define AVIC_PHY_APIC_ID__VALID_MSK		(1ULL << 63)
+
 static DEFINE_PER_CPU(u64, current_tsc_ratio);
 #define TSC_RATIO_DEFAULT	0x0100000000ULL
 
@@ -205,6 +225,10 @@  module_param(npt, int, S_IRUGO);
 static int nested = true;
 module_param(nested, int, S_IRUGO);
 
+/* enable / disable AVIC */
+static int avic;
+module_param(avic, int, S_IRUGO);
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -234,6 +258,18 @@  enum {
 /* TPR and CR2 are always written before VMRUN */
 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
 
+#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
+
+static inline bool svm_vcpu_avic_enabled(struct vcpu_svm *svm)
+{
+	return (avic && (svm->vmcb->control.int_ctl & AVIC_ENABLE_MASK));
+}
+
+static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
+{
+	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
+}
+
 static inline void mark_all_dirty(struct vmcb *vmcb)
 {
 	vmcb->control.clean = 0;
@@ -923,6 +959,13 @@  static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
+	if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
+		avic = false;
+
+	if (avic) {
+		printk(KERN_INFO "kvm: AVIC enabled\n");
+	}
+
 	return 0;
 
 err:
@@ -1000,6 +1043,24 @@  static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
+static void avic_init_vmcb(struct vcpu_svm *svm)
+{
+	struct vmcb *vmcb = svm->vmcb;
+	struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+	phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
+	phys_addr_t lpa = page_to_phys(vm_data->avic_log_apic_id_table_page);
+	phys_addr_t ppa = page_to_phys(vm_data->avic_phy_apic_id_table_page);
+
+	if (!vmcb)
+		return;
+
+	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
+	vmcb->control.avic_log_apic_id = lpa & AVIC_HPA_MASK;
+	vmcb->control.avic_phy_apic_id = ppa & AVIC_HPA_MASK;
+	vmcb->control.avic_phy_apic_id |= AVIC_PHY_APIC_ID_MAX;
+	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
+}
+
 static void init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1113,6 +1174,139 @@  static void init_vmcb(struct vcpu_svm *svm)
 	mark_all_dirty(svm->vmcb);
 
 	enable_gif(svm);
+
+	if (avic)
+		avic_init_vmcb(svm);
+}
+
+static u64 *avic_get_phy_apic_id_entry(struct kvm_vcpu *vcpu, int index)
+{
+	u64 *avic_phy_ait;
+	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+
+	/* Note: APIC ID = 0xff is used for broadcast.
+	 *       APIC ID > 0xff is reserved.
+	 */
+	if (index >= 0xff)
+		return NULL;
+
+	avic_phy_ait = page_address(vm_data->avic_phy_apic_id_table_page);
+
+	return &avic_phy_ait[index];
+}
+
+static int avic_init_backing_page(struct kvm_vcpu *vcpu)
+{
+	u64 *entry, new_entry;
+	int id = vcpu->vcpu_id;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (id >= AVIC_PHY_APIC_ID_MAX)
+		return -EINVAL;
+
+	if (!svm->vcpu.arch.apic->regs)
+		return -EINVAL;
+
+	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
+
+	avic_init_vmcb(svm);
+
+	/* Setting AVIC backing page address in the phy APIC ID table */
+	entry = avic_get_phy_apic_id_entry(vcpu, id);
+	if (!entry)
+		return -EINVAL;
+
+	new_entry = READ_ONCE(*entry);
+	new_entry = (page_to_phys(svm->avic_backing_page) &
+		     AVIC_PHY_APIC_ID__BACKING_PAGE_MSK) |
+		     AVIC_PHY_APIC_ID__VALID_MSK;
+	WRITE_ONCE(*entry, new_entry);
+
+	svm->avic_phy_apic_id_cache = entry;
+
+	return 0;
+}
+
+static void avic_vm_uninit(struct kvm *kvm)
+{
+	unsigned long flags;
+	struct kvm_arch *vm_data = &kvm->arch;
+
+	if (vm_data->avic_log_apic_id_table_page)
+		__free_page(vm_data->avic_log_apic_id_table_page);
+	if (vm_data->avic_phy_apic_id_table_page)
+		__free_page(vm_data->avic_phy_apic_id_table_page);
+}
+
+static void avic_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!avic)
+		return;
+
+	if (svm->avic_phy_apic_id_cache)
+		svm->avic_phy_apic_id_cache = NULL;
+}
+
+static atomic_t avic_tag_gen = ATOMIC_INIT(1);
+
+static inline u32 avic_get_next_tag(void)
+{
+	u32 tag = atomic_read(&avic_tag_gen);
+
+	atomic_inc(&avic_tag_gen);
+	return tag;
+}
+
+static int avic_vm_init(struct kvm *kvm)
+{
+	int err = -ENOMEM;
+	struct kvm_arch *vm_data = &kvm->arch;
+	struct page *p_page;
+	struct page *l_page;
+
+	/**
+	 * Note:
+	 * AVIC hardware walks the nested page table to check permissions,
+	 * but does not use the SPA address specified in the leaf page
+	 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
+	 * field of the VMCB. Therefore, we set up the
+	 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
+	 */
+	mutex_lock(&kvm->slots_lock);
+	err = __x86_set_memory_region(kvm,
+				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+				      APIC_DEFAULT_PHYS_BASE,
+				      PAGE_SIZE);
+	mutex_unlock(&kvm->slots_lock);
+	if (err)
+		goto free_avic;
+	kvm->arch.apic_access_page_done = true;
+
+	/* Allocating physical APIC ID table (4KB) */
+	p_page = alloc_page(GFP_KERNEL);
+	if (!p_page)
+		goto free_avic;
+
+	vm_data->avic_phy_apic_id_table_page = p_page;
+	clear_page(page_address(p_page));
+
+	/* Allocating logical APIC ID table (4KB) */
+	l_page = alloc_page(GFP_KERNEL);
+	if (!l_page)
+		goto free_avic;
+
+	vm_data->avic_log_apic_id_table_page = l_page;
+	clear_page(page_address(l_page));
+
+	vm_data->avic_tag = avic_get_next_tag();
+
+	return 0;
+
+free_avic:
+	avic_vm_uninit(kvm);
+	return err;
 }
 
 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1131,6 +1325,9 @@  static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+
+	if (svm_vcpu_avic_enabled(svm) && !init_event)
+		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
 }
 
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@@ -1169,6 +1366,14 @@  static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!hsave_page)
 		goto free_page3;
 
+	if (avic) {
+		err = avic_init_backing_page(&svm->vcpu);
+		if (err) {
+			avic_vcpu_uninit(&svm->vcpu);
+			goto free_page4;
+		}
+	}
+
 	svm->nested.hsave = page_address(hsave_page);
 
 	svm->msrpm = page_address(msrpm_pages);
@@ -1187,6 +1392,8 @@  static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	return &svm->vcpu;
 
+free_page4:
+	__free_page(hsave_page);
 free_page3:
 	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
 free_page2:
@@ -1209,6 +1416,7 @@  static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+	avic_vcpu_uninit(vcpu);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -3371,6 +3579,7 @@  static void dump_vmcb(struct kvm_vcpu *vcpu)
 	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
 	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
 	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
+	pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
 	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
 	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
 	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
@@ -3602,11 +3811,27 @@  static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 
 static bool svm_get_enable_apicv(void)
 {
-	return false;
+	return avic;
 }
 
+static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
+{
+}
+
+static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
+{
+}
+
+/* Note: Currently only used by Hyper-V. */
 static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+
+	if (!avic)
+		return;
+
+	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 }
 
 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -4318,6 +4543,9 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.vcpu_free = svm_free_vcpu,
 	.vcpu_reset = svm_vcpu_reset,
 
+	.vm_init = avic_vm_init,
+	.vm_uninit = avic_vm_uninit,
+
 	.prepare_guest_switch = svm_prepare_guest_switch,
 	.vcpu_load = svm_vcpu_load,
 	.vcpu_put = svm_vcpu_put,
@@ -4375,6 +4603,8 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
+	.hwapic_irr_update = svm_hwapic_irr_update,
+	.hwapic_isr_update = svm_hwapic_isr_update,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,