diff mbox

[PART1,RFC,v4,08/11] svm: Add VMEXIT handlers for AVIC

Message ID 1460017232-17429-9-git-send-email-Suravee.Suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suthikulpanit, Suravee April 7, 2016, 8:20 a.m. UTC
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
and avic_unaccelerated_access_interception() along with two trace points
(trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/include/uapi/asm/svm.h |   9 +-
 arch/x86/kvm/lapic.h            |   3 +
 arch/x86/kvm/svm.c              | 246 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/trace.h            |  57 ++++++++++
 arch/x86/kvm/x86.c              |   2 +
 5 files changed, 316 insertions(+), 1 deletion(-)

Comments

Radim Krčmář April 12, 2016, 4:22 p.m. UTC | #1
2016-04-07 03:20-0500, Suravee Suthikulpanit:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
> and avic_unaccelerated_access_interception() along with two trace points
> (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> @@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm)
> +static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat)
> +{
> +	struct kvm_arch *vm_data = &vcpu->kvm->arch;
> +	int index;
> +	u32 *logical_apic_id_table;
> +
> +	if (flat) { /* flat */
> +		if (mda > 7)

Don't you want to check that just one bit it set?

> +			return NULL;
> +		index = mda;
> +	} else { /* cluster */
> +		int apic_id = mda & 0xf;
> +		int cluster_id = (mda & 0xf0) >> 8;

">> 4".

> +
> +		if (apic_id > 4 || cluster_id >= 0xf)
> +			return NULL;
> +		index = (cluster_id << 2) + apic_id;

ffs(apic_id), because 'apic_id' must be compacted into 2 bits.

> +	}
> +	logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
> +
> +	return &logical_apic_id_table[index];
> +}
> +
> +static int avic_handle_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id,
> +				 u8 logical_id)
> +{
> +	u32 mod;
> +	u32 *entry, new_entry;
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (!svm)
> +		return -EINVAL;

(No need to check, svm is taken for granted.)

> +
> +	mod = (kvm_apic_get_reg(svm->vcpu.arch.apic, APIC_DFR) >> 28) & 0xf;
> +	entry = avic_get_logical_id_entry(vcpu, logical_id, (mod == 0xf));

(Use "kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT".)

> +	if (!entry)
> +		return -EINVAL;
> +
> +	new_entry = READ_ONCE(*entry);
> +	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
> +	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
> +	new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
> +	WRITE_ONCE(*entry, new_entry);
> +
> +	return 0;
> +}
> +
> +static int avic_unaccel_trap_write(struct vcpu_svm *svm)
> +{
> +	u32 offset = svm->vmcb->control.exit_info_1 & 0xFF0;

The previous function knew the offset, just pass it as an argument.
(Or use AVIC_UNACCEL_ACCESS_OFFSET_MASK, because defining and not using
 it everywhere is too sad.)

> +	struct kvm_lapic *apic = svm->vcpu.arch.apic;
> +	u32 reg = kvm_apic_get_reg(apic, offset);
> +
> +	switch (offset) {
> +	case APIC_ID: {
> +		u32 aid = (reg >> 24) & 0xff;
> +		u64 *o_ent = avic_get_physical_id_entry(&svm->vcpu,
> +							svm->vcpu.vcpu_id);
> +		u64 *n_ent = avic_get_physical_id_entry(&svm->vcpu, aid);

Write old and new.  (Skip "_ent" if you want to save characters.)

> +		if (!n_ent || !o_ent)
> +			return 0;
> +
> +		/* We need to move physical_id_entry to new offset */
> +		*n_ent = *o_ent;
> +		*o_ent = 0ULL;
> +		svm->avic_physical_id_cache = n_ent;
> +		break;
> +	}
> +	case APIC_LDR: {
> +		int ret, lid;
> +		int dlid = (reg >> 24) & 0xff;
> +
> +		if (!dlid)
> +			return 0;

ldr == 0 should be handled as well.

> +
> +		lid = ffs(dlid) - 1;
> +		ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid);
> +		if (ret)
> +			return 0;

OS can actually change LDR, so the old one should be invalidated.

(No OS does, but that is not an important factor for the hypervisor.)

> +
> +		break;
> +	}
> +	case APIC_DFR: {
> +		struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
> +		u32 mod = (reg >> 28) & 0xf;
> +
> +		/*
> +		 * We assume that all local APICs are using the same type.
> +		 * If this changes, we need to rebuild the AVIC logical
> +		 * APID id table with subsequent write to APIC_LDR.
> +		 */

The code doesn't rebuild avic_logical_id_table_page, it just flushes the
old one.

> +		if (vm_data->ldr_mode != mod) {
> +			clear_page(page_address(vm_data->avic_logical_id_table_page));
> +			vm_data->ldr_mode = mod;
> +		}
> +		break;
> +	}

All these cases need to be called on KVM_SET_LAPIC -- the userspace can
provide completely new set of APIC registers and AVIC should build its
maps with them.  Test with save/restore or migration.

> +static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
> +{
> +	int ret = 0;
> +	u32 offset = svm->vmcb->control.exit_info_1 &
> +		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
> +	u32 vector = svm->vmcb->control.exit_info_2 &
> +		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
> +	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
> +		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
> +	bool trap = is_avic_unaccelerated_access_trap(offset);
> +
> +	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
> +					    trap, write, vector);
> +
> +	/**
> +	 * AVIC does not support x2APIC registers, and we only advertise
> +	 * xAPIC when enable AVIC. Therefore, access to these registers
> +	 * will not be supported.
> +	 */

x2APIC only has MSR interface and I don't think it has much do with
offset >= 0x400.  AMD defines few registers in the extended area, but we
don't seem emulate them.

> +	if (offset >= 0x400) {
> +		WARN(1, "Unsupported APIC offset %#x\n", offset);

"printk_ratelimited(KERN_INFO " is the most severe message you could
give.  I think that not printing anything is best,

> +		return ret;

because we should not return, but continue to emulate the access.

> +	}
> +
> +	if (trap) {
> +		/* Handling Trap */
> +		if (!write) /* Trap read should never happens */
> +			BUG();

(BUG_ON(!write) is shorter, though I would avoid BUG -- only guests are
 going to fail, so we don't need to kill the host.)

> +		ret = avic_unaccel_trap_write(svm);
> +	} else {
> +		/* Handling Fault */
> +		ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
> +	}
> +
> +	return ret;
> +}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini April 12, 2016, 10:29 p.m. UTC | #2
On 12/04/2016 18:22, Radim Kr?má? wrote:
>> > +
>> > +		if (apic_id > 4 || cluster_id >= 0xf)
>> > +			return NULL;
>> > +		index = (cluster_id << 2) + apic_id;
> ffs(apic_id), because 'apic_id' must be compacted into 2 bits.
> 

ffs(apic_id)-1 actually.

Thanks for the review, Radim.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář April 13, 2016, 12:37 p.m. UTC | #3
2016-04-13 00:29+0200, Paolo Bonzini:
> On 12/04/2016 18:22, Radim Kr?má? wrote:
>>> > +		if (apic_id > 4 || cluster_id >= 0xf)
>>> > +			return NULL;
>>> > +		index = (cluster_id << 2) + apic_id;
>> ffs(apic_id), because 'apic_id' must be compacted into 2 bits.
> 
> ffs(apic_id)-1 actually.

Yes, thanks.

(And I missed that the confusion begins by passing "ffs(dlid) - 1" as
 mda to avic_handle_ldr_write, because the rest cannot work after that.)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Suthikulpanit, Suravee April 28, 2016, 10:08 p.m. UTC | #4
Hi Radim / Paolo,

Sorry for delay in response.

On 4/12/2016 11:22 AM, Radim Kr?má? wrote:
> 2016-04-07 03:20-0500, Suravee Suthikulpanit:
>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>
>> This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
>> and avic_unaccelerated_access_interception() along with two trace points
>> (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).
>>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> ---
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> @@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm)
>> +static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat)
>> +{
>> +	struct kvm_arch *vm_data = &vcpu->kvm->arch;
>> +	int index;
>> +	u32 *logical_apic_id_table;
>> +
>> +	if (flat) { /* flat */
>> +		if (mda > 7)
>
> Don't you want to check that just one bit it set?
>
>> +			return NULL;
>> +		index = mda;
>> +	} else { /* cluster */
>> +		int apic_id = mda & 0xf;
>> +		int cluster_id = (mda & 0xf0) >> 8;
>
> ">> 4".
>
>> +
>> +		if (apic_id > 4 || cluster_id >= 0xf)
>> +			return NULL;
>> +		index = (cluster_id << 2) + apic_id;
>
> ffs(apic_id), because 'apic_id' must be compacted into 2 bits.
>
>> +	}
>> +	logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
>> +
>> +	return &logical_apic_id_table[index];
>> +}

I have quite messed up in the logic here for handling the logical 
cluster ID. Sorry for not catching this earlier. I'm rewriting this 
function altogether to simplify it in the V5.

>> [...]
>> +		lid = ffs(dlid) - 1;
>> +		ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid);
>> +		if (ret)
>> +			return 0;
>
> OS can actually change LDR, so the old one should be invalidated.
>
> (No OS does, but that is not an important factor for the hypervisor.)
>

By validating the old one, are you suggesting that we should disable the 
logical APIC table entry previously used by this vcpu? If so, that means 
we would need to cached the previous LDR value since the one in vAPIC 
backing page would already be updated.

>> [...]
>
>> +		if (vm_data->ldr_mode != mod) {
>> +			clear_page(page_address(vm_data->avic_logical_id_table_page));
>> +			vm_data->ldr_mode = mod;
>> +		}
>> +		break;
>> +	}
>
> All these cases need to be called on KVM_SET_LAPIC -- the userspace can
> provide completely new set of APIC registers and AVIC should build its
> maps with them.  Test with save/restore or migration.

Hm.. This means we might need to introduce a new hook which is called 
from the arch/x86/kvm/lapic.c: kvm_apic_post_state_restore(). Probably 
something like kvm_x86_ops->apic_post_state_restore(), which sets up the 
new physical and logical APIC id tables for AVIC.

If this works, I'll add support to handle this and test with the 
migration stuff in the V5.

>> +	if (offset >= 0x400) {
>> +		WARN(1, "Unsupported APIC offset %#x\n", offset);
>
> "printk_ratelimited(KERN_INFO " is the most severe message you could
> give.  I think that not printing anything is best,
>
>> +		return ret;
>
> because we should not return, but continue to emulate the access.

Then, this would continue as if we are handling the normal fault access.

>
>> +	}
>> +
>> +	if (trap) {
>> +		/* Handling Trap */
>> +		if (!write) /* Trap read should never happens */
>> +			BUG();
>
> (BUG_ON(!write) is shorter, though I would avoid BUG -- only guests are
>   going to fail, so we don't need to kill the host.)

Ok. What about just WARN_ONCE(!write, "svm: Handling trap read.\n");

Thanks,
Suravee
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář April 29, 2016, 2:56 p.m. UTC | #5
2016-04-28 17:08-0500, Suravee Suthikulanit:
> On 4/12/2016 11:22 AM, Radim Kr?má? wrote:
>> 2016-04-07 03:20-0500, Suravee Suthikulpanit:
>> > From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> > 
>> > This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
>> > and avic_unaccelerated_access_interception() along with two trace points
>> > (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).
>> > 
>> > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> > ---
>> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> > @@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm)
>> > [...]
>> > +		lid = ffs(dlid) - 1;
>> > +		ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid);
>> > +		if (ret)
>> > +			return 0;
>> 
>> OS can actually change LDR, so the old one should be invalidated.
>> 
>> (No OS does, but that is not an important factor for the hypervisor.)
>> 
> 
> By validating the old one, are you suggesting that we should disable the
> logical APIC table entry previously used by this vcpu? If so, that means we
> would need to cached the previous LDR value since the one in vAPIC backing
> page would already be updated.

Yes, the cache could be used to speed up recalculate_apic_map() too, so
it might not be a total waste.

Which reminds me that physical APIC_ID doesn't use correct cache.
svm->vcpu.vcpu_id is only the initial ID, but APIC_ID could be changed
more than once.
It would be great to make APIC_ID read-only in all cases, because x86
spec allows us to do so, but KVM_SET_LAPIC can set APIC ID too and I
think we don't retroactively modify userspace API ... Paolo?

>> > [...]
>> 
>> > +		if (vm_data->ldr_mode != mod) {
>> > +			clear_page(page_address(vm_data->avic_logical_id_table_page));
>> > +			vm_data->ldr_mode = mod;
>> > +		}
>> > +		break;
>> > +	}
>> 
>> All these cases need to be called on KVM_SET_LAPIC -- the userspace can
>> provide completely new set of APIC registers and AVIC should build its
>> maps with them.  Test with save/restore or migration.
> 
> Hm.. This means we might need to introduce a new hook which is called from
> the arch/x86/kvm/lapic.c: kvm_apic_post_state_restore(). Probably something
> like kvm_x86_ops->apic_post_state_restore(), which sets up the new physical
> and logical APIC id tables for AVIC.

Sounds good.  I imagine the callback would just call
avic_unaccel_trap_write() for relevant registers.

>> > +		return ret;
>> 
>> because we should not return, but continue to emulate the access.
> 
> Then, this would continue as if we are handling the normal fault access.

Exactly, it is a normal access to an undefined register.

>> > +	}
>> > +
>> > +	if (trap) {
>> > +		/* Handling Trap */
>> > +		if (!write) /* Trap read should never happens */
>> > +			BUG();
>> 
>> (BUG_ON(!write) is shorter, though I would avoid BUG -- only guests are
>>   going to fail, so we don't need to kill the host.)
> 
> Ok. What about just WARN_ONCE(!write, "svm: Handling trap read.\n");

Sure, it's a hardware bug and calling avic_unaccel_trap_write() on a
read access shouldn't result in a bug.  I am slightly inclined towards
'if (trap && write)' and optional 'WARN_ONCE(trap,' in the else branch.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 8a4add8..b9e9bb2 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -73,6 +73,8 @@ 
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_NPF           0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
 
 #define SVM_EXIT_ERR           -1
 
@@ -107,8 +109,10 @@ 
 	{ SVM_EXIT_SMI,         "smi" }, \
 	{ SVM_EXIT_INIT,        "init" }, \
 	{ SVM_EXIT_VINTR,       "vintr" }, \
+	{ SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
 	{ SVM_EXIT_CPUID,       "cpuid" }, \
 	{ SVM_EXIT_INVD,        "invd" }, \
+	{ SVM_EXIT_PAUSE,       "pause" }, \
 	{ SVM_EXIT_HLT,         "hlt" }, \
 	{ SVM_EXIT_INVLPG,      "invlpg" }, \
 	{ SVM_EXIT_INVLPGA,     "invlpga" }, \
@@ -127,7 +131,10 @@ 
 	{ SVM_EXIT_MONITOR,     "monitor" }, \
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
-	{ SVM_EXIT_NPF,         "npf" }
+	{ SVM_EXIT_NPF,         "npf" }, \
+	{ SVM_EXIT_RSM,         "rsm" }, \
+	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
+	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }
 
 
 #endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a70cb62..2fc86b7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -9,6 +9,9 @@ 
 #define KVM_APIC_SIPI		1
 #define KVM_APIC_LVT_NUM	6
 
+#define KVM_APIC_SHORT_MASK	0xc0000
+#define KVM_APIC_DEST_MASK	0x800
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f9547bc..13fba3b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3515,6 +3515,250 @@  static int mwait_interception(struct vcpu_svm *svm)
 	return nop_interception(svm);
 }
 
+enum avic_ipi_failure_cause {
+	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+	AVIC_IPI_FAILURE_INVALID_TARGET,
+	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+{
+	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
+	u32 icrl = svm->vmcb->control.exit_info_1;
+	u32 id = svm->vmcb->control.exit_info_2 >> 32;
+	u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
+	struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+
+	switch (id) {
+	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+		/*
+		 * AVIC hardware handles the generation of
+		 * IPIs when the specified Message Type is Fixed
+		 * (also known as fixed delivery mode) and
+		 * the Trigger Mode is edge-triggered. The hardware
+		 * also supports self and broadcast delivery modes
+		 * specified via the Destination Shorthand(DSH)
+		 * field of the ICRL. Logical and physical APIC ID
+		 * formats are supported. All other IPI types cause
+		 * a #VMEXIT, which needs to emulated.
+		 */
+		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+		break;
+	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+		int i;
+		struct kvm_vcpu *vcpu;
+		struct kvm *kvm = svm->vcpu.kvm;
+		struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+		/*
+		 * At this point, we expect that the AVIC HW has already
+		 * set the appropriate IRR bits on the valid target
+		 * vcpus. So, we just need to kick the appropriate vcpu.
+		 */
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			bool m = kvm_apic_match_dest(vcpu, apic,
+						     icrl & KVM_APIC_SHORT_MASK,
+						     GET_APIC_DEST_FIELD(icrh),
+						     icrl & KVM_APIC_DEST_MASK);
+
+			if (m && !avic_vcpu_is_running(vcpu))
+				kvm_vcpu_wake_up(vcpu);
+		}
+		break;
+	}
+	case AVIC_IPI_FAILURE_INVALID_TARGET:
+		break;
+	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+		WARN_ONCE(1, "Invalid backing page\n");
+		break;
+	default:
+		pr_err("Unknown IPI interception\n");
+	}
+
+	return 1;
+}
+
+static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat)
+{
+	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	int index;
+	u32 *logical_apic_id_table;
+
+	if (flat) { /* flat */
+		if (mda > 7)
+			return NULL;
+		index = mda;
+	} else { /* cluster */
+		int apic_id = mda & 0xf;
+		int cluster_id = (mda & 0xf0) >> 8;
+
+		if (apic_id > 4 || cluster_id >= 0xf)
+			return NULL;
+		index = (cluster_id << 2) + apic_id;
+	}
+	logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+
+	return &logical_apic_id_table[index];
+}
+
+static int avic_handle_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id,
+				 u8 logical_id)
+{
+	u32 mod;
+	u32 *entry, new_entry;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!svm)
+		return -EINVAL;
+
+	mod = (kvm_apic_get_reg(svm->vcpu.arch.apic, APIC_DFR) >> 28) & 0xf;
+	entry = avic_get_logical_id_entry(vcpu, logical_id, (mod == 0xf));
+	if (!entry)
+		return -EINVAL;
+
+	new_entry = READ_ONCE(*entry);
+	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
+	new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+	WRITE_ONCE(*entry, new_entry);
+
+	return 0;
+}
+
+static int avic_unaccel_trap_write(struct vcpu_svm *svm)
+{
+	u32 offset = svm->vmcb->control.exit_info_1 & 0xFF0;
+	struct kvm_lapic *apic = svm->vcpu.arch.apic;
+	u32 reg = kvm_apic_get_reg(apic, offset);
+
+	switch (offset) {
+	case APIC_ID: {
+		u32 aid = (reg >> 24) & 0xff;
+		u64 *o_ent = avic_get_physical_id_entry(&svm->vcpu,
+							svm->vcpu.vcpu_id);
+		u64 *n_ent = avic_get_physical_id_entry(&svm->vcpu, aid);
+
+		if (!n_ent || !o_ent)
+			return 0;
+
+		/* We need to move physical_id_entry to new offset */
+		*n_ent = *o_ent;
+		*o_ent = 0ULL;
+		svm->avic_physical_id_cache = n_ent;
+		break;
+	}
+	case APIC_LDR: {
+		int ret, lid;
+		int dlid = (reg >> 24) & 0xff;
+
+		if (!dlid)
+			return 0;
+
+		lid = ffs(dlid) - 1;
+		ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid);
+		if (ret)
+			return 0;
+
+		break;
+	}
+	case APIC_DFR: {
+		struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+		u32 mod = (reg >> 28) & 0xf;
+
+		/*
+		 * We assume that all local APICs are using the same type.
+		 * If this changes, we need to rebuild the AVIC logical
+		 * APID id table with subsequent write to APIC_LDR.
+		 */
+		if (vm_data->ldr_mode != mod) {
+			clear_page(page_address(vm_data->avic_logical_id_table_page));
+			vm_data->ldr_mode = mod;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	kvm_lapic_reg_write(apic, offset, reg);
+
+	return 1;
+}
+
+static bool is_avic_unaccelerated_access_trap(u32 offset)
+{
+	bool ret = false;
+
+	switch (offset) {
+	case APIC_ID:
+	case APIC_EOI:
+	case APIC_RRR:
+	case APIC_LDR:
+	case APIC_DFR:
+	case APIC_SPIV:
+	case APIC_ESR:
+	case APIC_ICR:
+	case APIC_LVTT:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_LVTERR:
+	case APIC_TMICT:
+	case APIC_TDCR:
+		ret = true;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF
+
+static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+{
+	int ret = 0;
+	u32 offset = svm->vmcb->control.exit_info_1 &
+		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+	u32 vector = svm->vmcb->control.exit_info_2 &
+		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
+	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
+		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
+	bool trap = is_avic_unaccelerated_access_trap(offset);
+
+	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+					    trap, write, vector);
+
+	/**
+	 * AVIC does not support x2APIC registers, and we only advertise
+	 * xAPIC when enable AVIC. Therefore, access to these registers
+	 * will not be supported.
+	 */
+	if (offset >= 0x400) {
+		WARN(1, "Unsupported APIC offset %#x\n", offset);
+		return ret;
+	}
+
+	if (trap) {
+		/* Handling Trap */
+		if (!write) /* Trap read should never happens */
+			BUG();
+		ret = avic_unaccel_trap_write(svm);
+	} else {
+		/* Handling Fault */
+		ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+	}
+
+	return ret;
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
@@ -3578,6 +3822,8 @@  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_NPF]				= pf_interception,
 	[SVM_EXIT_RSM]                          = emulate_on_interception,
+	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
+	[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]	= avic_unaccelerated_access_interception,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2f1ea2f..39f264c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1292,6 +1292,63 @@  TRACE_EVENT(kvm_hv_stimer_cleanup,
 		  __entry->vcpu_id, __entry->timer_index)
 );
 
+/*
+ * Tracepoint for AMD AVIC
+ */
+TRACE_EVENT(kvm_avic_incomplete_ipi,
+	    TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
+	    TP_ARGS(vcpu, icrh, icrl, id, index),
+
+	TP_STRUCT__entry(
+		__field(u32, vcpu)
+		__field(u32, icrh)
+		__field(u32, icrl)
+		__field(u32, id)
+		__field(u32, index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->icrh = icrh;
+		__entry->icrl = icrl;
+		__entry->id = id;
+		__entry->index = index;
+	),
+
+	TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+		  __entry->vcpu, __entry->icrh, __entry->icrl,
+		  __entry->id, __entry->index)
+);
+
+TRACE_EVENT(kvm_avic_unaccelerated_access,
+	    TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
+	    TP_ARGS(vcpu, offset, ft, rw, vec),
+
+	TP_STRUCT__entry(
+		__field(u32, vcpu)
+		__field(u32, offset)
+		__field(bool, ft)
+		__field(bool, rw)
+		__field(u32, vec)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu = vcpu;
+		__entry->offset = offset;
+		__entry->ft = ft;
+		__entry->rw = rw;
+		__entry->vec = vec;
+	),
+
+	TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+		  __entry->vcpu,
+		  __entry->offset,
+		  __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
+		  __entry->ft ? "trap" : "fault",
+		  __entry->rw ? "write" : "read",
+		  __entry->vec)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d12583e..b0f211c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8437,3 +8437,5 @@  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);