diff mbox series

[RFC,03/14] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver

Message ID 20240913113705.419146-4-Neeraj.Upadhyay@amd.com (mailing list archive)
State New, archived
Headers show
Series AMD: Add Secure AVIC Guest Support | expand

Commit Message

Neeraj Upadhyay Sept. 13, 2024, 11:36 a.m. UTC
The x2APIC registers are mapped at an offset within the guest APIC
backing page which is same as their x2APIC MMIO offset. Secure AVIC
adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
within the IRR register offset range) and NMI_REQ to the APIC register
space. In addition, the APIC_ID register is writable and configured by
guest.

Add read() and write() APIC callback functions to read and write x2APIC
registers directly from the guest APIC backing page.

The default .read()/.write() callbacks of x2APIC drivers perform
a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled,
these would result in #VC exception (for non-accelerated register
accesses). The #VC exception handler reads/write the x2APIC register
in the guest APIC backing page. Since this would increase the latency
of accessing x2APIC registers, the read() and write() callbacks of
Secure AVIC driver directly reads/writes to the guest APIC backing page.

Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
 arch/x86/include/asm/apicdef.h      |   2 +
 arch/x86/kernel/apic/x2apic_savic.c | 107 +++++++++++++++++++++++++++-
 2 files changed, 107 insertions(+), 2 deletions(-)

Comments

Borislav Petkov Nov. 6, 2024, 6:16 p.m. UTC | #1
On Fri, Sep 13, 2024 at 05:06:54PM +0530, Neeraj Upadhyay wrote:
> @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
>  	return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
>  }
>  
> +static inline u32 get_reg(char *page, int reg_off)

Just "reg" like the other APICs.

> +{
> +	return READ_ONCE(*((u32 *)(page + reg_off)));
> +}
> +
> +static inline void set_reg(char *page, int reg_off, u32 val)
> +{
> +	WRITE_ONCE(*((u32 *)(page + reg_off)), val);
> +}
> +
> +#define SAVIC_ALLOWED_IRR_OFFSET	0x204
> +
> +static u32 x2apic_savic_read(u32 reg)
> +{
> +	void *backing_page = this_cpu_read(apic_backing_page);
> +
> +	switch (reg) {
> +	case APIC_LVTT:
> +	case APIC_TMICT:
> +	case APIC_TMCCT:
> +	case APIC_TDCR:
> +	case APIC_ID:
> +	case APIC_LVR:
> +	case APIC_TASKPRI:
> +	case APIC_ARBPRI:
> +	case APIC_PROCPRI:
> +	case APIC_LDR:
> +	case APIC_SPIV:
> +	case APIC_ESR:
> +	case APIC_ICR:
> +	case APIC_LVTTHMR:
> +	case APIC_LVTPC:
> +	case APIC_LVT0:
> +	case APIC_LVT1:
> +	case APIC_LVTERR:
> +	case APIC_EFEAT:
> +	case APIC_ECTRL:
> +	case APIC_SEOI:
> +	case APIC_IER:

I'm sure those can be turned into ranges instead of enumerating every single
APIC register...

> +	case APIC_EILVTn(0) ... APIC_EILVTn(3):

Like here.

> +		return get_reg(backing_page, reg);
> +	case APIC_ISR ... APIC_ISR + 0x70:
> +	case APIC_TMR ... APIC_TMR + 0x70:
> +		WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg);

What's the point of a WARN...

> +		return get_reg(backing_page, reg);

... and then allowing the register access anyway?

> +	/* IRR and ALLOWED_IRR offset range */
> +	case APIC_IRR ... APIC_IRR + 0x74:
> +		/*
> +		 * Either aligned at 16 bytes for valid IRR reg offset or a
> +		 * valid Secure AVIC ALLOWED_IRR offset.
> +		 */
> +		WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)),
> +			  "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg);
> +		return get_reg(backing_page, reg);

Ditto.

And below too.

> +	default:
> +		pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg);
> +		return 0;
> +	}
> +}
> +
> +#define SAVIC_NMI_REQ_OFFSET		0x278
> +
> +static void x2apic_savic_write(u32 reg, u32 data)
> +{
> +	void *backing_page = this_cpu_read(apic_backing_page);
> +
> +	switch (reg) {
> +	case APIC_LVTT:
> +	case APIC_LVT0:
> +	case APIC_LVT1:
> +	case APIC_TMICT:
> +	case APIC_TDCR:
> +	case APIC_SELF_IPI:
> +	/* APIC_ID is writable and configured by guest for Secure AVIC */
> +	case APIC_ID:
> +	case APIC_TASKPRI:
> +	case APIC_EOI:
> +	case APIC_SPIV:
> +	case SAVIC_NMI_REQ_OFFSET:
> +	case APIC_ESR:
> +	case APIC_ICR:
> +	case APIC_LVTTHMR:
> +	case APIC_LVTPC:
> +	case APIC_LVTERR:
> +	case APIC_ECTRL:
> +	case APIC_SEOI:
> +	case APIC_IER:
> +	case APIC_EILVTn(0) ... APIC_EILVTn(3):
> +		set_reg(backing_page, reg, data);
> +		break;
> +	/* ALLOWED_IRR offsets are writable */
> +	case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70:
> +		if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) {
> +			set_reg(backing_page, reg, data);
> +			break;
> +		}
> +		fallthrough;
> +	default:
> +		pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg);
> +	}
> +}
> +
>  static void x2apic_savic_send_IPI(int cpu, int vector)
>  {
>  	u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
> @@ -140,8 +243,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
>  	.send_IPI_self			= x2apic_send_IPI_self,
>  	.nmi_to_offline_cpu		= true,
>  
> -	.read				= native_apic_msr_read,
> -	.write				= native_apic_msr_write,
> +	.read				= x2apic_savic_read,
> +	.write				= x2apic_savic_write,
>  	.eoi				= native_apic_msr_eoi,
>  	.icr_read			= native_x2apic_icr_read,
>  	.icr_write			= native_x2apic_icr_write,
> --
Melody (Huibo) Wang Nov. 6, 2024, 7:20 p.m. UTC | #2
Hi Neeraj,

On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote:
> The x2APIC registers are mapped at an offset within the guest APIC
> backing page which is same as their x2APIC MMIO offset. Secure AVIC
> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
> within the IRR register offset range) and NMI_REQ to the APIC register
> space. In addition, the APIC_ID register is writable and configured by
> guest.
> 
> Add read() and write() APIC callback functions to read and write x2APIC
> registers directly from the guest APIC backing page.
> 
> The default .read()/.write() callbacks of x2APIC drivers perform
> a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled,
> these would result in #VC exception (for non-accelerated register
> accesses). The #VC exception handler reads/write the x2APIC register
> in the guest APIC backing page. Since this would increase the latency
> of accessing x2APIC registers, the read() and write() callbacks of
> Secure AVIC driver directly reads/writes to the guest APIC backing page.
> 
I think this is important non-obvious information which should be in a comment in the code
itself, not just in the commit message. 

Thanks,
Melody
Neeraj Upadhyay Nov. 7, 2024, 3:32 a.m. UTC | #3
On 11/6/2024 11:46 PM, Borislav Petkov wrote:
> On Fri, Sep 13, 2024 at 05:06:54PM +0530, Neeraj Upadhyay wrote:
>> @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
>>  	return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
>>  }
>>  
>> +static inline u32 get_reg(char *page, int reg_off)
> 
> Just "reg" like the other APICs.
> 

Ok sure.


>> +static u32 x2apic_savic_read(u32 reg)
>> +{
>> +	void *backing_page = this_cpu_read(apic_backing_page);
>> +
>> +	switch (reg) {
>> +	case APIC_LVTT:
>> +	case APIC_TMICT:
>> +	case APIC_TMCCT:
>> +	case APIC_TDCR:
>> +	case APIC_ID:
>> +	case APIC_LVR:
>> +	case APIC_TASKPRI:
>> +	case APIC_ARBPRI:
>> +	case APIC_PROCPRI:
>> +	case APIC_LDR:
>> +	case APIC_SPIV:
>> +	case APIC_ESR:
>> +	case APIC_ICR:
>> +	case APIC_LVTTHMR:
>> +	case APIC_LVTPC:
>> +	case APIC_LVT0:
>> +	case APIC_LVT1:
>> +	case APIC_LVTERR:
>> +	case APIC_EFEAT:
>> +	case APIC_ECTRL:
>> +	case APIC_SEOI:
>> +	case APIC_IER:
> 
> I'm sure those can be turned into ranges instead of enumerating every single
> APIC register...
> 

Below are the offset of these, as per "Table 16-6. x2APIC Register" in
APM vol2:

#Reg	     #offset
APIC_LVTT - 0x320
APIC_TMICT - 0x380
APIC_TMCCT - 0x390
APIC_TDCR - 0x3E0

Above timer related registers are read from HV when we reach the end of this patch
series.

APIC_ID - 20h
APIC_LVR - 30h
APIC_TASKPRI - 80h
APIC_ARBPRI - 90h
APIC_PROCPRI - A0h
APIC_LDR - D0h
APIC_SPIV - F0h
APIC_ESR - 280h
APIC_ICR - 300h
APIC_LVTTHMR - 330h
APIC_LVTPC - 340h
APIC_LVT0 - 350h
APIC_LVT1 - 360h
APIC_LVTERR - 370h
APIC_EFEAT - 0x400h
APIC_ECTRL - 0x410h
APIC_SEOI - 0x420h
APIC_IER - 0x480h

These are few registers like part of LVT (APIC_LVTTHMR ... APIC_LVTERR) ,
priority (APIC_TASKPRI ... APIC_PROCPRI), extended APIC
(APIC_EFEAT ... APIC_ECTRL) which can be grouped.

Intention of doing per reg is to be explicit about which registers
are accessed from backing page, which from hv and which are not allowed
access. As access (and their perms) are per-reg and not range-based, this
made sense to me. Also, if ranges are used, I think 16-byte aligned
checks are needed for the range. If using ranges looks more logical grouping
here, I can update it as per the above range groupings.


>> +	case APIC_EILVTn(0) ... APIC_EILVTn(3):
> 
> Like here.
> 

As this case is for EILVT register range, these registers were grouped.
(I need to add a 16-byte alignment check here).


>> +		return get_reg(backing_page, reg);
>> +	case APIC_ISR ... APIC_ISR + 0x70:
>> +	case APIC_TMR ... APIC_TMR + 0x70:
>> +		WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg);
> 
> What's the point of a WARN...
> 
>> +		return get_reg(backing_page, reg);
> 
> ... and then allowing the register access anyway?
> 

I will skip access for non-aligned case.

>> +	/* IRR and ALLOWED_IRR offset range */
>> +	case APIC_IRR ... APIC_IRR + 0x74:
>> +		/*
>> +		 * Either aligned at 16 bytes for valid IRR reg offset or a
>> +		 * valid Secure AVIC ALLOWED_IRR offset.
>> +		 */
>> +		WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)),
>> +			  "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg);
>> +		return get_reg(backing_page, reg);
> 
> Ditto.
> 
> And below too.
>

Same reply as above.


- Neeraj
 
>> +	default:
>> +		pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg);
>> +		return 0;
>> +	}
>> +}
>> +
Neeraj Upadhyay Nov. 7, 2024, 3:33 a.m. UTC | #4
On 11/7/2024 12:50 AM, Melody (Huibo) Wang wrote:
> Hi Neeraj,
> 
> On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote:
>> The x2APIC registers are mapped at an offset within the guest APIC
>> backing page which is same as their x2APIC MMIO offset. Secure AVIC
>> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
>> within the IRR register offset range) and NMI_REQ to the APIC register
>> space. In addition, the APIC_ID register is writable and configured by
>> guest.
>>
>> Add read() and write() APIC callback functions to read and write x2APIC
>> registers directly from the guest APIC backing page.
>>
>> The default .read()/.write() callbacks of x2APIC drivers perform
>> a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled,
>> these would result in #VC exception (for non-accelerated register
>> accesses). The #VC exception handler reads/write the x2APIC register
>> in the guest APIC backing page. Since this would increase the latency
>> of accessing x2APIC registers, the read() and write() callbacks of
>> Secure AVIC driver directly reads/writes to the guest APIC backing page.
>>
> I think this is important non-obvious information which should be in a comment in the code
> itself, not just in the commit message. 
> 

Sure, I will add some of this information in the comments. Thanks for the review!


- Neeraj

> Thanks,
> Melody
Borislav Petkov Nov. 7, 2024, 2:28 p.m. UTC | #5
On Thu, Nov 07, 2024 at 09:02:16AM +0530, Neeraj Upadhyay wrote:
> Intention of doing per reg is to be explicit about which registers
> are accessed from backing page, which from hv and which are not allowed
> access. As access (and their perms) are per-reg and not range-based, this
> made sense to me. Also, if ranges are used, I think 16-byte aligned
> checks are needed for the range. If using ranges looks more logical grouping
> here, I can update it as per the above range groupings.

Is this list of registers going to remain or are we going to keep adding to
it so that the ranges become contiguous?

And yes, there is some merit to explicitly naming them but you can also put
that in a comment once above those functions too.
Neeraj Upadhyay Nov. 8, 2024, 8:59 a.m. UTC | #6
On 11/7/2024 7:58 PM, Borislav Petkov wrote:
> On Thu, Nov 07, 2024 at 09:02:16AM +0530, Neeraj Upadhyay wrote:
>> Intention of doing per reg is to be explicit about which registers
>> are accessed from backing page, which from hv and which are not allowed
>> access. As access (and their perms) are per-reg and not range-based, this
>> made sense to me. Also, if ranges are used, I think 16-byte aligned
>> checks are needed for the range. If using ranges looks more logical grouping
>> here, I can update it as per the above range groupings.
> 
> Is this list of registers going to remain or are we going to keep adding to
> it so that the ranges become contiguous?
> 

From the APIC architecture details in APM and SDM, I see these gaps are reserved
ranges which are present for xapic also. x2apic keeps the register layout consistent.
So, these gaps looks to have have remained reserved for long. I don't have information
on the uarch reasons (if any) for the reserved space layout.


> And yes, there is some merit to explicitly naming them but you can also put
> that in a comment once above those functions too.
> 

I understand your point but, for this specific case, to me, each register as a separate
"switch case" looks clearer and self-describing than keeping a range of different
registers and putting comment about which registers it covers.

In addition, while each APIC register is at 16-byte alignment, they are accessed
using dword size reads/writes. So, as mentioned in previous reply, using ranges
requires alignment checks.

One hypothetical example where using range checks could become klugy is when
the unused 12 bytes of 16 byte  of a register is repurposed for implementation-
specific features and the read/write permissions are different for the architecture
register and the implementation-defined one. Secure AVIC uses (IRRn + 4) address
for ALLOWED_IRRn. However, the r/w permissions are same for IRRn and ALLOWED_IRRn.
So, using ranges for IRR register space works fine. However, it may not work
if similar register-space-repurposing happens for other features in future. I
understand this could be considered as speculative and hand-wavy reasoning. So,
I would ask, does above reasoning convince you with the current switch-case layout
or you want it to be range-based?


- Neeraj
Borislav Petkov Nov. 8, 2024, 10:48 a.m. UTC | #7
On Fri, Nov 08, 2024 at 02:29:03PM +0530, Neeraj Upadhyay wrote:
> From the APIC architecture details in APM and SDM, I see these gaps are reserved

What I actually meant here is whether SAVIC enablement is going to keep adding
more and more entries here so that it becomes practically *all* possible, each
spelled out explicitly.

But I went further in your patchset and it doesn't look like it so meh, ok.

> I would ask, does above reasoning convince you with the current switch-case layout
> or you want it to be range-based?

That's fine, let's keep them like they are now and we can always revisit if
the list grows too ugly.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@ 
 #define		APIC_TDR_DIV_128	0xA
 #define	APIC_EFEAT	0x400
 #define	APIC_ECTRL	0x410
+#define APIC_SEOI	0x420
+#define APIC_IER	0x480
 #define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index d903c35b8b64..6a471bbc3dba 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -10,6 +10,7 @@ 
 #include <linux/cpumask.h>
 #include <linux/cc_platform.h>
 #include <linux/percpu-defs.h>
+#include <linux/align.h>
 
 #include <asm/apic.h>
 #include <asm/sev.h>
@@ -24,6 +25,108 @@  static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
 }
 
+static inline u32 get_reg(char *page, int reg_off)
+{
+	return READ_ONCE(*((u32 *)(page + reg_off)));
+}
+
+static inline void set_reg(char *page, int reg_off, u32 val)
+{
+	WRITE_ONCE(*((u32 *)(page + reg_off)), val);
+}
+
+#define SAVIC_ALLOWED_IRR_OFFSET	0x204
+
+static u32 x2apic_savic_read(u32 reg)
+{
+	void *backing_page = this_cpu_read(apic_backing_page);
+
+	switch (reg) {
+	case APIC_LVTT:
+	case APIC_TMICT:
+	case APIC_TMCCT:
+	case APIC_TDCR:
+	case APIC_ID:
+	case APIC_LVR:
+	case APIC_TASKPRI:
+	case APIC_ARBPRI:
+	case APIC_PROCPRI:
+	case APIC_LDR:
+	case APIC_SPIV:
+	case APIC_ESR:
+	case APIC_ICR:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_LVTERR:
+	case APIC_EFEAT:
+	case APIC_ECTRL:
+	case APIC_SEOI:
+	case APIC_IER:
+	case APIC_EILVTn(0) ... APIC_EILVTn(3):
+		return get_reg(backing_page, reg);
+	case APIC_ISR ... APIC_ISR + 0x70:
+	case APIC_TMR ... APIC_TMR + 0x70:
+		WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg);
+		return get_reg(backing_page, reg);
+	/* IRR and ALLOWED_IRR offset range */
+	case APIC_IRR ... APIC_IRR + 0x74:
+		/*
+		 * Either aligned at 16 bytes for valid IRR reg offset or a
+		 * valid Secure AVIC ALLOWED_IRR offset.
+		 */
+		WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)),
+			  "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg);
+		return get_reg(backing_page, reg);
+	default:
+		pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg);
+		return 0;
+	}
+}
+
+#define SAVIC_NMI_REQ_OFFSET		0x278
+
+static void x2apic_savic_write(u32 reg, u32 data)
+{
+	void *backing_page = this_cpu_read(apic_backing_page);
+
+	switch (reg) {
+	case APIC_LVTT:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_TMICT:
+	case APIC_TDCR:
+	case APIC_SELF_IPI:
+	/* APIC_ID is writable and configured by guest for Secure AVIC */
+	case APIC_ID:
+	case APIC_TASKPRI:
+	case APIC_EOI:
+	case APIC_SPIV:
+	case SAVIC_NMI_REQ_OFFSET:
+	case APIC_ESR:
+	case APIC_ICR:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVTERR:
+	case APIC_ECTRL:
+	case APIC_SEOI:
+	case APIC_IER:
+	case APIC_EILVTn(0) ... APIC_EILVTn(3):
+		set_reg(backing_page, reg, data);
+		break;
+	/* ALLOWED_IRR offsets are writable */
+	case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70:
+		if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) {
+			set_reg(backing_page, reg, data);
+			break;
+		}
+		fallthrough;
+	default:
+		pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg);
+	}
+}
+
 static void x2apic_savic_send_IPI(int cpu, int vector)
 {
 	u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
@@ -140,8 +243,8 @@  static struct apic apic_x2apic_savic __ro_after_init = {
 	.send_IPI_self			= x2apic_send_IPI_self,
 	.nmi_to_offline_cpu		= true,
 
-	.read				= native_apic_msr_read,
-	.write				= native_apic_msr_write,
+	.read				= x2apic_savic_read,
+	.write				= x2apic_savic_write,
 	.eoi				= native_apic_msr_eoi,
 	.icr_read			= native_x2apic_icr_read,
 	.icr_write			= native_x2apic_icr_write,