diff mbox series

[RFC,5/7] KVM: MMU: Add support for PKS emulation

Message ID 20200807084841.7112-6-chenyi.qiang@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: PKS Virtualization support | expand

Commit Message

Chenyi Qiang Aug. 7, 2020, 8:48 a.m. UTC
Advertise pkr_mask to cache the conditions where pretection key checks
for supervisor pages are needed. When the accessed pages are those with
a translation for which the U/S flag is 0 in at least one
paging-structure entry controlling the translation, they are the
supervisor pages and PKRS enforces the access rights check.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  8 +++---
 arch/x86/kvm/mmu.h              | 12 ++++++---
 arch/x86/kvm/mmu/mmu.c          | 44 +++++++++++++++++----------------
 3 files changed, 35 insertions(+), 29 deletions(-)

Comments

Paolo Bonzini Jan. 26, 2021, 6:23 p.m. UTC | #1
On 07/08/20 10:48, Chenyi Qiang wrote:
> 
>  		if (pte_access & PT_USER_MASK)
>  			pkr_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
> +		else if (!kvm_get_msr(vcpu, MSR_IA32_PKRS, &pkrs))
> +			pkr_bits = (pkrs >> (pte_pkey * 2)) & 3;

You should be able to always use vcpu->arch.pkrs here.  So

pkr = pte_access & PT_USER_MASK ? vcpu->arch.pkru : vcpu->arch.pkrs;
pkr_bits = (pkr >> pte_pkey * 2) & 3;

Paolo
Chenyi Qiang Jan. 27, 2021, 3 a.m. UTC | #2
On 1/27/2021 2:23 AM, Paolo Bonzini wrote:
> On 07/08/20 10:48, Chenyi Qiang wrote:
>>
>>          if (pte_access & PT_USER_MASK)
>>              pkr_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
>> +        else if (!kvm_get_msr(vcpu, MSR_IA32_PKRS, &pkrs))
>> +            pkr_bits = (pkrs >> (pte_pkey * 2)) & 3;
> 
> You should be able to always use vcpu->arch.pkrs here.  So
> 
> pkr = pte_access & PT_USER_MASK ? vcpu->arch.pkru : vcpu->arch.pkrs;
> pkr_bits = (pkr >> pte_pkey * 2) & 3;
> 
> Paolo

Concerning vcpu->arch.pkrs would be the only use case in current 
submitted patches, is it still necessary to shadow it?

>
Paolo Bonzini Jan. 27, 2021, 8:37 a.m. UTC | #3
On 27/01/21 04:00, Chenyi Qiang wrote:
>>
>>>
>>>          if (pte_access & PT_USER_MASK)
>>>              pkr_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
>>> +        else if (!kvm_get_msr(vcpu, MSR_IA32_PKRS, &pkrs))
>>> +            pkr_bits = (pkrs >> (pte_pkey * 2)) & 3;
>>
>> You should be able to always use vcpu->arch.pkrs here.  So
>>
>> pkr = pte_access & PT_USER_MASK ? vcpu->arch.pkru : vcpu->arch.pkrs;
>> pkr_bits = (pkr >> pte_pkey * 2) & 3;
>>
> Concerning vcpu->arch.pkrs would be the only use case in current 
> submitted patches, is it still necessary to shadow it?

Yes, please do.  kvm_get_msr is pretty slow, and the code becomes 
simpler too.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6b739d0d1c97..736e56e023d5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -422,10 +422,10 @@  struct kvm_mmu {
 	u8 permissions[16];
 
 	/*
-	* The pkru_mask indicates if protection key checks are needed.  It
-	* consists of 16 domains indexed by page fault error code bits [4:1],
-	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
-	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
+	* The pkr_mask indicates if protection key checks are needed.
+	* It consists of 16 domains indexed by page fault error code
+	* bits[4:1]. Each domain has 2 bits which are ANDed with AD
+	* and WD from PKRU/PKRS.
 	*/
 	u32 pkr_mask;
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 7fb4c63d5704..b840b2d9ee9f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -195,15 +195,19 @@  static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
 	if (unlikely(mmu->pkr_mask)) {
 		u32 pkr_bits, offset;
+		u64 pkrs;
 
 		/*
-		* PKRU defines 32 bits, there are 16 domains and 2
-		* attribute bits per domain in pkru.  pte_pkey is the
-		* index of the protection domain, so pte_pkey * 2 is
-		* is the index of the first bit for the domain.
+		* PKRU and PKRS both define 32 bits. There are 16 domains
+		* and 2 attribute bits per domain in them. pte_key is the
+		* index of the protection domain, so pte_pkey * 2 is the
+		* index of the first bit for the domain. The choice of
+		* PKRU and PKRS is determined by the accessed pages.
 		*/
 		if (pte_access & PT_USER_MASK)
 			pkr_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
+		else if (!kvm_get_msr(vcpu, MSR_IA32_PKRS, &pkrs))
+			pkr_bits = (pkrs >> (pte_pkey * 2)) & 3;
 		else
 			pkr_bits = 0;
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 333b4da739f8..845aea86b138 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4693,28 +4693,29 @@  static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 }
 
 /*
-* PKU is an additional mechanism by which the paging controls access to
-* user-mode addresses based on the value in the PKRU register.  Protection
-* key violations are reported through a bit in the page fault error code.
+* Protection Keys (PKEY) is an additional mechanism by which
+* the paging controls access to user-mode/supervisor-mode address
+* based on the values in PKEY registers (PKRU/PKRS). Protection key
+* violations are reported through a bit in the page fault error code.
 * Unlike other bits of the error code, the PK bit is not known at the
 * call site of e.g. gva_to_gpa; it must be computed directly in
-* permission_fault based on two bits of PKRU, on some machine state (CR4,
-* CR0, EFER, CPL), and on other bits of the error code and the page tables.
+* permission_fault based on two bits of PKRU/PKRS, on some machine
+* state (CR4, CR0, EFER, CPL), and on other bits of the error code
+* and the page tables.
 *
 * In particular the following conditions come from the error code, the
 * page tables and the machine state:
-* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
+* - PK is always zero unless CR4.PKE=1/CR4.PKS=1 and EFER.LMA=1
 * - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
-* - PK is always zero if U=0 in the page tables
-* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
+* - (PKRU/PKRS).WD is ignored if CR0.WP=0 and the access is a supervisor access.
 *
-* The PKRU bitmask caches the result of these four conditions.  The error
-* code (minus the P bit) and the page table's U bit form an index into the
-* PKRU bitmask.  Two bits of the PKRU bitmask are then extracted and ANDed
-* with the two bits of the PKRU register corresponding to the protection key.
-* For the first three conditions above the bits will be 00, thus masking
-* away both AD and WD.  For all reads or if the last condition holds, WD
-* only will be masked away.
+* The pkr_mask caches the result of these three conditions. The error
+* code (minus the P bit) forms an index into the pkr_mask. Both PKU and
+* PKS shares the same bitmask. Two bits of the pkr_mask are then extracted
+* and ANDed with the two bits of the PKEY register corresponding to
+* the protection key. For the first two conditions above the bits will be 00,
+* thus masking away both AD and WD. For all reads or if the last condition
+* holds, WD only will be masked away.
 */
 static void update_pkr_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				bool ept)
@@ -4727,8 +4728,9 @@  static void update_pkr_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 		return;
 	}
 
-	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
-	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
+	/* PKEY is enabled only if CR4.PKE/CR4.PKS and EFER.LMA are both set. */
+	if ((!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
+	    !kvm_read_cr4_bits(vcpu, X86_CR4_PKS)) || !is_long_mode(vcpu)) {
 		mmu->pkr_mask = 0;
 		return;
 	}
@@ -4757,14 +4759,14 @@  static void update_pkr_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 		check_pkey = (!ff && !rsvdf);
 
 		/*
-		 * write access is controlled by PKRU if it is a
-		 * user access or CR0.WP = 1.
+		 * write access is controlled by PKRU/PKRS if
+		 * it is a user access or CR0.WP = 1.
 		 */
 		check_write = check_pkey && wf && (uf || wp);
 
-		/* PKRU.AD stops both read and write access. */
+		/* PKRU/PKRS.AD stops both read and write access. */
 		pkey_bits = !!check_pkey;
-		/* PKRU.WD stops write access. */
+		/* PKRU/PKRS.WD stops write access. */
 		pkey_bits |= (!!check_write) << 1;
 
 		mmu->pkr_mask |= (pkey_bits & 3) << pfec;