[v4,7/9] s390/mm: Add huge pmd storage key handling
diff mbox

Message ID 20180627135510.117945-8-frankja@linux.ibm.com
State New
Headers show

Commit Message

Janosch Frank June 27, 2018, 1:55 p.m. UTC
From: Janosch Frank <frankja@linux.vnet.ibm.com>

Storage keys for guests with huge page mappings have to directly set
the key in hardware. There are no PGSTEs for PMDs that we could use to
retain the guests's logical view of the key.

As pmds are handled from userspace side, KVM skey emulation for split
pmds will always use the hardware's storage key.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
---
 arch/s390/mm/pgtable.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 98 insertions(+), 6 deletions(-)

Comments

David Hildenbrand June 28, 2018, 7:49 a.m. UTC | #1
On 27.06.2018 15:55, Janosch Frank wrote:
> From: Janosch Frank <frankja@linux.vnet.ibm.com>
> 
> Storage keys for guests with huge page mappings have to directly set
> the key in hardware. There are no PGSTEs for PMDs that we could use to
> retain the guests's logical view of the key.

As we don't walk the actual gmap, we will never try to touch fake 4k
page tables, right?

> 
> As pmds are handled from userspace side, KVM skey emulation for split
> pmds will always use the hardware's storage key.
> 
> Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
> ---
>  arch/s390/mm/pgtable.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 98 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> index 7bc79aae3c25..158c880226fe 100644
> --- a/arch/s390/mm/pgtable.c
> +++ b/arch/s390/mm/pgtable.c
> @@ -811,12 +811,45 @@ EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
>  int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
>  			  unsigned char key, bool nq)
>  {
> -	unsigned long keyul;
> +	unsigned long keyul, address;
>  	spinlock_t *ptl;
>  	pgste_t old, new;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
>  	pte_t *ptep;
>  
> -	ptep = get_locked_pte(mm, addr, &ptl);
> +	pgd = pgd_offset(mm, addr);
> +	p4d = p4d_alloc(mm, pgd, addr);
> +	if (!p4d)
> +		return -EFAULT;
> +	pud = pud_alloc(mm, p4d, addr);
> +	if (!pud)
> +		return -EFAULT;
> +	pmd = pmd_alloc(mm, pud, addr);
> +	if (!pmd)
> +		return -EFAULT;
> +
> +	ptl = pmd_lock(mm, pmd);
> +	if (!pmd_present(*pmd)) {
> +		spin_unlock(ptl);

Is there no pmd_unlock()? Maybe there should be one :)

Using spin_unlock() here looks strange on first sight.

> +		return -EFAULT;
> +	}
> +	if (pmd_large(*pmd)) {
> +		address = pmd_val(*pmd) & HPAGE_MASK;
> +		address |= addr & ~HPAGE_MASK;
> +		/*
> +		 * Huge pmds need quiescing operations, they are
> +		 * always mapped.
> +		 */
> +		page_set_storage_key(address, key, 1);
> +		spin_unlock(ptl);
> +		return 0;
> +	}
> +	spin_unlock(ptl);
> +
> +	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
>  	if (unlikely(!ptep))
>  		return -EFAULT;
>  
> @@ -827,7 +860,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
>  	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
>  	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
>  	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
> -		unsigned long address, bits, skey;
> +		unsigned long bits, skey;
>  
>  		address = pte_val(*ptep) & PAGE_MASK;
>  		skey = (unsigned long) page_get_storage_key(address);
> @@ -890,14 +923,43 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
>  int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
>  {
>  	spinlock_t *ptl;
> +	unsigned long address;
>  	pgste_t old, new;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
>  	pte_t *ptep;
>  	int cc = 0;
>  
> -	ptep = get_locked_pte(mm, addr, &ptl);
> -	if (unlikely(!ptep))
> +	pgd = pgd_offset(mm, addr);
> +	p4d = p4d_alloc(mm, pgd, addr);
> +	if (!p4d)
> +		return -EFAULT;
> +	pud = pud_alloc(mm, p4d, addr);
> +	if (!pud)
> +		return -EFAULT;
> +	pmd = pmd_alloc(mm, pud, addr);
> +	if (!pmd)
>  		return -EFAULT;
>  
> +	ptl = pmd_lock(mm, pmd);
> +	if (!pmd_present(*pmd)) {
> +		spin_unlock(ptl);
> +		return -EFAULT;
> +	}
> +	if (pmd_large(*pmd)) {
> +		address = pmd_val(*pmd) & HPAGE_MASK;
> +		address |= addr & ~HPAGE_MASK;
> +		cc = page_reset_referenced(addr);
> +		spin_unlock(ptl);
> +		return cc;
> +	}
> +	spin_unlock(ptl);
> +
> +	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
> +	if (unlikely(!ptep))
> +		return -EFAULT;
>  	new = old = pgste_get_lock(ptep);
>  	/* Reset guest reference bit only */
>  	pgste_val(new) &= ~PGSTE_GR_BIT;
> @@ -922,11 +984,41 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
>  int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
>  			  unsigned char *key)
>  {
> +	unsigned long address;
>  	spinlock_t *ptl;
>  	pgste_t pgste;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
>  	pte_t *ptep;
>  
> -	ptep = get_locked_pte(mm, addr, &ptl);
> +	pgd = pgd_offset(mm, addr);
> +	p4d = p4d_alloc(mm, pgd, addr);
> +	if (!p4d)
> +		return -EFAULT;
> +	pud = pud_alloc(mm, p4d, addr);
> +	if (!pud)
> +		return -EFAULT;
> +	pmd = pmd_alloc(mm, pud, addr);
> +	if (!pmd)
> +		return -EFAULT;
> +
> +	ptl = pmd_lock(mm, pmd);
> +	if (!pmd_present(*pmd)) {
> +		spin_unlock(ptl);
> +		return -EFAULT;
> +	}
> +	if (pmd_large(*pmd)) {
> +		address = pmd_val(*pmd) & HPAGE_MASK;
> +		address |= addr & ~HPAGE_MASK;
> +		*key = page_get_storage_key(address);

(not having looked in detail through all the patches)

We are guaranteed to have valid storage keys at this point, right?
(patch #6)

> +		spin_unlock(ptl);
> +		return 0;
> +	}
> +	spin_unlock(ptl);
> +
> +	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
>  	if (unlikely(!ptep))
>  		return -EFAULT;
>  
> 


In general, would a helper __get_locked_pmd() make sense? (could be
s390x specific) We have quite some duplicate code here.
Janosch Frank June 28, 2018, 10:45 a.m. UTC | #2
On 28.06.2018 09:49, David Hildenbrand wrote:
> On 27.06.2018 15:55, Janosch Frank wrote:
>> From: Janosch Frank <frankja@linux.vnet.ibm.com>
>>
>> Storage keys for guests with huge page mappings have to directly set
>> the key in hardware. There are no PGSTEs for PMDs that we could use to
>> retain the guests's logical view of the key.
> 
> As we don't walk the actual gmap, we will never try to touch fake 4k
> page tables, right?

Correct

> 
>>
>> As pmds are handled from userspace side, KVM skey emulation for split
>> pmds will always use the hardware's storage key.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
>> ---
>>  arch/s390/mm/pgtable.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 98 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
>> index 7bc79aae3c25..158c880226fe 100644
>> --- a/arch/s390/mm/pgtable.c
>> +++ b/arch/s390/mm/pgtable.c
>> @@ -811,12 +811,45 @@ EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
>>  int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
>>  			  unsigned char key, bool nq)
>>  {
>> -	unsigned long keyul;
>> +	unsigned long keyul, address;
>>  	spinlock_t *ptl;
>>  	pgste_t old, new;
>> +	pgd_t *pgd;
>> +	p4d_t *p4d;
>> +	pud_t *pud;
>> +	pmd_t *pmd;
>>  	pte_t *ptep;
>>  
>> -	ptep = get_locked_pte(mm, addr, &ptl);
>> +	pgd = pgd_offset(mm, addr);
>> +	p4d = p4d_alloc(mm, pgd, addr);
>> +	if (!p4d)
>> +		return -EFAULT;
>> +	pud = pud_alloc(mm, p4d, addr);
>> +	if (!pud)
>> +		return -EFAULT;
>> +	pmd = pmd_alloc(mm, pud, addr);
>> +	if (!pmd)
>> +		return -EFAULT;
>> +
>> +	ptl = pmd_lock(mm, pmd);
>> +	if (!pmd_present(*pmd)) {
>> +		spin_unlock(ptl);
> 
> Is there no pmd_unlock()? Maybe there should be one :)
> 
> Using spin_unlock() here looks strange on first sight.

There doesn't seem to be one in include/linux/mm.h where it is from and
I'm not gonna create one.

> 
>> +		return -EFAULT;
>> +	}
>> +	if (pmd_large(*pmd)) {
>> +		address = pmd_val(*pmd) & HPAGE_MASK;
>> +		address |= addr & ~HPAGE_MASK;
>> +		/*
>> +		 * Huge pmds need quiescing operations, they are
>> +		 * always mapped.
>> +		 */
>> +		page_set_storage_key(address, key, 1);
>> +		spin_unlock(ptl);
>> +		return 0;
>> +	}
>> +	spin_unlock(ptl);
>> +
>> +	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
>>  	if (unlikely(!ptep))
>>  		return -EFAULT;
>>  
[...]
>> -	ptep = get_locked_pte(mm, addr, &ptl);
>> +	pgd = pgd_offset(mm, addr);
>> +	p4d = p4d_alloc(mm, pgd, addr);
>> +	if (!p4d)
>> +		return -EFAULT;
>> +	pud = pud_alloc(mm, p4d, addr);
>> +	if (!pud)
>> +		return -EFAULT;
>> +	pmd = pmd_alloc(mm, pud, addr);
>> +	if (!pmd)
>> +		return -EFAULT;
>> +
>> +	ptl = pmd_lock(mm, pmd);
>> +	if (!pmd_present(*pmd)) {
>> +		spin_unlock(ptl);
>> +		return -EFAULT;
>> +	}
>> +	if (pmd_large(*pmd)) {
>> +		address = pmd_val(*pmd) & HPAGE_MASK;
>> +		address |= addr & ~HPAGE_MASK;
>> +		*key = page_get_storage_key(address);
> 
> (not having looked in detail through all the patches)
> 
> We are guaranteed to have valid storage keys at this point, right?
> (patch #6)

Yes, the VM part goes through the enablement and kvm_s390_get_skeys uses
the mm->context for checking.

> 
>> +		spin_unlock(ptl);
>> +		return 0;
>> +	}
>> +	spin_unlock(ptl);
>> +
>> +	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
>>  	if (unlikely(!ptep))
>>  		return -EFAULT;
>>  
>>
> 
> 
> In general, would a helper __get_locked_pmd() make sense? (could be
> s390x specific) We have quite some duplicate code here.
> 

In the gmap I use huge_pte_offset and cast it back to pmd, I'll change
it for the next version.

Patch
diff mbox

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 7bc79aae3c25..158c880226fe 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -811,12 +811,45 @@  EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned char key, bool nq)
 {
-	unsigned long keyul;
+	unsigned long keyul, address;
 	spinlock_t *ptl;
 	pgste_t old, new;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
 	pte_t *ptep;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -EFAULT;
+	pud = pud_alloc(mm, p4d, addr);
+	if (!pud)
+		return -EFAULT;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return -EFAULT;
+
+	ptl = pmd_lock(mm, pmd);
+	if (!pmd_present(*pmd)) {
+		spin_unlock(ptl);
+		return -EFAULT;
+	}
+	if (pmd_large(*pmd)) {
+		address = pmd_val(*pmd) & HPAGE_MASK;
+		address |= addr & ~HPAGE_MASK;
+		/*
+		 * Huge pmds need quiescing operations, they are
+		 * always mapped.
+		 */
+		page_set_storage_key(address, key, 1);
+		spin_unlock(ptl);
+		return 0;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
 
@@ -827,7 +860,7 @@  int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
-		unsigned long address, bits, skey;
+		unsigned long bits, skey;
 
 		address = pte_val(*ptep) & PAGE_MASK;
 		skey = (unsigned long) page_get_storage_key(address);
@@ -890,14 +923,43 @@  EXPORT_SYMBOL(cond_set_guest_storage_key);
 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 {
 	spinlock_t *ptl;
+	unsigned long address;
 	pgste_t old, new;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
 	pte_t *ptep;
 	int cc = 0;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
-	if (unlikely(!ptep))
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -EFAULT;
+	pud = pud_alloc(mm, p4d, addr);
+	if (!pud)
+		return -EFAULT;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
 		return -EFAULT;
 
+	ptl = pmd_lock(mm, pmd);
+	if (!pmd_present(*pmd)) {
+		spin_unlock(ptl);
+		return -EFAULT;
+	}
+	if (pmd_large(*pmd)) {
+		address = pmd_val(*pmd) & HPAGE_MASK;
+		address |= addr & ~HPAGE_MASK;
+		cc = page_reset_referenced(addr);
+		spin_unlock(ptl);
+		return cc;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
 	new = old = pgste_get_lock(ptep);
 	/* Reset guest reference bit only */
 	pgste_val(new) &= ~PGSTE_GR_BIT;
@@ -922,11 +984,41 @@  EXPORT_SYMBOL(reset_guest_reference_bit);
 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned char *key)
 {
+	unsigned long address;
 	spinlock_t *ptl;
 	pgste_t pgste;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
 	pte_t *ptep;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -EFAULT;
+	pud = pud_alloc(mm, p4d, addr);
+	if (!pud)
+		return -EFAULT;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return -EFAULT;
+
+	ptl = pmd_lock(mm, pmd);
+	if (!pmd_present(*pmd)) {
+		spin_unlock(ptl);
+		return -EFAULT;
+	}
+	if (pmd_large(*pmd)) {
+		address = pmd_val(*pmd) & HPAGE_MASK;
+		address |= addr & ~HPAGE_MASK;
+		*key = page_get_storage_key(address);
+		spin_unlock(ptl);
+		return 0;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;