diff mbox

[v7,11/12] s390/mm: Add huge page gmap linking support

Message ID 20180717124426.6240-12-frankja@linux.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Janosch Frank July 17, 2018, 12:44 p.m. UTC
Let's allow huge pmd linking when enabled through the
KVM_CAP_S390_HPAGE_1M capability. Also we can now restrict gmap
invalidation and notification to the cases where the capability has
been activated and save some cycles when that's not the case.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/mmu.h         |  2 ++
 arch/s390/include/asm/mmu_context.h |  1 +
 arch/s390/mm/gmap.c                 |  9 ++++++---
 arch/s390/mm/pgtable.c              | 12 ++++++------
 4 files changed, 15 insertions(+), 9 deletions(-)

Comments

David Hildenbrand July 17, 2018, 7:25 p.m. UTC | #1
On 17.07.2018 14:44, Janosch Frank wrote:
> Let's allow huge pmd linking when enabled through the
> KVM_CAP_S390_HPAGE_1M capability. Also we can now restrict gmap
> invalidation and notification to the cases where the capability has
> been activated and save some cycles when that's not the case.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  arch/s390/include/asm/mmu.h         |  2 ++
>  arch/s390/include/asm/mmu_context.h |  1 +
>  arch/s390/mm/gmap.c                 |  9 ++++++---
>  arch/s390/mm/pgtable.c              | 12 ++++++------
>  4 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
> index f5ff9dbad8ac..fad2ec2c1fd1 100644
> --- a/arch/s390/include/asm/mmu.h
> +++ b/arch/s390/include/asm/mmu.h
> @@ -24,6 +24,8 @@ typedef struct {
>  	unsigned int uses_skeys:1;
>  	/* The mmu context uses CMM. */
>  	unsigned int uses_cmm:1;
> +	/* The gmap associated with this context uses huge pages. */

... are allowed to use ...

> +	unsigned int allow_gmap_hpage_1m:1;
>  } mm_context_t;
>  
>  #define INIT_MM_CONTEXT(name)						   \
> diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
> index d16bc79c30bb..0717ee76885d 100644
> --- a/arch/s390/include/asm/mmu_context.h
> +++ b/arch/s390/include/asm/mmu_context.h
> @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk,
>  	mm->context.has_pgste = 0;
>  	mm->context.uses_skeys = 0;
>  	mm->context.uses_cmm = 0;
> +	mm->context.allow_gmap_hpage_1m = 0;
>  #endif
>  	switch (mm->context.asce_limit) {
>  	case _REGION2_SIZE:
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index a5a60afae8df..16878dacaeab 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -2,8 +2,10 @@
>  /*
>   *  KVM guest address space mapping code
>   *
> - *    Copyright IBM Corp. 2007, 2016
> + *    Copyright IBM Corp. 2007, 2016, 2018
>   *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
> + *		 David Hildenbrand <david@redhat.com>
> + *		 Janosch Frank <frankja@linux.vnet.ibm.com>
>   */
>  
>  #include <linux/kernel.h>
> @@ -589,8 +591,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
>  		return -EFAULT;
>  	pmd = pmd_offset(pud, vmaddr);
>  	VM_BUG_ON(pmd_none(*pmd));
> -	/* large pmds cannot yet be handled */
> -	if (pmd_large(*pmd))
> +	/* Are we allowed to use huge pages? */
> +	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
>  		return -EFAULT;
>  	/* Link gmap segment table entry location to page table. */
>  	rc = radix_tree_preload(GFP_KERNEL);
> @@ -1634,6 +1636,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
>  	unsigned long limit;
>  	int rc;
>  
> +	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
>  	BUG_ON(gmap_is_shadow(parent));
>  	spin_lock(&parent->shadow_lock);
>  	sg = gmap_find_shadow(parent, asce, edat_level);
> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> index 147380c0b5d3..8595c37e0370 100644
> --- a/arch/s390/mm/pgtable.c
> +++ b/arch/s390/mm/pgtable.c
> @@ -348,7 +348,7 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
>  			    mm->context.asce, IDTE_LOCAL);
>  	else
>  		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
> -	if (mm_has_pgste(mm))
> +	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  		gmap_pmdp_idte_local(mm, addr);
>  }
>  
> @@ -358,15 +358,15 @@ static inline void pmdp_idte_global(struct mm_struct *mm,
>  	if (MACHINE_HAS_TLB_GUEST) {
>  		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
>  			    mm->context.asce, IDTE_GLOBAL);
> -		if (mm_has_pgste(mm))
> +		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  			gmap_pmdp_idte_global(mm, addr);
>  	} else if (MACHINE_HAS_IDTE) {
>  		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
> -		if (mm_has_pgste(mm))
> +		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  			gmap_pmdp_idte_global(mm, addr);
>  	} else {
>  		__pmdp_csp(pmdp);
> -		if (mm_has_pgste(mm))
> +		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  			gmap_pmdp_csp(mm, addr);
>  	}
>  }
> @@ -435,7 +435,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
>  	pmd_t old;
>  
>  	preempt_disable();
> -	if (mm_has_pgste(mm))
> +	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  		pmdp_clear_skeys(mm, pmdp, new);
>  	old = pmdp_flush_direct(mm, addr, pmdp);
>  	*pmdp = new;
> @@ -450,7 +450,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
>  	pmd_t old;
>  
>  	preempt_disable();
> -	if (mm_has_pgste(mm))
> +	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
>  		pmdp_clear_skeys(mm, pmdp, new);
>  	old = pmdp_flush_lazy(mm, addr, pmdp);
>  	*pmdp = new;
> 

Not sure if all of these extra checks really make sense (performance
wise), but I guess it would be helpful if some part of e.g. QEMU (shared
libraries?) are based on huge pages, while the GMAP does not contain
huge pages.
David Hildenbrand July 17, 2018, 7:27 p.m. UTC | #2
On 17.07.2018 14:44, Janosch Frank wrote:
> Let's allow huge pmd linking when enabled through the
> KVM_CAP_S390_HPAGE_1M capability. Also we can now restrict gmap
> invalidation and notification to the cases where the capability has
> been activated and save some cycles when that's not the case.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>

... missed to add

Reviewed-by: David Hildenbrand <david@redhat.com>
diff mbox

Patch

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f5ff9dbad8ac..fad2ec2c1fd1 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -24,6 +24,8 @@  typedef struct {
 	unsigned int uses_skeys:1;
 	/* The mmu context uses CMM. */
 	unsigned int uses_cmm:1;
+	/* The gmap associated with this context uses huge pages. */
+	unsigned int allow_gmap_hpage_1m:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d16bc79c30bb..0717ee76885d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -32,6 +32,7 @@  static inline int init_new_context(struct task_struct *tsk,
 	mm->context.has_pgste = 0;
 	mm->context.uses_skeys = 0;
 	mm->context.uses_cmm = 0;
+	mm->context.allow_gmap_hpage_1m = 0;
 #endif
 	switch (mm->context.asce_limit) {
 	case _REGION2_SIZE:
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a5a60afae8df..16878dacaeab 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2,8 +2,10 @@ 
 /*
  *  KVM guest address space mapping code
  *
- *    Copyright IBM Corp. 2007, 2016
+ *    Copyright IBM Corp. 2007, 2016, 2018
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		 David Hildenbrand <david@redhat.com>
+ *		 Janosch Frank <frankja@linux.vnet.ibm.com>
  */
 
 #include <linux/kernel.h>
@@ -589,8 +591,8 @@  int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		return -EFAULT;
 	pmd = pmd_offset(pud, vmaddr);
 	VM_BUG_ON(pmd_none(*pmd));
-	/* large pmds cannot yet be handled */
-	if (pmd_large(*pmd))
+	/* Are we allowed to use huge pages? */
+	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
 		return -EFAULT;
 	/* Link gmap segment table entry location to page table. */
 	rc = radix_tree_preload(GFP_KERNEL);
@@ -1634,6 +1636,7 @@  struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
 	unsigned long limit;
 	int rc;
 
+	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
 	BUG_ON(gmap_is_shadow(parent));
 	spin_lock(&parent->shadow_lock);
 	sg = gmap_find_shadow(parent, asce, edat_level);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 147380c0b5d3..8595c37e0370 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -348,7 +348,7 @@  static inline void pmdp_idte_local(struct mm_struct *mm,
 			    mm->context.asce, IDTE_LOCAL);
 	else
 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 		gmap_pmdp_idte_local(mm, addr);
 }
 
@@ -358,15 +358,15 @@  static inline void pmdp_idte_global(struct mm_struct *mm,
 	if (MACHINE_HAS_TLB_GUEST) {
 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_GLOBAL);
-		if (mm_has_pgste(mm))
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
 	} else if (MACHINE_HAS_IDTE) {
 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
-		if (mm_has_pgste(mm))
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
 	} else {
 		__pmdp_csp(pmdp);
-		if (mm_has_pgste(mm))
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_csp(mm, addr);
 	}
 }
@@ -435,7 +435,7 @@  pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 	pmd_t old;
 
 	preempt_disable();
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 		pmdp_clear_skeys(mm, pmdp, new);
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	*pmdp = new;
@@ -450,7 +450,7 @@  pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 	pmd_t old;
 
 	preempt_disable();
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 		pmdp_clear_skeys(mm, pmdp, new);
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	*pmdp = new;