diff mbox series

[1/2] s390/gmap: voluntarily schedule during key setting

Message ID 20220530092706.11637-2-borntraeger@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390/gmap/pgtable improve handling of keyed KVM guests | expand

Commit Message

Christian Borntraeger May 30, 2022, 9:27 a.m. UTC
With large and many guest with storage keys it is possible to create
large latencies or stalls during initial key setting:

rcu: INFO: rcu_sched self-detected stall on CPU
rcu:   18-....: (2099 ticks this GP) idle=54e/1/0x4000000000000002 softirq=35598716/35598716 fqs=998
       (t=2100 jiffies g=155867385 q=20879)
Task dump for CPU 18:
CPU 1/KVM       R  running task        0 1030947 256019 0x06000004
Call Trace:
sched_show_task
rcu_dump_cpu_stacks
rcu_sched_clock_irq
update_process_times
tick_sched_handle
tick_sched_timer
__hrtimer_run_queues
hrtimer_interrupt
do_IRQ
ext_int_handler
ptep_zap_key

The mmap lock is held during the page walking but since this is a
semaphore scheduling is still possible. Same for the kvm srcu.
To minimize overhead do this on every segment table entry or large page.

Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 arch/s390/mm/gmap.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Claudio Imbrenda May 30, 2022, 9:56 a.m. UTC | #1
On Mon, 30 May 2022 11:27:05 +0200
Christian Borntraeger <borntraeger@linux.ibm.com> wrote:

> With large and many guest with storage keys it is possible to create
> large latencies or stalls during initial key setting:
> 
> rcu: INFO: rcu_sched self-detected stall on CPU
> rcu:   18-....: (2099 ticks this GP) idle=54e/1/0x4000000000000002 softirq=35598716/35598716 fqs=998
>        (t=2100 jiffies g=155867385 q=20879)
> Task dump for CPU 18:
> CPU 1/KVM       R  running task        0 1030947 256019 0x06000004
> Call Trace:
> sched_show_task
> rcu_dump_cpu_stacks
> rcu_sched_clock_irq
> update_process_times
> tick_sched_handle
> tick_sched_timer
> __hrtimer_run_queues
> hrtimer_interrupt
> do_IRQ
> ext_int_handler
> ptep_zap_key
> 
> The mmap lock is held during the page walking but since this is a
> semaphore scheduling is still possible. Same for the kvm srcu.
> To minimize overhead do this on every segment table entry or large page.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> ---
>  arch/s390/mm/gmap.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 1ac73917a8d3..b8ae4a4aa2ba 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -2608,6 +2608,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
>  	return 0;
>  }
>  
> +/*
> + * Give a chance to schedule after setting a key to 256 pages.
> + * We only hold the mm lock, which is a rwsem and the kvm srcu.
> + * Both can sleep.
> + */
> +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
> +				  unsigned long next, struct mm_walk *walk)
> +{
> +	cond_resched();
> +	return 0;
> +}
> +
>  static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
>  				      unsigned long hmask, unsigned long next,
>  				      struct mm_walk *walk)
> @@ -2630,12 +2642,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
>  	end = start + HPAGE_SIZE - 1;
>  	__storage_key_init_range(start, end);
>  	set_bit(PG_arch_1, &page->flags);
> +	cond_resched();
>  	return 0;
>  }
>  
>  static const struct mm_walk_ops enable_skey_walk_ops = {
>  	.hugetlb_entry		= __s390_enable_skey_hugetlb,
>  	.pte_entry		= __s390_enable_skey_pte,
> +	.pmd_entry		= __s390_enable_skey_pmd,
>  };
>  
>  int s390_enable_skey(void)
Alexander Gordeev May 31, 2022, 7:23 a.m. UTC | #2
On Mon, May 30, 2022 at 11:27:05AM +0200, Christian Borntraeger wrote:
> With large and many guest with storage keys it is possible to create
> large latencies or stalls during initial key setting:
> 
> rcu: INFO: rcu_sched self-detected stall on CPU
> rcu:   18-....: (2099 ticks this GP) idle=54e/1/0x4000000000000002 softirq=35598716/35598716 fqs=998
>        (t=2100 jiffies g=155867385 q=20879)
> Task dump for CPU 18:
> CPU 1/KVM       R  running task        0 1030947 256019 0x06000004
> Call Trace:
> sched_show_task
> rcu_dump_cpu_stacks
> rcu_sched_clock_irq
> update_process_times
> tick_sched_handle
> tick_sched_timer
> __hrtimer_run_queues
> hrtimer_interrupt
> do_IRQ
> ext_int_handler
> ptep_zap_key
> 
> The mmap lock is held during the page walking but since this is a
> semaphore scheduling is still possible. Same for the kvm srcu.
> To minimize overhead do this on every segment table entry or large page.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>

> ---
>  arch/s390/mm/gmap.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 1ac73917a8d3..b8ae4a4aa2ba 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -2608,6 +2608,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
>  	return 0;
>  }
>  
> +/*
> + * Give a chance to schedule after setting a key to 256 pages.
> + * We only hold the mm lock, which is a rwsem and the kvm srcu.
> + * Both can sleep.
> + */
> +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
> +				  unsigned long next, struct mm_walk *walk)
> +{
> +	cond_resched();
> +	return 0;
> +}
> +
>  static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
>  				      unsigned long hmask, unsigned long next,
>  				      struct mm_walk *walk)
> @@ -2630,12 +2642,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
>  	end = start + HPAGE_SIZE - 1;
>  	__storage_key_init_range(start, end);
>  	set_bit(PG_arch_1, &page->flags);
> +	cond_resched();
>  	return 0;
>  }
>  
>  static const struct mm_walk_ops enable_skey_walk_ops = {
>  	.hugetlb_entry		= __s390_enable_skey_hugetlb,
>  	.pte_entry		= __s390_enable_skey_pte,
> +	.pmd_entry		= __s390_enable_skey_pmd,
>  };
>  
>  int s390_enable_skey(void)
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 1ac73917a8d3..b8ae4a4aa2ba 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2608,6 +2608,18 @@  static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
+/*
+ * Give a chance to schedule after setting a key to 256 pages.
+ * We only hold the mm lock, which is a rwsem and the kvm srcu.
+ * Both can sleep.
+ */
+static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
+				  unsigned long next, struct mm_walk *walk)
+{
+	cond_resched();
+	return 0;
+}
+
 static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
 				      unsigned long hmask, unsigned long next,
 				      struct mm_walk *walk)
@@ -2630,12 +2642,14 @@  static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
 	end = start + HPAGE_SIZE - 1;
 	__storage_key_init_range(start, end);
 	set_bit(PG_arch_1, &page->flags);
+	cond_resched();
 	return 0;
 }
 
 static const struct mm_walk_ops enable_skey_walk_ops = {
 	.hugetlb_entry		= __s390_enable_skey_hugetlb,
 	.pte_entry		= __s390_enable_skey_pte,
+	.pmd_entry		= __s390_enable_skey_pmd,
 };
 
 int s390_enable_skey(void)