diff mbox series

[v3] arm64: paravirt: Use RCU read locks to guard stolen_time

Message ID 20220513174654.362169-1-quic_eberman@quicinc.com (mailing list archive)
State New, archived
Headers show
Series [v3] arm64: paravirt: Use RCU read locks to guard stolen_time | expand

Commit Message

Elliot Berman May 13, 2022, 5:46 p.m. UTC
From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>

During hotplug, the stolen time data structure is unmapped and memset.
There is a possibility of the timer IRQ being triggered before memset
and stolen time is getting updated as part of this timer IRQ handler. This
causes the below crash in timer handler -

  [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
  ...
  [ 3458.154398][    C5] Call trace:
  [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
  [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
  [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
  [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
  [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
  [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
  [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
  [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
  [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
  [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
  [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
  [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
  [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
  [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
  [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
  [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
  [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
  [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
  [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
  [ 3458.251656][    C5]  __vunmap+0x154/0x278
  [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
  [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
  [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
  [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
  [ 3458.276638][    C5]  kthread+0x17c/0x1e0
  [ 3458.280691][    C5]  ret_from_fork+0x10/0x20

As a fix, introduce rcu lock to update stolen time structure.

Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
Changes since v2: https://lore.kernel.org/all/20220428183536.2866667-1-quic_eberman@quicinc.com/
 - Clean up sparse warnings and apply suggestion from Will
Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
 - Use RCU instead of disabling interrupts
 arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

Comments

Srivatsa S. Bhat May 13, 2022, 11:32 p.m. UTC | #1
On 5/13/22 10:46 AM, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Suggested-by: Will Deacon <will@kernel.org>
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---

Looks good to me, but one quick question though (see below).

Reviewed-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>

>  
>  static int stolen_time_cpu_down_prepare(unsigned int cpu)
>  {
> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>  	struct pv_time_stolen_time_region *reg;
>  
>  	reg = this_cpu_ptr(&stolen_time_region);
>  	if (!reg->kaddr)
>  		return 0;
>  
> -	memunmap(reg->kaddr);
> -	memset(reg, 0, sizeof(*reg));
> +	kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
> +	synchronize_rcu();
> +	memunmap(kaddr);
>  

The original code used to memset the stolen time region, but this
patch seems to drop it. Was that change intentional?

Regards,
Srivatsa
Will Deacon May 17, 2022, 8:54 a.m. UTC | #2
On Fri, May 13, 2022 at 04:32:53PM -0700, Srivatsa S. Bhat wrote:
> On 5/13/22 10:46 AM, Elliot Berman wrote:
> > From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> > 
> > During hotplug, the stolen time data structure is unmapped and memset.
> > There is a possibility of the timer IRQ being triggered before memset
> > and stolen time is getting updated as part of this timer IRQ handler. This
> > causes the below crash in timer handler -
> > 
> >   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
> >   ...
> >   [ 3458.154398][    C5] Call trace:
> >   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
> >   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
> >   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
> >   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
> >   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
> >   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
> >   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
> >   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
> >   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
> >   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
> >   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
> >   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
> >   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
> >   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
> >   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
> >   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
> >   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
> >   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
> >   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
> >   [ 3458.251656][    C5]  __vunmap+0x154/0x278
> >   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
> >   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
> >   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
> >   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
> >   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
> >   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> > 
> > As a fix, introduce rcu lock to update stolen time structure.
> > 
> > Suggested-by: Will Deacon <will@kernel.org>
> > Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> > Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> > ---
> 
> Looks good to me, but one quick question though (see below).
> 
> Reviewed-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>

Cheers.

> >  static int stolen_time_cpu_down_prepare(unsigned int cpu)
> >  {
> > +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
> >  	struct pv_time_stolen_time_region *reg;
> >  
> >  	reg = this_cpu_ptr(&stolen_time_region);
> >  	if (!reg->kaddr)
> >  		return 0;
> >  
> > -	memunmap(reg->kaddr);
> > -	memset(reg, 0, sizeof(*reg));
> > +	kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
> > +	synchronize_rcu();
> > +	memunmap(kaddr);
> >  
> 
> The original code used to memset the stolen time region, but this
> patch seems to drop it. Was that change intentional?

'struct pv_time_stolen_time_region' only has one field ('kaddr'), which
we're now clearing with rcu_replace_pointer() so the memset doesn't make
sense.

Will
Will Deacon May 17, 2022, 2:04 p.m. UTC | #3
On Fri, 13 May 2022 10:46:54 -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
> [...]

Applied to arm64 (for-next/fixes), thanks!

[1/1] arm64: paravirt: Use RCU read locks to guard stolen_time
      https://git.kernel.org/arm64/c/19bef63f951e

Cheers,
Srivatsa S. Bhat May 17, 2022, 4:48 p.m. UTC | #4
On 5/17/22 1:54 AM, Will Deacon wrote:
> On Fri, May 13, 2022 at 04:32:53PM -0700, Srivatsa S. Bhat wrote:
>> On 5/13/22 10:46 AM, Elliot Berman wrote:
>>> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>>>

[...]

>>>  static int stolen_time_cpu_down_prepare(unsigned int cpu)
>>>  {
>>> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>>>  	struct pv_time_stolen_time_region *reg;
>>>  
>>>  	reg = this_cpu_ptr(&stolen_time_region);
>>>  	if (!reg->kaddr)
>>>  		return 0;
>>>  
>>> -	memunmap(reg->kaddr);
>>> -	memset(reg, 0, sizeof(*reg));
>>> +	kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
>>> +	synchronize_rcu();
>>> +	memunmap(kaddr);
>>>  
>>
>> The original code used to memset the stolen time region, but this
>> patch seems to drop it. Was that change intentional?
> 
> 'struct pv_time_stolen_time_region' only has one field ('kaddr'), which
> we're now clearing with rcu_replace_pointer() so the memset doesn't make
> sense.
> 

Ah right, never mind :)

Thank you!

Regards,
Srivatsa
diff mbox series

Patch

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@  static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-	struct pvclock_vcpu_stolen_time *kaddr;
+	struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@  early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
+	u64 ret = 0;
 
 	reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,37 @@  static u64 para_steal_clock(int cpu)
 	 * online notification callback runs. Until the callback
 	 * has run we just return zero.
 	 */
-	if (!reg->kaddr)
+	rcu_read_lock();
+	kaddr = rcu_dereference(reg->kaddr);
+	if (!kaddr) {
+		rcu_read_unlock();
 		return 0;
+	}
 
-	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+	rcu_read_unlock();
+	return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 
 	reg = this_cpu_ptr(&stolen_time_region);
 	if (!reg->kaddr)
 		return 0;
 
-	memunmap(reg->kaddr);
-	memset(reg, 0, sizeof(*reg));
+	kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+	synchronize_rcu();
+	memunmap(kaddr);
 
 	return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 	struct arm_smccc_res res;
 
@@ -93,17 +104,19 @@  static int stolen_time_cpu_online(unsigned int cpu)
 	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
 		return -EINVAL;
 
-	reg->kaddr = memremap(res.a0,
+	kaddr = memremap(res.a0,
 			      sizeof(struct pvclock_vcpu_stolen_time),
 			      MEMREMAP_WB);
 
+	rcu_assign_pointer(reg->kaddr, kaddr);
+
 	if (!reg->kaddr) {
 		pr_warn("Failed to map stolen time data structure\n");
 		return -ENOMEM;
 	}
 
-	if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-	    le32_to_cpu(reg->kaddr->attributes) != 0) {
+	if (le32_to_cpu(kaddr->revision) != 0 ||
+	    le32_to_cpu(kaddr->attributes) != 0) {
 		pr_warn_once("Unexpected revision or attributes in stolen time data\n");
 		return -ENXIO;
 	}