diff mbox series

[v2,1/5] padata: make flushing work with async users

Message ID 20190828221425.22701-2-daniel.m.jordan@oracle.com (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series padata flushing and CPU hotplug fixes | expand

Commit Message

Daniel Jordan Aug. 28, 2019, 10:14 p.m. UTC
padata_flush_queues() is broken for an async ->parallel() function
because flush_work() can't wait on it:

  # modprobe tcrypt alg="pcrypt(cryptd(rfc4106(gcm_base(ctr(aes-generic),ghash-generic))))" type=3
  # modprobe tcrypt mode=215 sec=1 &
  # sleep 5; echo 7 > /sys/kernel/pcrypt/pencrypt/parallel_cpumask

  kernel BUG at kernel/padata.c:473!
  invalid opcode: 0000 [#1] SMP PTI
  CPU: 0 PID: 282 Comm: sh Not tainted 5.3.0-rc5-padata-base+ #3
  RIP: 0010:padata_flush_queues+0xa7/0xb0
  Call Trace:
   padata_replace+0xa1/0x110
   padata_set_cpumask+0xae/0x130
   store_cpumask+0x75/0xa0
   padata_sysfs_store+0x20/0x30
   ...

Wait instead for the parallel_data (pd) object's refcount to drop to
zero.

Simplify by taking an initial reference on a pd when allocating an
instance.  That ref is dropped during flushing, which allows calling
wait_for_completion() unconditionally.

If the initial ref weren't used, the only other alternative I could
think of is that complete() would be conditional on !PADATA_INIT or
PADATA_REPLACE (in addition to zero pd->refcnt), and
wait_for_completion() on nonzero pd->refcnt.  But then complete() could
be called without a matching wait:

completer                     waiter
---------                     ------
DEC pd->refcnt    // 0
                              pinst->flags |= PADATA_REPLACE
LOAD pinst->flags // REPLACE
                              LOAD pd->refcnt // 0
                              /* return without wait_for_completion() */
complete()

No more flushing per-CPU work items, so no more CPU hotplug lock in
__padata_stop.

Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Reported-by: Herbert Xu <herbert@gondor.apana.org.au>
Suggested-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/padata.h |  3 +++
 kernel/padata.c        | 32 ++++++++++++--------------------
 2 files changed, 15 insertions(+), 20 deletions(-)

Comments

Herbert Xu Sept. 5, 2019, 4:17 a.m. UTC | #1
On Wed, Aug 28, 2019 at 06:14:21PM -0400, Daniel Jordan wrote:
>
> @@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
>  /* Flush all objects out of the padata queues. */
>  static void padata_flush_queues(struct parallel_data *pd)
>  {
> -	int cpu;
> -	struct padata_parallel_queue *pqueue;
> -	struct padata_serial_queue *squeue;
> -
> -	for_each_cpu(cpu, pd->cpumask.pcpu) {
> -		pqueue = per_cpu_ptr(pd->pqueue, cpu);
> -		flush_work(&pqueue->work);
> -	}
> -
> -	if (atomic_read(&pd->reorder_objects))
> -		padata_reorder(pd);
> +	if (!(pd->pinst->flags & PADATA_INIT))
> +		return;
>  
> -	for_each_cpu(cpu, pd->cpumask.cbcpu) {
> -		squeue = per_cpu_ptr(pd->squeue, cpu);
> -		flush_work(&squeue->work);
> -	}
> +	if (atomic_dec_return(&pd->refcnt) == 0)
> +		complete(&pd->flushing_done);
>  
> -	BUG_ON(atomic_read(&pd->refcnt) != 0);
> +	wait_for_completion(&pd->flushing_done);
> +	reinit_completion(&pd->flushing_done);
> +	atomic_set(&pd->refcnt, 1);
>  }

I don't think waiting is an option.  In a pathological case the
hardware may not return at all.  We cannot and should not hold off
CPU hotplug for an arbitrary amount of time when the event we are
waiting for isn't even occuring on that CPU.

I don't think flushing is needed at all.  All we need to do is
maintain consistency before and after the CPU hotplug event.

Cheers,
Daniel Jordan Sept. 5, 2019, 10:37 p.m. UTC | #2
On Thu, Sep 05, 2019 at 02:17:35PM +1000, Herbert Xu wrote:
> On Wed, Aug 28, 2019 at 06:14:21PM -0400, Daniel Jordan wrote:
> >
> > @@ -453,24 +456,15 @@ static void padata_free_pd(struct parallel_data *pd)
> >  /* Flush all objects out of the padata queues. */
> >  static void padata_flush_queues(struct parallel_data *pd)
> >  {
> > -	int cpu;
> > -	struct padata_parallel_queue *pqueue;
> > -	struct padata_serial_queue *squeue;
> > -
> > -	for_each_cpu(cpu, pd->cpumask.pcpu) {
> > -		pqueue = per_cpu_ptr(pd->pqueue, cpu);
> > -		flush_work(&pqueue->work);
> > -	}
> > -
> > -	if (atomic_read(&pd->reorder_objects))
> > -		padata_reorder(pd);
> > +	if (!(pd->pinst->flags & PADATA_INIT))
> > +		return;
> >  
> > -	for_each_cpu(cpu, pd->cpumask.cbcpu) {
> > -		squeue = per_cpu_ptr(pd->squeue, cpu);
> > -		flush_work(&squeue->work);
> > -	}
> > +	if (atomic_dec_return(&pd->refcnt) == 0)
> > +		complete(&pd->flushing_done);
> >  
> > -	BUG_ON(atomic_read(&pd->refcnt) != 0);
> > +	wait_for_completion(&pd->flushing_done);
> > +	reinit_completion(&pd->flushing_done);
> > +	atomic_set(&pd->refcnt, 1);
> >  }
> 
> I don't think waiting is an option.  In a pathological case the
> hardware may not return at all.  We cannot and should not hold off
> CPU hotplug for an arbitrary amount of time when the event we are
> waiting for isn't even occuring on that CPU.

Ok, I hadn't considered hardware not returning.

> I don't think flushing is needed at all.  All we need to do is
> maintain consistency before and after the CPU hotplug event.

I could imagine not flushing would work for replacing a pd.  The old pd could
be freed by whatever drops the last reference and the new pd could be
installed, all without flushing.

In the case of freeing an instance, though, padata needs to wait for all the
jobs to complete so they don't use the instance's data after it's been freed.
Holding the CPU hotplug lock isn't necessary for this, though, so I think we're
ok to wait here.
Daniel Jordan Sept. 18, 2019, 8:37 p.m. UTC | #3
On Thu, Sep 05, 2019 at 06:37:56PM -0400, Daniel Jordan wrote:
> On Thu, Sep 05, 2019 at 02:17:35PM +1000, Herbert Xu wrote:
> > I don't think waiting is an option.  In a pathological case the
> > hardware may not return at all.  We cannot and should not hold off
> > CPU hotplug for an arbitrary amount of time when the event we are
> > waiting for isn't even occuring on that CPU.
> 
> Ok, I hadn't considered hardware not returning.
> 
> > I don't think flushing is needed at all.  All we need to do is
> > maintain consistency before and after the CPU hotplug event.
> 
> I could imagine not flushing would work for replacing a pd.  The old pd could
> be freed by whatever drops the last reference and the new pd could be
> installed, all without flushing.
> 
> In the case of freeing an instance, though, padata needs to wait for all the
> jobs to complete so they don't use the instance's data after it's been freed.
> Holding the CPU hotplug lock isn't necessary for this, though, so I think we're
> ok to wait here.

[FYI, I'm currently on leave until mid-October and will return to this series
then.]
diff mbox series

Patch

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8da673861d99..1c73f9cc7b72 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -14,6 +14,7 @@ 
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
+#include <linux/completion.h>
 
 #define PADATA_CPU_SERIAL   0x01
 #define PADATA_CPU_PARALLEL 0x02
@@ -104,6 +105,7 @@  struct padata_cpumask {
  * @squeue: percpu padata queues used for serialuzation.
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @flushing_done: Wait for all objects to be sent out.
  * @max_seq_nr:  Maximal used sequence number.
  * @cpu: Next CPU to be processed.
  * @cpumask: The cpumasks in use for parallel and serial workers.
@@ -116,6 +118,7 @@  struct parallel_data {
 	struct padata_serial_queue	__percpu *squeue;
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
+	struct completion		flushing_done;
 	atomic_t			seq_nr;
 	int				cpu;
 	struct padata_cpumask		cpumask;
diff --git a/kernel/padata.c b/kernel/padata.c
index b60cc3dcee58..958166e23123 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -301,7 +301,8 @@  static void padata_serial_worker(struct work_struct *serial_work)
 		list_del_init(&padata->list);
 
 		padata->serial(padata);
-		atomic_dec(&pd->refcnt);
+		if (atomic_dec_return(&pd->refcnt) == 0)
+			complete(&pd->flushing_done);
 	}
 	local_bh_enable();
 }
@@ -423,7 +424,9 @@  static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	padata_init_squeues(pd);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
-	atomic_set(&pd->refcnt, 0);
+	/* Initial ref dropped in padata_flush_queues. */
+	atomic_set(&pd->refcnt, 1);
+	init_completion(&pd->flushing_done);
 	pd->pinst = pinst;
 	spin_lock_init(&pd->lock);
 	pd->cpu = cpumask_first(pd->cpumask.pcpu);
@@ -453,24 +456,15 @@  static void padata_free_pd(struct parallel_data *pd)
 /* Flush all objects out of the padata queues. */
 static void padata_flush_queues(struct parallel_data *pd)
 {
-	int cpu;
-	struct padata_parallel_queue *pqueue;
-	struct padata_serial_queue *squeue;
-
-	for_each_cpu(cpu, pd->cpumask.pcpu) {
-		pqueue = per_cpu_ptr(pd->pqueue, cpu);
-		flush_work(&pqueue->work);
-	}
-
-	if (atomic_read(&pd->reorder_objects))
-		padata_reorder(pd);
+	if (!(pd->pinst->flags & PADATA_INIT))
+		return;
 
-	for_each_cpu(cpu, pd->cpumask.cbcpu) {
-		squeue = per_cpu_ptr(pd->squeue, cpu);
-		flush_work(&squeue->work);
-	}
+	if (atomic_dec_return(&pd->refcnt) == 0)
+		complete(&pd->flushing_done);
 
-	BUG_ON(atomic_read(&pd->refcnt) != 0);
+	wait_for_completion(&pd->flushing_done);
+	reinit_completion(&pd->flushing_done);
+	atomic_set(&pd->refcnt, 1);
 }
 
 static void __padata_start(struct padata_instance *pinst)
@@ -487,9 +481,7 @@  static void __padata_stop(struct padata_instance *pinst)
 
 	synchronize_rcu();
 
-	get_online_cpus();
 	padata_flush_queues(pinst->pd);
-	put_online_cpus();
 }
 
 /* Replace the internal control structure with a new one. */