diff mbox

[1/2] dmaengine: shdma: fix locking

Message ID Pine.LNX.4.64.1104291906140.17813@axis700.grange (mailing list archive)
State Accepted
Headers show

Commit Message

Guennadi Liakhovetski April 29, 2011, 5:09 p.m. UTC
Close multiple theoretical races, especially the one in
.device_free_chan_resources().

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
---
 drivers/dma/shdma.c |  104 +++++++++++++++++++++++++++++++++-----------------
 1 files changed, 68 insertions(+), 36 deletions(-)

Comments

Paul Mundt May 31, 2011, 6:50 a.m. UTC | #1
On Fri, Apr 29, 2011 at 07:09:21PM +0200, Guennadi Liakhovetski wrote:
>  static int sh_dmae_rst(struct sh_dmae_device *shdev)
>  {
..
> +	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);

On Fri, Apr 29, 2011 at 07:09:25PM +0200, Guennadi Liakhovetski wrote:
> +static int sh_dmae_runtime_resume(struct device *dev)
> +{
> +	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
> +
> +	return sh_dmae_rst(shdev);
..

Yet in sh_dmae_probe() we have:

        shdev->pdata = pdata;

        pm_runtime_enable(&pdev->dev);
        pm_runtime_get_sync(&pdev->dev);

..

        /* reset dma controller - only needed as a test */
        err = sh_dmae_rst(shdev);
        if (err)
                goto rst_err;

..

        pm_runtime_put(&pdev->dev);

        platform_set_drvdata(pdev, shdev);
        dma_async_device_register(&shdev->common);

        return err;
..

So I'm wondering how this was ever actually tested. The original sh_dmae_rst()
call is safe due to passing along the shdev pointer with pdata initialized
explicitly, while the runtime PM bits fetch the pointer via dev_get_drvdata()
at a time where drvdata hasn't even been initialized yet, resulting in a rather
predictable oops:

	Unable to handle kernel NULL pointer dereference at virtual address 000000c4
	pc = 8025adee
	*pde = 00000000
	Oops: 0000 [#1]
	Modules linked in:

	Pid : 1, Comm:           swapper
	CPU : 0                  Not tainted  (3.0.0-rc1-00012-g9436b4a-dirty #1456)

	PC is at sh_dmae_rst+0x28/0x86
	PR is at sh_dmae_rst+0x22/0x86
	PC  : 8025adee SP  : 9e803d10 SR  : 400080f1 TEA : 000000c4
	R0  : 000000c4 R1  : 0000fff8 R2  : 00000000 R3  : 00000040
	R4  : 000000f0 R5  : 00000000 R6  : 00000000 R7  : 804f184c
	R8  : 00000000 R9  : 804dd0e8 R10 : 80283204 R11 : ffffffda
	R12 : 000000a0 R13 : 804dd18c R14 : 9e803d10
	MACH: 00000000 MACL: 00008f20 GBR : 00000000 PR  : 8025ade8

	Call trace:
	[<8025ae70>] sh_dmae_runtime_resume+0x24/0x34
	[<80283238>] pm_generic_runtime_resume+0x34/0x3c
	[<80283370>] rpm_callback+0x4a/0x7e
	[<80283efc>] rpm_resume+0x240/0x384
	[<80283f54>] rpm_resume+0x298/0x384
	[<8028428c>] __pm_runtime_resume+0x44/0x7c
	[<8038a358>] __ioremap_caller+0x0/0xec
	[<80284296>] __pm_runtime_resume+0x4e/0x7c
	[<8038a358>] __ioremap_caller+0x0/0xec
	[<80666254>] sh_dmae_probe+0x180/0x6a0
	[<802803ae>] platform_drv_probe+0x26/0x2e

I've fixed this up now, but I am growing rather weary of applying anything with
runtime PM in the subject that alleges to have been tested.

The next runtime PM patch that doesn't even boot will be immediately reverted
and have a kernel version or two to sit things out in order to try to get
things in demonstrable functional order.
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guennadi Liakhovetski May 31, 2011, 9:26 a.m. UTC | #2
On Tue, 31 May 2011, Paul Mundt wrote:

> On Fri, Apr 29, 2011 at 07:09:21PM +0200, Guennadi Liakhovetski wrote:
> >  static int sh_dmae_rst(struct sh_dmae_device *shdev)
> >  {
> ..
> > +	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
> 
> On Fri, Apr 29, 2011 at 07:09:25PM +0200, Guennadi Liakhovetski wrote:
> > +static int sh_dmae_runtime_resume(struct device *dev)
> > +{
> > +	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
> > +
> > +	return sh_dmae_rst(shdev);
> ..
> 
> Yet in sh_dmae_probe() we have:
> 
>         shdev->pdata = pdata;
> 
>         pm_runtime_enable(&pdev->dev);
>         pm_runtime_get_sync(&pdev->dev);
> 
> ..
> 
>         /* reset dma controller - only needed as a test */
>         err = sh_dmae_rst(shdev);
>         if (err)
>                 goto rst_err;
> 
> ..
> 
>         pm_runtime_put(&pdev->dev);
> 
>         platform_set_drvdata(pdev, shdev);
>         dma_async_device_register(&shdev->common);
> 
>         return err;
> ..
> 
> So I'm wondering how this was ever actually tested. The original sh_dmae_rst()
> call is safe due to passing along the shdev pointer with pdata initialized
> explicitly, while the runtime PM bits fetch the pointer via dev_get_drvdata()
> at a time where drvdata hasn't even been initialized yet, resulting in a rather
> predictable oops:

Ok, I tested it on ARM mainly, maybe only:-( And there the rtpm seems to 
function differently, resulting in resume not being called from probe. 
Even now, testing Linus' head (from yesterday) I'm not getting this Oops 
on ARM. So, have to test on both ARM and SH then, especially everything, 
concerning rtpm...

Thanks for spotting and fixing!
Guennadi

> 	Unable to handle kernel NULL pointer dereference at virtual address 000000c4
> 	pc = 8025adee
> 	*pde = 00000000
> 	Oops: 0000 [#1]
> 	Modules linked in:
> 
> 	Pid : 1, Comm:           swapper
> 	CPU : 0                  Not tainted  (3.0.0-rc1-00012-g9436b4a-dirty #1456)
> 
> 	PC is at sh_dmae_rst+0x28/0x86
> 	PR is at sh_dmae_rst+0x22/0x86
> 	PC  : 8025adee SP  : 9e803d10 SR  : 400080f1 TEA : 000000c4
> 	R0  : 000000c4 R1  : 0000fff8 R2  : 00000000 R3  : 00000040
> 	R4  : 000000f0 R5  : 00000000 R6  : 00000000 R7  : 804f184c
> 	R8  : 00000000 R9  : 804dd0e8 R10 : 80283204 R11 : ffffffda
> 	R12 : 000000a0 R13 : 804dd18c R14 : 9e803d10
> 	MACH: 00000000 MACL: 00008f20 GBR : 00000000 PR  : 8025ade8
> 
> 	Call trace:
> 	[<8025ae70>] sh_dmae_runtime_resume+0x24/0x34
> 	[<80283238>] pm_generic_runtime_resume+0x34/0x3c
> 	[<80283370>] rpm_callback+0x4a/0x7e
> 	[<80283efc>] rpm_resume+0x240/0x384
> 	[<80283f54>] rpm_resume+0x298/0x384
> 	[<8028428c>] __pm_runtime_resume+0x44/0x7c
> 	[<8038a358>] __ioremap_caller+0x0/0xec
> 	[<80284296>] __pm_runtime_resume+0x4e/0x7c
> 	[<8038a358>] __ioremap_caller+0x0/0xec
> 	[<80666254>] sh_dmae_probe+0x180/0x6a0
> 	[<802803ae>] platform_drv_probe+0x26/0x2e
> 
> I've fixed this up now, but I am growing rather weary of applying anything with
> runtime PM in the subject that alleges to have been tested.
> 
> The next runtime PM patch that doesn't even boot will be immediately reverted
> and have a kernel version or two to sit things out in order to try to get
> things in demonstrable functional order.
> 

---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
http://www.open-technology.de/
--
To unsubscribe from this list: send the line "unsubscribe linux-sh" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index b7f27f5..d0f402b 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -48,7 +48,7 @@  enum sh_dmae_desc_status {
 
 /*
  * Used for write-side mutual exclusion for the global device list,
- * read-side synchronization by way of RCU.
+ * read-side synchronization by way of RCU, and per-controller data.
  */
 static DEFINE_SPINLOCK(sh_dmae_lock);
 static LIST_HEAD(sh_dmae_devices);
@@ -85,22 +85,35 @@  static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
  */
 static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
 {
-	unsigned short dmaor = dmaor_read(shdev);
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
 
+	dmaor = dmaor_read(shdev);
 	dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
 }
 
 static int sh_dmae_rst(struct sh_dmae_device *shdev)
 {
 	unsigned short dmaor;
+	unsigned long flags;
 
-	sh_dmae_ctl_stop(shdev);
-	dmaor = dmaor_read(shdev) | shdev->pdata->dmaor_init;
+	spin_lock_irqsave(&sh_dmae_lock, flags);
 
-	dmaor_write(shdev, dmaor);
-	if (dmaor_read(shdev) & (DMAOR_AE | DMAOR_NMIF)) {
-		pr_warning("dma-sh: Can't initialize DMAOR.\n");
-		return -EINVAL;
+	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+	dmaor = dmaor_read(shdev);
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+	if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
+		return -EIO;
 	}
 	return 0;
 }
@@ -184,7 +197,7 @@  static void dmae_init(struct sh_dmae_chan *sh_chan)
 
 static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
 {
-	/* When DMA was working, can not set data to CHCR */
+	/* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
 	if (dmae_is_busy(sh_chan))
 		return -EBUSY;
 
@@ -374,7 +387,12 @@  static void sh_dmae_free_chan_resources(struct dma_chan *chan)
 	LIST_HEAD(list);
 	int descs = sh_chan->descs_allocated;
 
+	/* Protect against ISR */
+	spin_lock_irq(&sh_chan->desc_lock);
 	dmae_halt(sh_chan);
+	spin_unlock_irq(&sh_chan->desc_lock);
+
+	/* Now no new interrupts will occur */
 
 	/* Prepared and not submitted descriptors can still be on the queue */
 	if (!list_empty(&sh_chan->ld_queue))
@@ -384,6 +402,7 @@  static void sh_dmae_free_chan_resources(struct dma_chan *chan)
 		/* The caller is holding dma_list_mutex */
 		struct sh_dmae_slave *param = chan->private;
 		clear_bit(param->slave_id, sh_dmae_slave_used);
+		chan->private = NULL;
 	}
 
 	spin_lock_bh(&sh_chan->desc_lock);
@@ -563,8 +582,6 @@  static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 	if (!chan || !len)
 		return NULL;
 
-	chan->private = NULL;
-
 	sh_chan = to_sh_chan(chan);
 
 	sg_init_table(&sg, 1);
@@ -620,9 +637,9 @@  static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	if (!chan)
 		return -EINVAL;
 
+	spin_lock_bh(&sh_chan->desc_lock);
 	dmae_halt(sh_chan);
 
-	spin_lock_bh(&sh_chan->desc_lock);
 	if (!list_empty(&sh_chan->ld_queue)) {
 		/* Record partial transfer */
 		struct sh_desc *desc = list_entry(sh_chan->ld_queue.next,
@@ -716,6 +733,14 @@  static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all
 			list_move(&desc->node, &sh_chan->ld_free);
 		}
 	}
+
+	if (all && !callback)
+		/*
+		 * Terminating and the loop completed normally: forgive
+		 * uncompleted cookies
+		 */
+		sh_chan->completed_cookie = sh_chan->common.cookie;
+
 	spin_unlock_bh(&sh_chan->desc_lock);
 
 	if (callback)
@@ -733,10 +758,6 @@  static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
 {
 	while (__ld_cleanup(sh_chan, all))
 		;
-
-	if (all)
-		/* Terminating - forgive uncompleted cookies */
-		sh_chan->completed_cookie = sh_chan->common.cookie;
 }
 
 static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
@@ -782,8 +803,10 @@  static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
 
 	sh_dmae_chan_ld_cleanup(sh_chan, false);
 
-	last_used = chan->cookie;
+	/* First read completed cookie to avoid a skew */
 	last_complete = sh_chan->completed_cookie;
+	rmb();
+	last_used = chan->cookie;
 	BUG_ON(last_complete < 0);
 	dma_set_tx_state(txstate, last_complete, last_used, 0);
 
@@ -813,8 +836,12 @@  static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
 static irqreturn_t sh_dmae_interrupt(int irq, void *data)
 {
 	irqreturn_t ret = IRQ_NONE;
-	struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
-	u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+	struct sh_dmae_chan *sh_chan = data;
+	u32 chcr;
+
+	spin_lock(&sh_chan->desc_lock);
+
+	chcr = sh_dmae_readl(sh_chan, CHCR);
 
 	if (chcr & CHCR_TE) {
 		/* DMA stop */
@@ -824,10 +851,13 @@  static irqreturn_t sh_dmae_interrupt(int irq, void *data)
 		tasklet_schedule(&sh_chan->tasklet);
 	}
 
+	spin_unlock(&sh_chan->desc_lock);
+
 	return ret;
 }
 
-static unsigned int sh_dmae_reset(struct sh_dmae_device *shdev)
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
 {
 	unsigned int handled = 0;
 	int i;
@@ -839,22 +869,32 @@  static unsigned int sh_dmae_reset(struct sh_dmae_device *shdev)
 	for (i = 0; i < SH_DMAC_MAX_CHANNELS; i++) {
 		struct sh_dmae_chan *sh_chan = shdev->chan[i];
 		struct sh_desc *desc;
+		LIST_HEAD(dl);
 
 		if (!sh_chan)
 			continue;
 
+		spin_lock(&sh_chan->desc_lock);
+
 		/* Stop the channel */
 		dmae_halt(sh_chan);
 
+		list_splice_init(&sh_chan->ld_queue, &dl);
+
+		spin_unlock(&sh_chan->desc_lock);
+
 		/* Complete all  */
-		list_for_each_entry(desc, &sh_chan->ld_queue, node) {
+		list_for_each_entry(desc, &dl, node) {
 			struct dma_async_tx_descriptor *tx = &desc->async_tx;
 			desc->mark = DESC_IDLE;
 			if (tx->callback)
 				tx->callback(tx->callback_param);
 		}
 
-		list_splice_init(&sh_chan->ld_queue, &sh_chan->ld_free);
+		spin_lock(&sh_chan->desc_lock);
+		list_splice(&dl, &sh_chan->ld_free);
+		spin_unlock(&sh_chan->desc_lock);
+
 		handled++;
 	}
 
@@ -867,10 +907,11 @@  static irqreturn_t sh_dmae_err(int irq, void *data)
 {
 	struct sh_dmae_device *shdev = data;
 
-	if (dmaor_read(shdev) & DMAOR_AE)
-		return IRQ_RETVAL(sh_dmae_reset(data));
-	else
+	if (!(dmaor_read(shdev) & DMAOR_AE))
 		return IRQ_NONE;
+
+	sh_dmae_reset(data);
+	return IRQ_HANDLED;
 }
 
 static void dmae_do_tasklet(unsigned long data)
@@ -902,17 +943,11 @@  static void dmae_do_tasklet(unsigned long data)
 
 static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
 {
-	unsigned int handled;
-
 	/* Fast path out if NMIF is not asserted for this controller */
 	if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
 		return false;
 
-	handled = sh_dmae_reset(shdev);
-	if (handled)
-		return true;
-
-	return false;
+	return sh_dmae_reset(shdev);
 }
 
 static int sh_dmae_nmi_handler(struct notifier_block *self,
@@ -982,9 +1017,6 @@  static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
 	tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
 			(unsigned long)new_sh_chan);
 
-	/* Init the channel */
-	dmae_init(new_sh_chan);
-
 	spin_lock_init(&new_sh_chan->desc_lock);
 
 	/* Init descripter manage list */
@@ -1114,7 +1146,7 @@  static int __init sh_dmae_probe(struct platform_device *pdev)
 	list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
 	spin_unlock_irq(&sh_dmae_lock);
 
-	/* reset dma controller */
+	/* reset dma controller - only needed as a test */
 	err = sh_dmae_rst(shdev);
 	if (err)
 		goto rst_err;