Message ID | Pine.LNX.4.64.1108181650530.5245@axis700.grange (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > Currently the shdma dmaengine driver uses runtime PM to save power, when > no channel on the specific controller is requested by a user. This patch > switches the driver to count individual DMA transfers. That way the > controller can be powered down between transfers, even if some of its > channels are in use. No, I don't agree with the approach here, you don't need to count the transfers, the runtime_pm framework does that very well for you. What you need to do is to call pm_runtime_get() in your .issue_pending callback (NOT in tx_submit anyway, this needs to be fixed in driver, see the Documentation/dmaengine.txt And once the transfer has completed you need to call pm_rumtime_put() Runtime PM framework actually counts the device usage count and whenever your device usage count goes to 0 it will call .runtime_suspend callback, thus enable you to save power.
On Thu, 25 Aug 2011, Koul, Vinod wrote: > On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > > Currently the shdma dmaengine driver uses runtime PM to save power, when > > no channel on the specific controller is requested by a user. This patch > > switches the driver to count individual DMA transfers. That way the > > controller can be powered down between transfers, even if some of its > > channels are in use. > No, I don't agree with the approach here, you don't need to count the > transfers, the runtime_pm framework does that very well for you. > > What you need to do is to call pm_runtime_get() in your .issue_pending > callback (NOT in tx_submit anyway, this needs to be fixed in driver, see > the Documentation/dmaengine.txt > And once the transfer has completed you need to call pm_rumtime_put() This has been discussed before: http://marc.info/?l=linux-sh&m=131004613801231&w=2 Thanks Guennadi > > Runtime PM framework actually counts the device usage count and whenever > your device usage count goes to 0 it will call .runtime_suspend > callback, thus enable you to save power. > > -- > ~Vinod > > > > Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> > > --- > > > > tested on mackerel (sh7372) with dmatest, sh_mobile_sdhi and sh_mmcif with > > runtime PM and STR. > > > > drivers/dma/shdma.c | 94 +++++++++++++++++++++++++++++++++++++-------------- > > drivers/dma/shdma.h | 7 ++++ > > 2 files changed, 75 insertions(+), 26 deletions(-) > > > > diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c > > index e7bb747..81809c2 100644 > > --- a/drivers/dma/shdma.c > > +++ b/drivers/dma/shdma.c > > @@ -259,15 +259,23 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) > > return 0; > > } > > > > +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan); > > + > > static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > { > > struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; > > struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); > > + struct sh_dmae_slave *param = tx->chan->private; > > dma_async_tx_callback callback = tx->callback; > > dma_cookie_t cookie; > > - unsigned long flags; > > + bool power_up; > > > > - spin_lock_irqsave(&sh_chan->desc_lock, flags); > > + spin_lock_irq(&sh_chan->desc_lock); > > + > > + if (list_empty(&sh_chan->ld_queue)) > > + power_up = true; > > + else > > + power_up = false; > > > > cookie = sh_chan->common.cookie; > > cookie++; > > @@ -303,7 +311,38 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > tx->cookie, &last->async_tx, sh_chan->id, > > desc->hw.sar, desc->hw.tcr, desc->hw.dar); > > > > - spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > + if (power_up) { > > + sh_chan->pm_state = DMAE_PM_BUSY; > > + > > + pm_runtime_get(sh_chan->dev); > > + > > + spin_unlock_irq(&sh_chan->desc_lock); > > + > > + pm_runtime_barrier(sh_chan->dev); > > + > > + spin_lock_irq(&sh_chan->desc_lock); > > + > > + /* Have we been reset, while waiting? */ > > + if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) { > > + dev_dbg(sh_chan->dev, "Bring up channel %d\n", > > + sh_chan->id); > > + if (param) { > > + const struct sh_dmae_slave_config *cfg = > > + param->config; > > + > > + dmae_set_dmars(sh_chan, cfg->mid_rid); > > + dmae_set_chcr(sh_chan, cfg->chcr); > > + } else { > > + dmae_init(sh_chan); > > + } > > + > > + if (sh_chan->pm_state == DMAE_PM_PENDING) > > + sh_chan_xfer_ld_queue(sh_chan); > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > + } > > + } > > + > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > return cookie; > > } > > @@ -347,8 +386,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > struct sh_dmae_slave *param = chan->private; > > int ret; > > > > - pm_runtime_get_sync(sh_chan->dev); > > - > > /* > > * This relies on the guarantee from dmaengine that alloc_chan_resources > > * never runs concurrently with itself or free_chan_resources. > > @@ -368,11 +405,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > } > > > > param->config = cfg; > > - > > - dmae_set_dmars(sh_chan, cfg->mid_rid); > > - dmae_set_chcr(sh_chan, cfg->chcr); > > - } else { > > - dmae_init(sh_chan); > > } > > > > while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { > > @@ -401,7 +433,6 @@ edescalloc: > > etestused: > > efindslave: > > chan->private = NULL; > > - pm_runtime_put(sh_chan->dev); > > return ret; > > } > > > > @@ -413,7 +444,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > struct sh_desc *desc, *_desc; > > LIST_HEAD(list); > > - int descs = sh_chan->descs_allocated; > > > > /* Protect against ISR */ > > spin_lock_irq(&sh_chan->desc_lock); > > @@ -440,9 +470,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > > > spin_unlock_irq(&sh_chan->desc_lock); > > > > - if (descs > 0) > > - pm_runtime_put(sh_chan->dev); > > - > > list_for_each_entry_safe(desc, _desc, &list, node) > > kfree(desc); > > } > > @@ -676,7 +703,6 @@ static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, > > struct sh_desc, node); > > desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) << > > sh_chan->xmit_shift; > > - > > } > > spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > > > @@ -761,7 +787,13 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all > > async_tx_test_ack(&desc->async_tx)) || all) { > > /* Remove from ld_queue list */ > > desc->mark = DESC_IDLE; > > + > > list_move(&desc->node, &sh_chan->ld_free); > > + > > + if (list_empty(&sh_chan->ld_queue)) { > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > + pm_runtime_put(sh_chan->dev); > > + } > > } > > } > > > > @@ -791,16 +823,14 @@ static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) > > ; > > } > > > > +/* Called under spin_lock_irq(&sh_chan->desc_lock) */ > > static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > { > > struct sh_desc *desc; > > > > - spin_lock_irq(&sh_chan->desc_lock); > > /* DMA work check */ > > - if (dmae_is_busy(sh_chan)) { > > - spin_unlock_irq(&sh_chan->desc_lock); > > + if (dmae_is_busy(sh_chan)) > > return; > > - } > > > > /* Find the first not transferred descriptor */ > > list_for_each_entry(desc, &sh_chan->ld_queue, node) > > @@ -813,14 +843,18 @@ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > dmae_start(sh_chan); > > break; > > } > > - > > - spin_unlock_irq(&sh_chan->desc_lock); > > } > > > > static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) > > { > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > - sh_chan_xfer_ld_queue(sh_chan); > > + > > + spin_lock_irq(&sh_chan->desc_lock); > > + if (sh_chan->pm_state == DMAE_PM_ESTABLISHED) > > + sh_chan_xfer_ld_queue(sh_chan); > > + else > > + sh_chan->pm_state = DMAE_PM_PENDING; > > + spin_unlock_irq(&sh_chan->desc_lock); > > } > > > > static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, > > @@ -913,6 +947,12 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev) > > > > list_splice_init(&sh_chan->ld_queue, &dl); > > > > + if (!list_empty(&dl)) { > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > + pm_runtime_put(sh_chan->dev); > > + } > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > + > > spin_unlock(&sh_chan->desc_lock); > > > > /* Complete all */ > > @@ -966,10 +1006,10 @@ static void dmae_do_tasklet(unsigned long data) > > break; > > } > > } > > - spin_unlock_irq(&sh_chan->desc_lock); > > - > > /* Next desc */ > > sh_chan_xfer_ld_queue(sh_chan); > > + spin_unlock_irq(&sh_chan->desc_lock); > > + > > sh_dmae_chan_ld_cleanup(sh_chan, false); > > } > > > > @@ -1037,7 +1077,9 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, > > return -ENOMEM; > > } > > > > - /* copy struct dma_device */ > > + new_sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > + > > + /* reference struct dma_device */ > > new_sh_chan->common.device = &shdev->common; > > > > new_sh_chan->dev = shdev->common.dev; > > diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h > > index dc56576..2b55a27 100644 > > --- a/drivers/dma/shdma.h > > +++ b/drivers/dma/shdma.h > > @@ -23,6 +23,12 @@ > > > > struct device; > > > > +enum dmae_pm_state { > > + DMAE_PM_ESTABLISHED, > > + DMAE_PM_BUSY, > > + DMAE_PM_PENDING, > > +}; > > + > > struct sh_dmae_chan { > > dma_cookie_t completed_cookie; /* The maximum cookie completed */ > > spinlock_t desc_lock; /* Descriptor operation lock */ > > @@ -38,6 +44,7 @@ struct sh_dmae_chan { > > u32 __iomem *base; > > char dev_id[16]; /* unique name per DMAC of channel */ > > int pm_error; > > + enum dmae_pm_state pm_state; > > }; > > > > struct sh_dmae_device { > > > > --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2011-08-25 at 16:37 +0200, Guennadi Liakhovetski wrote: > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > > > Currently the shdma dmaengine driver uses runtime PM to save power, when > > > no channel on the specific controller is requested by a user. This patch > > > switches the driver to count individual DMA transfers. That way the > > > controller can be powered down between transfers, even if some of its > > > channels are in use. > > No, I don't agree with the approach here, you don't need to count the > > transfers, the runtime_pm framework does that very well for you. > > > > What you need to do is to call pm_runtime_get() in your .issue_pending > > callback (NOT in tx_submit anyway, this needs to be fixed in driver, see > > the Documentation/dmaengine.txt > > And once the transfer has completed you need to call pm_rumtime_put() > > This has been discussed before: > > http://marc.info/?l=linux-sh&m=131004613801231&w=2 uh, yes but at least the runtime_xxx needs to get fixed. -- ~Vinod > > Thanks > Guennadi > > > > > Runtime PM framework actually counts the device usage count and whenever > > your device usage count goes to 0 it will call .runtime_suspend > > callback, thus enable you to save power. > > > > -- > > ~Vinod > > > > > > Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> > > > --- > > > > > > tested on mackerel (sh7372) with dmatest, sh_mobile_sdhi and sh_mmcif with > > > runtime PM and STR. > > > > > > drivers/dma/shdma.c | 94 +++++++++++++++++++++++++++++++++++++-------------- > > > drivers/dma/shdma.h | 7 ++++ > > > 2 files changed, 75 insertions(+), 26 deletions(-) > > > > > > diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c > > > index e7bb747..81809c2 100644 > > > --- a/drivers/dma/shdma.c > > > +++ b/drivers/dma/shdma.c > > > @@ -259,15 +259,23 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) > > > return 0; > > > } > > > > > > +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan); > > > + > > > static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > > { > > > struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; > > > struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); > > > + struct sh_dmae_slave *param = tx->chan->private; > > > dma_async_tx_callback callback = tx->callback; > > > dma_cookie_t cookie; > > > - unsigned long flags; > > > + bool power_up; > > > > > > - spin_lock_irqsave(&sh_chan->desc_lock, flags); > > > + spin_lock_irq(&sh_chan->desc_lock); > > > + > > > + if (list_empty(&sh_chan->ld_queue)) > > > + power_up = true; > > > + else > > > + power_up = false; > > > > > > cookie = sh_chan->common.cookie; > > > cookie++; > > > @@ -303,7 +311,38 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > > tx->cookie, &last->async_tx, sh_chan->id, > > > desc->hw.sar, desc->hw.tcr, desc->hw.dar); > > > > > > - spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > > + if (power_up) { > > > + sh_chan->pm_state = DMAE_PM_BUSY; > > > + > > > + pm_runtime_get(sh_chan->dev); > > > + > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > + > > > + pm_runtime_barrier(sh_chan->dev); > > > + > > > + spin_lock_irq(&sh_chan->desc_lock); > > > + > > > + /* Have we been reset, while waiting? */ > > > + if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) { > > > + dev_dbg(sh_chan->dev, "Bring up channel %d\n", > > > + sh_chan->id); > > > + if (param) { > > > + const struct sh_dmae_slave_config *cfg = > > > + param->config; > > > + > > > + dmae_set_dmars(sh_chan, cfg->mid_rid); > > > + dmae_set_chcr(sh_chan, cfg->chcr); > > > + } else { > > > + dmae_init(sh_chan); > > > + } > > > + > > > + if (sh_chan->pm_state == DMAE_PM_PENDING) > > > + sh_chan_xfer_ld_queue(sh_chan); > > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > + } > > > + } > > > + > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > > > return cookie; > > > } > > > @@ -347,8 +386,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > > struct sh_dmae_slave *param = chan->private; > > > int ret; > > > > > > - pm_runtime_get_sync(sh_chan->dev); > > > - > > > /* > > > * This relies on the guarantee from dmaengine that alloc_chan_resources > > > * never runs concurrently with itself or free_chan_resources. > > > @@ -368,11 +405,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > > } > > > > > > param->config = cfg; > > > - > > > - dmae_set_dmars(sh_chan, cfg->mid_rid); > > > - dmae_set_chcr(sh_chan, cfg->chcr); > > > - } else { > > > - dmae_init(sh_chan); > > > } > > > > > > while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { > > > @@ -401,7 +433,6 @@ edescalloc: > > > etestused: > > > efindslave: > > > chan->private = NULL; > > > - pm_runtime_put(sh_chan->dev); > > > return ret; > > > } > > > > > > @@ -413,7 +444,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > > struct sh_desc *desc, *_desc; > > > LIST_HEAD(list); > > > - int descs = sh_chan->descs_allocated; > > > > > > /* Protect against ISR */ > > > spin_lock_irq(&sh_chan->desc_lock); > > > @@ -440,9 +470,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > > > > > spin_unlock_irq(&sh_chan->desc_lock); > > > > > > - if (descs > 0) > > > - pm_runtime_put(sh_chan->dev); > > > - > > > list_for_each_entry_safe(desc, _desc, &list, node) > > > kfree(desc); > > > } > > > @@ -676,7 +703,6 @@ static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, > > > struct sh_desc, node); > > > desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) << > > > sh_chan->xmit_shift; > > > - > > > } > > > spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > > > > > @@ -761,7 +787,13 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all > > > async_tx_test_ack(&desc->async_tx)) || all) { > > > /* Remove from ld_queue list */ > > > desc->mark = DESC_IDLE; > > > + > > > list_move(&desc->node, &sh_chan->ld_free); > > > + > > > + if (list_empty(&sh_chan->ld_queue)) { > > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > > + pm_runtime_put(sh_chan->dev); > > > + } > > > } > > > } > > > > > > @@ -791,16 +823,14 @@ static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) > > > ; > > > } > > > > > > +/* Called under spin_lock_irq(&sh_chan->desc_lock) */ > > > static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > > { > > > struct sh_desc *desc; > > > > > > - spin_lock_irq(&sh_chan->desc_lock); > > > /* DMA work check */ > > > - if (dmae_is_busy(sh_chan)) { > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > + if (dmae_is_busy(sh_chan)) > > > return; > > > - } > > > > > > /* Find the first not transferred descriptor */ > > > list_for_each_entry(desc, &sh_chan->ld_queue, node) > > > @@ -813,14 +843,18 @@ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > > dmae_start(sh_chan); > > > break; > > > } > > > - > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > } > > > > > > static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) > > > { > > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > > - sh_chan_xfer_ld_queue(sh_chan); > > > + > > > + spin_lock_irq(&sh_chan->desc_lock); > > > + if (sh_chan->pm_state == DMAE_PM_ESTABLISHED) > > > + sh_chan_xfer_ld_queue(sh_chan); > > > + else > > > + sh_chan->pm_state = DMAE_PM_PENDING; > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > } > > > > > > static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, > > > @@ -913,6 +947,12 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev) > > > > > > list_splice_init(&sh_chan->ld_queue, &dl); > > > > > > + if (!list_empty(&dl)) { > > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > > + pm_runtime_put(sh_chan->dev); > > > + } > > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > + > > > spin_unlock(&sh_chan->desc_lock); > > > > > > /* Complete all */ > > > @@ -966,10 +1006,10 @@ static void dmae_do_tasklet(unsigned long data) > > > break; > > > } > > > } > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > - > > > /* Next desc */ > > > sh_chan_xfer_ld_queue(sh_chan); > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > + > > > sh_dmae_chan_ld_cleanup(sh_chan, false); > > > } > > > > > > @@ -1037,7 +1077,9 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, > > > return -ENOMEM; > > > } > > > > > > - /* copy struct dma_device */ > > > + new_sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > + > > > + /* reference struct dma_device */ > > > new_sh_chan->common.device = &shdev->common; > > > > > > new_sh_chan->dev = shdev->common.dev; > > > diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h > > > index dc56576..2b55a27 100644 > > > --- a/drivers/dma/shdma.h > > > +++ b/drivers/dma/shdma.h > > > @@ -23,6 +23,12 @@ > > > > > > struct device; > > > > > > +enum dmae_pm_state { > > > + DMAE_PM_ESTABLISHED, > > > + DMAE_PM_BUSY, > > > + DMAE_PM_PENDING, > > > +}; > > > + > > > struct sh_dmae_chan { > > > dma_cookie_t completed_cookie; /* The maximum cookie completed */ > > > spinlock_t desc_lock; /* Descriptor operation lock */ > > > @@ -38,6 +44,7 @@ struct sh_dmae_chan { > > > u32 __iomem *base; > > > char dev_id[16]; /* unique name per DMAC of channel */ > > > int pm_error; > > > + enum dmae_pm_state pm_state; > > > }; > > > > > > struct sh_dmae_device { > > > > > > > > > > --- > Guennadi Liakhovetski, Ph.D. > Freelance Open-Source Software Developer > http://www.open-technology.de/ > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 25 Aug 2011, Koul, Vinod wrote: > On Thu, 2011-08-25 at 16:37 +0200, Guennadi Liakhovetski wrote: > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > > On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > > > > Currently the shdma dmaengine driver uses runtime PM to save power, when > > > > no channel on the specific controller is requested by a user. This patch > > > > switches the driver to count individual DMA transfers. That way the > > > > controller can be powered down between transfers, even if some of its > > > > channels are in use. > > > No, I don't agree with the approach here, you don't need to count the > > > transfers, the runtime_pm framework does that very well for you. > > > > > > What you need to do is to call pm_runtime_get() in your .issue_pending > > > callback (NOT in tx_submit anyway, this needs to be fixed in driver, see > > > the Documentation/dmaengine.txt > > > And once the transfer has completed you need to call pm_rumtime_put() > > > > This has been discussed before: > > > > http://marc.info/?l=linux-sh&m=131004613801231&w=2 > uh, yes but at least the runtime_xxx needs to get fixed. This isn't so easy either. In principle, yes, I know, that pm_runtime_* calls count depth. But I don't think the DMA case is simple enough for that. It's not necessarily one in - one out. Think about terminating transfers, timing out, closing the channel, etc. In those cases you'd have to count pending transfers and pm_runtime_put() for each of them. This is even less trivial on shdma, where DMA transfers get split into sg-lists, which are then all queued on a single queue. So, you'd have to scan that queue and check for transfer borders... That's why I decided that doing just one get() on the first descriptor and one put() on the last one would be easier and more robust. Thanks Guennadi > > -- > ~Vinod > > > > Thanks > > Guennadi > > > > > > > > Runtime PM framework actually counts the device usage count and whenever > > > your device usage count goes to 0 it will call .runtime_suspend > > > callback, thus enable you to save power. > > > > > > -- > > > ~Vinod > > > > > > > > Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> > > > > --- > > > > > > > > tested on mackerel (sh7372) with dmatest, sh_mobile_sdhi and sh_mmcif with > > > > runtime PM and STR. > > > > > > > > drivers/dma/shdma.c | 94 +++++++++++++++++++++++++++++++++++++-------------- > > > > drivers/dma/shdma.h | 7 ++++ > > > > 2 files changed, 75 insertions(+), 26 deletions(-) > > > > > > > > diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c > > > > index e7bb747..81809c2 100644 > > > > --- a/drivers/dma/shdma.c > > > > +++ b/drivers/dma/shdma.c > > > > @@ -259,15 +259,23 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) > > > > return 0; > > > > } > > > > > > > > +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan); > > > > + > > > > static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > > > { > > > > struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; > > > > struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); > > > > + struct sh_dmae_slave *param = tx->chan->private; > > > > dma_async_tx_callback callback = tx->callback; > > > > dma_cookie_t cookie; > > > > - unsigned long flags; > > > > + bool power_up; > > > > > > > > - spin_lock_irqsave(&sh_chan->desc_lock, flags); > > > > + spin_lock_irq(&sh_chan->desc_lock); > > > > + > > > > + if (list_empty(&sh_chan->ld_queue)) > > > > + power_up = true; > > > > + else > > > > + power_up = false; > > > > > > > > cookie = sh_chan->common.cookie; > > > > cookie++; > > > > @@ -303,7 +311,38 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) > > > > tx->cookie, &last->async_tx, sh_chan->id, > > > > desc->hw.sar, desc->hw.tcr, desc->hw.dar); > > > > > > > > - spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > > > + if (power_up) { > > > > + sh_chan->pm_state = DMAE_PM_BUSY; > > > > + > > > > + pm_runtime_get(sh_chan->dev); > > > > + > > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > + > > > > + pm_runtime_barrier(sh_chan->dev); > > > > + > > > > + spin_lock_irq(&sh_chan->desc_lock); > > > > + > > > > + /* Have we been reset, while waiting? */ > > > > + if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) { > > > > + dev_dbg(sh_chan->dev, "Bring up channel %d\n", > > > > + sh_chan->id); > > > > + if (param) { > > > > + const struct sh_dmae_slave_config *cfg = > > > > + param->config; > > > > + > > > > + dmae_set_dmars(sh_chan, cfg->mid_rid); > > > > + dmae_set_chcr(sh_chan, cfg->chcr); > > > > + } else { > > > > + dmae_init(sh_chan); > > > > + } > > > > + > > > > + if (sh_chan->pm_state == DMAE_PM_PENDING) > > > > + sh_chan_xfer_ld_queue(sh_chan); > > > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > > + } > > > > + } > > > > + > > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > > > > > return cookie; > > > > } > > > > @@ -347,8 +386,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > > > struct sh_dmae_slave *param = chan->private; > > > > int ret; > > > > > > > > - pm_runtime_get_sync(sh_chan->dev); > > > > - > > > > /* > > > > * This relies on the guarantee from dmaengine that alloc_chan_resources > > > > * never runs concurrently with itself or free_chan_resources. > > > > @@ -368,11 +405,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) > > > > } > > > > > > > > param->config = cfg; > > > > - > > > > - dmae_set_dmars(sh_chan, cfg->mid_rid); > > > > - dmae_set_chcr(sh_chan, cfg->chcr); > > > > - } else { > > > > - dmae_init(sh_chan); > > > > } > > > > > > > > while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { > > > > @@ -401,7 +433,6 @@ edescalloc: > > > > etestused: > > > > efindslave: > > > > chan->private = NULL; > > > > - pm_runtime_put(sh_chan->dev); > > > > return ret; > > > > } > > > > > > > > @@ -413,7 +444,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > > > struct sh_desc *desc, *_desc; > > > > LIST_HEAD(list); > > > > - int descs = sh_chan->descs_allocated; > > > > > > > > /* Protect against ISR */ > > > > spin_lock_irq(&sh_chan->desc_lock); > > > > @@ -440,9 +470,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) > > > > > > > > spin_unlock_irq(&sh_chan->desc_lock); > > > > > > > > - if (descs > 0) > > > > - pm_runtime_put(sh_chan->dev); > > > > - > > > > list_for_each_entry_safe(desc, _desc, &list, node) > > > > kfree(desc); > > > > } > > > > @@ -676,7 +703,6 @@ static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, > > > > struct sh_desc, node); > > > > desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) << > > > > sh_chan->xmit_shift; > > > > - > > > > } > > > > spin_unlock_irqrestore(&sh_chan->desc_lock, flags); > > > > > > > > @@ -761,7 +787,13 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all > > > > async_tx_test_ack(&desc->async_tx)) || all) { > > > > /* Remove from ld_queue list */ > > > > desc->mark = DESC_IDLE; > > > > + > > > > list_move(&desc->node, &sh_chan->ld_free); > > > > + > > > > + if (list_empty(&sh_chan->ld_queue)) { > > > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > > > + pm_runtime_put(sh_chan->dev); > > > > + } > > > > } > > > > } > > > > > > > > @@ -791,16 +823,14 @@ static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) > > > > ; > > > > } > > > > > > > > +/* Called under spin_lock_irq(&sh_chan->desc_lock) */ > > > > static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > > > { > > > > struct sh_desc *desc; > > > > > > > > - spin_lock_irq(&sh_chan->desc_lock); > > > > /* DMA work check */ > > > > - if (dmae_is_busy(sh_chan)) { > > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > > + if (dmae_is_busy(sh_chan)) > > > > return; > > > > - } > > > > > > > > /* Find the first not transferred descriptor */ > > > > list_for_each_entry(desc, &sh_chan->ld_queue, node) > > > > @@ -813,14 +843,18 @@ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) > > > > dmae_start(sh_chan); > > > > break; > > > > } > > > > - > > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > > } > > > > > > > > static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) > > > > { > > > > struct sh_dmae_chan *sh_chan = to_sh_chan(chan); > > > > - sh_chan_xfer_ld_queue(sh_chan); > > > > + > > > > + spin_lock_irq(&sh_chan->desc_lock); > > > > + if (sh_chan->pm_state == DMAE_PM_ESTABLISHED) > > > > + sh_chan_xfer_ld_queue(sh_chan); > > > > + else > > > > + sh_chan->pm_state = DMAE_PM_PENDING; > > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > } > > > > > > > > static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, > > > > @@ -913,6 +947,12 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev) > > > > > > > > list_splice_init(&sh_chan->ld_queue, &dl); > > > > > > > > + if (!list_empty(&dl)) { > > > > + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); > > > > + pm_runtime_put(sh_chan->dev); > > > > + } > > > > + sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > > + > > > > spin_unlock(&sh_chan->desc_lock); > > > > > > > > /* Complete all */ > > > > @@ -966,10 +1006,10 @@ static void dmae_do_tasklet(unsigned long data) > > > > break; > > > > } > > > > } > > > > - spin_unlock_irq(&sh_chan->desc_lock); > > > > - > > > > /* Next desc */ > > > > sh_chan_xfer_ld_queue(sh_chan); > > > > + spin_unlock_irq(&sh_chan->desc_lock); > > > > + > > > > sh_dmae_chan_ld_cleanup(sh_chan, false); > > > > } > > > > > > > > @@ -1037,7 +1077,9 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, > > > > return -ENOMEM; > > > > } > > > > > > > > - /* copy struct dma_device */ > > > > + new_sh_chan->pm_state = DMAE_PM_ESTABLISHED; > > > > + > > > > + /* reference struct dma_device */ > > > > new_sh_chan->common.device = &shdev->common; > > > > > > > > new_sh_chan->dev = shdev->common.dev; > > > > diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h > > > > index dc56576..2b55a27 100644 > > > > --- a/drivers/dma/shdma.h > > > > +++ b/drivers/dma/shdma.h > > > > @@ -23,6 +23,12 @@ > > > > > > > > struct device; > > > > > > > > +enum dmae_pm_state { > > > > + DMAE_PM_ESTABLISHED, > > > > + DMAE_PM_BUSY, > > > > + DMAE_PM_PENDING, > > > > +}; > > > > + > > > > struct sh_dmae_chan { > > > > dma_cookie_t completed_cookie; /* The maximum cookie completed */ > > > > spinlock_t desc_lock; /* Descriptor operation lock */ > > > > @@ -38,6 +44,7 @@ struct sh_dmae_chan { > > > > u32 __iomem *base; > > > > char dev_id[16]; /* unique name per DMAC of channel */ > > > > int pm_error; > > > > + enum dmae_pm_state pm_state; > > > > }; > > > > > > > > struct sh_dmae_device { > > > > > > > > > > > > > > > > --- > > Guennadi Liakhovetski, Ph.D. > > Freelance Open-Source Software Developer > > http://www.open-technology.de/ > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ > --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2011-08-25 at 16:55 +0200, Guennadi Liakhovetski wrote: > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > On Thu, 2011-08-25 at 16:37 +0200, Guennadi Liakhovetski wrote: > > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > > > > On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > > > > > Currently the shdma dmaengine driver uses runtime PM to save power, when > > > > > no channel on the specific controller is requested by a user. This patch > > > > > switches the driver to count individual DMA transfers. That way the > > > > > controller can be powered down between transfers, even if some of its > > > > > channels are in use. > > > > No, I don't agree with the approach here, you don't need to count the > > > > transfers, the runtime_pm framework does that very well for you. > > > > > > > > What you need to do is to call pm_runtime_get() in your .issue_pending > > > > callback (NOT in tx_submit anyway, this needs to be fixed in driver, see > > > > the Documentation/dmaengine.txt > > > > And once the transfer has completed you need to call pm_rumtime_put() > > > > > > This has been discussed before: > > > > > > http://marc.info/?l=linux-sh&m=131004613801231&w=2 > > uh, yes but at least the runtime_xxx needs to get fixed. > > This isn't so easy either. In principle, yes, I know, that pm_runtime_* > calls count depth. But I don't think the DMA case is simple enough for > that. It's not necessarily one in - one out. Think about terminating > transfers, timing out, closing the channel, etc. In those cases you'd have > to count pending transfers and pm_runtime_put() for each of them. This is > even less trivial on shdma, where DMA transfers get split into sg-lists, > which are then all queued on a single queue. So, you'd have to scan that > queue and check for transfer borders... That's why I decided that doing > just one get() on the first descriptor and one put() on the last one would > be easier and more robust. Wont it be easy to to do: - pm_runtime_get() in each submit - pm_runtime_put() in each callback Normal case above would work just fine - In terminate case, count the number of issued transactions, and call pm_runtime_put() for each canceled transaction (i am assuming that for each timeout error, the client will call terminate) Let me know if there is a case for you which doesn't fit in above
On Thu, 25 Aug 2011, Koul, Vinod wrote: > On Thu, 2011-08-25 at 16:55 +0200, Guennadi Liakhovetski wrote: > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > > On Thu, 2011-08-25 at 16:37 +0200, Guennadi Liakhovetski wrote: > > > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > > > > > > On Thu, 2011-08-18 at 16:55 +0200, Guennadi Liakhovetski wrote: > > > > > > Currently the shdma dmaengine driver uses runtime PM to save power, when > > > > > > no channel on the specific controller is requested by a user. This patch > > > > > > switches the driver to count individual DMA transfers. That way the > > > > > > controller can be powered down between transfers, even if some of its > > > > > > channels are in use. > > > > > No, I don't agree with the approach here, you don't need to count the > > > > > transfers, the runtime_pm framework does that very well for you. > > > > > > > > > > What you need to do is to call pm_runtime_get() in your .issue_pending > > > > > callback (NOT in tx_submit anyway, this needs to be fixed in driver, see > > > > > the Documentation/dmaengine.txt > > > > > And once the transfer has completed you need to call pm_rumtime_put() > > > > > > > > This has been discussed before: > > > > > > > > http://marc.info/?l=linux-sh&m=131004613801231&w=2 > > > uh, yes but at least the runtime_xxx needs to get fixed. > > > > This isn't so easy either. In principle, yes, I know, that pm_runtime_* > > calls count depth. But I don't think the DMA case is simple enough for > > that. It's not necessarily one in - one out. Think about terminating > > transfers, timing out, closing the channel, etc. In those cases you'd have > > to count pending transfers and pm_runtime_put() for each of them. This is > > even less trivial on shdma, where DMA transfers get split into sg-lists, > > which are then all queued on a single queue. So, you'd have to scan that > > queue and check for transfer borders... That's why I decided that doing > > just one get() on the first descriptor and one put() on the last one would > > be easier and more robust. > Wont it be easy to to do: > - pm_runtime_get() in each submit > - pm_runtime_put() in each callback > Normal case above would work just fine > - In terminate case, count the number of issued transactions, and call > pm_runtime_put() for each canceled transaction > (i am assuming that for each timeout error, the client will call > terminate) As I said, this won't be very easy to do this in a robust way. You'd have to scan your list of DMA blocks and see, which of them belong to one descriptor, and once you reach the end of that descriptor, issue a put(). Perhaps, this can be done, but my choice went to the currently presented solution. Thanks Guennadi > Let me know if there is a case for you which doesn't fit in above > > -- > ~Vinod --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2011-08-26 at 01:11 +0200, Guennadi Liakhovetski wrote: > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > Wont it be easy to to do: > > - pm_runtime_get() in each submit > > - pm_runtime_put() in each callback > > Normal case above would work just fine > > - In terminate case, count the number of issued transactions, and call > > pm_runtime_put() for each canceled transaction > > (i am assuming that for each timeout error, the client will call > > terminate) > > As I said, this won't be very easy to do this in a robust way. You'd have > to scan your list of DMA blocks and see, which of them belong to one > descriptor, and once you reach the end of that descriptor, issue a put(). > Perhaps, this can be done, but my choice went to the currently presented > solution. If you count the number of descriptor submitted in your submitted list and call _put for each, I see no reason why it wont be simple and better than current approach. Something like: /* since callback is set for last descriptor of chain, we call runtime * put for that desc alone */ list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { if (desc->async_tx.callback) pm_runtime_put(device); If i read shdma correctly, descriptors are put into ld_queue of channel and any pending should be checked in this list alone. For normal case again you check for the callback to decide if you need to call pm_runtime_put() or not. -- ~Vinod -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 29 Aug 2011, Vinod Koul wrote: > On Fri, 2011-08-26 at 01:11 +0200, Guennadi Liakhovetski wrote: > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > Wont it be easy to to do: > > > - pm_runtime_get() in each submit > > > - pm_runtime_put() in each callback > > > Normal case above would work just fine > > > - In terminate case, count the number of issued transactions, and call > > > pm_runtime_put() for each canceled transaction > > > (i am assuming that for each timeout error, the client will call > > > terminate) > > > > As I said, this won't be very easy to do this in a robust way. You'd have > > to scan your list of DMA blocks and see, which of them belong to one > > descriptor, and once you reach the end of that descriptor, issue a put(). > > Perhaps, this can be done, but my choice went to the currently presented > > solution. > If you count the number of descriptor submitted in your submitted list > and call _put for each, I see no reason why it wont be simple and better > than current approach. Sorry, I thought, you wanted to avoid extra counting, because runtime-pm counts by itself. Now you propose to count... > Something like: > /* since callback is set for last descriptor of chain, we call runtime > * put for that desc alone > */ > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > if (desc->async_tx.callback) > pm_runtime_put(device); Not all dma users have callbacks. > If i read shdma correctly, descriptors are put into ld_queue of channel > and any pending should be checked in this list alone. > For normal case again you check for the callback to decide if you need > to call pm_runtime_put() or not. Thanks Guennadi --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2011-08-29 at 14:21 +0200, Guennadi Liakhovetski wrote: > On Mon, 29 Aug 2011, Vinod Koul wrote: > > > On Fri, 2011-08-26 at 01:11 +0200, Guennadi Liakhovetski wrote: > > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > Wont it be easy to to do: > > > > - pm_runtime_get() in each submit > > > > - pm_runtime_put() in each callback > > > > Normal case above would work just fine > > > > - In terminate case, count the number of issued transactions, and call > > > > pm_runtime_put() for each canceled transaction > > > > (i am assuming that for each timeout error, the client will call > > > > terminate) > > > > > > As I said, this won't be very easy to do this in a robust way. You'd have > > > to scan your list of DMA blocks and see, which of them belong to one > > > descriptor, and once you reach the end of that descriptor, issue a put(). > > > Perhaps, this can be done, but my choice went to the currently presented > > > solution. > > If you count the number of descriptor submitted in your submitted list > > and call _put for each, I see no reason why it wont be simple and better > > than current approach. > > Sorry, I thought, you wanted to avoid extra counting, because runtime-pm > counts by itself. Now you propose to count... see below it doesn't count but iterate thru list > > > Something like: > > /* since callback is set for last descriptor of chain, we call runtime > > * put for that desc alone > > */ > > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > > if (desc->async_tx.callback) > > pm_runtime_put(device); > > Not all dma users have callbacks. Do you have such usage today, at least I dont :) Nevertheless, in tx_submit adding a simple flag in your drivers descriptor structure can tell you whether to call _put() or not. Agreed? -- ~Vinod > > > If i read shdma correctly, descriptors are put into ld_queue of channel > > and any pending should be checked in this list alone. > > For normal case again you check for the callback to decide if you need > > to call pm_runtime_put() or not. > > Thanks > Guennadi > --- > Guennadi Liakhovetski, Ph.D. > Freelance Open-Source Software Developer > http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 29 Aug 2011, Vinod Koul wrote: > On Mon, 2011-08-29 at 14:21 +0200, Guennadi Liakhovetski wrote: > > On Mon, 29 Aug 2011, Vinod Koul wrote: > > > > > On Fri, 2011-08-26 at 01:11 +0200, Guennadi Liakhovetski wrote: > > > > On Thu, 25 Aug 2011, Koul, Vinod wrote: > > > > > Wont it be easy to to do: > > > > > - pm_runtime_get() in each submit > > > > > - pm_runtime_put() in each callback > > > > > Normal case above would work just fine > > > > > - In terminate case, count the number of issued transactions, and call > > > > > pm_runtime_put() for each canceled transaction > > > > > (i am assuming that for each timeout error, the client will call > > > > > terminate) > > > > > > > > As I said, this won't be very easy to do this in a robust way. You'd have > > > > to scan your list of DMA blocks and see, which of them belong to one > > > > descriptor, and once you reach the end of that descriptor, issue a put(). > > > > Perhaps, this can be done, but my choice went to the currently presented > > > > solution. > > > If you count the number of descriptor submitted in your submitted list > > > and call _put for each, I see no reason why it wont be simple and better > > > than current approach. > > > > Sorry, I thought, you wanted to avoid extra counting, because runtime-pm > > counts by itself. Now you propose to count... > see below it doesn't count but iterate thru list Yes, sorry, iterating is a better word, but I actually meant "counting" in a broad sense, of which "iterating" is a particular case:-) > > > Something like: > > > /* since callback is set for last descriptor of chain, we call runtime > > > * put for that desc alone > > > */ > > > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > > > if (desc->async_tx.callback) > > > pm_runtime_put(device); > > > > Not all dma users have callbacks. > Do you have such usage today, at least I dont :) > Nevertheless, in tx_submit adding a simple flag in your drivers > descriptor structure can tell you whether to call _put() or not. Agreed? Yes, I agree, that one could make this work too. Still, I do not understand how and why this is better to the extent, that I have to reimplement my patch, retest and resubmit it. Maybe Dan or Paul have an opinion on this? Thanks Guennadi --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2011-08-30 at 09:12 +0200, Guennadi Liakhovetski wrote: > On Mon, 29 Aug 2011, Vinod Koul wrote: > > > > Something like: > > > > /* since callback is set for last descriptor of chain, we call runtime > > > > * put for that desc alone > > > > */ > > > > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > > > > if (desc->async_tx.callback) > > > > pm_runtime_put(device); > > > > > > Not all dma users have callbacks. > > Do you have such usage today, at least I dont :) > > Nevertheless, in tx_submit adding a simple flag in your drivers > > descriptor structure can tell you whether to call _put() or not. Agreed? > > Yes, I agree, that one could make this work too. Still, I do not > understand how and why this is better to the extent, that I have to > reimplement my patch, retest and resubmit it. Maybe Dan or Paul have an > opinion on this? But wont it make code look simpler and cleaner, you don't reply on your counters but on pm_runtime infrastructure to do the job. You juts need to call _put/_get at right places, which IMO l;ooks lot simpler than current approach
On Tue, 30 Aug 2011, Vinod Koul wrote: > On Tue, 2011-08-30 at 09:12 +0200, Guennadi Liakhovetski wrote: > > On Mon, 29 Aug 2011, Vinod Koul wrote: > > > > > Something like: > > > > > /* since callback is set for last descriptor of chain, we call runtime > > > > > * put for that desc alone > > > > > */ > > > > > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > > > > > if (desc->async_tx.callback) > > > > > pm_runtime_put(device); > > > > > > > > Not all dma users have callbacks. > > > Do you have such usage today, at least I dont :) > > > Nevertheless, in tx_submit adding a simple flag in your drivers > > > descriptor structure can tell you whether to call _put() or not. Agreed? > > > > Yes, I agree, that one could make this work too. Still, I do not > > understand how and why this is better to the extent, that I have to > > reimplement my patch, retest and resubmit it. Maybe Dan or Paul have an > > opinion on this? > But wont it make code look simpler and cleaner, you don't reply on your > counters but on pm_runtime infrastructure to do the job. Sorry, I see it differently. I don't use any counters in my patch. I'm only checking for empty queue, i.e., I'm just identifying the first descriptor submission and the last completion or termination. > You juts need > to call _put/_get at right places, which IMO l;ooks lot simpler than > current approach If we didn't have to check for exact symmetry, then yes, I agree, this would be cleaner. I.e., if we indeed had well-defined entry- and exit-points, which are guaranteed to be called exact same number of times. Like, e.g., with file open() / close() etc. But since we don't have this symmetry, and instead have to add flags and iterate lists, this doesn't look natural and simple to me anymore, sorry. Thanks Guennadi --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Vinod On Tue, 30 Aug 2011, Guennadi Liakhovetski wrote: > On Tue, 30 Aug 2011, Vinod Koul wrote: > > > On Tue, 2011-08-30 at 09:12 +0200, Guennadi Liakhovetski wrote: > > > On Mon, 29 Aug 2011, Vinod Koul wrote: > > > > > > Something like: > > > > > > /* since callback is set for last descriptor of chain, we call runtime > > > > > > * put for that desc alone > > > > > > */ > > > > > > list_for_each_entry_safe(desc, __desc, sh_chan->ld_queue, node) { > > > > > > if (desc->async_tx.callback) > > > > > > pm_runtime_put(device); > > > > > > > > > > Not all dma users have callbacks. > > > > Do you have such usage today, at least I dont :) > > > > Nevertheless, in tx_submit adding a simple flag in your drivers > > > > descriptor structure can tell you whether to call _put() or not. Agreed? > > > > > > Yes, I agree, that one could make this work too. Still, I do not > > > understand how and why this is better to the extent, that I have to > > > reimplement my patch, retest and resubmit it. Maybe Dan or Paul have an > > > opinion on this? > > But wont it make code look simpler and cleaner, you don't reply on your > > counters but on pm_runtime infrastructure to do the job. > > Sorry, I see it differently. I don't use any counters in my patch. I'm > only checking for empty queue, i.e., I'm just identifying the first > descriptor submission and the last completion or termination. > > > You juts need > > to call _put/_get at right places, which IMO l;ooks lot simpler than > > current approach > > If we didn't have to check for exact symmetry, then yes, I agree, this > would be cleaner. I.e., if we indeed had well-defined entry- and > exit-points, which are guaranteed to be called exact same number of times. > Like, e.g., with file open() / close() etc. But since we don't have this > symmetry, and instead have to add flags and iterate lists, this doesn't > look natural and simple to me anymore, sorry. What about this one? Would you be prepared to take it as is, or you still think, that a pm_runtime_get*() on each descriptor submission would be better? Thanks Guennadi --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2011-09-05 at 10:10 +0200, Guennadi Liakhovetski wrote: > Hi Vinod > > On Tue, 30 Aug 2011, Guennadi Liakhovetski wrote: Please dont top post... > > Sorry, I see it differently. I don't use any counters in my patch. I'm > > only checking for empty queue, i.e., I'm just identifying the first > > descriptor submission and the last completion or termination. > > > > > You juts need > > > to call _put/_get at right places, which IMO l;ooks lot simpler than > > > current approach > > > > If we didn't have to check for exact symmetry, then yes, I agree, this > > would be cleaner. I.e., if we indeed had well-defined entry- and > > exit-points, which are guaranteed to be called exact same number of times. > > Like, e.g., with file open() / close() etc. But since we don't have this > > symmetry, and instead have to add flags and iterate lists, this doesn't > > look natural and simple to me anymore, sorry. > > What about this one? Would you be prepared to take it as is, or you still > think, that a pm_runtime_get*() on each descriptor submission would be > better? I think I will go with your current approach. Let me review again and check it. If I get time it should be in my tree by tonight
On Mon, 2011-09-05 at 18:36 +0530, Vinod Koul wrote: > On Mon, 2011-09-05 at 10:10 +0200, Guennadi Liakhovetski wrote: > > Hi Vinod > > > > On Tue, 30 Aug 2011, Guennadi Liakhovetski wrote: > Please dont top post... > > > Sorry, I see it differently. I don't use any counters in my patch. I'm > > > only checking for empty queue, i.e., I'm just identifying the first > > > descriptor submission and the last completion or termination. > > > > > > > You juts need > > > > to call _put/_get at right places, which IMO l;ooks lot simpler than > > > > current approach > > > > > > If we didn't have to check for exact symmetry, then yes, I agree, this > > > would be cleaner. I.e., if we indeed had well-defined entry- and > > > exit-points, which are guaranteed to be called exact same number of times. > > > Like, e.g., with file open() / close() etc. But since we don't have this > > > symmetry, and instead have to add flags and iterate lists, this doesn't > > > look natural and simple to me anymore, sorry. > > > > What about this one? Would you be prepared to take it as is, or you still > > think, that a pm_runtime_get*() on each descriptor submission would be > > better? > I think I will go with your current approach. Let me review again and > check it. If I get time it should be in my tree by tonight This patch fails to apply for me, can you please rebase it to me tree and resend, I will apply it later this week
On Mon, 5 Sep 2011, Vinod Koul wrote: > On Mon, 2011-09-05 at 18:36 +0530, Vinod Koul wrote: > > On Mon, 2011-09-05 at 10:10 +0200, Guennadi Liakhovetski wrote: > > > Hi Vinod > > > > > > On Tue, 30 Aug 2011, Guennadi Liakhovetski wrote: > > Please dont top post... > > > > Sorry, I see it differently. I don't use any counters in my patch. I'm > > > > only checking for empty queue, i.e., I'm just identifying the first > > > > descriptor submission and the last completion or termination. > > > > > > > > > You juts need > > > > > to call _put/_get at right places, which IMO l;ooks lot simpler than > > > > > current approach > > > > > > > > If we didn't have to check for exact symmetry, then yes, I agree, this > > > > would be cleaner. I.e., if we indeed had well-defined entry- and > > > > exit-points, which are guaranteed to be called exact same number of times. > > > > Like, e.g., with file open() / close() etc. But since we don't have this > > > > symmetry, and instead have to add flags and iterate lists, this doesn't > > > > look natural and simple to me anymore, sorry. > > > > > > What about this one? Would you be prepared to take it as is, or you still > > > think, that a pm_runtime_get*() on each descriptor submission would be > > > better? > > I think I will go with your current approach. Let me review again and > > check it. If I get time it should be in my tree by tonight > This patch fails to apply for me, can you please rebase it to me tree > and resend, I will apply it later this week Sure, will do. Thanks Guennadi --- Guennadi Liakhovetski, Ph.D. Freelance Open-Source Software Developer http://www.open-technology.de/ -- To unsubscribe from this list: send the line "unsubscribe linux-sh" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index e7bb747..81809c2 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -259,15 +259,23 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) return 0; } +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan); + static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) { struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + struct sh_dmae_slave *param = tx->chan->private; dma_async_tx_callback callback = tx->callback; dma_cookie_t cookie; - unsigned long flags; + bool power_up; - spin_lock_irqsave(&sh_chan->desc_lock, flags); + spin_lock_irq(&sh_chan->desc_lock); + + if (list_empty(&sh_chan->ld_queue)) + power_up = true; + else + power_up = false; cookie = sh_chan->common.cookie; cookie++; @@ -303,7 +311,38 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) tx->cookie, &last->async_tx, sh_chan->id, desc->hw.sar, desc->hw.tcr, desc->hw.dar); - spin_unlock_irqrestore(&sh_chan->desc_lock, flags); + if (power_up) { + sh_chan->pm_state = DMAE_PM_BUSY; + + pm_runtime_get(sh_chan->dev); + + spin_unlock_irq(&sh_chan->desc_lock); + + pm_runtime_barrier(sh_chan->dev); + + spin_lock_irq(&sh_chan->desc_lock); + + /* Have we been reset, while waiting? */ + if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) { + dev_dbg(sh_chan->dev, "Bring up channel %d\n", + sh_chan->id); + if (param) { + const struct sh_dmae_slave_config *cfg = + param->config; + + dmae_set_dmars(sh_chan, cfg->mid_rid); + dmae_set_chcr(sh_chan, cfg->chcr); + } else { + dmae_init(sh_chan); + } + + if (sh_chan->pm_state == DMAE_PM_PENDING) + sh_chan_xfer_ld_queue(sh_chan); + sh_chan->pm_state = DMAE_PM_ESTABLISHED; + } + } + + spin_unlock_irq(&sh_chan->desc_lock); return cookie; } @@ -347,8 +386,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) struct sh_dmae_slave *param = chan->private; int ret; - pm_runtime_get_sync(sh_chan->dev); - /* * This relies on the guarantee from dmaengine that alloc_chan_resources * never runs concurrently with itself or free_chan_resources. @@ -368,11 +405,6 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) } param->config = cfg; - - dmae_set_dmars(sh_chan, cfg->mid_rid); - dmae_set_chcr(sh_chan, cfg->chcr); - } else { - dmae_init(sh_chan); } while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { @@ -401,7 +433,6 @@ edescalloc: etestused: efindslave: chan->private = NULL; - pm_runtime_put(sh_chan->dev); return ret; } @@ -413,7 +444,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) struct sh_dmae_chan *sh_chan = to_sh_chan(chan); struct sh_desc *desc, *_desc; LIST_HEAD(list); - int descs = sh_chan->descs_allocated; /* Protect against ISR */ spin_lock_irq(&sh_chan->desc_lock); @@ -440,9 +470,6 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) spin_unlock_irq(&sh_chan->desc_lock); - if (descs > 0) - pm_runtime_put(sh_chan->dev); - list_for_each_entry_safe(desc, _desc, &list, node) kfree(desc); } @@ -676,7 +703,6 @@ static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, struct sh_desc, node); desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) << sh_chan->xmit_shift; - } spin_unlock_irqrestore(&sh_chan->desc_lock, flags); @@ -761,7 +787,13 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all async_tx_test_ack(&desc->async_tx)) || all) { /* Remove from ld_queue list */ desc->mark = DESC_IDLE; + list_move(&desc->node, &sh_chan->ld_free); + + if (list_empty(&sh_chan->ld_queue)) { + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); + pm_runtime_put(sh_chan->dev); + } } } @@ -791,16 +823,14 @@ static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) ; } +/* Called under spin_lock_irq(&sh_chan->desc_lock) */ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) { struct sh_desc *desc; - spin_lock_irq(&sh_chan->desc_lock); /* DMA work check */ - if (dmae_is_busy(sh_chan)) { - spin_unlock_irq(&sh_chan->desc_lock); + if (dmae_is_busy(sh_chan)) return; - } /* Find the first not transferred descriptor */ list_for_each_entry(desc, &sh_chan->ld_queue, node) @@ -813,14 +843,18 @@ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) dmae_start(sh_chan); break; } - - spin_unlock_irq(&sh_chan->desc_lock); } static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) { struct sh_dmae_chan *sh_chan = to_sh_chan(chan); - sh_chan_xfer_ld_queue(sh_chan); + + spin_lock_irq(&sh_chan->desc_lock); + if (sh_chan->pm_state == DMAE_PM_ESTABLISHED) + sh_chan_xfer_ld_queue(sh_chan); + else + sh_chan->pm_state = DMAE_PM_PENDING; + spin_unlock_irq(&sh_chan->desc_lock); } static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, @@ -913,6 +947,12 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev) list_splice_init(&sh_chan->ld_queue, &dl); + if (!list_empty(&dl)) { + dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id); + pm_runtime_put(sh_chan->dev); + } + sh_chan->pm_state = DMAE_PM_ESTABLISHED; + spin_unlock(&sh_chan->desc_lock); /* Complete all */ @@ -966,10 +1006,10 @@ static void dmae_do_tasklet(unsigned long data) break; } } - spin_unlock_irq(&sh_chan->desc_lock); - /* Next desc */ sh_chan_xfer_ld_queue(sh_chan); + spin_unlock_irq(&sh_chan->desc_lock); + sh_dmae_chan_ld_cleanup(sh_chan, false); } @@ -1037,7 +1077,9 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id, return -ENOMEM; } - /* copy struct dma_device */ + new_sh_chan->pm_state = DMAE_PM_ESTABLISHED; + + /* reference struct dma_device */ new_sh_chan->common.device = &shdev->common; new_sh_chan->dev = shdev->common.dev; diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index dc56576..2b55a27 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -23,6 +23,12 @@ struct device; +enum dmae_pm_state { + DMAE_PM_ESTABLISHED, + DMAE_PM_BUSY, + DMAE_PM_PENDING, +}; + struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ spinlock_t desc_lock; /* Descriptor operation lock */ @@ -38,6 +44,7 @@ struct sh_dmae_chan { u32 __iomem *base; char dev_id[16]; /* unique name per DMAC of channel */ int pm_error; + enum dmae_pm_state pm_state; }; struct sh_dmae_device {
Currently the shdma dmaengine driver uses runtime PM to save power, when no channel on the specific controller is requested by a user. This patch switches the driver to count individual DMA transfers. That way the controller can be powered down between transfers, even if some of its channels are in use. Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de> --- tested on mackerel (sh7372) with dmatest, sh_mobile_sdhi and sh_mmcif with runtime PM and STR. drivers/dma/shdma.c | 94 +++++++++++++++++++++++++++++++++++++-------------- drivers/dma/shdma.h | 7 ++++ 2 files changed, 75 insertions(+), 26 deletions(-)