Message ID | 20170524070048.30942-1-sr@denx.de (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > +#define MSGDMA_MAX_TRANS_LEN 0xffffffff GENAMSK? > +struct msgdma_extended_desc { > + u32 read_addr_lo; /* data buffer source address low bits */ > + u32 write_addr_lo; /* data buffer destination address low bits */ > + u32 len; /* the number of bytes to transfer > + * per descriptor > + */ > + u32 burst_seq_num; /* bit 31:24 write burst > + * bit 23:16 read burst > + * bit 15:0 sequence number > + */ > + u32 stride; /* bit 31:16 write stride > + * bit 15:0 read stride > + */ > + u32 read_addr_hi; /* data buffer source address high bits */ > + u32 write_addr_hi; /* data buffer destination address high bits */ > + u32 control; /* characteristics of the transfer */ nice comments but can be made kernel-doc style > +#define MSGDMA_DESC_CTL_TR_ERR_IRQ (0xff << 16) GENMASK? > +#define MSGDMA_CSR_STAT_MASK 0x3FF > +#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ 0x1FF GENMASK for these two > + > +#define MSGDMA_CSR_STAT_BUSY_GET(v) GET_BIT_VALUE(v, 0) This is not defined globally and seems to be an altera define but I don't see altera_tse.h included here ?? > +/* mSGDMA response register bit definitions */ > +#define MSGDMA_RESP_EARLY_TERM BIT(8) > +#define MSGDMA_RESP_ERR_MASK 0xFF GENMASK > +static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) > +{ > + struct msgdma_device *mdev = to_mdev(tx->chan); > + struct msgdma_sw_desc *desc, *new; > + dma_cookie_t cookie; > + > + new = tx_to_desc(tx); > + spin_lock_bh(&mdev->lock); > + cookie = dma_cookie_assign(tx); > + > + if (!list_empty(&mdev->pending_list)) { > + desc = list_last_entry(&mdev->pending_list, > + struct msgdma_sw_desc, node); > + if (!list_empty(&desc->tx_list)) > + desc = list_last_entry(&desc->tx_list, > + struct msgdma_sw_desc, node); > + } desc is not used anywhere so not really seeing the point of this code.. > +static struct dma_async_tx_descriptor *msgdma_prep_memcpy( > + struct dma_chan *dchan, dma_addr_t dma_dst, > + dma_addr_t dma_src, size_t len, ulong flags) > +{ > + struct msgdma_device *mdev = to_mdev(dchan); > + struct msgdma_sw_desc *new, *first = NULL; > + struct msgdma_extended_desc *desc; > + size_t copy; > + u32 desc_cnt; > + > + if (len > MSGDMA_MAX_TRANS_LEN) > + return NULL; why :) Nothing prevents you from splitting this to N descriptors of MSGDMA_MAX_TRANS_LEN and one remaining one, though it would be great to have this, but not a deal breaker > + > + desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); and in that case why would you need this?? > +static struct dma_async_tx_descriptor *msgdma_prep_sg( > + struct dma_chan *dchan, struct scatterlist *dst_sg, > + unsigned int dst_sg_len, struct scatterlist *src_sg, > + unsigned int src_sg_len, unsigned long flags) > +{ > + struct msgdma_device *mdev = to_mdev(dchan); > + struct msgdma_sw_desc *new, *first = NULL; > + void *desc = NULL; > + size_t len, dst_avail, src_avail; > + dma_addr_t dma_dst, dma_src; > + u32 desc_cnt = 0, i; > + struct scatterlist *sg; > + > + for_each_sg(src_sg, sg, src_sg_len, i) > + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); > + > + spin_lock_bh(&mdev->lock); > + if (desc_cnt > mdev->desc_free_cnt) { > + spin_unlock_bh(&mdev->lock); > + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); > + return NULL; > + } > + mdev->desc_free_cnt -= desc_cnt; > + spin_unlock_bh(&mdev->lock); > + > + dst_avail = sg_dma_len(dst_sg); > + src_avail = sg_dma_len(src_sg); > + > + /* Run until we are out of scatterlist entries */ > + while (true) { > + /* Allocate and populate the descriptor */ > + new = msgdma_get_descriptor(mdev); > + > + desc = &new->hw_desc; > + len = min_t(size_t, src_avail, dst_avail); > + len = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); > + if (len == 0) > + goto fetch; > + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - > + dst_avail; right justified pls > +static struct dma_async_tx_descriptor *msgdma_prep_slave_sg( > + struct dma_chan *dchan, struct scatterlist *sgl, > + unsigned int sg_len, enum dma_transfer_direction dir, > + unsigned long flags, void *context) these should be right justified as well, its quite hard to read > +static void msgdma_copy_one(struct msgdma_device *mdev, > + struct msgdma_sw_desc *desc) > +{ > + struct msgdma_extended_desc *hw_desc = mdev->desc; > + > + /* > + * Check if the DESC FIFO it not full. If its full, we need to wait > + * for at least one entry to become free again > + */ > + while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) > + mdelay(1); > + > + memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); whats the magical 4 here? > +static void msgdma_complete_descriptor(struct msgdma_device *mdev) > +{ > + struct msgdma_sw_desc *desc; > + > + desc = list_first_entry_or_null(&mdev->active_list, > + struct msgdma_sw_desc, node); > + if (!desc) > + return; > + list_del(&desc->node); > + dma_cookie_complete(&desc->async_tx); > + list_add_tail(&desc->node, &mdev->done_list); when do you move from done to free list > +static void msgdma_free_descriptors(struct msgdma_device *mdev) > +{ > + msgdma_free_desc_list(mdev, &mdev->active_list); > + msgdma_free_desc_list(mdev, &mdev->pending_list); > + msgdma_free_desc_list(mdev, &mdev->done_list); btw when are the descriptors in free list freedup > +static int msgdma_alloc_chan_resources(struct dma_chan *dchan) > +{ > + struct msgdma_device *mdev = to_mdev(dchan); > + struct msgdma_sw_desc *desc; > + int i; > + > + mdev->sw_desq = kzalloc(sizeof(*desc) * MSGDMA_DESC_NUM, GFP_KERNEL); GFP_NOWAIT pls > +static void msgdma_tasklet(unsigned long data) > +{ > + struct msgdma_device *mdev = (struct msgdma_device *)data; > + u32 count; > + u32 size; > + u32 status; > + > + spin_lock(&mdev->lock); > + > + /* Read number of responses that are available */ > + count = ioread32(&mdev->csr->resp_fill_level); > + pr_debug("%s (%d): response count=%d\n", __func__, __LINE__, count); dev_ variants please > +static irqreturn_t msgdma_irq_handler(int irq, void *data) > +{ > + struct msgdma_device *mdev = data; > + > + tasklet_schedule(&mdev->irq_tasklet); not submitting next descriptor here..?? > +MODULE_DESCRIPTION("Altera mSGDMA driver"); > +MODULE_AUTHOR("Stefan Roese <sr@denx.de>"); > +MODULE_LICENSE("GPL"); no alias?
Hi Vinod, On 14.06.2017 10:25, Vinod Koul wrote: > On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > >> +#define MSGDMA_MAX_TRANS_LEN 0xffffffff > > GENAMSK? I'm personally not a big fan of GENMASK. BIT(x) is good for single bits, but GENMASK usually looks lees clear - at least to me. So if you do not insist in this change, I would like to keep the masks. >> +struct msgdma_extended_desc { >> + u32 read_addr_lo; /* data buffer source address low bits */ >> + u32 write_addr_lo; /* data buffer destination address low bits */ >> + u32 len; /* the number of bytes to transfer >> + * per descriptor >> + */ >> + u32 burst_seq_num; /* bit 31:24 write burst >> + * bit 23:16 read burst >> + * bit 15:0 sequence number >> + */ >> + u32 stride; /* bit 31:16 write stride >> + * bit 15:0 read stride >> + */ >> + u32 read_addr_hi; /* data buffer source address high bits */ >> + u32 write_addr_hi; /* data buffer destination address high bits */ >> + u32 control; /* characteristics of the transfer */ > > nice comments but can be made kernel-doc style Sure. Will change in v2. >> +#define MSGDMA_DESC_CTL_TR_ERR_IRQ (0xff << 16) > > GENMASK? > >> +#define MSGDMA_CSR_STAT_MASK 0x3FF >> +#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ 0x1FF > > GENMASK for these two > >> + >> +#define MSGDMA_CSR_STAT_BUSY_GET(v) GET_BIT_VALUE(v, 0) > > This is not defined globally and seems to be an altera define but I don't > see altera_tse.h included here ?? Will remove in v2. >> +/* mSGDMA response register bit definitions */ >> +#define MSGDMA_RESP_EARLY_TERM BIT(8) >> +#define MSGDMA_RESP_ERR_MASK 0xFF > > GENMASK Again, please let me know if I need to change to using GENMASK (s.o.). >> +static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) >> +{ >> + struct msgdma_device *mdev = to_mdev(tx->chan); >> + struct msgdma_sw_desc *desc, *new; >> + dma_cookie_t cookie; >> + >> + new = tx_to_desc(tx); >> + spin_lock_bh(&mdev->lock); >> + cookie = dma_cookie_assign(tx); >> + >> + if (!list_empty(&mdev->pending_list)) { >> + desc = list_last_entry(&mdev->pending_list, >> + struct msgdma_sw_desc, node); >> + if (!list_empty(&desc->tx_list)) >> + desc = list_last_entry(&desc->tx_list, >> + struct msgdma_sw_desc, node); >> + } > > desc is not used anywhere so not really seeing the point of this code.. Good catch, thanks. Its some relict from porting the code from the Xilinx DMA driver to this engine. Will remove in v2. >> +static struct dma_async_tx_descriptor *msgdma_prep_memcpy( >> + struct dma_chan *dchan, dma_addr_t dma_dst, >> + dma_addr_t dma_src, size_t len, ulong flags) >> +{ >> + struct msgdma_device *mdev = to_mdev(dchan); >> + struct msgdma_sw_desc *new, *first = NULL; >> + struct msgdma_extended_desc *desc; >> + size_t copy; >> + u32 desc_cnt; >> + >> + if (len > MSGDMA_MAX_TRANS_LEN) >> + return NULL; > > > why :) This doesn't make much sense, agreed. :) > Nothing prevents you from splitting this to N descriptors of > MSGDMA_MAX_TRANS_LEN and one remaining one, though it would be great to have > this, but not a deal breaker This is what the code below actually does. FWICT, I only need to remove the first "len" check above and it should just work this way. I'll give it a test and will change the code accordingly. >> + >> + desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); > > and in that case why would you need this?? All this is copied from the Xiling Zynqmp DMA driver. I'll send a fix for this driver once all this is sorted out. >> +static struct dma_async_tx_descriptor *msgdma_prep_sg( >> + struct dma_chan *dchan, struct scatterlist *dst_sg, >> + unsigned int dst_sg_len, struct scatterlist *src_sg, >> + unsigned int src_sg_len, unsigned long flags) >> +{ >> + struct msgdma_device *mdev = to_mdev(dchan); >> + struct msgdma_sw_desc *new, *first = NULL; >> + void *desc = NULL; >> + size_t len, dst_avail, src_avail; >> + dma_addr_t dma_dst, dma_src; >> + u32 desc_cnt = 0, i; >> + struct scatterlist *sg; >> + >> + for_each_sg(src_sg, sg, src_sg_len, i) >> + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); >> + >> + spin_lock_bh(&mdev->lock); >> + if (desc_cnt > mdev->desc_free_cnt) { >> + spin_unlock_bh(&mdev->lock); >> + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); >> + return NULL; >> + } >> + mdev->desc_free_cnt -= desc_cnt; >> + spin_unlock_bh(&mdev->lock); >> + >> + dst_avail = sg_dma_len(dst_sg); >> + src_avail = sg_dma_len(src_sg); >> + >> + /* Run until we are out of scatterlist entries */ >> + while (true) { >> + /* Allocate and populate the descriptor */ >> + new = msgdma_get_descriptor(mdev); >> + >> + desc = &new->hw_desc; >> + len = min_t(size_t, src_avail, dst_avail); >> + len = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); >> + if (len == 0) >> + goto fetch; >> + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - >> + dst_avail; > > right justified pls Thats the default indentation of my Emacs Linux C-style. Do I really need to change this? >> +static struct dma_async_tx_descriptor *msgdma_prep_slave_sg( >> + struct dma_chan *dchan, struct scatterlist *sgl, >> + unsigned int sg_len, enum dma_transfer_direction dir, >> + unsigned long flags, void *context) > > these should be right justified as well, its quite hard to read Hmmm, this will result in more lines for the function header, as only one parameter will fit into one line. I could change it this way if you prefer this: static struct dma_async_tx_descriptor * msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags, void *context) Just let know. >> +static void msgdma_copy_one(struct msgdma_device *mdev, >> + struct msgdma_sw_desc *desc) >> +{ >> + struct msgdma_extended_desc *hw_desc = mdev->desc; >> + >> + /* >> + * Check if the DESC FIFO it not full. If its full, we need to wait >> + * for at least one entry to become free again >> + */ >> + while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) >> + mdelay(1); >> + >> + memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); > > whats the magical 4 here? Its the size of the last word in the descriptor. By writing this last word, the complete descriptor is flushed from the FIFO to the DMA controller. I wanted to make sure by using a single iowrite() after this memcpy(), that this control word will be written after all other descriptor words have been copied. BTW: Do you know if memcpy() is guaranteed to copy the data always in the incrementing "direction"? If yes, I can probably drop the iowrite32() call and only use memcpy() here. >> +static void msgdma_complete_descriptor(struct msgdma_device *mdev) >> +{ >> + struct msgdma_sw_desc *desc; >> + >> + desc = list_first_entry_or_null(&mdev->active_list, >> + struct msgdma_sw_desc, node); >> + if (!desc) >> + return; >> + list_del(&desc->node); >> + dma_cookie_complete(&desc->async_tx); >> + list_add_tail(&desc->node, &mdev->done_list); > > when do you move from done to free list They are moved to the free_list in msgdma_free_descriptor(). The call-list here is: msgdma_free_chan_resources() -> msgdma_free_descriptors() -> msgdma_free_desc_list() -> msgdma_free_descriptor() >> +static void msgdma_free_descriptors(struct msgdma_device *mdev) >> +{ >> + msgdma_free_desc_list(mdev, &mdev->active_list); >> + msgdma_free_desc_list(mdev, &mdev->pending_list); >> + msgdma_free_desc_list(mdev, &mdev->done_list); > > btw when are the descriptors in free list freedup You mean, when is the memory free'ed? Its free'ed in msgdma_free_chan_resources(). This mechanism is also copied from the Zynqmp driver btw. >> +static int msgdma_alloc_chan_resources(struct dma_chan *dchan) >> +{ >> + struct msgdma_device *mdev = to_mdev(dchan); >> + struct msgdma_sw_desc *desc; >> + int i; >> + >> + mdev->sw_desq = kzalloc(sizeof(*desc) * MSGDMA_DESC_NUM, GFP_KERNEL); > > GFP_NOWAIT pls Ah, thanks. Will change in v2. >> +static void msgdma_tasklet(unsigned long data) >> +{ >> + struct msgdma_device *mdev = (struct msgdma_device *)data; >> + u32 count; >> + u32 size; >> + u32 status; >> + >> + spin_lock(&mdev->lock); >> + >> + /* Read number of responses that are available */ >> + count = ioread32(&mdev->csr->resp_fill_level); >> + pr_debug("%s (%d): response count=%d\n", __func__, __LINE__, count); > > dev_ variants please Sure. >> +static irqreturn_t msgdma_irq_handler(int irq, void *data) >> +{ >> + struct msgdma_device *mdev = data; >> + >> + tasklet_schedule(&mdev->irq_tasklet); > > not submitting next descriptor here..?? You mean a detection of an "idle" DMA controller state and submitting potentially pending descriptors in this case? I'll check, if this can be done... >> +MODULE_DESCRIPTION("Altera mSGDMA driver"); >> +MODULE_AUTHOR("Stefan Roese <sr@denx.de>"); >> +MODULE_LICENSE("GPL"); > > no alias? Sure, will add. Many thanks for the detailed review. Thanks, Stefan -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: > Hi Vinod, > > On 14.06.2017 10:25, Vinod Koul wrote: > > On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > > > >> +#define MSGDMA_MAX_TRANS_LEN 0xffffffff > > > > GENAMSK? > > I'm personally not a big fan of GENMASK. BIT(x) is good for single > bits, but GENMASK usually looks lees clear - at least to me. So if > you do not insist in this change, I would like to keep the masks. Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, which is way clear than 0x7800 :) IMHO it adds readability and helps avoid bit errors while coding from specs > Good catch, thanks. Its some relict from porting the code from the > Xilinx DMA driver to this engine. Will remove in v2. ha ha that was my hunch too > >> + desc = &new->hw_desc; > >> + len = min_t(size_t, src_avail, dst_avail); > >> + len = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); > >> + if (len == 0) > >> + goto fetch; > >> + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - > >> + dst_avail; > > > > right justified pls > > Thats the default indentation of my Emacs Linux C-style. Do I really > need to change this? I prefer right justified as that's more readable and easy on (my) eye > static struct dma_async_tx_descriptor * > msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, > unsigned int sg_len, enum dma_transfer_direction dir, > unsigned long flags, void *context) > > Just let know. Looks much better, helps when you have been staring at your terminal whole day to notice the lines are justified and not the body of function which I would expect at that level > >> +static void msgdma_copy_one(struct msgdma_device *mdev, > >> + struct msgdma_sw_desc *desc) > >> +{ > >> + struct msgdma_extended_desc *hw_desc = mdev->desc; > >> + > >> + /* > >> + * Check if the DESC FIFO it not full. If its full, we need to wait > >> + * for at least one entry to become free again > >> + */ > >> + while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) > >> + mdelay(1); > >> + > >> + memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); > > > > whats the magical 4 here? > > Its the size of the last word in the descriptor. By writing this last > word, the complete descriptor is flushed from the FIFO to the > DMA controller. I wanted to make sure by using a single iowrite() > after this memcpy(), that this control word will be written after > all other descriptor words have been copied. > > BTW: Do you know if memcpy() is guaranteed to copy the data always > in the incrementing "direction"? If yes, I can probably drop the > iowrite32() call and only use memcpy() here. IIUC memcpy would be arch dependent and based on ordering, we don't have absolute guarantee that iowrite would happen after :) so you need to check the arch and possibly add barriers Also this is good info and should be added here, will help other and possibly you down the line :) > >> +static irqreturn_t msgdma_irq_handler(int irq, void *data) > >> +{ > >> + struct msgdma_device *mdev = data; > >> + > >> + tasklet_schedule(&mdev->irq_tasklet); > > > > not submitting next descriptor here..?? > > You mean a detection of an "idle" DMA controller state and > submitting potentially pending descriptors in this case? I'll check, > if this can be done... Yes that will greatly increase the performance of the driver, few drivers already do so but sadly a lot more don't do..
Hi Vinod, On 22.06.2017 15:00, Vinod Koul wrote: > On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: >> Hi Vinod, >> >> On 14.06.2017 10:25, Vinod Koul wrote: >>> On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: >>> >>>> +#define MSGDMA_MAX_TRANS_LEN 0xffffffff >>> >>> GENAMSK? >> >> I'm personally not a big fan of GENMASK. BIT(x) is good for single >> bits, but GENMASK usually looks lees clear - at least to me. So if >> you do not insist in this change, I would like to keep the masks. > > Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, which is > way clear than 0x7800 :) In this specific case, I see your point and would be willing to make this change gladly. But in the case above, its a the largest number of a u32 variable. I find it more confusing using GENMASK here. Probably there is a macro already for this u32(-1) value, that I should use instead. > IMHO it adds readability and helps avoid bit errors while coding from specs > >> Good catch, thanks. Its some relict from porting the code from the >> Xilinx DMA driver to this engine. Will remove in v2. > > ha ha that was my hunch too > >>>> + desc = &new->hw_desc; >>>> + len = min_t(size_t, src_avail, dst_avail); >>>> + len = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); >>>> + if (len == 0) >>>> + goto fetch; >>>> + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - >>>> + dst_avail; >>> >>> right justified pls >> >> Thats the default indentation of my Emacs Linux C-style. Do I really >> need to change this? > > I prefer right justified as that's more readable and easy on (my) eye > >> static struct dma_async_tx_descriptor * >> msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, >> unsigned int sg_len, enum dma_transfer_direction dir, >> unsigned long flags, void *context) >> >> Just let know. > > Looks much better, helps when you have been staring at your terminal whole > day to notice the lines are justified and not the body of function which I > would expect at that level Okay, will change either way to match "your taste". ;) >>>> +static void msgdma_copy_one(struct msgdma_device *mdev, >>>> + struct msgdma_sw_desc *desc) >>>> +{ >>>> + struct msgdma_extended_desc *hw_desc = mdev->desc; >>>> + >>>> + /* >>>> + * Check if the DESC FIFO it not full. If its full, we need to wait >>>> + * for at least one entry to become free again >>>> + */ >>>> + while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) >>>> + mdelay(1); >>>> + >>>> + memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); >>> >>> whats the magical 4 here? >> >> Its the size of the last word in the descriptor. By writing this last >> word, the complete descriptor is flushed from the FIFO to the >> DMA controller. I wanted to make sure by using a single iowrite() >> after this memcpy(), that this control word will be written after >> all other descriptor words have been copied. >> >> BTW: Do you know if memcpy() is guaranteed to copy the data always >> in the incrementing "direction"? If yes, I can probably drop the >> iowrite32() call and only use memcpy() here. > > IIUC memcpy would be arch dependent and based on ordering, we don't have > absolute guarantee that iowrite would happen after :) so you need to check > the arch and possibly add barriers This driver will (hopefully) be used on multiple archs (we test ARM and x86), so I will stick with the current version and add some barries and ... > Also this is good info and should be added here, will help other and > possibly you down the line :) ... and some comments. :) >>>> +static irqreturn_t msgdma_irq_handler(int irq, void *data) >>>> +{ >>>> + struct msgdma_device *mdev = data; >>>> + >>>> + tasklet_schedule(&mdev->irq_tasklet); >>> >>> not submitting next descriptor here..?? >> >> You mean a detection of an "idle" DMA controller state and >> submitting potentially pending descriptors in this case? I'll check, >> if this can be done... > > Yes that will greatly increase the performance of the driver, few drivers > already do so but sadly a lot more don't do.. Already coded here this way. This change will be included in v2. Thanks, Stefan -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 22, 2017 at 04:50:57PM +0200, Stefan Roese wrote: > Hi Vinod, > > On 22.06.2017 15:00, Vinod Koul wrote: > >On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: > >>Hi Vinod, > >> > >>On 14.06.2017 10:25, Vinod Koul wrote: > >>>On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > >>> > >>>>+#define MSGDMA_MAX_TRANS_LEN 0xffffffff > >>> > >>>GENAMSK? > >> > >>I'm personally not a big fan of GENMASK. BIT(x) is good for single > >>bits, but GENMASK usually looks lees clear - at least to me. So if > >>you do not insist in this change, I would like to keep the masks. > > > >Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, which is > >way clear than 0x7800 :) > > In this specific case, I see your point and would be willing to make > this change gladly. But in the case above, its a the largest number > of a u32 variable. I find it more confusing using GENMASK here. > Probably there is a macro already for this u32(-1) value, that I > should use instead. I think there is something, but can't recall which one. Andy? > >>>>+static void msgdma_copy_one(struct msgdma_device *mdev, > >>>>+ struct msgdma_sw_desc *desc) > >>>>+{ > >>>>+ struct msgdma_extended_desc *hw_desc = mdev->desc; > >>>>+ > >>>>+ /* > >>>>+ * Check if the DESC FIFO it not full. If its full, we need to wait > >>>>+ * for at least one entry to become free again > >>>>+ */ > >>>>+ while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) > >>>>+ mdelay(1); > >>>>+ > >>>>+ memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); > >>> > >>>whats the magical 4 here? > >> > >>Its the size of the last word in the descriptor. By writing this last > >>word, the complete descriptor is flushed from the FIFO to the > >>DMA controller. I wanted to make sure by using a single iowrite() > >>after this memcpy(), that this control word will be written after > >>all other descriptor words have been copied. > >> > >>BTW: Do you know if memcpy() is guaranteed to copy the data always > >>in the incrementing "direction"? If yes, I can probably drop the > >>iowrite32() call and only use memcpy() here. > > > >IIUC memcpy would be arch dependent and based on ordering, we don't have > >absolute guarantee that iowrite would happen after :) so you need to check > >the arch and possibly add barriers > > This driver will (hopefully) be used on multiple archs (we test ARM > and x86), so I will stick with the current version and add some barries > and ... That makes sense
On Thu, 2017-06-22 at 21:00 +0530, Vinod Koul wrote: > On Thu, Jun 22, 2017 at 04:50:57PM +0200, Stefan Roese wrote: > > On 22.06.2017 15:00, Vinod Koul wrote: > > > On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: > > > > On 14.06.2017 10:25, Vinod Koul wrote: > > > > > On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > > > > > > > > > > > +#define MSGDMA_MAX_TRANS_LEN 0xffffffff > > > > > > > > > > GENAMSK? > > > > > > > > I'm personally not a big fan of GENMASK. BIT(x) is good for > > > > single > > > > bits, but GENMASK usually looks lees clear - at least to me. So > > > > if > > > > you do not insist in this change, I would like to keep the > > > > masks. > > > > > > Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, > > > which is > > > way clear than 0x7800 :) > > > > In this specific case, I see your point and would be willing to make > > this change gladly. But in the case above, its a the largest number > > of a u32 variable. I find it more confusing using GENMASK here. > > Probably there is a macro already for this u32(-1) value, that I > > should use instead. > > I think there is something, but can't recall which one. Andy? Since it's a maximum length of transfer on the one hand and in the hardware is actually a mask (bits are used for transfer length) you may choose one of U32_MAX GENMASK(31, 0) My personal preference is GENMASK(), since it's slightly closer to what hardware does inside. In case I'm wrong I would like to hear hardware engineer speaking.
Hi Andy, On 27.06.2017 10:31, Andy Shevchenko wrote: > On Thu, 2017-06-22 at 21:00 +0530, Vinod Koul wrote: >> On Thu, Jun 22, 2017 at 04:50:57PM +0200, Stefan Roese wrote: >>> On 22.06.2017 15:00, Vinod Koul wrote: >>>> On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: >>>>> On 14.06.2017 10:25, Vinod Koul wrote: >>>>>> On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: >>>>>> >>>>>>> +#define MSGDMA_MAX_TRANS_LEN 0xffffffff >>>>>> >>>>>> GENAMSK? >>>>> >>>>> I'm personally not a big fan of GENMASK. BIT(x) is good for >>>>> single >>>>> bits, but GENMASK usually looks lees clear - at least to me. So >>>>> if >>>>> you do not insist in this change, I would like to keep the >>>>> masks. >>>> >>>> Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, >>>> which is >>>> way clear than 0x7800 :) >>> >>> In this specific case, I see your point and would be willing to make >>> this change gladly. But in the case above, its a the largest number >>> of a u32 variable. I find it more confusing using GENMASK here. >>> Probably there is a macro already for this u32(-1) value, that I >>> should use instead. >> >> I think there is something, but can't recall which one. Andy? > > Since it's a maximum length of transfer on the one hand and in the > hardware is actually a mask (bits are used for transfer length) you may > choose one of > > U32_MAX > GENMASK(31, 0) > > My personal preference is GENMASK(), since it's slightly closer to what > hardware does inside. My personal preference would be U32_MAX in this case. Vinod, do you want me to send a v3 of the mSGDMA patch with one of those defines used? Which one do you prefer, I really have no strong feelings here. Thanks, Stefan -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 27, 2017 at 01:08:12PM +0200, Stefan Roese wrote: > Hi Andy, > > On 27.06.2017 10:31, Andy Shevchenko wrote: > >On Thu, 2017-06-22 at 21:00 +0530, Vinod Koul wrote: > >>On Thu, Jun 22, 2017 at 04:50:57PM +0200, Stefan Roese wrote: > >>>On 22.06.2017 15:00, Vinod Koul wrote: > >>>>On Tue, Jun 20, 2017 at 02:20:49PM +0200, Stefan Roese wrote: > >>>>>On 14.06.2017 10:25, Vinod Koul wrote: > >>>>>>On Wed, May 24, 2017 at 09:00:48AM +0200, Stefan Roese wrote: > >>>>>> > >>>>>>>+#define MSGDMA_MAX_TRANS_LEN 0xffffffff > >>>>>> > >>>>>>GENAMSK? > >>>>> > >>>>>I'm personally not a big fan of GENMASK. BIT(x) is good for > >>>>>single > >>>>>bits, but GENMASK usually looks lees clear - at least to me. So > >>>>>if > >>>>>you do not insist in this change, I would like to keep the > >>>>>masks. > >>>> > >>>>Well GENMASK(14, 11) tell me that it means mask of bit 14 thru 11, > >>>>which is > >>>>way clear than 0x7800 :) > >>> > >>>In this specific case, I see your point and would be willing to make > >>>this change gladly. But in the case above, its a the largest number > >>>of a u32 variable. I find it more confusing using GENMASK here. > >>>Probably there is a macro already for this u32(-1) value, that I > >>>should use instead. > >> > >>I think there is something, but can't recall which one. Andy? > > > >Since it's a maximum length of transfer on the one hand and in the > >hardware is actually a mask (bits are used for transfer length) you may > >choose one of > > > >U32_MAX > >GENMASK(31, 0) > > > >My personal preference is GENMASK(), since it's slightly closer to what > >hardware does inside. > > My personal preference would be U32_MAX in this case. > > Vinod, do you want me to send a v3 of the mSGDMA patch with one of > those defines used? Which one do you prefer, I really have no strong > feelings here. That can be sent as an update afterwards or if we have comments then in next iternation :)
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 24e8597b2c3e..1d26b5734369 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -56,6 +56,12 @@ config DMA_OF select DMA_ENGINE #devices +config ALTERA_MSGDMA + tristate "Altera / Intel mSGDMA Engine" + select DMA_ENGINE + help + Enable support for Altera / Intel mSGDMA controller. + config AMBA_PL08X bool "ARM PrimeCell PL080 or PL081 support" depends on ARM_AMBA diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 0b723e94d9e6..cf68b1dae0bf 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_DMA_OF) += of-dma.o obj-$(CONFIG_DMATEST) += dmatest.o #devices +obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c new file mode 100644 index 000000000000..84d1dcc574ca --- /dev/null +++ b/drivers/dma/altera-msgdma.c @@ -0,0 +1,1003 @@ +/* + * DMA driver for Altera mSGDMA IP core + * + * Copyright (C) 2017 Stefan Roese <sr@denx.de> + * + * Based on drivers/dma/xilinx/zynqmp_dma.c, which is: + * Copyright (C) 2016 Xilinx, Inc. All rights reserved. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dmaengine.h" + +#define MSGDMA_MAX_TRANS_LEN 0xffffffff +#define MSGDMA_DESC_NUM 1024 + +/* mSGDMA extended descriptor format */ +struct msgdma_extended_desc { + u32 read_addr_lo; /* data buffer source address low bits */ + u32 write_addr_lo; /* data buffer destination address low bits */ + u32 len; /* the number of bytes to transfer + * per descriptor + */ + u32 burst_seq_num; /* bit 31:24 write burst + * bit 23:16 read burst + * bit 15:0 sequence number + */ + u32 stride; /* bit 31:16 write stride + * bit 15:0 read stride + */ + u32 read_addr_hi; /* data buffer source address high bits */ + u32 write_addr_hi; /* data buffer destination address high bits */ + u32 control; /* characteristics of the transfer */ +}; + +/* mSGDMA descriptor control field bit definitions */ +#define MSGDMA_DESC_CTL_SET_CH(x) ((x) & 0xff) +#define MSGDMA_DESC_CTL_GEN_SOP BIT(8) +#define MSGDMA_DESC_CTL_GEN_EOP BIT(9) +#define MSGDMA_DESC_CTL_PARK_READS BIT(10) +#define MSGDMA_DESC_CTL_PARK_WRITES BIT(11) +#define MSGDMA_DESC_CTL_END_ON_EOP BIT(12) +#define MSGDMA_DESC_CTL_END_ON_LEN BIT(13) +#define MSGDMA_DESC_CTL_TR_COMP_IRQ BIT(14) +#define MSGDMA_DESC_CTL_EARLY_IRQ BIT(15) +#define MSGDMA_DESC_CTL_TR_ERR_IRQ (0xff << 16) +#define MSGDMA_DESC_CTL_EARLY_DONE BIT(24) + +/* + * Writing "1" the "go" bit commits the entire descriptor into the + * descriptor FIFO(s) + */ +#define MSGDMA_DESC_CTL_GO BIT(31) + +/* Tx buffer control flags */ +#define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \ + MSGDMA_DESC_CTL_END_ON_LEN | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_EARLY_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +/* mSGDMA extended descriptor stride definitions */ +#define MSGDMA_DESC_STRIDE_RD 0x00000001 +#define MSGDMA_DESC_STRIDE_WR 0x00010000 +#define MSGDMA_DESC_STRIDE_RW 0x00010001 + +/* mSGDMA dispatcher control and status register map */ +struct msgdma_csr { + u32 status; /* Read/Clear */ + u32 control; /* Read/Write */ + u32 rw_fill_level; /* bit 31:16 - write fill level + * bit 15:0 - read fill level + */ + u32 resp_fill_level; /* bit 15:0 */ + u32 rw_seq_num; /* bit 31:16 - write sequence number + * bit 15:0 - read sequence number + */ + u32 pad[3]; /* reserved */ +}; + +/* mSGDMA CSR status register bit definitions */ +#define MSGDMA_CSR_STAT_BUSY BIT(0) +#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY BIT(1) +#define MSGDMA_CSR_STAT_DESC_BUF_FULL BIT(2) +#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY BIT(3) +#define MSGDMA_CSR_STAT_RESP_BUF_FULL BIT(4) +#define MSGDMA_CSR_STAT_STOPPED BIT(5) +#define MSGDMA_CSR_STAT_RESETTING BIT(6) +#define MSGDMA_CSR_STAT_STOPPED_ON_ERR BIT(7) +#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY BIT(8) +#define MSGDMA_CSR_STAT_IRQ BIT(9) +#define MSGDMA_CSR_STAT_MASK 0x3FF +#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ 0x1FF + +#define MSGDMA_CSR_STAT_BUSY_GET(v) GET_BIT_VALUE(v, 0) +#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY_GET(v) GET_BIT_VALUE(v, 1) +#define MSGDMA_CSR_STAT_DESC_BUF_FULL_GET(v) GET_BIT_VALUE(v, 2) +#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY_GET(v) GET_BIT_VALUE(v, 3) +#define MSGDMA_CSR_STAT_RESP_BUF_FULL_GET(v) GET_BIT_VALUE(v, 4) +#define MSGDMA_CSR_STAT_STOPPED_GET(v) GET_BIT_VALUE(v, 5) +#define MSGDMA_CSR_STAT_RESETTING_GET(v) GET_BIT_VALUE(v, 6) +#define MSGDMA_CSR_STAT_STOPPED_ON_ERR_GET(v) GET_BIT_VALUE(v, 7) +#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY_GET(v) GET_BIT_VALUE(v, 8) +#define MSGDMA_CSR_STAT_IRQ_GET(v) GET_BIT_VALUE(v, 9) + +#define DESC_EMPTY (MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \ + MSGDMA_CSR_STAT_RESP_BUF_EMPTY) + +/* mSGDMA CSR control register bit definitions */ +#define MSGDMA_CSR_CTL_STOP BIT(0) +#define MSGDMA_CSR_CTL_RESET BIT(1) +#define MSGDMA_CSR_CTL_STOP_ON_ERR BIT(2) +#define MSGDMA_CSR_CTL_STOP_ON_EARLY BIT(3) +#define MSGDMA_CSR_CTL_GLOBAL_INTR BIT(4) +#define MSGDMA_CSR_CTL_STOP_DESCS BIT(5) + +/* mSGDMA CSR fill level bits */ +#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v) (((v) & 0xffff0000) >> 16) +#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) +#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) + +#define MSGDMA_CSR_SEQ_NUM_GET(v) (((v) & 0xffff0000) >> 16) + +/* mSGDMA response register map */ +struct msgdma_response { + u32 bytes_transferred; + u32 status; +}; + +/* mSGDMA response register bit definitions */ +#define MSGDMA_RESP_EARLY_TERM BIT(8) +#define MSGDMA_RESP_ERR_MASK 0xFF + +/** + * struct msgdma_sw_desc - implements a sw descriptor + * @async_tx: support for the async_tx api + * @hw_desc: assosiated HW descriptor + * @free_list: node of the free SW descriprots list + */ +struct msgdma_sw_desc { + struct dma_async_tx_descriptor async_tx; + struct msgdma_extended_desc hw_desc; + struct list_head node; + struct list_head tx_list; +}; + +/** + * struct msgdma_device - DMA device structure + * @dev: Device Structure + * @dmadev: DMA device structure + * @chan: Driver specific DMA channel + */ +struct msgdma_device { + spinlock_t lock; + struct device *dev; + struct tasklet_struct irq_tasklet; + struct list_head pending_list; + struct list_head free_list; + struct list_head active_list; + struct list_head done_list; + u32 desc_free_cnt; + + struct dma_device dmadev; + struct dma_chan dmachan; + dma_addr_t hw_desq; + struct msgdma_sw_desc *sw_desq; + unsigned int npendings; + + struct dma_slave_config slave_cfg; + + int irq; + + /* mSGDMA controller */ + struct msgdma_csr *csr; + + /* mSGDMA descriptors */ + struct msgdma_extended_desc *desc; + + /* mSGDMA response */ + struct msgdma_response *resp; +}; + +#define to_mdev(chan) container_of(chan, struct msgdma_device, dmachan) +#define tx_to_desc(tx) container_of(tx, struct msgdma_sw_desc, async_tx) + +/** + * msgdma_get_descriptor - Get the sw descriptor from the pool + * @mdev: Pointer to the Altera mSGDMA device structure + * + * Return: The sw descriptor + */ +static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + spin_lock_bh(&mdev->lock); + desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); + list_del(&desc->node); + spin_unlock_bh(&mdev->lock); + + INIT_LIST_HEAD(&desc->tx_list); + + return desc; +} + +/** + * msgdma_free_descriptor - Issue pending transactions + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_free_descriptor(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *child, *next; + + mdev->desc_free_cnt++; + list_add_tail(&desc->node, &mdev->free_list); + list_for_each_entry_safe(child, next, &desc->tx_list, node) { + mdev->desc_free_cnt++; + list_move_tail(&child->node, &mdev->free_list); + } +} + +/** + * msgdma_free_desc_list - Free descriptors list + * @mdev: Pointer to the Altera mSGDMA device structure + * @list: List to parse and delete the descriptor + */ +static void msgdma_free_desc_list(struct msgdma_device *mdev, + struct list_head *list) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) + msgdma_free_descriptor(mdev, desc); +} + +/** + * msgdma_desc_config - Configure the descriptor + * @desc: Hw descriptor pointer + * @dst: Destination buffer address + * @src: Source buffer address + * @len: Transfer length + */ +static void msgdma_desc_config(struct msgdma_extended_desc *desc, + dma_addr_t dst, dma_addr_t src, size_t len, + u32 stride) +{ + /* Set lower 32bits of src & dst addresses in the descriptor */ + desc->read_addr_lo = lower_32_bits(src); + desc->write_addr_lo = lower_32_bits(dst); + + /* Set upper 32bits of src & dst addresses in the descriptor */ + desc->read_addr_hi = upper_32_bits(src); + desc->write_addr_hi = upper_32_bits(dst); + + desc->len = len; + desc->stride = stride; + desc->burst_seq_num = 0; /* 0 will result in max burst length */ + + /* + * Don't set interrupt on xfer end yet, this will be done later + * for the "last" descriptor + */ + desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO | + MSGDMA_DESC_CTL_END_ON_LEN; +} + +/** + * msgdma_desc_config_eod - Mark the descriptor as end descriptor + * @desc: Hw descriptor pointer + */ +static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc) +{ + desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ; +} + +/** + * msgdma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor pointer + * + * Return: cookie value + */ +static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct msgdma_device *mdev = to_mdev(tx->chan); + struct msgdma_sw_desc *desc, *new; + dma_cookie_t cookie; + + new = tx_to_desc(tx); + spin_lock_bh(&mdev->lock); + cookie = dma_cookie_assign(tx); + + if (!list_empty(&mdev->pending_list)) { + desc = list_last_entry(&mdev->pending_list, + struct msgdma_sw_desc, node); + if (!list_empty(&desc->tx_list)) + desc = list_last_entry(&desc->tx_list, + struct msgdma_sw_desc, node); + } + + list_add_tail(&new->node, &mdev->pending_list); + spin_unlock_bh(&mdev->lock); + + return cookie; +} + +/** + * msgdma_prep_memcpy - prepare descriptors for memcpy transaction + * @dchan: DMA channel + * @dma_dst: Destination buffer address + * @dma_src: Source buffer address + * @len: Transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *msgdma_prep_memcpy( + struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, ulong flags) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *new, *first = NULL; + struct msgdma_extended_desc *desc; + size_t copy; + u32 desc_cnt; + + if (len > MSGDMA_MAX_TRANS_LEN) + return NULL; + + desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); + + spin_lock_bh(&mdev->lock); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_bh(&mdev->lock); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_bh(&mdev->lock); + + do { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); + desc = &new->hw_desc; + msgdma_desc_config(desc, dma_dst, dma_src, copy, + MSGDMA_DESC_STRIDE_RW); + len -= copy; + dma_src += copy; + dma_dst += copy; + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + } while (len); + + msgdma_desc_config_eod(desc); + async_tx_ack(&first->async_tx); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +/** + * msgdma_prep_sg - prepare descriptors for a memory sg transaction + * @dchan: DMA channel + * @dst_sg: Destination scatter list + * @dst_sg_len: Number of entries in destination scatter list + * @src_sg: Source scatter list + * @src_sg_len: Number of entries in source scatter list + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *msgdma_prep_sg( + struct dma_chan *dchan, struct scatterlist *dst_sg, + unsigned int dst_sg_len, struct scatterlist *src_sg, + unsigned int src_sg_len, unsigned long flags) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *new, *first = NULL; + void *desc = NULL; + size_t len, dst_avail, src_avail; + dma_addr_t dma_dst, dma_src; + u32 desc_cnt = 0, i; + struct scatterlist *sg; + + for_each_sg(src_sg, sg, src_sg_len, i) + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); + + spin_lock_bh(&mdev->lock); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_bh(&mdev->lock); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_bh(&mdev->lock); + + dst_avail = sg_dma_len(dst_sg); + src_avail = sg_dma_len(src_sg); + + /* Run until we are out of scatterlist entries */ + while (true) { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + desc = &new->hw_desc; + len = min_t(size_t, src_avail, dst_avail); + len = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); + if (len == 0) + goto fetch; + dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - + dst_avail; + dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - + src_avail; + + msgdma_desc_config(desc, dma_dst, dma_src, len, + MSGDMA_DESC_STRIDE_RW); + dst_avail -= len; + src_avail -= len; + + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); +fetch: + /* Fetch the next dst scatterlist entry */ + if (dst_avail == 0) { + if (dst_sg_len == 0) + break; + dst_sg = sg_next(dst_sg); + if (dst_sg == NULL) + break; + dst_sg_len--; + dst_avail = sg_dma_len(dst_sg); + } + /* Fetch the next src scatterlist entry */ + if (src_avail == 0) { + if (src_sg_len == 0) + break; + src_sg = sg_next(src_sg); + if (src_sg == NULL) + break; + src_sg_len--; + src_avail = sg_dma_len(src_sg); + } + } + + msgdma_desc_config_eod(desc); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +/** + * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction + * + * @dchan: DMA channel + * @sgl: Destination scatter list + * @sg_len: Number of entries in destination scatter list + * @dir: DMA transfer direction + * @flags: transfer ack flags + * @context: transfer context (unused) + */ +static struct dma_async_tx_descriptor *msgdma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct dma_slave_config *cfg = &mdev->slave_cfg; + struct msgdma_sw_desc *new, *first = NULL; + void *desc = NULL; + size_t len, avail; + dma_addr_t dma_dst, dma_src; + u32 desc_cnt = 0, i; + struct scatterlist *sg; + u32 stride; + + for_each_sg(sgl, sg, sg_len, i) + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); + + spin_lock_bh(&mdev->lock); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_bh(&mdev->lock); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_bh(&mdev->lock); + + avail = sg_dma_len(sgl); + + /* Run until we are out of scatterlist entries */ + while (true) { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + desc = &new->hw_desc; + len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN); + if (len == 0) + goto fetch; + if (dir == DMA_MEM_TO_DEV) { + dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + dma_dst = cfg->dst_addr; + stride = MSGDMA_DESC_STRIDE_RD; + } else { + dma_src = cfg->src_addr; + dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + stride = MSGDMA_DESC_STRIDE_WR; + } + msgdma_desc_config(desc, dma_dst, dma_src, len, stride); + avail -= len; + + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); +fetch: + /* Fetch the next scatterlist entry */ + if (avail == 0) { + if (sg_len == 0) + break; + sgl = sg_next(sgl); + if (sgl == NULL) + break; + sg_len--; + avail = sg_dma_len(sgl); + } + } + + msgdma_desc_config_eod(desc); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +static int msgdma_dma_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct msgdma_device *mdev = to_mdev(dchan); + + memcpy(&mdev->slave_cfg, config, sizeof(*config)); + + return 0; +} + +static void msgdma_reset(struct msgdma_device *mdev) +{ + u32 val; + int ret; + + /* Reset mSGDMA */ + iowrite32(MSGDMA_CSR_STAT_MASK, &mdev->csr->status); + iowrite32(MSGDMA_CSR_CTL_RESET, &mdev->csr->control); + + ret = readl_poll_timeout(&mdev->csr->status, val, + (val & MSGDMA_CSR_STAT_RESETTING) == 0, + 1, 10000); + if (ret) + dev_err(mdev->dev, "DMA channel did not reset\n"); + + /* Clear all status bits */ + iowrite32(MSGDMA_CSR_STAT_MASK, &mdev->csr->status); + + /* Enable the DMA controller including interrupts */ + iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY | + MSGDMA_CSR_CTL_GLOBAL_INTR, &mdev->csr->control); +}; + +static void msgdma_copy_one(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_extended_desc *hw_desc = mdev->desc; + + /* + * Check if the DESC FIFO it not full. If its full, we need to wait + * for at least one entry to become free again + */ + while (ioread32(&mdev->csr->status) & MSGDMA_CSR_STAT_DESC_BUF_FULL) + mdelay(1); + + memcpy(hw_desc, &desc->hw_desc, sizeof(desc->hw_desc) - 4); + /* Write control word last to flush this descriptor into the FIFO */ + iowrite32(desc->hw_desc.control, &hw_desc->control); +} + +/** + * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *sdesc, *next; + + msgdma_copy_one(mdev, desc); + + list_for_each_entry_safe(sdesc, next, &desc->tx_list, node) + msgdma_copy_one(mdev, sdesc); +} + +/** + * msgdma_start_transfer - Initiate the new transfer + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_start_transfer(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + desc = list_first_entry_or_null(&mdev->pending_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + + list_splice_tail_init(&mdev->pending_list, &mdev->active_list); + msgdma_copy_desc_to_fifo(mdev, desc); +} + +/** + * msgdma_issue_pending - Issue pending transactions + * @chan: DMA channel pointer + */ +static void msgdma_issue_pending(struct dma_chan *chan) +{ + struct msgdma_device *mdev = to_mdev(chan); + + spin_lock_bh(&mdev->lock); + msgdma_start_transfer(mdev); + spin_unlock_bh(&mdev->lock); +} + +/** + * msgdma_chan_desc_cleanup - Cleanup the completed descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, &mdev->done_list, node) { + dma_async_tx_callback callback; + void *callback_param; + + list_del(&desc->node); + + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + if (callback) { + spin_unlock(&mdev->lock); + callback(callback_param); + spin_lock(&mdev->lock); + } + + /* Run any dependencies, then free the descriptor */ + msgdma_free_descriptor(mdev, desc); + } +} + +/** + * msgdma_complete_descriptor - Mark the active descriptor as complete + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_complete_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + desc = list_first_entry_or_null(&mdev->active_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + list_del(&desc->node); + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &mdev->done_list); +} + +/** + * msgdma_free_descriptors - Free channel descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_free_descriptors(struct msgdma_device *mdev) +{ + msgdma_free_desc_list(mdev, &mdev->active_list); + msgdma_free_desc_list(mdev, &mdev->pending_list); + msgdma_free_desc_list(mdev, &mdev->done_list); +} + +/** + * msgdma_free_chan_resources - Free channel resources + * @dchan: DMA channel pointer + */ +static void msgdma_free_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + + spin_lock_bh(&mdev->lock); + msgdma_free_descriptors(mdev); + spin_unlock_bh(&mdev->lock); + kfree(mdev->sw_desq); +} + +/** + * msgdma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: Number of descriptors on success and failure value on error + */ +static int msgdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *desc; + int i; + + mdev->sw_desq = kzalloc(sizeof(*desc) * MSGDMA_DESC_NUM, GFP_KERNEL); + if (!mdev->sw_desq) + return -ENOMEM; + + mdev->desc_free_cnt = MSGDMA_DESC_NUM; + + INIT_LIST_HEAD(&mdev->free_list); + + for (i = 0; i < MSGDMA_DESC_NUM; i++) { + desc = mdev->sw_desq + i; + dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan); + desc->async_tx.tx_submit = msgdma_tx_submit; + list_add_tail(&desc->node, &mdev->free_list); + } + + return MSGDMA_DESC_NUM; +} + +/** + * msgdma_tasklet - Schedule completion tasklet + * @data: Pointer to the Altera sSGDMA channel structure + */ +static void msgdma_tasklet(unsigned long data) +{ + struct msgdma_device *mdev = (struct msgdma_device *)data; + u32 count; + u32 size; + u32 status; + + spin_lock(&mdev->lock); + + /* Read number of responses that are available */ + count = ioread32(&mdev->csr->resp_fill_level); + pr_debug("%s (%d): response count=%d\n", __func__, __LINE__, count); + + while (count--) { + /* + * Read both longwords to purge this response from the FIFO + * On Avalon-MM implementations, size and status do not + * have any real values, like transferred bytes or error + * bits. So we need to just drop these values. + */ + size = ioread32(&mdev->resp->bytes_transferred); + status = ioread32(&mdev->resp->status); + + msgdma_complete_descriptor(mdev); + msgdma_chan_desc_cleanup(mdev); + } + + spin_unlock(&mdev->lock); +} + +/** + * msgdma_irq_handler - Altera mSGDMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Altera mSGDMA device structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t msgdma_irq_handler(int irq, void *data) +{ + struct msgdma_device *mdev = data; + + tasklet_schedule(&mdev->irq_tasklet); + + /* Clear interrupt in mSGDMA controller */ + iowrite32(MSGDMA_CSR_STAT_IRQ, &mdev->csr->status); + + return IRQ_HANDLED; +} + +/** + * msgdma_chan_remove - Channel remove function + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_dev_remove(struct msgdma_device *mdev) +{ + if (!mdev) + return; + + devm_free_irq(mdev->dev, mdev->irq, mdev); + tasklet_kill(&mdev->irq_tasklet); + list_del(&mdev->dmachan.device_node); +} + +static int request_and_map(struct platform_device *pdev, const char *name, + struct resource **res, void __iomem **ptr) +{ + struct resource *region; + struct device *device = &pdev->dev; + + *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (*res == NULL) { + dev_err(device, "resource %s not defined\n", name); + return -ENODEV; + } + + region = devm_request_mem_region(device, (*res)->start, + resource_size(*res), dev_name(device)); + if (region == NULL) { + dev_err(device, "unable to request %s\n", name); + return -EBUSY; + } + + *ptr = devm_ioremap_nocache(device, region->start, + resource_size(region)); + if (*ptr == NULL) { + dev_err(device, "ioremap_nocache of %s failed!", name); + return -ENOMEM; + } + + return 0; +} + +/** + * msgdma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int msgdma_probe(struct platform_device *pdev) +{ + struct msgdma_device *mdev; + struct dma_device *dma_dev; + struct resource *dma_res; + int ret; + + mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return -ENOMEM; + + mdev->dev = &pdev->dev; + + /* Map CSR space */ + ret = request_and_map(pdev, "csr", &dma_res, (void **)&mdev->csr); + if (ret) + return ret; + + /* Map (extended) descriptor space */ + ret = request_and_map(pdev, "desc", &dma_res, (void **)&mdev->desc); + if (ret) + return ret; + + /* Map response space */ + ret = request_and_map(pdev, "resp", &dma_res, (void **)&mdev->resp); + if (ret) + return ret; + + platform_set_drvdata(pdev, mdev); + + /* Get interrupt nr from platform data */ + mdev->irq = platform_get_irq(pdev, 0); + if (mdev->irq < 0) + return -ENXIO; + + ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler, + 0, dev_name(&pdev->dev), mdev); + if (ret) + return ret; + + tasklet_init(&mdev->irq_tasklet, msgdma_tasklet, (unsigned long)mdev); + + dma_cookie_init(&mdev->dmachan); + + spin_lock_init(&mdev->lock); + + INIT_LIST_HEAD(&mdev->active_list); + INIT_LIST_HEAD(&mdev->pending_list); + INIT_LIST_HEAD(&mdev->done_list); + INIT_LIST_HEAD(&mdev->free_list); + + dma_dev = &mdev->dmadev; + + /* Set DMA capabilities */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SG, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_MEM); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + /* Init DMA link list */ + INIT_LIST_HEAD(&dma_dev->channels); + + /* Set base routines */ + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = msgdma_issue_pending; + dma_dev->dev = &pdev->dev; + + dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES; + dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy; + dma_dev->device_prep_dma_sg = msgdma_prep_sg; + dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg; + dma_dev->device_config = msgdma_dma_config; + + dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources; + dma_dev->device_free_chan_resources = msgdma_free_chan_resources; + + mdev->dmachan.device = dma_dev; + list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels); + + msgdma_reset(mdev); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto fail; + + dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n"); + + return 0; + +fail: + msgdma_dev_remove(mdev); + + return ret; +} + +/** + * msgdma_dma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int msgdma_remove(struct platform_device *pdev) +{ + struct msgdma_device *mdev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&mdev->dmadev); + msgdma_dev_remove(mdev); + + dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n"); + + return 0; +} + +static struct platform_driver msgdma_driver = { + .driver = { + .name = "altera-msgdma", + }, + .probe = msgdma_probe, + .remove = msgdma_remove, +}; + +module_platform_driver(msgdma_driver); + +MODULE_DESCRIPTION("Altera mSGDMA driver"); +MODULE_AUTHOR("Stefan Roese <sr@denx.de>"); +MODULE_LICENSE("GPL");
This driver adds support for the Altera / Intel modular Scatter-Gather Direct Memory Access (mSGDMA) intellectual property (IP) to the Linux DMAengine subsystem. Currently it supports the following op modes: - DMA_MEMCPY - DMA_SG - DMA_SLAVE This implementation has been tested on an Altera Cyclone FPGA connected via PCIe, both on an ARM and an x86 platform. Signed-off-by: Stefan Roese <sr@denx.de> Cc: Vinod Koul <vinod.koul@intel.com> --- drivers/dma/Kconfig | 6 + drivers/dma/Makefile | 1 + drivers/dma/altera-msgdma.c | 1003 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1010 insertions(+) create mode 100644 drivers/dma/altera-msgdma.c