diff mbox

[V11,5/7] dma: qcom_hidma: implement lower level hardware interface

Message ID 1451865996-12808-6-git-send-email-okaya@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Sinan Kaya Jan. 4, 2016, 12:06 a.m. UTC
This patch implements the hardware hooks for the HIDMA channel driver.

The main functions of interest are:
- hidma_ll_init
- hidma_ll_request
- hidma_ll_queue_request
- hidma_ll_hw_start

OS layer calls the hidma_ll_init function during probe to set up the
hardware. At this moment, the number of supported descriptors are also
given. On each request, a descriptor is allocated from the free pool and
filled in with the transfer parameters. Multiple requests can be queued
into the hardware via the OS interface. When client is ready for requests
to be executed, start method is called.

Completions are delivered via callbacks via tasklet.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/dma/qcom/Makefile   |   2 +
 drivers/dma/qcom/hidma.h    |   2 +-
 drivers/dma/qcom/hidma_ll.c | 927 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 930 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma/qcom/hidma_ll.c

Comments

Andy Shevchenko Jan. 4, 2016, 7:01 p.m. UTC | #1
On Mon, Jan 4, 2016 at 2:06 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
> This patch implements the hardware hooks for the HIDMA channel driver.
>
> The main functions of interest are:
> - hidma_ll_init
> - hidma_ll_request
> - hidma_ll_queue_request
> - hidma_ll_hw_start
>
> OS layer calls the hidma_ll_init function during probe to set up the
> hardware. At this moment, the number of supported descriptors are also
> given. On each request, a descriptor is allocated from the free pool and
> filled in with the transfer parameters. Multiple requests can be queued
> into the hardware via the OS interface. When client is ready for requests
> to be executed, start method is called.
>
> Completions are delivered via callbacks via tasklet.

Few nitpicks below.

>
> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  drivers/dma/qcom/Makefile   |   2 +
>  drivers/dma/qcom/hidma.h    |   2 +-
>  drivers/dma/qcom/hidma_ll.c | 927 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 930 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/dma/qcom/hidma_ll.c
>
> diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile
> index bfea699..6bf9267 100644
> --- a/drivers/dma/qcom/Makefile
> +++ b/drivers/dma/qcom/Makefile
> @@ -1,3 +1,5 @@
>  obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o
>  obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o
>  hdma_mgmt-objs  := hidma_mgmt.o hidma_mgmt_sys.o
> +obj-$(CONFIG_QCOM_HIDMA) +=  hdma.o
> +hdma-objs        := hidma_ll.o hidma.o
> diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
> index 231e306..1e09d7c 100644
> --- a/drivers/dma/qcom/hidma.h
> +++ b/drivers/dma/qcom/hidma.h
> @@ -37,7 +37,7 @@ struct hidma_tre {
>         atomic_t allocated;             /* if this channel is allocated     */
>         bool queued;                    /* flag whether this is pending     */
>         u16 status;                     /* status                           */
> -       u32 chidx;                      /* index of the tre                 */
> +       u32 idx;                        /* index of the tre                 */
>         u32 dma_sig;                    /* signature of the tre             */
>         const char *dev_name;           /* name of the device               */
>         void (*callback)(void *data);   /* requester callback               */
> diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
> new file mode 100644
> index 0000000..0cd8d70
> --- /dev/null
> +++ b/drivers/dma/qcom/hidma_ll.c
> @@ -0,0 +1,927 @@
> +/*
> + * Qualcomm Technologies HIDMA DMA engine low level code
> + *
> + * Copyright (c) 2015, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/dmaengine.h>
> +#include <linux/slab.h>
> +#include <linux/interrupt.h>
> +#include <linux/mm.h>
> +#include <linux/highmem.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/delay.h>
> +#include <linux/atomic.h>
> +#include <linux/iopoll.h>
> +#include <linux/kfifo.h>
> +#include <linux/bitops.h>
> +
> +#include "hidma.h"
> +
> +#define EVRE_SIZE                      16      /* each EVRE is 16 bytes */
> +
> +#define TRCA_CTRLSTS_OFFSET            0x0
> +#define TRCA_RING_LOW_OFFSET           0x8
> +#define TRCA_RING_HIGH_OFFSET          0xC
> +#define TRCA_RING_LEN_OFFSET           0x10
> +#define TRCA_READ_PTR_OFFSET           0x18
> +#define TRCA_WRITE_PTR_OFFSET          0x20
> +#define TRCA_DOORBELL_OFFSET           0x400

I would rather have same precision for all offsets
like
...CTRLSTS_OFFSET  0x000
and so on

> +
> +#define EVCA_CTRLSTS_OFFSET            0x0
> +#define EVCA_INTCTRL_OFFSET            0x4
> +#define EVCA_RING_LOW_OFFSET           0x8
> +#define EVCA_RING_HIGH_OFFSET          0xC
> +#define EVCA_RING_LEN_OFFSET           0x10
> +#define EVCA_READ_PTR_OFFSET           0x18
> +#define EVCA_WRITE_PTR_OFFSET          0x20
> +#define EVCA_DOORBELL_OFFSET           0x400

Ditto.

> +
> +#define EVCA_IRQ_STAT_OFFSET           0x100
> +#define EVCA_IRQ_CLR_OFFSET            0x108
> +#define EVCA_IRQ_EN_OFFSET             0x110
> +
> +#define EVRE_CFG_IDX                   0
> +#define EVRE_LEN_IDX                   1
> +#define EVRE_DEST_LOW_IDX              2
> +#define EVRE_DEST_HI_IDX               3
> +
> +#define EVRE_ERRINFO_BIT_POS           24
> +#define EVRE_CODE_BIT_POS              28
> +
> +#define EVRE_ERRINFO_MASK              GENMASK(3, 0)
> +#define EVRE_CODE_MASK                 GENMASK(3, 0)
> +
> +#define CH_CONTROL_MASK                GENMASK(7, 0)
> +#define CH_STATE_MASK                  GENMASK(7, 0)
> +#define CH_STATE_BIT_POS               0x8
> +
> +#define IRQ_EV_CH_EOB_IRQ_BIT_POS      0
> +#define IRQ_EV_CH_WR_RESP_BIT_POS      1
> +#define IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9
> +#define IRQ_TR_CH_DATA_RD_ER_BIT_POS   10
> +#define IRQ_TR_CH_DATA_WR_ER_BIT_POS   11
> +#define IRQ_TR_CH_INVALID_TRE_BIT_POS  14
> +
> +#define        ENABLE_IRQS (BIT(IRQ_EV_CH_EOB_IRQ_BIT_POS)     | \
> +               BIT(IRQ_EV_CH_WR_RESP_BIT_POS)          | \
> +               BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)    | \
> +               BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)       | \
> +               BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS)       | \
> +               BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))
> +
> +enum ch_command {
> +       CH_DISABLE = 0,
> +       CH_ENABLE = 1,
> +       CH_SUSPEND = 2,
> +       CH_RESET = 9,
> +};
> +
> +enum ch_state {
> +       CH_DISABLED = 0,
> +       CH_ENABLED = 1,
> +       CH_RUNNING = 2,
> +       CH_SUSPENDED = 3,
> +       CH_STOPPED = 4,
> +       CH_ERROR = 5,
> +       CH_IN_RESET = 9,
> +};
> +
> +enum tre_type {
> +       TRE_MEMCPY = 3,
> +       TRE_MEMSET = 4,
> +};
> +
> +enum evre_type {
> +       EVRE_DMA_COMPLETE = 0x23,
> +       EVRE_IMM_DATA = 0x24,
> +};
> +
> +enum err_code {
> +       EVRE_STATUS_COMPLETE = 1,
> +       EVRE_STATUS_ERROR = 4,
> +};
> +
> +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
> +{
> +       struct hidma_tre *tre;
> +
> +       if (tre_ch >= lldev->nr_tres) {
> +               dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
> +               return;
> +       }
> +
> +       tre = &lldev->trepool[tre_ch];
> +       if (atomic_read(&tre->allocated) != true) {
> +               dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
> +               return;
> +       }
> +
> +       atomic_set(&tre->allocated, 0);
> +}
> +
> +int hidma_ll_request(struct hidma_lldev *lldev, u32 dma_sig,
> +                    const char *dev_name,
> +                    void (*callback)(void *data), void *data, u32 *tre_ch)
> +{
> +       unsigned int i;
> +       struct hidma_tre *tre;
> +       u32 *tre_local;
> +
> +       if (!tre_ch || !lldev)
> +               return -EINVAL;
> +
> +       /* need to have at least one empty spot in the queue */
> +       for (i = 0; i < lldev->nr_tres - 1; i++) {
> +               if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
> +                       break;
> +       }
> +
> +       if (i == (lldev->nr_tres - 1))
> +               return -ENOMEM;
> +
> +       tre = &lldev->trepool[i];
> +       tre->dma_sig = dma_sig;
> +       tre->dev_name = dev_name;
> +       tre->callback = callback;
> +       tre->data = data;
> +       tre->idx = i;
> +       tre->status = 0;
> +       tre->queued = 0;
> +       lldev->tx_status_list[i].err_code = 0;
> +       tre->lldev = lldev;
> +       tre_local = &tre->tre_local[0];
> +       tre_local[TRE_CFG_IDX] = TRE_MEMCPY;
> +       tre_local[TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8;
> +       tre_local[TRE_CFG_IDX] |= BIT(16);      /* set IEOB */
> +       *tre_ch = i;
> +       if (callback)
> +               callback(data);
> +       return 0;
> +}
> +
> +/*
> + * Multiple TREs may be queued and waiting in the
> + * pending queue.
> + */
> +static void hidma_ll_tre_complete(unsigned long arg)
> +{
> +       struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
> +       struct hidma_tre *tre;
> +
> +       while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
> +               /* call the user if it has been read by the hardware */
> +               if (tre->callback)
> +                       tre->callback(tre->data);
> +       }
> +}
> +
> +/*
> + * Called to handle the interrupt for the channel.
> + * Return a positive number if TRE or EVRE were consumed on this run.
> + * Return a positive number if there are pending TREs or EVREs.
> + * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
> + */
> +static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
> +{
> +       struct hidma_tre *tre;
> +       u32 evre_write_off;
> +       u32 evre_ring_size = lldev->evre_ring_size;
> +       u32 tre_ring_size = lldev->tre_ring_size;
> +       u32 num_completed = 0, tre_iterator, evre_iterator;
> +       unsigned long flags;
> +
> +       evre_write_off = readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
> +       tre_iterator = lldev->tre_processed_off;
> +       evre_iterator = lldev->evre_processed_off;
> +
> +       if ((evre_write_off > evre_ring_size) ||
> +           ((evre_write_off % EVRE_SIZE) != 0)) {
> +               dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
> +               return 0;
> +       }
> +
> +       /*
> +        * By the time control reaches here the number of EVREs and TREs
> +        * may not match. Only consume the ones that hardware told us.
> +        */
> +       while ((evre_iterator != evre_write_off)) {
> +               u32 *current_evre = lldev->evre_ring + evre_iterator;
> +               u32 cfg;
> +               u8 err_info;
> +
> +               spin_lock_irqsave(&lldev->lock, flags);
> +               tre = lldev->pending_tre_list[tre_iterator / TRE_SIZE];
> +               if (!tre) {
> +                       spin_unlock_irqrestore(&lldev->lock, flags);
> +                       dev_warn(lldev->dev,
> +                                "tre_index [%d] and tre out of sync\n",
> +                                tre_iterator / TRE_SIZE);
> +                       tre_iterator += TRE_SIZE;
> +                       if (tre_iterator >= tre_ring_size)
> +                               tre_iterator -= tre_ring_size;

Could it be refactored to macro
increment_iterator(iter,size,ring_size) ?

> +                       evre_iterator += EVRE_SIZE;
> +                       if (evre_iterator >= evre_ring_size)
> +                               evre_iterator -= evre_ring_size;

Ditto.

> +
> +                       continue;
> +               }
> +               lldev->pending_tre_list[tre->tre_index] = NULL;
> +
> +               /*
> +                * Keep track of pending TREs that SW is expecting to receive
> +                * from HW. We got one now. Decrement our counter.
> +                */
> +               lldev->pending_tre_count--;
> +               if (lldev->pending_tre_count < 0) {
> +                       dev_warn(lldev->dev,
> +                                "tre count mismatch on completion");
> +                       lldev->pending_tre_count = 0;
> +               }
> +
> +               spin_unlock_irqrestore(&lldev->lock, flags);
> +
> +               cfg = current_evre[EVRE_CFG_IDX];
> +               err_info = cfg >> EVRE_ERRINFO_BIT_POS;
> +               err_info &= EVRE_ERRINFO_MASK;
> +               lldev->tx_status_list[tre->idx].err_info = err_info;
> +               lldev->tx_status_list[tre->idx].err_code =
> +                   (cfg >> EVRE_CODE_BIT_POS) & EVRE_CODE_MASK;
> +               tre->queued = 0;
> +
> +               kfifo_put(&lldev->handoff_fifo, tre);
> +               tasklet_schedule(&lldev->task);
> +
> +               tre_iterator += TRE_SIZE;
> +               if (tre_iterator >= tre_ring_size)
> +                       tre_iterator -= tre_ring_size;

Ditto.

> +               evre_iterator += EVRE_SIZE;
> +               if (evre_iterator >= evre_ring_size)
> +                       evre_iterator -= evre_ring_size;

Ditto.

> +
> +               /*
> +                * Read the new event descriptor written by the HW.
> +                * As we are processing the delivered events, other events
> +                * get queued to the SW for processing.
> +                */
> +               evre_write_off =
> +                   readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
> +               num_completed++;
> +       }
> +
> +       if (num_completed) {
> +               u32 evre_read_off = (lldev->evre_processed_off +
> +                                    EVRE_SIZE * num_completed);
> +               u32 tre_read_off = (lldev->tre_processed_off +
> +                                   TRE_SIZE * num_completed);
> +
> +               evre_read_off = evre_read_off % evre_ring_size;
> +               tre_read_off = tre_read_off % tre_ring_size;
> +
> +               writel(evre_read_off, lldev->evca + EVCA_DOORBELL_OFFSET);
> +
> +               /* record the last processed tre offset */
> +               lldev->tre_processed_off = tre_read_off;
> +               lldev->evre_processed_off = evre_read_off;
> +       }
> +
> +       return num_completed;
> +}
> +
> +void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info,
> +                              u8 err_code)
> +{
> +       u32 tre_iterator;
> +       struct hidma_tre *tre;
> +       u32 tre_ring_size = lldev->tre_ring_size;
> +       int num_completed = 0;
> +       u32 tre_read_off;
> +       unsigned long flags;
> +
> +       tre_iterator = lldev->tre_processed_off;
> +       while (lldev->pending_tre_count) {
> +               int tre_index = tre_iterator / TRE_SIZE;
> +
> +               spin_lock_irqsave(&lldev->lock, flags);
> +               tre = lldev->pending_tre_list[tre_index];
> +               if (!tre) {
> +                       spin_unlock_irqrestore(&lldev->lock, flags);
> +                       tre_iterator += TRE_SIZE;
> +                       if (tre_iterator >= tre_ring_size)
> +                               tre_iterator -= tre_ring_size;

Ditto.

> +                       continue;
> +               }
> +               lldev->pending_tre_list[tre_index] = NULL;
> +               lldev->pending_tre_count--;
> +               if (lldev->pending_tre_count < 0) {
> +                       dev_warn(lldev->dev,
> +                                "tre count mismatch on completion");
> +                       lldev->pending_tre_count = 0;
> +               }
> +               spin_unlock_irqrestore(&lldev->lock, flags);
> +
> +               lldev->tx_status_list[tre->idx].err_info = err_info;
> +               lldev->tx_status_list[tre->idx].err_code = err_code;
> +               tre->queued = 0;
> +
> +               kfifo_put(&lldev->handoff_fifo, tre);
> +               tasklet_schedule(&lldev->task);
> +
> +               tre_iterator += TRE_SIZE;
> +               if (tre_iterator >= tre_ring_size)
> +                       tre_iterator -= tre_ring_size;

Ditto.

> +
> +               num_completed++;
> +       }
> +       tre_read_off = (lldev->tre_processed_off + TRE_SIZE * num_completed);
> +
> +       tre_read_off = tre_read_off % tre_ring_size;
> +
> +       /* record the last processed tre offset */
> +       lldev->tre_processed_off = tre_read_off;
> +}
> +
> +static int hidma_ll_reset(struct hidma_lldev *lldev)
> +{
> +       u32 val;
> +       int ret;
> +
> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_RESET << 16;
> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
> +
> +       /*
> +        * Delay 10ms after reset to allow DMA logic to quiesce.
> +        * Do a polled read up to 1ms and 10ms maximum.
> +        */
> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
> +                                 CH_DISABLED), 1000, 10000);
> +       if (ret) {
> +               dev_err(lldev->dev, "transfer channel did not reset\n");
> +               return ret;
> +       }
> +
> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_RESET << 16;
> +       writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
> +
> +       /*
> +        * Delay 10ms after reset to allow DMA logic to quiesce.
> +        * Do a polled read up to 1ms and 10ms maximum.
> +        */
> +       ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
> +                                 CH_DISABLED), 1000, 10000);
> +       if (ret)
> +               return ret;
> +
> +       lldev->trch_state = CH_DISABLED;
> +       lldev->evch_state = CH_DISABLED;
> +       return 0;
> +}
> +
> +static void hidma_ll_enable_irq(struct hidma_lldev *lldev, u32 irq_bits)
> +{
> +       writel(irq_bits, lldev->evca + EVCA_IRQ_EN_OFFSET);
> +}
> +
> +/*
> + * The interrupt handler for HIDMA will try to consume as many pending
> + * EVRE from the event queue as possible. Each EVRE has an associated
> + * TRE that holds the user interface parameters. EVRE reports the
> + * result of the transaction. Hardware guarantees ordering between EVREs
> + * and TREs. We use last processed offset to figure out which TRE is
> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
> + * are in order in the event ring.
> + *
> + * This handler will do a one pass for consuming EVREs. Other EVREs may
> + * be delivered while we are working. It will try to consume incoming
> + * EVREs one more time and return.
> + *
> + * For unprocessed EVREs, hardware will trigger another interrupt until
> + * all the interrupt bits are cleared.
> + *
> + * Hardware guarantees that by the time interrupt is observed, all data
> + * transactions in flight are delivered to their respective places and
> + * are visible to the CPU.
> + *
> + * On demand paging for IOMMU is only supported for PCIe via PRI
> + * (Page Request Interface) not for HIDMA. All other hardware instances
> + * including HIDMA work on pinned DMA addresses.
> + *
> + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
> + * IOMMU latency will be built into the data movement time. By the time
> + * interrupt happens, IOMMU lookups + data movement has already taken place.
> + *
> + * While the first read in a typical PCI endpoint ISR flushes all outstanding
> + * requests traditionally to the destination, this concept does not apply
> + * here for this HW.
> + */
> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
> +{
> +       u32 status;
> +       u32 enable;
> +       u32 cause;
> +       int repeat = 2;
> +       unsigned long timeout;
> +
> +       /*
> +        * Fine tuned for this HW...
> +        *
> +        * This ISR has been designed for this particular hardware. Relaxed
> +        * read and write accessors are used for performance reasons due to
> +        * interrupt delivery guarantees. Do not copy this code blindly and
> +        * expect that to work.
> +        */
> +       status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +       enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
> +       cause = status & enable;
> +
> +       if ((cause & (BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))) ||
> +           (cause & BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)) ||
> +           (cause & BIT(IRQ_EV_CH_WR_RESP_BIT_POS)) ||
> +           (cause & BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)) ||
> +           (cause & BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS))) {
> +               u8 err_code = EVRE_STATUS_ERROR;
> +               u8 err_info = 0xFF;
> +
> +               /* Clear out pending interrupts */
> +               writel(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +
> +               dev_err(lldev->dev, "error 0x%x, resetting...\n", cause);
> +
> +               hidma_cleanup_pending_tre(lldev, err_info, err_code);
> +
> +               /* reset the channel for recovery */
> +               if (hidma_ll_setup(lldev)) {
> +                       dev_err(lldev->dev,
> +                               "channel reinitialize failed after error\n");
> +                       return;
> +               }
> +               hidma_ll_enable_irq(lldev, ENABLE_IRQS);
> +               return;
> +       }
> +
> +       /*
> +        * Try to consume as many EVREs as possible.
> +        * skip this loop if the interrupt is spurious.
> +        */
> +       while (cause && repeat) {
> +               unsigned long start = jiffies;
> +
> +               /* This timeout should be sufficent for core to finish */
> +               timeout = start + msecs_to_jiffies(500);
> +
> +               while (lldev->pending_tre_count) {
> +                       hidma_handle_tre_completion(lldev);
> +                       if (time_is_before_jiffies(timeout)) {
> +                               dev_warn(lldev->dev,
> +                                        "ISR timeout %lx-%lx from %lx [%d]\n",
> +                                        jiffies, timeout, start,
> +                                        lldev->pending_tre_count);
> +                               break;
> +                       }
> +               }
> +
> +               /* We consumed TREs or there are pending TREs or EVREs. */
> +               writel_relaxed(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +
> +               /*
> +                * Another interrupt might have arrived while we are
> +                * processing this one. Read the new cause.
> +                */
> +               status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +               enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
> +               cause = status & enable;
> +
> +               repeat--;
> +       }
> +}
> +
> +static int hidma_ll_enable(struct hidma_lldev *lldev)
> +{
> +       u32 val;
> +       int ret;
> +
> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_ENABLE << 16;
> +       writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
> +
> +       ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
> +                                ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
> +                                  == CH_ENABLED)
> +                                 ||
> +                                 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
> +                                  == CH_RUNNING)), 1000, 10000);
> +       if (ret) {
> +               dev_err(lldev->dev, "event channel did not get enabled\n");
> +               return ret;
> +       }
> +
> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_ENABLE << 16;
> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
> +
> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
> +                                ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
> +                                  == CH_ENABLED)
> +                                 ||
> +                                 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
> +                                  == CH_RUNNING)), 1000, 10000);

u32 val, state;

state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;

...

And above.

> +       if (ret) {
> +               dev_err(lldev->dev, "transfer channel did not get enabled\n");
> +               return ret;
> +       }
> +
> +       lldev->trch_state = CH_ENABLED;
> +       lldev->evch_state = CH_ENABLED;
> +
> +       return 0;
> +}
> +
> +int hidma_ll_resume(struct hidma_lldev *lldev)
> +{
> +       return hidma_ll_enable(lldev);
> +}
> +
> +static void hidma_ll_hw_start(struct hidma_lldev *lldev)
> +{
> +       unsigned long irqflags;
> +
> +       spin_lock_irqsave(&lldev->lock, irqflags);
> +       writel(lldev->tre_write_offset, lldev->trca + TRCA_DOORBELL_OFFSET);
> +       spin_unlock_irqrestore(&lldev->lock, irqflags);
> +}
> +
> +bool hidma_ll_isenabled(struct hidma_lldev *lldev)
> +{
> +       u32 val;
> +
> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +       lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +       lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;

Even macro

#define CH_STATE(v) ...

> +
> +       /* both channels have to be enabled before calling this function */
> +       if (((lldev->trch_state == CH_ENABLED) ||
> +            (lldev->trch_state == CH_RUNNING)) &&
> +           ((lldev->evch_state == CH_ENABLED) ||
> +            (lldev->evch_state == CH_RUNNING)))
> +               return true;
> +
> +       return false;
> +}
> +
> +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
> +{
> +       struct hidma_tre *tre;
> +       unsigned long flags;
> +
> +       tre = &lldev->trepool[tre_ch];
> +
> +       /* copy the TRE into its location in the TRE ring */
> +       spin_lock_irqsave(&lldev->lock, flags);
> +       tre->tre_index = lldev->tre_write_offset / TRE_SIZE;
> +       lldev->pending_tre_list[tre->tre_index] = tre;
> +       memcpy(lldev->tre_ring + lldev->tre_write_offset, &tre->tre_local[0],
> +              TRE_SIZE);
> +       lldev->tx_status_list[tre->idx].err_code = 0;
> +       lldev->tx_status_list[tre->idx].err_info = 0;
> +       tre->queued = 1;
> +       lldev->pending_tre_count++;
> +       lldev->tre_write_offset = (lldev->tre_write_offset + TRE_SIZE)
> +           % lldev->tre_ring_size;
> +       spin_unlock_irqrestore(&lldev->lock, flags);
> +}
> +
> +void hidma_ll_start(struct hidma_lldev *lldev)
> +{
> +       hidma_ll_hw_start(lldev);
> +}
> +
> +/*
> + * Note that even though we stop this channel
> + * if there is a pending transaction in flight
> + * it will complete and follow the callback.
> + * This request will prevent further requests
> + * to be made.
> + */
> +int hidma_ll_pause(struct hidma_lldev *lldev)
> +{
> +       u32 val;
> +       int ret;
> +
> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +       lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +       lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;


Ditto.

> +
> +       /* already suspended by this OS */
> +       if ((lldev->trch_state == CH_SUSPENDED) ||
> +           (lldev->evch_state == CH_SUSPENDED))
> +               return 0;
> +
> +       /* already stopped by the manager */
> +       if ((lldev->trch_state == CH_STOPPED) ||
> +           (lldev->evch_state == CH_STOPPED))
> +               return 0;
> +
> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_SUSPEND << 16;
> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
> +
> +       /*
> +        * Start the wait right after the suspend is confirmed.
> +        * Do a polled read up to 1ms and 10ms maximum.
> +        */
> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==

Ditto. And everywhere else.

> +                                 CH_SUSPENDED), 1000, 10000);
> +       if (ret)
> +               return ret;
> +
> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +       val &= ~(CH_CONTROL_MASK << 16);
> +       val |= CH_SUSPEND << 16;
> +       writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
> +
> +       /*
> +        * Start the wait right after the suspend is confirmed
> +        * Delay up to 10ms after reset to allow DMA logic to quiesce.
> +        */
> +       ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
> +                                 CH_SUSPENDED), 1000, 10000);
> +       if (ret)
> +               return ret;
> +
> +       lldev->trch_state = CH_SUSPENDED;
> +       lldev->evch_state = CH_SUSPENDED;
> +       return 0;
> +}
> +
> +void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
> +                                 dma_addr_t src, dma_addr_t dest, u32 len,
> +                                 u32 flags)
> +{
> +       struct hidma_tre *tre;
> +       u32 *tre_local;
> +
> +       if (tre_ch >= lldev->nr_tres) {
> +               dev_err(lldev->dev,
> +                       "invalid TRE number in transfer params:%d", tre_ch);
> +               return;
> +       }
> +
> +       tre = &lldev->trepool[tre_ch];
> +       if (atomic_read(&tre->allocated) != true) {
> +               dev_err(lldev->dev,
> +                       "trying to set params on an unused TRE:%d", tre_ch);
> +               return;
> +       }
> +
> +       tre_local = &tre->tre_local[0];
> +       tre_local[TRE_LEN_IDX] = len;
> +       tre_local[TRE_SRC_LOW_IDX] = lower_32_bits(src);
> +       tre_local[TRE_SRC_HI_IDX] = upper_32_bits(src);
> +       tre_local[TRE_DEST_LOW_IDX] = lower_32_bits(dest);
> +       tre_local[TRE_DEST_HI_IDX] = upper_32_bits(dest);
> +       tre->int_flags = flags;
> +}
> +
> +/*
> + * Called during initialization and after an error condition
> + * to restore hardware state.
> + */
> +int hidma_ll_setup(struct hidma_lldev *lldev)
> +{
> +       int rc;
> +       u64 addr;
> +       u32 val;
> +       u32 nr_tres = lldev->nr_tres;
> +
> +       lldev->pending_tre_count = 0;
> +       lldev->tre_processed_off = 0;
> +       lldev->evre_processed_off = 0;
> +       lldev->tre_write_offset = 0;
> +
> +       /* disable interrupts */
> +       hidma_ll_enable_irq(lldev, 0);
> +
> +       /* clear all pending interrupts */
> +       val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +       writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +
> +       rc = hidma_ll_reset(lldev);
> +       if (rc)
> +               return rc;
> +
> +       /*
> +        * Clear all pending interrupts again.
> +        * Otherwise, we observe reset complete interrupts.
> +        */
> +       val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +       writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +
> +       /* disable interrupts again after reset */
> +       hidma_ll_enable_irq(lldev, 0);
> +
> +       addr = lldev->tre_ring_handle;
> +       writel(lower_32_bits(addr), lldev->trca + TRCA_RING_LOW_OFFSET);
> +       writel(upper_32_bits(addr), lldev->trca + TRCA_RING_HIGH_OFFSET);
> +       writel(lldev->tre_ring_size, lldev->trca + TRCA_RING_LEN_OFFSET);
> +
> +       addr = lldev->evre_ring_handle;
> +       writel(lower_32_bits(addr), lldev->evca + EVCA_RING_LOW_OFFSET);
> +       writel(upper_32_bits(addr), lldev->evca + EVCA_RING_HIGH_OFFSET);
> +       writel(EVRE_SIZE * nr_tres, lldev->evca + EVCA_RING_LEN_OFFSET);
> +
> +       /* support IRQ only for now */
> +       val = readl(lldev->evca + EVCA_INTCTRL_OFFSET);
> +       val &= ~0xF;
> +       val |= 0x1;
> +       writel(val, lldev->evca + EVCA_INTCTRL_OFFSET);
> +
> +       /* clear all pending interrupts and enable them */
> +       writel(ENABLE_IRQS, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +       hidma_ll_enable_irq(lldev, ENABLE_IRQS);
> +
> +       rc = hidma_ll_enable(lldev);
> +       if (rc)
> +               return rc;
> +
> +       return rc;
> +}
> +
> +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
> +                                 void __iomem *trca, void __iomem *evca,
> +                                 u8 chidx)
> +{
> +       u32 required_bytes;
> +       struct hidma_lldev *lldev;
> +       int rc;
> +
> +       if (!trca || !evca || !dev || !nr_tres)
> +               return NULL;
> +
> +       /* need at least four TREs */
> +       if (nr_tres < 4)
> +               return NULL;
> +
> +       /* need an extra space */
> +       nr_tres += 1;
> +
> +       lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
> +       if (!lldev)
> +               return NULL;
> +
> +       lldev->evca = evca;
> +       lldev->trca = trca;
> +       lldev->dev = dev;
> +       required_bytes = sizeof(struct hidma_tre) * nr_tres;
> +       lldev->trepool = devm_kzalloc(lldev->dev, required_bytes, GFP_KERNEL);
> +       if (!lldev->trepool)
> +               return NULL;
> +
> +       required_bytes = sizeof(lldev->pending_tre_list[0]) * nr_tres;
> +       lldev->pending_tre_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);

devm_kcalloc for each?

> +       if (!lldev->pending_tre_list)
> +               return NULL;
> +
> +       required_bytes = sizeof(lldev->tx_status_list[0]) * nr_tres;
> +       lldev->tx_status_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);

Ditto.

> +       if (!lldev->tx_status_list)
> +               return NULL;
> +
> +       lldev->tre_ring = dmam_alloc_coherent(dev, (TRE_SIZE + 1) * nr_tres,
> +                                             &lldev->tre_ring_handle,
> +                                             GFP_KERNEL);
> +       if (!lldev->tre_ring)
> +               return NULL;
> +
> +       memset(lldev->tre_ring, 0, (TRE_SIZE + 1) * nr_tres);
> +       lldev->tre_ring_size = TRE_SIZE * nr_tres;
> +       lldev->nr_tres = nr_tres;
> +
> +       /* the TRE ring has to be TRE_SIZE aligned */
> +       if (!IS_ALIGNED(lldev->tre_ring_handle, TRE_SIZE)) {
> +               u8 tre_ring_shift;
> +
> +               tre_ring_shift = lldev->tre_ring_handle % TRE_SIZE;
> +               tre_ring_shift = TRE_SIZE - tre_ring_shift;
> +               lldev->tre_ring_handle += tre_ring_shift;
> +               lldev->tre_ring += tre_ring_shift;
> +       }
> +
> +       lldev->evre_ring = dmam_alloc_coherent(dev, (EVRE_SIZE + 1) * nr_tres,
> +                                              &lldev->evre_ring_handle,
> +                                              GFP_KERNEL);
> +       if (!lldev->evre_ring)
> +               return NULL;
> +
> +       memset(lldev->evre_ring, 0, (EVRE_SIZE + 1) * nr_tres);
> +       lldev->evre_ring_size = EVRE_SIZE * nr_tres;
> +
> +       /* the EVRE ring has to be EVRE_SIZE aligned */
> +       if (!IS_ALIGNED(lldev->evre_ring_handle, EVRE_SIZE)) {
> +               u8 evre_ring_shift;
> +
> +               evre_ring_shift = lldev->evre_ring_handle % EVRE_SIZE;
> +               evre_ring_shift = EVRE_SIZE - evre_ring_shift;
> +               lldev->evre_ring_handle += evre_ring_shift;
> +               lldev->evre_ring += evre_ring_shift;
> +       }
> +       lldev->nr_tres = nr_tres;
> +       lldev->chidx = chidx;
> +
> +       rc = kfifo_alloc(&lldev->handoff_fifo,
> +                        nr_tres * sizeof(struct hidma_tre *), GFP_KERNEL);
> +       if (rc)
> +               return NULL;
> +
> +       rc = hidma_ll_setup(lldev);
> +       if (rc)
> +               return NULL;
> +
> +       spin_lock_init(&lldev->lock);
> +       tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev);
> +       lldev->initialized = 1;
> +       hidma_ll_enable_irq(lldev, ENABLE_IRQS);
> +       return lldev;
> +}
> +
> +int hidma_ll_uninit(struct hidma_lldev *lldev)
> +{
> +       int rc = 0;
> +       u32 val;
> +
> +       if (!lldev)
> +               return -ENODEV;
> +
> +       if (lldev->initialized) {
> +               u32 required_bytes;
> +
> +               lldev->initialized = 0;
> +
> +               required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;

> +               tasklet_kill(&lldev->task);

Can this be moved up? Or you afraid of racing?

> +               memset(lldev->trepool, 0, required_bytes);
> +               lldev->trepool = NULL;
> +               lldev->pending_tre_count = 0;
> +               lldev->tre_write_offset = 0;
> +
> +               rc = hidma_ll_reset(lldev);
> +
> +               /*
> +                * Clear all pending interrupts again.
> +                * Otherwise, we observe reset complete interrupts.
> +                */
> +               val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +               writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
> +               hidma_ll_enable_irq(lldev, 0);
> +       }
> +       return rc;
> +}
> +
> +irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
> +{
> +       struct hidma_lldev *lldev = arg;
> +
> +       hidma_ll_int_handler_internal(lldev);
> +       return IRQ_HANDLED;
> +}
> +
> +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
> +{
> +       enum dma_status ret = DMA_ERROR;
> +       unsigned long flags;
> +       u8 err_code;
> +
> +       spin_lock_irqsave(&lldev->lock, flags);
> +       err_code = lldev->tx_status_list[tre_ch].err_code;
> +
> +       if (err_code & EVRE_STATUS_COMPLETE)
> +               ret = DMA_COMPLETE;
> +       else if (err_code & EVRE_STATUS_ERROR)
> +               ret = DMA_ERROR;
> +       else
> +               ret = DMA_IN_PROGRESS;
> +       spin_unlock_irqrestore(&lldev->lock, flags);
> +
> +       return ret;
> +}
> --
> Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Sinan Kaya Jan. 10, 2016, 2:03 p.m. UTC | #2
On 1/4/2016 2:01 PM, Andy Shevchenko wrote:
> On Mon, Jan 4, 2016 at 2:06 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> +
>> +#define EVRE_SIZE                      16      /* each EVRE is 16 bytes */
>> +
>> +#define TRCA_CTRLSTS_OFFSET            0x0
>> +#define TRCA_RING_LOW_OFFSET           0x8
>> +#define TRCA_RING_HIGH_OFFSET          0xC
>> +#define TRCA_RING_LEN_OFFSET           0x10
>> +#define TRCA_READ_PTR_OFFSET           0x18
>> +#define TRCA_WRITE_PTR_OFFSET          0x20
>> +#define TRCA_DOORBELL_OFFSET           0x400
> 
> I would rather have same precision for all offsets
> like
> ...CTRLSTS_OFFSET  0x000
> and so on
> 

ok

>> +
>> +#define EVCA_CTRLSTS_OFFSET            0x0
>> +#define EVCA_INTCTRL_OFFSET            0x4
>> +#define EVCA_RING_LOW_OFFSET           0x8
>> +#define EVCA_RING_HIGH_OFFSET          0xC
>> +#define EVCA_RING_LEN_OFFSET           0x10
>> +#define EVCA_READ_PTR_OFFSET           0x18
>> +#define EVCA_WRITE_PTR_OFFSET          0x20
>> +#define EVCA_DOORBELL_OFFSET           0x400
> 
> Ditto.
> 
ok

>> +
>> +#define EVCA_IRQ_STAT_OFFSET           0x100
>> +#define EVCA_IRQ_CLR_OFFSET            0x108
>> +#define EVCA_IRQ_EN_OFFSET             0x110
>> +
>> +#define EVRE_CFG_IDX                   0
>> +#define EVRE_LEN_IDX                   1
>> +#define EVRE_DEST_LOW_IDX              2
>> +#define EVRE_DEST_HI_IDX               3
>> +
>> +#define EVRE_ERRINFO_BIT_POS           24
>> +#define EVRE_CODE_BIT_POS              28
>> +
>> +#define EVRE_ERRINFO_MASK              GENMASK(3, 0)
>> +#define EVRE_CODE_MASK                 GENMASK(3, 0)
>> +
>> +#define CH_CONTROL_MASK                GENMASK(7, 0)
>> +#define CH_STATE_MASK                  GENMASK(7, 0)
>> +#define CH_STATE_BIT_POS               0x8
>> +
>> +#define IRQ_EV_CH_EOB_IRQ_BIT_POS      0
>> +#define IRQ_EV_CH_WR_RESP_BIT_POS      1
>> +#define IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9
>> +#define IRQ_TR_CH_DATA_RD_ER_BIT_POS   10
>> +#define IRQ_TR_CH_DATA_WR_ER_BIT_POS   11
>> +#define IRQ_TR_CH_INVALID_TRE_BIT_POS  14
>> +
>> +#define        ENABLE_IRQS (BIT(IRQ_EV_CH_EOB_IRQ_BIT_POS)     | \
>> +               BIT(IRQ_EV_CH_WR_RESP_BIT_POS)          | \
>> +               BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)    | \
>> +               BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)       | \
>> +               BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS)       | \
>> +               BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))
>> +
>> +enum ch_command {
>> +       CH_DISABLE = 0,
>> +       CH_ENABLE = 1,
>> +       CH_SUSPEND = 2,
>> +       CH_RESET = 9,
>> +};
>> +
>> +enum ch_state {
>> +       CH_DISABLED = 0,
>> +       CH_ENABLED = 1,
>> +       CH_RUNNING = 2,
>> +       CH_SUSPENDED = 3,
>> +       CH_STOPPED = 4,
>> +       CH_ERROR = 5,
>> +       CH_IN_RESET = 9,
>> +};
>> +
>> +enum tre_type {
>> +       TRE_MEMCPY = 3,
>> +       TRE_MEMSET = 4,
>> +};
>> +
>> +enum evre_type {
>> +       EVRE_DMA_COMPLETE = 0x23,
>> +       EVRE_IMM_DATA = 0x24,
>> +};
>> +
>> +enum err_code {
>> +       EVRE_STATUS_COMPLETE = 1,
>> +       EVRE_STATUS_ERROR = 4,
>> +};
>> +
>> +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
>> +{
>> +       struct hidma_tre *tre;
>> +
>> +       if (tre_ch >= lldev->nr_tres) {
>> +               dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
>> +               return;
>> +       }
>> +
>> +       tre = &lldev->trepool[tre_ch];
>> +       if (atomic_read(&tre->allocated) != true) {
>> +               dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
>> +               return;
>> +       }
>> +
>> +       atomic_set(&tre->allocated, 0);
>> +}
>> +
>> +int hidma_ll_request(struct hidma_lldev *lldev, u32 dma_sig,
>> +                    const char *dev_name,
>> +                    void (*callback)(void *data), void *data, u32 *tre_ch)
>> +{
>> +       unsigned int i;
>> +       struct hidma_tre *tre;
>> +       u32 *tre_local;
>> +
>> +       if (!tre_ch || !lldev)
>> +               return -EINVAL;
>> +
>> +       /* need to have at least one empty spot in the queue */
>> +       for (i = 0; i < lldev->nr_tres - 1; i++) {
>> +               if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
>> +                       break;
>> +       }
>> +
>> +       if (i == (lldev->nr_tres - 1))
>> +               return -ENOMEM;
>> +
>> +       tre = &lldev->trepool[i];
>> +       tre->dma_sig = dma_sig;
>> +       tre->dev_name = dev_name;
>> +       tre->callback = callback;
>> +       tre->data = data;
>> +       tre->idx = i;
>> +       tre->status = 0;
>> +       tre->queued = 0;
>> +       lldev->tx_status_list[i].err_code = 0;
>> +       tre->lldev = lldev;
>> +       tre_local = &tre->tre_local[0];
>> +       tre_local[TRE_CFG_IDX] = TRE_MEMCPY;
>> +       tre_local[TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8;
>> +       tre_local[TRE_CFG_IDX] |= BIT(16);      /* set IEOB */
>> +       *tre_ch = i;
>> +       if (callback)
>> +               callback(data);
>> +       return 0;
>> +}
>> +
>> +/*
>> + * Multiple TREs may be queued and waiting in the
>> + * pending queue.
>> + */
>> +static void hidma_ll_tre_complete(unsigned long arg)
>> +{
>> +       struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
>> +       struct hidma_tre *tre;
>> +
>> +       while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
>> +               /* call the user if it has been read by the hardware */
>> +               if (tre->callback)
>> +                       tre->callback(tre->data);
>> +       }
>> +}
>> +
>> +/*
>> + * Called to handle the interrupt for the channel.
>> + * Return a positive number if TRE or EVRE were consumed on this run.
>> + * Return a positive number if there are pending TREs or EVREs.
>> + * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
>> + */
>> +static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
>> +{
>> +       struct hidma_tre *tre;
>> +       u32 evre_write_off;
>> +       u32 evre_ring_size = lldev->evre_ring_size;
>> +       u32 tre_ring_size = lldev->tre_ring_size;
>> +       u32 num_completed = 0, tre_iterator, evre_iterator;
>> +       unsigned long flags;
>> +
>> +       evre_write_off = readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
>> +       tre_iterator = lldev->tre_processed_off;
>> +       evre_iterator = lldev->evre_processed_off;
>> +
>> +       if ((evre_write_off > evre_ring_size) ||
>> +           ((evre_write_off % EVRE_SIZE) != 0)) {
>> +               dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
>> +               return 0;
>> +       }
>> +
>> +       /*
>> +        * By the time control reaches here the number of EVREs and TREs
>> +        * may not match. Only consume the ones that hardware told us.
>> +        */
>> +       while ((evre_iterator != evre_write_off)) {
>> +               u32 *current_evre = lldev->evre_ring + evre_iterator;
>> +               u32 cfg;
>> +               u8 err_info;
>> +
>> +               spin_lock_irqsave(&lldev->lock, flags);
>> +               tre = lldev->pending_tre_list[tre_iterator / TRE_SIZE];
>> +               if (!tre) {
>> +                       spin_unlock_irqrestore(&lldev->lock, flags);
>> +                       dev_warn(lldev->dev,
>> +                                "tre_index [%d] and tre out of sync\n",
>> +                                tre_iterator / TRE_SIZE);
>> +                       tre_iterator += TRE_SIZE;
>> +                       if (tre_iterator >= tre_ring_size)
>> +                               tre_iterator -= tre_ring_size;
> 
> Could it be refactored to macro
> increment_iterator(iter,size,ring_size) ?
> 
OK

>> +                       evre_iterator += EVRE_SIZE;
>> +                       if (evre_iterator >= evre_ring_size)
>> +                               evre_iterator -= evre_ring_size;
> 
> Ditto.
ok
> 
>> +
>> +                       continue;
>> +               }
>> +               lldev->pending_tre_list[tre->tre_index] = NULL;
>> +
>> +               /*
>> +                * Keep track of pending TREs that SW is expecting to receive
>> +                * from HW. We got one now. Decrement our counter.
>> +                */
>> +               lldev->pending_tre_count--;
>> +               if (lldev->pending_tre_count < 0) {
>> +                       dev_warn(lldev->dev,
>> +                                "tre count mismatch on completion");
>> +                       lldev->pending_tre_count = 0;
>> +               }
>> +
>> +               spin_unlock_irqrestore(&lldev->lock, flags);
>> +
>> +               cfg = current_evre[EVRE_CFG_IDX];
>> +               err_info = cfg >> EVRE_ERRINFO_BIT_POS;
>> +               err_info &= EVRE_ERRINFO_MASK;
>> +               lldev->tx_status_list[tre->idx].err_info = err_info;
>> +               lldev->tx_status_list[tre->idx].err_code =
>> +                   (cfg >> EVRE_CODE_BIT_POS) & EVRE_CODE_MASK;
>> +               tre->queued = 0;
>> +
>> +               kfifo_put(&lldev->handoff_fifo, tre);
>> +               tasklet_schedule(&lldev->task);
>> +
>> +               tre_iterator += TRE_SIZE;
>> +               if (tre_iterator >= tre_ring_size)
>> +                       tre_iterator -= tre_ring_size;
> 
> Ditto.

done

> 
>> +               evre_iterator += EVRE_SIZE;
>> +               if (evre_iterator >= evre_ring_size)
>> +                       evre_iterator -= evre_ring_size;
> 
> Ditto.

done
> 
>> +
>> +
>> +       tre_iterator = lldev->tre_processed_off;
>> +       while (lldev->pending_tre_count) {
>> +               int tre_index = tre_iterator / TRE_SIZE;
>> +
>> +               spin_lock_irqsave(&lldev->lock, flags);
>> +               tre = lldev->pending_tre_list[tre_index];
>> +               if (!tre) {
>> +                       spin_unlock_irqrestore(&lldev->lock, flags);
>> +                       tre_iterator += TRE_SIZE;
>> +                       if (tre_iterator >= tre_ring_size)
>> +                               tre_iterator -= tre_ring_size;
> 
> Ditto.

done

> 
>> +                       continue;
>> +               }
>> +               lldev->pending_tre_list[tre_index] = NULL;
>> +               lldev->pending_tre_count--;
>> +               if (lldev->pending_tre_count < 0) {
>> +                       dev_warn(lldev->dev,
>> +                                "tre count mismatch on completion");
>> +                       lldev->pending_tre_count = 0;
>> +               }
>> +               spin_unlock_irqrestore(&lldev->lock, flags);
>> +
>> +               lldev->tx_status_list[tre->idx].err_info = err_info;
>> +               lldev->tx_status_list[tre->idx].err_code = err_code;
>> +               tre->queued = 0;
>> +
>> +               kfifo_put(&lldev->handoff_fifo, tre);
>> +               tasklet_schedule(&lldev->task);
>> +
>> +               tre_iterator += TRE_SIZE;
>> +               if (tre_iterator >= tre_ring_size)
>> +                       tre_iterator -= tre_ring_size;
> 
> Ditto.
> 
done

>> +
>> +               num_completed++;
>> +       }
>> +       tre_read_off = (lldev->tre_processed_off + TRE_SIZE * num_completed);
>> +
>> +       tre_read_off = tre_read_off % tre_ring_size;
>> +
>> +       /* record the last processed tre offset */
>> +       lldev->tre_processed_off = tre_read_off;
>> +}
>> +
>> +static int hidma_ll_reset(struct hidma_lldev *lldev)
>> +{
>> +       u32 val;
>> +       int ret;
>> +
>> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +       val &= ~(CH_CONTROL_MASK << 16);
>> +       val |= CH_RESET << 16;
>> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +
>> +       /*
>> +        * Delay 10ms after reset to allow DMA logic to quiesce.
>> +        * Do a polled read up to 1ms and 10ms maximum.
>> +        */
>> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
>> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
>> +                                 CH_DISABLED), 1000, 10000);
>> +       if (ret) {
>> +               dev_err(lldev->dev, "transfer channel did not reset\n");
>> +               return ret;
>> +       }
>> +
>> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +       val &= ~(CH_CONTROL_MASK << 16);
>> +       val |= CH_RESET << 16;
>> +       writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +
>> +       /*
>> +        * Delay 10ms after reset to allow DMA logic to quiesce.
>> +        * Do a polled read up to 1ms and 10ms maximum.
>> +        */
>> +       ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
>> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
>> +                                 CH_DISABLED), 1000, 10000);
>> +       if (ret)
>> +               return ret;
>> +
>> +       lldev->trch_state = CH_DISABLED;
>> +       lldev->evch_state = CH_DISABLED;
>> +       return 0;
>> +}
>> +
>> +static void hidma_ll_enable_irq(struct hidma_lldev *lldev, u32 irq_bits)
>> +{
>> +       writel(irq_bits, lldev->evca + EVCA_IRQ_EN_OFFSET);
>> +}
>> +
>> +/*
>> + * The interrupt handler for HIDMA will try to consume as many pending
>> + * EVRE from the event queue as possible. Each EVRE has an associated
>> + * TRE that holds the user interface parameters. EVRE reports the
>> + * result of the transaction. Hardware guarantees ordering between EVREs
>> + * and TREs. We use last processed offset to figure out which TRE is
>> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
>> + * are in order in the event ring.
>> + *
>> + * This handler will do a one pass for consuming EVREs. Other EVREs may
>> + * be delivered while we are working. It will try to consume incoming
>> + * EVREs one more time and return.
>> + *
>> + * For unprocessed EVREs, hardware will trigger another interrupt until
>> + * all the interrupt bits are cleared.
>> + *
>> + * Hardware guarantees that by the time interrupt is observed, all data
>> + * transactions in flight are delivered to their respective places and
>> + * are visible to the CPU.
>> + *
>> + * On demand paging for IOMMU is only supported for PCIe via PRI
>> + * (Page Request Interface) not for HIDMA. All other hardware instances
>> + * including HIDMA work on pinned DMA addresses.
>> + *
>> + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
>> + * IOMMU latency will be built into the data movement time. By the time
>> + * interrupt happens, IOMMU lookups + data movement has already taken place.
>> + *
>> + * While the first read in a typical PCI endpoint ISR flushes all outstanding
>> + * requests traditionally to the destination, this concept does not apply
>> + * here for this HW.
>> + */
>> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
>> +{
>> +       u32 status;
>> +       u32 enable;
>> +       u32 cause;
>> +       int repeat = 2;
>> +       unsigned long timeout;
>> +
>> +       /*
>> +        * Fine tuned for this HW...
>> +        *
>> +        * This ISR has been designed for this particular hardware. Relaxed
>> +        * read and write accessors are used for performance reasons due to
>> +        * interrupt delivery guarantees. Do not copy this code blindly and
>> +        * expect that to work.
>> +        */
>> +       status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>> +       enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
>> +       cause = status & enable;
>> +
>> +       if ((cause & (BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))) ||
>> +           (cause & BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)) ||
>> +           (cause & BIT(IRQ_EV_CH_WR_RESP_BIT_POS)) ||
>> +           (cause & BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)) ||
>> +           (cause & BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS))) {
>> +               u8 err_code = EVRE_STATUS_ERROR;
>> +               u8 err_info = 0xFF;
>> +
>> +               /* Clear out pending interrupts */
>> +               writel(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
>> +
>> +               dev_err(lldev->dev, "error 0x%x, resetting...\n", cause);
>> +
>> +               hidma_cleanup_pending_tre(lldev, err_info, err_code);
>> +
>> +               /* reset the channel for recovery */
>> +               if (hidma_ll_setup(lldev)) {
>> +                       dev_err(lldev->dev,
>> +                               "channel reinitialize failed after error\n");
>> +                       return;
>> +               }
>> +               hidma_ll_enable_irq(lldev, ENABLE_IRQS);
>> +               return;
>> +       }
>> +
>> +       /*
>> +        * Try to consume as many EVREs as possible.
>> +        * skip this loop if the interrupt is spurious.
>> +        */
>> +       while (cause && repeat) {
>> +               unsigned long start = jiffies;
>> +
>> +               /* This timeout should be sufficent for core to finish */
>> +               timeout = start + msecs_to_jiffies(500);
>> +
>> +               while (lldev->pending_tre_count) {
>> +                       hidma_handle_tre_completion(lldev);
>> +                       if (time_is_before_jiffies(timeout)) {
>> +                               dev_warn(lldev->dev,
>> +                                        "ISR timeout %lx-%lx from %lx [%d]\n",
>> +                                        jiffies, timeout, start,
>> +                                        lldev->pending_tre_count);
>> +                               break;
>> +                       }
>> +               }
>> +
>> +               /* We consumed TREs or there are pending TREs or EVREs. */
>> +               writel_relaxed(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
>> +
>> +               /*
>> +                * Another interrupt might have arrived while we are
>> +                * processing this one. Read the new cause.
>> +                */
>> +               status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>> +               enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
>> +               cause = status & enable;
>> +
>> +               repeat--;
>> +       }
>> +}
>> +
>> +static int hidma_ll_enable(struct hidma_lldev *lldev)
>> +{
>> +       u32 val;
>> +       int ret;
>> +
>> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +       val &= ~(CH_CONTROL_MASK << 16);
>> +       val |= CH_ENABLE << 16;
>> +       writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +
>> +       ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
>> +                                ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
>> +                                  == CH_ENABLED)
>> +                                 ||
>> +                                 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
>> +                                  == CH_RUNNING)), 1000, 10000);
>> +       if (ret) {
>> +               dev_err(lldev->dev, "event channel did not get enabled\n");
>> +               return ret;
>> +       }
>> +
>> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +       val &= ~(CH_CONTROL_MASK << 16);
>> +       val |= CH_ENABLE << 16;
>> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +
>> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
>> +                                ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
>> +                                  == CH_ENABLED)
>> +                                 ||
>> +                                 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
>> +                                  == CH_RUNNING)), 1000, 10000);
> 
> u32 val, state;
> 
> state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> 
> ...
> 
> And above.

the third argument to readl_poll_timeout is the condition. This logic condition needs to be 
evaluated every 4th argument time until 5th argument timeout is reached. I'll add a macro to make
code readable a little bit better. Something like this.

#define HIDMA_CH_STATE(val)	\
((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)


> 
>> +       if (ret) {
>> +               dev_err(lldev->dev, "transfer channel did not get enabled\n");
>> +               return ret;
>> +       }
>> +
>> +       lldev->trch_state = CH_ENABLED;
>> +       lldev->evch_state = CH_ENABLED;
>> +
>> +       return 0;
>> +}
>> +
>> +int hidma_ll_resume(struct hidma_lldev *lldev)
>> +{
>> +       return hidma_ll_enable(lldev);
>> +}
>> +
>> +static void hidma_ll_hw_start(struct hidma_lldev *lldev)
>> +{
>> +       unsigned long irqflags;
>> +
>> +       spin_lock_irqsave(&lldev->lock, irqflags);
>> +       writel(lldev->tre_write_offset, lldev->trca + TRCA_DOORBELL_OFFSET);
>> +       spin_unlock_irqrestore(&lldev->lock, irqflags);
>> +}
>> +
>> +bool hidma_ll_isenabled(struct hidma_lldev *lldev)
>> +{
>> +       u32 val;
>> +
>> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +       lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
>> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +       lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> 
> Even macro
> 
> #define CH_STATE(v) ...
> 
ok

>> +
>> +       /* both channels have to be enabled before calling this function */
>> +       if (((lldev->trch_state == CH_ENABLED) ||
>> +            (lldev->trch_state == CH_RUNNING)) &&
>> +           ((lldev->evch_state == CH_ENABLED) ||
>> +            (lldev->evch_state == CH_RUNNING)))
>> +               return true;
>> +
>> +       return false;
>> +}
>> +
>> +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
>> +{
>> +       struct hidma_tre *tre;
>> +       unsigned long flags;
>> +
>> +       tre = &lldev->trepool[tre_ch];
>> +
>> +       /* copy the TRE into its location in the TRE ring */
>> +       spin_lock_irqsave(&lldev->lock, flags);
>> +       tre->tre_index = lldev->tre_write_offset / TRE_SIZE;
>> +       lldev->pending_tre_list[tre->tre_index] = tre;
>> +       memcpy(lldev->tre_ring + lldev->tre_write_offset, &tre->tre_local[0],
>> +              TRE_SIZE);
>> +       lldev->tx_status_list[tre->idx].err_code = 0;
>> +       lldev->tx_status_list[tre->idx].err_info = 0;
>> +       tre->queued = 1;
>> +       lldev->pending_tre_count++;
>> +       lldev->tre_write_offset = (lldev->tre_write_offset + TRE_SIZE)
>> +           % lldev->tre_ring_size;
>> +       spin_unlock_irqrestore(&lldev->lock, flags);
>> +}
>> +
>> +void hidma_ll_start(struct hidma_lldev *lldev)
>> +{
>> +       hidma_ll_hw_start(lldev);
>> +}
>> +
>> +/*
>> + * Note that even though we stop this channel
>> + * if there is a pending transaction in flight
>> + * it will complete and follow the callback.
>> + * This request will prevent further requests
>> + * to be made.
>> + */
>> +int hidma_ll_pause(struct hidma_lldev *lldev)
>> +{
>> +       u32 val;
>> +       int ret;
>> +
>> +       val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +       lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
>> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +       lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> 
> 
> Ditto.
> 
yes, done

>> +
>> +       /* already suspended by this OS */
>> +       if ((lldev->trch_state == CH_SUSPENDED) ||
>> +           (lldev->evch_state == CH_SUSPENDED))
>> +               return 0;
>> +
>> +       /* already stopped by the manager */
>> +       if ((lldev->trch_state == CH_STOPPED) ||
>> +           (lldev->evch_state == CH_STOPPED))
>> +               return 0;
>> +
>> +       val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +       val &= ~(CH_CONTROL_MASK << 16);
>> +       val |= CH_SUSPEND << 16;
>> +       writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +
>> +       /*
>> +        * Start the wait right after the suspend is confirmed.
>> +        * Do a polled read up to 1ms and 10ms maximum.
>> +        */
>> +       ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
>> +                                (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
> 
> Ditto. And everywhere else.
> 
yep

>> +
>> +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
>> +                                 void __iomem *trca, void __iomem *evca,
>> +                                 u8 chidx)
>> +{
>> +       u32 required_bytes;
>> +       struct hidma_lldev *lldev;
>> +       int rc;
>> +
>> +       if (!trca || !evca || !dev || !nr_tres)
>> +               return NULL;
>> +
>> +       /* need at least four TREs */
>> +       if (nr_tres < 4)
>> +               return NULL;
>> +
>> +       /* need an extra space */
>> +       nr_tres += 1;
>> +
>> +       lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
>> +       if (!lldev)
>> +               return NULL;
>> +
>> +       lldev->evca = evca;
>> +       lldev->trca = trca;
>> +       lldev->dev = dev;
>> +       required_bytes = sizeof(struct hidma_tre) * nr_tres;
>> +       lldev->trepool = devm_kzalloc(lldev->dev, required_bytes, GFP_KERNEL);
>> +       if (!lldev->trepool)
>> +               return NULL;
>> +
>> +       required_bytes = sizeof(lldev->pending_tre_list[0]) * nr_tres;
>> +       lldev->pending_tre_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
> 
> devm_kcalloc for each?

By the time, code was implemented around 3.15 devm_kcalloc did not exist. That's why, 
I used devm_kzalloc. Now is a good time to change it.

> 
>> +       if (!lldev->pending_tre_list)
>> +               return NULL;
>> +
>> +       required_bytes = sizeof(lldev->tx_status_list[0]) * nr_tres;
>> +       lldev->tx_status_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
> 
> Ditto.

ok

> 
>> +       if (!lldev->tx_status_list)
>> +               return NULL;
>> +
>> +       lldev->tre_ring = dmam_alloc_coherent(dev, (TRE_SIZE + 1) * nr_tres,
>> +                                             &lldev->tre_ring_handle,
>> +                                             GFP_KERNEL);
>> +       if (!lldev->tre_ring)
>> +               return NULL;
>> +
>> +       memset(lldev->tre_ring, 0, (TRE_SIZE + 1) * nr_tres);
>> +       lldev->tre_ring_size = TRE_SIZE * nr_tres;
>> +       lldev->nr_tres = nr_tres;
>> +
>> +       /* the TRE ring has to be TRE_SIZE aligned */
>> +       if (!IS_ALIGNED(lldev->tre_ring_handle, TRE_SIZE)) {
>> +               u8 tre_ring_shift;
>> +
>> +               tre_ring_shift = lldev->tre_ring_handle % TRE_SIZE;
>> +               tre_ring_shift = TRE_SIZE - tre_ring_shift;
>> +               lldev->tre_ring_handle += tre_ring_shift;
>> +               lldev->tre_ring += tre_ring_shift;
>> +       }
>> +
>> +       lldev->evre_ring = dmam_alloc_coherent(dev, (EVRE_SIZE + 1) * nr_tres,
>> +                                              &lldev->evre_ring_handle,
>> +                                              GFP_KERNEL);
>> +       if (!lldev->evre_ring)
>> +               return NULL;
>> +
>> +       memset(lldev->evre_ring, 0, (EVRE_SIZE + 1) * nr_tres);
>> +       lldev->evre_ring_size = EVRE_SIZE * nr_tres;
>> +
>> +       /* the EVRE ring has to be EVRE_SIZE aligned */
>> +       if (!IS_ALIGNED(lldev->evre_ring_handle, EVRE_SIZE)) {
>> +               u8 evre_ring_shift;
>> +
>> +               evre_ring_shift = lldev->evre_ring_handle % EVRE_SIZE;
>> +               evre_ring_shift = EVRE_SIZE - evre_ring_shift;
>> +               lldev->evre_ring_handle += evre_ring_shift;
>> +               lldev->evre_ring += evre_ring_shift;
>> +       }
>> +       lldev->nr_tres = nr_tres;
>> +       lldev->chidx = chidx;
>> +
>> +       rc = kfifo_alloc(&lldev->handoff_fifo,
>> +                        nr_tres * sizeof(struct hidma_tre *), GFP_KERNEL);
>> +       if (rc)
>> +               return NULL;
>> +
>> +       rc = hidma_ll_setup(lldev);
>> +       if (rc)
>> +               return NULL;
>> +
>> +       spin_lock_init(&lldev->lock);
>> +       tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev);
>> +       lldev->initialized = 1;
>> +       hidma_ll_enable_irq(lldev, ENABLE_IRQS);
>> +       return lldev;
>> +}
>> +
>> +int hidma_ll_uninit(struct hidma_lldev *lldev)
>> +{
>> +       int rc = 0;
>> +       u32 val;
>> +
>> +       if (!lldev)
>> +               return -ENODEV;
>> +
>> +       if (lldev->initialized) {
>> +               u32 required_bytes;
>> +
>> +               lldev->initialized = 0;
>> +
>> +               required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
> 
>> +               tasklet_kill(&lldev->task);
> 
> Can this be moved up? Or you afraid of racing?

This function will get called from driver remove function only. I'll keep it like this as moving
it around doesn't gain anything. 

> 
>> +               memset(lldev->trepool, 0, required_bytes);
>> +               lldev->trepool = NULL;
>> +               lldev->pending_tre_count = 0;
>> +               lldev->tre_write_offset = 0;
>> +
>> +               rc = hidma_ll_reset(lldev);
>> +
>> +               /*
>> +                * Clear all pending interrupts again.
>> +                * Otherwise, we observe reset complete interrupts.
>> +                */
>> +               val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>> +               writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
>> +               hidma_ll_enable_irq(lldev, 0);
>> +       }
>> +       return rc;
>> +}
>> +
>> +irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
>> +{
>> +       struct hidma_lldev *lldev = arg;
>> +
>> +       hidma_ll_int_handler_internal(lldev);
>> +       return IRQ_HANDLED;
>> +}
>> +
>> +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
>> +{
>> +       enum dma_status ret = DMA_ERROR;
>> +       unsigned long flags;
>> +       u8 err_code;
>> +
>> +       spin_lock_irqsave(&lldev->lock, flags);
>> +       err_code = lldev->tx_status_list[tre_ch].err_code;
>> +
>> +       if (err_code & EVRE_STATUS_COMPLETE)
>> +               ret = DMA_COMPLETE;
>> +       else if (err_code & EVRE_STATUS_ERROR)
>> +               ret = DMA_ERROR;
>> +       else
>> +               ret = DMA_IN_PROGRESS;
>> +       spin_unlock_irqrestore(&lldev->lock, flags);
>> +
>> +       return ret;
>> +}
>> --
>> Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 
>
diff mbox

Patch

diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile
index bfea699..6bf9267 100644
--- a/drivers/dma/qcom/Makefile
+++ b/drivers/dma/qcom/Makefile
@@ -1,3 +1,5 @@ 
 obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o
 obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o
 hdma_mgmt-objs	 := hidma_mgmt.o hidma_mgmt_sys.o
+obj-$(CONFIG_QCOM_HIDMA) +=  hdma.o
+hdma-objs        := hidma_ll.o hidma.o
diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
index 231e306..1e09d7c 100644
--- a/drivers/dma/qcom/hidma.h
+++ b/drivers/dma/qcom/hidma.h
@@ -37,7 +37,7 @@  struct hidma_tre {
 	atomic_t allocated;		/* if this channel is allocated	    */
 	bool queued;			/* flag whether this is pending     */
 	u16 status;			/* status			    */
-	u32 chidx;			/* index of the tre		    */
+	u32 idx;			/* index of the tre		    */
 	u32 dma_sig;			/* signature of the tre		    */
 	const char *dev_name;		/* name of the device		    */
 	void (*callback)(void *data);	/* requester callback		    */
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
new file mode 100644
index 0000000..0cd8d70
--- /dev/null
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -0,0 +1,927 @@ 
+/*
+ * Qualcomm Technologies HIDMA DMA engine low level code
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+#include <linux/iopoll.h>
+#include <linux/kfifo.h>
+#include <linux/bitops.h>
+
+#include "hidma.h"
+
+#define EVRE_SIZE			16	/* each EVRE is 16 bytes */
+
+#define TRCA_CTRLSTS_OFFSET		0x0
+#define TRCA_RING_LOW_OFFSET		0x8
+#define TRCA_RING_HIGH_OFFSET		0xC
+#define TRCA_RING_LEN_OFFSET		0x10
+#define TRCA_READ_PTR_OFFSET		0x18
+#define TRCA_WRITE_PTR_OFFSET		0x20
+#define TRCA_DOORBELL_OFFSET		0x400
+
+#define EVCA_CTRLSTS_OFFSET		0x0
+#define EVCA_INTCTRL_OFFSET		0x4
+#define EVCA_RING_LOW_OFFSET		0x8
+#define EVCA_RING_HIGH_OFFSET		0xC
+#define EVCA_RING_LEN_OFFSET		0x10
+#define EVCA_READ_PTR_OFFSET		0x18
+#define EVCA_WRITE_PTR_OFFSET		0x20
+#define EVCA_DOORBELL_OFFSET		0x400
+
+#define EVCA_IRQ_STAT_OFFSET		0x100
+#define EVCA_IRQ_CLR_OFFSET		0x108
+#define EVCA_IRQ_EN_OFFSET		0x110
+
+#define EVRE_CFG_IDX			0
+#define EVRE_LEN_IDX			1
+#define EVRE_DEST_LOW_IDX		2
+#define EVRE_DEST_HI_IDX		3
+
+#define EVRE_ERRINFO_BIT_POS		24
+#define EVRE_CODE_BIT_POS		28
+
+#define EVRE_ERRINFO_MASK		GENMASK(3, 0)
+#define EVRE_CODE_MASK			GENMASK(3, 0)
+
+#define CH_CONTROL_MASK		GENMASK(7, 0)
+#define CH_STATE_MASK			GENMASK(7, 0)
+#define CH_STATE_BIT_POS		0x8
+
+#define IRQ_EV_CH_EOB_IRQ_BIT_POS	0
+#define IRQ_EV_CH_WR_RESP_BIT_POS	1
+#define IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9
+#define IRQ_TR_CH_DATA_RD_ER_BIT_POS	10
+#define IRQ_TR_CH_DATA_WR_ER_BIT_POS	11
+#define IRQ_TR_CH_INVALID_TRE_BIT_POS	14
+
+#define	ENABLE_IRQS (BIT(IRQ_EV_CH_EOB_IRQ_BIT_POS)	| \
+		BIT(IRQ_EV_CH_WR_RESP_BIT_POS)		| \
+		BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)	| \
+		BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)	| \
+		BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS)	| \
+		BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))
+
+enum ch_command {
+	CH_DISABLE = 0,
+	CH_ENABLE = 1,
+	CH_SUSPEND = 2,
+	CH_RESET = 9,
+};
+
+enum ch_state {
+	CH_DISABLED = 0,
+	CH_ENABLED = 1,
+	CH_RUNNING = 2,
+	CH_SUSPENDED = 3,
+	CH_STOPPED = 4,
+	CH_ERROR = 5,
+	CH_IN_RESET = 9,
+};
+
+enum tre_type {
+	TRE_MEMCPY = 3,
+	TRE_MEMSET = 4,
+};
+
+enum evre_type {
+	EVRE_DMA_COMPLETE = 0x23,
+	EVRE_IMM_DATA = 0x24,
+};
+
+enum err_code {
+	EVRE_STATUS_COMPLETE = 1,
+	EVRE_STATUS_ERROR = 4,
+};
+
+void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
+		return;
+	}
+
+	atomic_set(&tre->allocated, 0);
+}
+
+int hidma_ll_request(struct hidma_lldev *lldev, u32 dma_sig,
+		     const char *dev_name,
+		     void (*callback)(void *data), void *data, u32 *tre_ch)
+{
+	unsigned int i;
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (!tre_ch || !lldev)
+		return -EINVAL;
+
+	/* need to have at least one empty spot in the queue */
+	for (i = 0; i < lldev->nr_tres - 1; i++) {
+		if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
+			break;
+	}
+
+	if (i == (lldev->nr_tres - 1))
+		return -ENOMEM;
+
+	tre = &lldev->trepool[i];
+	tre->dma_sig = dma_sig;
+	tre->dev_name = dev_name;
+	tre->callback = callback;
+	tre->data = data;
+	tre->idx = i;
+	tre->status = 0;
+	tre->queued = 0;
+	lldev->tx_status_list[i].err_code = 0;
+	tre->lldev = lldev;
+	tre_local = &tre->tre_local[0];
+	tre_local[TRE_CFG_IDX] = TRE_MEMCPY;
+	tre_local[TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8;
+	tre_local[TRE_CFG_IDX] |= BIT(16);	/* set IEOB */
+	*tre_ch = i;
+	if (callback)
+		callback(data);
+	return 0;
+}
+
+/*
+ * Multiple TREs may be queued and waiting in the
+ * pending queue.
+ */
+static void hidma_ll_tre_complete(unsigned long arg)
+{
+	struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
+	struct hidma_tre *tre;
+
+	while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
+		/* call the user if it has been read by the hardware */
+		if (tre->callback)
+			tre->callback(tre->data);
+	}
+}
+
+/*
+ * Called to handle the interrupt for the channel.
+ * Return a positive number if TRE or EVRE were consumed on this run.
+ * Return a positive number if there are pending TREs or EVREs.
+ * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
+ */
+static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
+{
+	struct hidma_tre *tre;
+	u32 evre_write_off;
+	u32 evre_ring_size = lldev->evre_ring_size;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	u32 num_completed = 0, tre_iterator, evre_iterator;
+	unsigned long flags;
+
+	evre_write_off = readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
+	tre_iterator = lldev->tre_processed_off;
+	evre_iterator = lldev->evre_processed_off;
+
+	if ((evre_write_off > evre_ring_size) ||
+	    ((evre_write_off % EVRE_SIZE) != 0)) {
+		dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
+		return 0;
+	}
+
+	/*
+	 * By the time control reaches here the number of EVREs and TREs
+	 * may not match. Only consume the ones that hardware told us.
+	 */
+	while ((evre_iterator != evre_write_off)) {
+		u32 *current_evre = lldev->evre_ring + evre_iterator;
+		u32 cfg;
+		u8 err_info;
+
+		spin_lock_irqsave(&lldev->lock, flags);
+		tre = lldev->pending_tre_list[tre_iterator / TRE_SIZE];
+		if (!tre) {
+			spin_unlock_irqrestore(&lldev->lock, flags);
+			dev_warn(lldev->dev,
+				 "tre_index [%d] and tre out of sync\n",
+				 tre_iterator / TRE_SIZE);
+			tre_iterator += TRE_SIZE;
+			if (tre_iterator >= tre_ring_size)
+				tre_iterator -= tre_ring_size;
+			evre_iterator += EVRE_SIZE;
+			if (evre_iterator >= evre_ring_size)
+				evre_iterator -= evre_ring_size;
+
+			continue;
+		}
+		lldev->pending_tre_list[tre->tre_index] = NULL;
+
+		/*
+		 * Keep track of pending TREs that SW is expecting to receive
+		 * from HW. We got one now. Decrement our counter.
+		 */
+		lldev->pending_tre_count--;
+		if (lldev->pending_tre_count < 0) {
+			dev_warn(lldev->dev,
+				 "tre count mismatch on completion");
+			lldev->pending_tre_count = 0;
+		}
+
+		spin_unlock_irqrestore(&lldev->lock, flags);
+
+		cfg = current_evre[EVRE_CFG_IDX];
+		err_info = cfg >> EVRE_ERRINFO_BIT_POS;
+		err_info &= EVRE_ERRINFO_MASK;
+		lldev->tx_status_list[tre->idx].err_info = err_info;
+		lldev->tx_status_list[tre->idx].err_code =
+		    (cfg >> EVRE_CODE_BIT_POS) & EVRE_CODE_MASK;
+		tre->queued = 0;
+
+		kfifo_put(&lldev->handoff_fifo, tre);
+		tasklet_schedule(&lldev->task);
+
+		tre_iterator += TRE_SIZE;
+		if (tre_iterator >= tre_ring_size)
+			tre_iterator -= tre_ring_size;
+		evre_iterator += EVRE_SIZE;
+		if (evre_iterator >= evre_ring_size)
+			evre_iterator -= evre_ring_size;
+
+		/*
+		 * Read the new event descriptor written by the HW.
+		 * As we are processing the delivered events, other events
+		 * get queued to the SW for processing.
+		 */
+		evre_write_off =
+		    readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
+		num_completed++;
+	}
+
+	if (num_completed) {
+		u32 evre_read_off = (lldev->evre_processed_off +
+				     EVRE_SIZE * num_completed);
+		u32 tre_read_off = (lldev->tre_processed_off +
+				    TRE_SIZE * num_completed);
+
+		evre_read_off = evre_read_off % evre_ring_size;
+		tre_read_off = tre_read_off % tre_ring_size;
+
+		writel(evre_read_off, lldev->evca + EVCA_DOORBELL_OFFSET);
+
+		/* record the last processed tre offset */
+		lldev->tre_processed_off = tre_read_off;
+		lldev->evre_processed_off = evre_read_off;
+	}
+
+	return num_completed;
+}
+
+void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info,
+			       u8 err_code)
+{
+	u32 tre_iterator;
+	struct hidma_tre *tre;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	int num_completed = 0;
+	u32 tre_read_off;
+	unsigned long flags;
+
+	tre_iterator = lldev->tre_processed_off;
+	while (lldev->pending_tre_count) {
+		int tre_index = tre_iterator / TRE_SIZE;
+
+		spin_lock_irqsave(&lldev->lock, flags);
+		tre = lldev->pending_tre_list[tre_index];
+		if (!tre) {
+			spin_unlock_irqrestore(&lldev->lock, flags);
+			tre_iterator += TRE_SIZE;
+			if (tre_iterator >= tre_ring_size)
+				tre_iterator -= tre_ring_size;
+			continue;
+		}
+		lldev->pending_tre_list[tre_index] = NULL;
+		lldev->pending_tre_count--;
+		if (lldev->pending_tre_count < 0) {
+			dev_warn(lldev->dev,
+				 "tre count mismatch on completion");
+			lldev->pending_tre_count = 0;
+		}
+		spin_unlock_irqrestore(&lldev->lock, flags);
+
+		lldev->tx_status_list[tre->idx].err_info = err_info;
+		lldev->tx_status_list[tre->idx].err_code = err_code;
+		tre->queued = 0;
+
+		kfifo_put(&lldev->handoff_fifo, tre);
+		tasklet_schedule(&lldev->task);
+
+		tre_iterator += TRE_SIZE;
+		if (tre_iterator >= tre_ring_size)
+			tre_iterator -= tre_ring_size;
+
+		num_completed++;
+	}
+	tre_read_off = (lldev->tre_processed_off + TRE_SIZE * num_completed);
+
+	tre_read_off = tre_read_off % tre_ring_size;
+
+	/* record the last processed tre offset */
+	lldev->tre_processed_off = tre_read_off;
+}
+
+static int hidma_ll_reset(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_RESET << 16;
+	writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
+				 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
+				  CH_DISABLED), 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not reset\n");
+		return ret;
+	}
+
+	val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_RESET << 16;
+	writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
+				 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
+				  CH_DISABLED), 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = CH_DISABLED;
+	lldev->evch_state = CH_DISABLED;
+	return 0;
+}
+
+static void hidma_ll_enable_irq(struct hidma_lldev *lldev, u32 irq_bits)
+{
+	writel(irq_bits, lldev->evca + EVCA_IRQ_EN_OFFSET);
+}
+
+/*
+ * The interrupt handler for HIDMA will try to consume as many pending
+ * EVRE from the event queue as possible. Each EVRE has an associated
+ * TRE that holds the user interface parameters. EVRE reports the
+ * result of the transaction. Hardware guarantees ordering between EVREs
+ * and TREs. We use last processed offset to figure out which TRE is
+ * associated with which EVRE. If two TREs are consumed by HW, the EVREs
+ * are in order in the event ring.
+ *
+ * This handler will do a one pass for consuming EVREs. Other EVREs may
+ * be delivered while we are working. It will try to consume incoming
+ * EVREs one more time and return.
+ *
+ * For unprocessed EVREs, hardware will trigger another interrupt until
+ * all the interrupt bits are cleared.
+ *
+ * Hardware guarantees that by the time interrupt is observed, all data
+ * transactions in flight are delivered to their respective places and
+ * are visible to the CPU.
+ *
+ * On demand paging for IOMMU is only supported for PCIe via PRI
+ * (Page Request Interface) not for HIDMA. All other hardware instances
+ * including HIDMA work on pinned DMA addresses.
+ *
+ * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
+ * IOMMU latency will be built into the data movement time. By the time
+ * interrupt happens, IOMMU lookups + data movement has already taken place.
+ *
+ * While the first read in a typical PCI endpoint ISR flushes all outstanding
+ * requests traditionally to the destination, this concept does not apply
+ * here for this HW.
+ */
+static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
+{
+	u32 status;
+	u32 enable;
+	u32 cause;
+	int repeat = 2;
+	unsigned long timeout;
+
+	/*
+	 * Fine tuned for this HW...
+	 *
+	 * This ISR has been designed for this particular hardware. Relaxed
+	 * read and write accessors are used for performance reasons due to
+	 * interrupt delivery guarantees. Do not copy this code blindly and
+	 * expect that to work.
+	 */
+	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
+	cause = status & enable;
+
+	if ((cause & (BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))) ||
+	    (cause & BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)) ||
+	    (cause & BIT(IRQ_EV_CH_WR_RESP_BIT_POS)) ||
+	    (cause & BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)) ||
+	    (cause & BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS))) {
+		u8 err_code = EVRE_STATUS_ERROR;
+		u8 err_info = 0xFF;
+
+		/* Clear out pending interrupts */
+		writel(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+		dev_err(lldev->dev, "error 0x%x, resetting...\n", cause);
+
+		hidma_cleanup_pending_tre(lldev, err_info, err_code);
+
+		/* reset the channel for recovery */
+		if (hidma_ll_setup(lldev)) {
+			dev_err(lldev->dev,
+				"channel reinitialize failed after error\n");
+			return;
+		}
+		hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+		return;
+	}
+
+	/*
+	 * Try to consume as many EVREs as possible.
+	 * skip this loop if the interrupt is spurious.
+	 */
+	while (cause && repeat) {
+		unsigned long start = jiffies;
+
+		/* This timeout should be sufficent for core to finish */
+		timeout = start + msecs_to_jiffies(500);
+
+		while (lldev->pending_tre_count) {
+			hidma_handle_tre_completion(lldev);
+			if (time_is_before_jiffies(timeout)) {
+				dev_warn(lldev->dev,
+					 "ISR timeout %lx-%lx from %lx [%d]\n",
+					 jiffies, timeout, start,
+					 lldev->pending_tre_count);
+				break;
+			}
+		}
+
+		/* We consumed TREs or there are pending TREs or EVREs. */
+		writel_relaxed(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+		/*
+		 * Another interrupt might have arrived while we are
+		 * processing this one. Read the new cause.
+		 */
+		status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+		enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
+		cause = status & enable;
+
+		repeat--;
+	}
+}
+
+static int hidma_ll_enable(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_ENABLE << 16;
+	writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
+				 ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
+				   == CH_ENABLED)
+				  ||
+				  (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
+				   == CH_RUNNING)), 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "event channel did not get enabled\n");
+		return ret;
+	}
+
+	val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_ENABLE << 16;
+	writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
+				 ((((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
+				   == CH_ENABLED)
+				  ||
+				  (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK)
+				   == CH_RUNNING)), 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not get enabled\n");
+		return ret;
+	}
+
+	lldev->trch_state = CH_ENABLED;
+	lldev->evch_state = CH_ENABLED;
+
+	return 0;
+}
+
+int hidma_ll_resume(struct hidma_lldev *lldev)
+{
+	return hidma_ll_enable(lldev);
+}
+
+static void hidma_ll_hw_start(struct hidma_lldev *lldev)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&lldev->lock, irqflags);
+	writel(lldev->tre_write_offset, lldev->trca + TRCA_DOORBELL_OFFSET);
+	spin_unlock_irqrestore(&lldev->lock, irqflags);
+}
+
+bool hidma_ll_isenabled(struct hidma_lldev *lldev)
+{
+	u32 val;
+
+	val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+
+	/* both channels have to be enabled before calling this function */
+	if (((lldev->trch_state == CH_ENABLED) ||
+	     (lldev->trch_state == CH_RUNNING)) &&
+	    ((lldev->evch_state == CH_ENABLED) ||
+	     (lldev->evch_state == CH_RUNNING)))
+		return true;
+
+	return false;
+}
+
+void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+	unsigned long flags;
+
+	tre = &lldev->trepool[tre_ch];
+
+	/* copy the TRE into its location in the TRE ring */
+	spin_lock_irqsave(&lldev->lock, flags);
+	tre->tre_index = lldev->tre_write_offset / TRE_SIZE;
+	lldev->pending_tre_list[tre->tre_index] = tre;
+	memcpy(lldev->tre_ring + lldev->tre_write_offset, &tre->tre_local[0],
+	       TRE_SIZE);
+	lldev->tx_status_list[tre->idx].err_code = 0;
+	lldev->tx_status_list[tre->idx].err_info = 0;
+	tre->queued = 1;
+	lldev->pending_tre_count++;
+	lldev->tre_write_offset = (lldev->tre_write_offset + TRE_SIZE)
+	    % lldev->tre_ring_size;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+}
+
+void hidma_ll_start(struct hidma_lldev *lldev)
+{
+	hidma_ll_hw_start(lldev);
+}
+
+/*
+ * Note that even though we stop this channel
+ * if there is a pending transaction in flight
+ * it will complete and follow the callback.
+ * This request will prevent further requests
+ * to be made.
+ */
+int hidma_ll_pause(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+
+	/* already suspended by this OS */
+	if ((lldev->trch_state == CH_SUSPENDED) ||
+	    (lldev->evch_state == CH_SUSPENDED))
+		return 0;
+
+	/* already stopped by the manager */
+	if ((lldev->trch_state == CH_STOPPED) ||
+	    (lldev->evch_state == CH_STOPPED))
+		return 0;
+
+	val = readl(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_SUSPEND << 16;
+	writel(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	/*
+	 * Start the wait right after the suspend is confirmed.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + TRCA_CTRLSTS_OFFSET, val,
+				 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
+				  CH_SUSPENDED), 1000, 10000);
+	if (ret)
+		return ret;
+
+	val = readl(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= CH_SUSPEND << 16;
+	writel(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	/*
+	 * Start the wait right after the suspend is confirmed
+	 * Delay up to 10ms after reset to allow DMA logic to quiesce.
+	 */
+	ret = readl_poll_timeout(lldev->evca + EVCA_CTRLSTS_OFFSET, val,
+				 (((val >> CH_STATE_BIT_POS) & CH_STATE_MASK) ==
+				  CH_SUSPENDED), 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = CH_SUSPENDED;
+	lldev->evch_state = CH_SUSPENDED;
+	return 0;
+}
+
+void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
+				  dma_addr_t src, dma_addr_t dest, u32 len,
+				  u32 flags)
+{
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev,
+			"invalid TRE number in transfer params:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev,
+			"trying to set params on an unused TRE:%d", tre_ch);
+		return;
+	}
+
+	tre_local = &tre->tre_local[0];
+	tre_local[TRE_LEN_IDX] = len;
+	tre_local[TRE_SRC_LOW_IDX] = lower_32_bits(src);
+	tre_local[TRE_SRC_HI_IDX] = upper_32_bits(src);
+	tre_local[TRE_DEST_LOW_IDX] = lower_32_bits(dest);
+	tre_local[TRE_DEST_HI_IDX] = upper_32_bits(dest);
+	tre->int_flags = flags;
+}
+
+/*
+ * Called during initialization and after an error condition
+ * to restore hardware state.
+ */
+int hidma_ll_setup(struct hidma_lldev *lldev)
+{
+	int rc;
+	u64 addr;
+	u32 val;
+	u32 nr_tres = lldev->nr_tres;
+
+	lldev->pending_tre_count = 0;
+	lldev->tre_processed_off = 0;
+	lldev->evre_processed_off = 0;
+	lldev->tre_write_offset = 0;
+
+	/* disable interrupts */
+	hidma_ll_enable_irq(lldev, 0);
+
+	/* clear all pending interrupts */
+	val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+	rc = hidma_ll_reset(lldev);
+	if (rc)
+		return rc;
+
+	/*
+	 * Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+	/* disable interrupts again after reset */
+	hidma_ll_enable_irq(lldev, 0);
+
+	addr = lldev->tre_ring_handle;
+	writel(lower_32_bits(addr), lldev->trca + TRCA_RING_LOW_OFFSET);
+	writel(upper_32_bits(addr), lldev->trca + TRCA_RING_HIGH_OFFSET);
+	writel(lldev->tre_ring_size, lldev->trca + TRCA_RING_LEN_OFFSET);
+
+	addr = lldev->evre_ring_handle;
+	writel(lower_32_bits(addr), lldev->evca + EVCA_RING_LOW_OFFSET);
+	writel(upper_32_bits(addr), lldev->evca + EVCA_RING_HIGH_OFFSET);
+	writel(EVRE_SIZE * nr_tres, lldev->evca + EVCA_RING_LEN_OFFSET);
+
+	/* support IRQ only for now */
+	val = readl(lldev->evca + EVCA_INTCTRL_OFFSET);
+	val &= ~0xF;
+	val |= 0x1;
+	writel(val, lldev->evca + EVCA_INTCTRL_OFFSET);
+
+	/* clear all pending interrupts and enable them */
+	writel(ENABLE_IRQS, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+	hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+
+	rc = hidma_ll_enable(lldev);
+	if (rc)
+		return rc;
+
+	return rc;
+}
+
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
+				  void __iomem *trca, void __iomem *evca,
+				  u8 chidx)
+{
+	u32 required_bytes;
+	struct hidma_lldev *lldev;
+	int rc;
+
+	if (!trca || !evca || !dev || !nr_tres)
+		return NULL;
+
+	/* need at least four TREs */
+	if (nr_tres < 4)
+		return NULL;
+
+	/* need an extra space */
+	nr_tres += 1;
+
+	lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
+	if (!lldev)
+		return NULL;
+
+	lldev->evca = evca;
+	lldev->trca = trca;
+	lldev->dev = dev;
+	required_bytes = sizeof(struct hidma_tre) * nr_tres;
+	lldev->trepool = devm_kzalloc(lldev->dev, required_bytes, GFP_KERNEL);
+	if (!lldev->trepool)
+		return NULL;
+
+	required_bytes = sizeof(lldev->pending_tre_list[0]) * nr_tres;
+	lldev->pending_tre_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
+	if (!lldev->pending_tre_list)
+		return NULL;
+
+	required_bytes = sizeof(lldev->tx_status_list[0]) * nr_tres;
+	lldev->tx_status_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
+	if (!lldev->tx_status_list)
+		return NULL;
+
+	lldev->tre_ring = dmam_alloc_coherent(dev, (TRE_SIZE + 1) * nr_tres,
+					      &lldev->tre_ring_handle,
+					      GFP_KERNEL);
+	if (!lldev->tre_ring)
+		return NULL;
+
+	memset(lldev->tre_ring, 0, (TRE_SIZE + 1) * nr_tres);
+	lldev->tre_ring_size = TRE_SIZE * nr_tres;
+	lldev->nr_tres = nr_tres;
+
+	/* the TRE ring has to be TRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->tre_ring_handle, TRE_SIZE)) {
+		u8 tre_ring_shift;
+
+		tre_ring_shift = lldev->tre_ring_handle % TRE_SIZE;
+		tre_ring_shift = TRE_SIZE - tre_ring_shift;
+		lldev->tre_ring_handle += tre_ring_shift;
+		lldev->tre_ring += tre_ring_shift;
+	}
+
+	lldev->evre_ring = dmam_alloc_coherent(dev, (EVRE_SIZE + 1) * nr_tres,
+					       &lldev->evre_ring_handle,
+					       GFP_KERNEL);
+	if (!lldev->evre_ring)
+		return NULL;
+
+	memset(lldev->evre_ring, 0, (EVRE_SIZE + 1) * nr_tres);
+	lldev->evre_ring_size = EVRE_SIZE * nr_tres;
+
+	/* the EVRE ring has to be EVRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->evre_ring_handle, EVRE_SIZE)) {
+		u8 evre_ring_shift;
+
+		evre_ring_shift = lldev->evre_ring_handle % EVRE_SIZE;
+		evre_ring_shift = EVRE_SIZE - evre_ring_shift;
+		lldev->evre_ring_handle += evre_ring_shift;
+		lldev->evre_ring += evre_ring_shift;
+	}
+	lldev->nr_tres = nr_tres;
+	lldev->chidx = chidx;
+
+	rc = kfifo_alloc(&lldev->handoff_fifo,
+			 nr_tres * sizeof(struct hidma_tre *), GFP_KERNEL);
+	if (rc)
+		return NULL;
+
+	rc = hidma_ll_setup(lldev);
+	if (rc)
+		return NULL;
+
+	spin_lock_init(&lldev->lock);
+	tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev);
+	lldev->initialized = 1;
+	hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+	return lldev;
+}
+
+int hidma_ll_uninit(struct hidma_lldev *lldev)
+{
+	int rc = 0;
+	u32 val;
+
+	if (!lldev)
+		return -ENODEV;
+
+	if (lldev->initialized) {
+		u32 required_bytes;
+
+		lldev->initialized = 0;
+
+		required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
+		tasklet_kill(&lldev->task);
+		memset(lldev->trepool, 0, required_bytes);
+		lldev->trepool = NULL;
+		lldev->pending_tre_count = 0;
+		lldev->tre_write_offset = 0;
+
+		rc = hidma_ll_reset(lldev);
+
+		/*
+		 * Clear all pending interrupts again.
+		 * Otherwise, we observe reset complete interrupts.
+		 */
+		val = readl(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+		writel(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+		hidma_ll_enable_irq(lldev, 0);
+	}
+	return rc;
+}
+
+irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
+{
+	struct hidma_lldev *lldev = arg;
+
+	hidma_ll_int_handler_internal(lldev);
+	return IRQ_HANDLED;
+}
+
+enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	enum dma_status ret = DMA_ERROR;
+	unsigned long flags;
+	u8 err_code;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+	err_code = lldev->tx_status_list[tre_ch].err_code;
+
+	if (err_code & EVRE_STATUS_COMPLETE)
+		ret = DMA_COMPLETE;
+	else if (err_code & EVRE_STATUS_ERROR)
+		ret = DMA_ERROR;
+	else
+		ret = DMA_IN_PROGRESS;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	return ret;
+}