diff mbox series

[v2,5/7] dmaengine: ae4dma: Register AE4DMA using pt_dmaengine_register

Message ID 20240617100359.2550541-6-Basavaraj.Natikar@amd.com (mailing list archive)
State Superseded
Headers show
Series Add support of AMD AE4DMA DMA Engine | expand

Commit Message

Basavaraj Natikar June 17, 2024, 10:03 a.m. UTC
Use the pt_dmaengine_register function to register a AE4DMA DMA engine.

Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
---
 drivers/dma/amd/ae4dma/Makefile     |  2 +-
 drivers/dma/amd/ae4dma/ae4dma-dev.c | 73 +++++++++++++++++++++++++++++
 drivers/dma/amd/ae4dma/ae4dma-pci.c |  1 +
 drivers/dma/amd/ae4dma/ae4dma.h     |  2 +
 4 files changed, 77 insertions(+), 1 deletion(-)

Comments

Philipp Stanner June 19, 2024, 7:17 a.m. UTC | #1
On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote:
> Use the pt_dmaengine_register function to register a AE4DMA DMA
> engine.
> 
> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> ---
>  drivers/dma/amd/ae4dma/Makefile     |  2 +-
>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 73
> +++++++++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma-pci.c |  1 +
>  drivers/dma/amd/ae4dma/ae4dma.h     |  2 +
>  4 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/amd/ae4dma/Makefile
> b/drivers/dma/amd/ae4dma/Makefile
> index e918f85a80ec..165d1c74b732 100644
> --- a/drivers/dma/amd/ae4dma/Makefile
> +++ b/drivers/dma/amd/ae4dma/Makefile
> @@ -5,6 +5,6 @@
>  
>  obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
>  
> -ae4dma-objs := ae4dma-dev.o
> +ae4dma-objs := ae4dma-dev.o  ../ptdma/ptdma-dmaengine.o
> ../common/amd_dma.o
>  
>  ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> index 958bdab8db59..77c37649d8d1 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct
> ae4_cmd_queue *ae4cmd_q, int idx)
>         }
>  }
>  
> +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue
> *cmd_q)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
> ae4_cmd_queue, cmd_q);
> +       int i;
> +
> +       for (i = 0; i < CMD_Q_LEN; i++)
> +               ae4_check_status_error(ae4cmd_q, i);
> +}
> +
>  static void ae4_pending_work(struct work_struct *work)
>  {
>         struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
> ae4_cmd_queue, p_work.work);
> @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq,
> void *data)
>         return IRQ_HANDLED;
>  }
>  
> +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct
> ae4_cmd_queue *ae4cmd_q)
> +{

Hi,

The memory ordering in this function seems to be addressed through
several different mechanisms simultaneously?

> +       bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       u32 tail_wi;
> +
> +       if (soc) {
> +               desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc-
> >dwouv.dw0);
> +               desc->dwouv.dw0 &= ~DWORD0_SOC;
> +       }
> +
> +       mutex_lock(&ae4cmd_q->cmd_lock);
> +
> +       tail_wi = atomic_read(&ae4cmd_q->tail_wi);
> +       memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct
> ae4dma_desc));
> +
> +       atomic64_inc(&ae4cmd_q->q_cmd_count);
> +
> +       tail_wi = (tail_wi + 1) % CMD_Q_LEN;
> +
> +       atomic_set(&ae4cmd_q->tail_wi, tail_wi);
> +       /* Synchronize ordering */
> +       mb();

This mb() should be surplus because writel() should have its own
barrier already.

> +
> +       writel(tail_wi, cmd_q->reg_control + 0x10);
> +       /* Synchronize ordering */
> +       mb();
> +
> +       mutex_unlock(&ae4cmd_q->cmd_lock);

Same here – the compiler can't change the order of writel() and the
subsequent mutex_unlock(). If that were the case the entire kernel
would explode.

So it seems there are three mechanisms in action here:
   1. mutex
   2. atomics
   3. memory barriers

Can't the ordering be ensured by the mutex alone?


Regards,
P.


> +
> +       wake_up(&ae4cmd_q->q_w);
> +
> +       return 0;
> +}
> +
> +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
> +                            struct pt_passthru_engine *pt_engine)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
> ae4_cmd_queue, cmd_q);
> +       struct ae4dma_desc desc;
> +
> +       cmd_q->cmd_error = 0;
> +       cmd_q->total_pt_ops++;
> +       memset(&desc, 0, sizeof(desc));
> +       desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
> +
> +       desc.dw1.status = 0;
> +       desc.dw1.err_code = 0;
> +       desc.dw1.desc_id = 0;
> +
> +       desc.length = pt_engine->src_len;
> +
> +       desc.src_lo = upper_32_bits(pt_engine->src_dma);
> +       desc.src_hi = lower_32_bits(pt_engine->src_dma);
> +       desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
> +       desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
> +
> +       return ae4_core_execute_cmd(&desc, ae4cmd_q);
> +}
> +
>  void ae4_destroy_work(struct ae4_device *ae4)
>  {
>         struct ae4_cmd_queue *ae4cmd_q;
> @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4)
>                 init_completion(&ae4cmd_q->cmp);
>         }
>  
> +       ret = pt_dmaengine_register(pt);
> +       if (ret)
> +               ae4_destroy_work(ae4);
> +
>         return ret;
>  }
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> index ddebf0609c4d..5450fa551eea 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  
>         pt = &ae4->pt;
>         pt->dev = dev;
> +       pt->ver = AE4_DMA_VERSION;
>  
>         pt->io_regs = pcim_iomap_table(pdev)[0];
>         if (!pt->io_regs) {
> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
> b/drivers/dma/amd/ae4dma/ae4dma.h
> index 4e4584e152a1..f1b6dcc1d8c3 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma.h
> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> @@ -16,6 +16,7 @@
>  
>  #define AE4_DESC_COMPLETED             0x3
>  #define AE4_DMA_VERSION                        4
> +#define CMD_AE4_DESC_DW0_VAL           2
>  
>  struct ae4_msix {
>         int msix_count;
> @@ -36,6 +37,7 @@ struct ae4_cmd_queue {
>         atomic64_t done_cnt;
>         atomic64_t q_cmd_count;
>         atomic_t dridx;
> +       atomic_t tail_wi;
>         unsigned int id;
>  };
>
Philipp Stanner June 19, 2024, 7:29 a.m. UTC | #2
On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote:
> Use the pt_dmaengine_register function to register a AE4DMA DMA
> engine.
> 
> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> ---
>  drivers/dma/amd/ae4dma/Makefile     |  2 +-
>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 73
> +++++++++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma-pci.c |  1 +
>  drivers/dma/amd/ae4dma/ae4dma.h     |  2 +
>  4 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/amd/ae4dma/Makefile
> b/drivers/dma/amd/ae4dma/Makefile
> index e918f85a80ec..165d1c74b732 100644
> --- a/drivers/dma/amd/ae4dma/Makefile
> +++ b/drivers/dma/amd/ae4dma/Makefile
> @@ -5,6 +5,6 @@
>  
>  obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
>  
> -ae4dma-objs := ae4dma-dev.o
> +ae4dma-objs := ae4dma-dev.o  ../ptdma/ptdma-dmaengine.o
> ../common/amd_dma.o
>  
>  ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> index 958bdab8db59..77c37649d8d1 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct
> ae4_cmd_queue *ae4cmd_q, int idx)
>         }
>  }
>  
> +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue
> *cmd_q)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
> ae4_cmd_queue, cmd_q);
> +       int i;
> +
> +       for (i = 0; i < CMD_Q_LEN; i++)
> +               ae4_check_status_error(ae4cmd_q, i);
> +}
> +
>  static void ae4_pending_work(struct work_struct *work)
>  {
>         struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
> ae4_cmd_queue, p_work.work);
> @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq,
> void *data)
>         return IRQ_HANDLED;
>  }
>  
> +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct
> ae4_cmd_queue *ae4cmd_q)
> +{
> +       bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       u32 tail_wi;
> +
> +       if (soc) {
> +               desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc-
> >dwouv.dw0);
> +               desc->dwouv.dw0 &= ~DWORD0_SOC;
> +       }
> +
> +       mutex_lock(&ae4cmd_q->cmd_lock);
> +
> +       tail_wi = atomic_read(&ae4cmd_q->tail_wi);
> +       memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct
> ae4dma_desc));
> +
> +       atomic64_inc(&ae4cmd_q->q_cmd_count);
> +
> +       tail_wi = (tail_wi + 1) % CMD_Q_LEN;
> +
> +       atomic_set(&ae4cmd_q->tail_wi, tail_wi);
> +       /* Synchronize ordering */
> +       mb();
> +
> +       writel(tail_wi, cmd_q->reg_control + 0x10);
> +       /* Synchronize ordering */
> +       mb();

Same here as in patch №2, I think writel() and mutex can't change their
relative order.

> +
> +       mutex_unlock(&ae4cmd_q->cmd_lock);

Same question: can't everything be done by the mutex alone?


P.

> +
> +       wake_up(&ae4cmd_q->q_w);
> +
> +       return 0;
> +}
> +
> +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
> +                            struct pt_passthru_engine *pt_engine)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
> ae4_cmd_queue, cmd_q);
> +       struct ae4dma_desc desc;
> +
> +       cmd_q->cmd_error = 0;
> +       cmd_q->total_pt_ops++;
> +       memset(&desc, 0, sizeof(desc));
> +       desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
> +
> +       desc.dw1.status = 0;
> +       desc.dw1.err_code = 0;
> +       desc.dw1.desc_id = 0;
> +
> +       desc.length = pt_engine->src_len;
> +
> +       desc.src_lo = upper_32_bits(pt_engine->src_dma);
> +       desc.src_hi = lower_32_bits(pt_engine->src_dma);
> +       desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
> +       desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
> +
> +       return ae4_core_execute_cmd(&desc, ae4cmd_q);
> +}
> +
>  void ae4_destroy_work(struct ae4_device *ae4)
>  {
>         struct ae4_cmd_queue *ae4cmd_q;
> @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4)
>                 init_completion(&ae4cmd_q->cmp);
>         }
>  
> +       ret = pt_dmaengine_register(pt);
> +       if (ret)
> +               ae4_destroy_work(ae4);
> +
>         return ret;
>  }
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> index ddebf0609c4d..5450fa551eea 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  
>         pt = &ae4->pt;
>         pt->dev = dev;
> +       pt->ver = AE4_DMA_VERSION;
>  
>         pt->io_regs = pcim_iomap_table(pdev)[0];
>         if (!pt->io_regs) {
> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
> b/drivers/dma/amd/ae4dma/ae4dma.h
> index 4e4584e152a1..f1b6dcc1d8c3 100644
> --- a/drivers/dma/amd/ae4dma/ae4dma.h
> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> @@ -16,6 +16,7 @@
>  
>  #define AE4_DESC_COMPLETED             0x3
>  #define AE4_DMA_VERSION                        4
> +#define CMD_AE4_DESC_DW0_VAL           2
>  
>  struct ae4_msix {
>         int msix_count;
> @@ -36,6 +37,7 @@ struct ae4_cmd_queue {
>         atomic64_t done_cnt;
>         atomic64_t q_cmd_count;
>         atomic_t dridx;
> +       atomic_t tail_wi;
>         unsigned int id;
>  };
>
Basavaraj Natikar June 19, 2024, 7:52 a.m. UTC | #3
On 6/19/2024 12:59 PM, Philipp Stanner wrote:
> On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote:
>> Use the pt_dmaengine_register function to register a AE4DMA DMA
>> engine.
>>
>> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
>> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> ---
>>  drivers/dma/amd/ae4dma/Makefile     |  2 +-
>>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 73
>> +++++++++++++++++++++++++++++
>>  drivers/dma/amd/ae4dma/ae4dma-pci.c |  1 +
>>  drivers/dma/amd/ae4dma/ae4dma.h     |  2 +
>>  4 files changed, 77 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/dma/amd/ae4dma/Makefile
>> b/drivers/dma/amd/ae4dma/Makefile
>> index e918f85a80ec..165d1c74b732 100644
>> --- a/drivers/dma/amd/ae4dma/Makefile
>> +++ b/drivers/dma/amd/ae4dma/Makefile
>> @@ -5,6 +5,6 @@
>>  
>>  obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
>>  
>> -ae4dma-objs := ae4dma-dev.o
>> +ae4dma-objs := ae4dma-dev.o  ../ptdma/ptdma-dmaengine.o
>> ../common/amd_dma.o
>>  
>>  ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> index 958bdab8db59..77c37649d8d1 100644
>> --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct
>> ae4_cmd_queue *ae4cmd_q, int idx)
>>         }
>>  }
>>  
>> +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue
>> *cmd_q)
>> +{
>> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
>> ae4_cmd_queue, cmd_q);
>> +       int i;
>> +
>> +       for (i = 0; i < CMD_Q_LEN; i++)
>> +               ae4_check_status_error(ae4cmd_q, i);
>> +}
>> +
>>  static void ae4_pending_work(struct work_struct *work)
>>  {
>>         struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
>> ae4_cmd_queue, p_work.work);
>> @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq,
>> void *data)
>>         return IRQ_HANDLED;
>>  }
>>  
>> +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct
>> ae4_cmd_queue *ae4cmd_q)
>> +{
>> +       bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
>> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
>> +       u32 tail_wi;
>> +
>> +       if (soc) {
>> +               desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc-
>>> dwouv.dw0);
>> +               desc->dwouv.dw0 &= ~DWORD0_SOC;
>> +       }
>> +
>> +       mutex_lock(&ae4cmd_q->cmd_lock);
>> +
>> +       tail_wi = atomic_read(&ae4cmd_q->tail_wi);
>> +       memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct
>> ae4dma_desc));
>> +
>> +       atomic64_inc(&ae4cmd_q->q_cmd_count);
>> +
>> +       tail_wi = (tail_wi + 1) % CMD_Q_LEN;
>> +
>> +       atomic_set(&ae4cmd_q->tail_wi, tail_wi);
>> +       /* Synchronize ordering */
>> +       mb();
>> +
>> +       writel(tail_wi, cmd_q->reg_control + 0x10);
>> +       /* Synchronize ordering */
>> +       mb();
> Same here as in patch №2, I think writel() and mutex can't change their
> relative order.
>
>> +
>> +       mutex_unlock(&ae4cmd_q->cmd_lock);
> Same question: can't everything be done by the mutex alone?

Sure , I will remove it in all applicable places.

Thanks,
--
Basavaraj

>
>
> P.
>
>> +
>> +       wake_up(&ae4cmd_q->q_w);
>> +
>> +       return 0;
>> +}
>> +
>> +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
>> +                            struct pt_passthru_engine *pt_engine)
>> +{
>> +       struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct
>> ae4_cmd_queue, cmd_q);
>> +       struct ae4dma_desc desc;
>> +
>> +       cmd_q->cmd_error = 0;
>> +       cmd_q->total_pt_ops++;
>> +       memset(&desc, 0, sizeof(desc));
>> +       desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
>> +
>> +       desc.dw1.status = 0;
>> +       desc.dw1.err_code = 0;
>> +       desc.dw1.desc_id = 0;
>> +
>> +       desc.length = pt_engine->src_len;
>> +
>> +       desc.src_lo = upper_32_bits(pt_engine->src_dma);
>> +       desc.src_hi = lower_32_bits(pt_engine->src_dma);
>> +       desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
>> +       desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
>> +
>> +       return ae4_core_execute_cmd(&desc, ae4cmd_q);
>> +}
>> +
>>  void ae4_destroy_work(struct ae4_device *ae4)
>>  {
>>         struct ae4_cmd_queue *ae4cmd_q;
>> @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4)
>>                 init_completion(&ae4cmd_q->cmp);
>>         }
>>  
>> +       ret = pt_dmaengine_register(pt);
>> +       if (ret)
>> +               ae4_destroy_work(ae4);
>> +
>>         return ret;
>>  }
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> index ddebf0609c4d..5450fa551eea 100644
>> --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev,
>> const struct pci_device_id *id)
>>  
>>         pt = &ae4->pt;
>>         pt->dev = dev;
>> +       pt->ver = AE4_DMA_VERSION;
>>  
>>         pt->io_regs = pcim_iomap_table(pdev)[0];
>>         if (!pt->io_regs) {
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
>> b/drivers/dma/amd/ae4dma/ae4dma.h
>> index 4e4584e152a1..f1b6dcc1d8c3 100644
>> --- a/drivers/dma/amd/ae4dma/ae4dma.h
>> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
>> @@ -16,6 +16,7 @@
>>  
>>  #define AE4_DESC_COMPLETED             0x3
>>  #define AE4_DMA_VERSION                        4
>> +#define CMD_AE4_DESC_DW0_VAL           2
>>  
>>  struct ae4_msix {
>>         int msix_count;
>> @@ -36,6 +37,7 @@ struct ae4_cmd_queue {
>>         atomic64_t done_cnt;
>>         atomic64_t q_cmd_count;
>>         atomic_t dridx;
>> +       atomic_t tail_wi;
>>         unsigned int id;
>>  };
>>
kernel test robot June 23, 2024, 4:45 p.m. UTC | #4
Hi Basavaraj,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.10-rc4]
[cannot apply to vkoul-dmaengine/next next-20240621]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Basavaraj-Natikar/dmaengine-Move-AMD-DMA-driver-to-separate-directory/20240617-184320
base:   linus/master
patch link:    https://lore.kernel.org/r/20240617100359.2550541-6-Basavaraj.Natikar%40amd.com
patch subject: [PATCH v2 5/7] dmaengine: ae4dma: Register AE4DMA using pt_dmaengine_register
config: x86_64-randconfig-103-20240623 (https://download.01.org/0day-ci/archive/20240624/202406240021.ytiS3jV6-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240624/202406240021.ytiS3jV6-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406240021.ytiS3jV6-lkp@intel.com/

All errors (new ones prefixed by >>):

>> ld.lld: error: duplicate symbol: pt_dmaengine_register
   >>> defined at ptdma-dmaengine.c:364 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:364)
   >>>            drivers/dma/amd/ptdma/ptdma-dmaengine.o:(pt_dmaengine_register) in archive vmlinux.a
   >>> defined at ptdma-dmaengine.c:364 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:364)
   >>>            drivers/dma/amd/ptdma/ptdma-dmaengine.o:(.text+0x0) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: pt_dmaengine_unregister
   >>> defined at ptdma-dmaengine.c:467 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:467)
   >>>            drivers/dma/amd/ptdma/ptdma-dmaengine.o:(pt_dmaengine_unregister) in archive vmlinux.a
   >>> defined at ptdma-dmaengine.c:467 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:467)
   >>>            drivers/dma/amd/ptdma/ptdma-dmaengine.o:(.text+0x1ED0) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: pt_start_queue
   >>> defined at amd_dma.c:14 (drivers/dma/amd/ae4dma/../common/amd_dma.c:14)
   >>>            drivers/dma/amd/common/amd_dma.o:(pt_start_queue) in archive vmlinux.a
   >>> defined at amd_dma.c:14 (drivers/dma/amd/ae4dma/../common/amd_dma.c:14)
   >>>            drivers/dma/amd/common/amd_dma.o:(.text+0x0) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: pt_stop_queue
   >>> defined at amd_dma.c:20 (drivers/dma/amd/ae4dma/../common/amd_dma.c:20)
   >>>            drivers/dma/amd/common/amd_dma.o:(pt_stop_queue) in archive vmlinux.a
   >>> defined at amd_dma.c:20 (drivers/dma/amd/ae4dma/../common/amd_dma.c:20)
   >>>            drivers/dma/amd/common/amd_dma.o:(.text+0x70) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: pt_check_status_trans
   >>> defined at ptdma-dev.c:133 (drivers/dma/amd/ptdma/ptdma-dev.c:133)
   >>>            drivers/dma/amd/ptdma/ptdma-dev.o:(pt_check_status_trans) in archive vmlinux.a
   >>> defined at ae4dma-dev.c:64 (drivers/dma/amd/ae4dma/ae4dma-dev.c:64)
   >>>            drivers/dma/amd/ae4dma/ae4dma-dev.o:(.text+0x0) in archive vmlinux.a
--
>> ld.lld: error: duplicate symbol: pt_core_perform_passthru
   >>> defined at ptdma-dev.c:90 (drivers/dma/amd/ptdma/ptdma-dev.c:90)
   >>>            drivers/dma/amd/ptdma/ptdma-dev.o:(pt_core_perform_passthru) in archive vmlinux.a
   >>> defined at ae4dma-dev.c:172 (drivers/dma/amd/ae4dma/ae4dma-dev.c:172)
   >>>            drivers/dma/amd/ae4dma/ae4dma-dev.o:(.text+0x350) in archive vmlinux.a
diff mbox series

Patch

diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile
index e918f85a80ec..165d1c74b732 100644
--- a/drivers/dma/amd/ae4dma/Makefile
+++ b/drivers/dma/amd/ae4dma/Makefile
@@ -5,6 +5,6 @@ 
 
 obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
 
-ae4dma-objs := ae4dma-dev.o
+ae4dma-objs := ae4dma-dev.o  ../ptdma/ptdma-dmaengine.o ../common/amd_dma.o
 
 ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c
index 958bdab8db59..77c37649d8d1 100644
--- a/drivers/dma/amd/ae4dma/ae4dma-dev.c
+++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
@@ -60,6 +60,15 @@  static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
 	}
 }
 
+void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
+{
+	struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
+	int i;
+
+	for (i = 0; i < CMD_Q_LEN; i++)
+		ae4_check_status_error(ae4cmd_q, i);
+}
+
 static void ae4_pending_work(struct work_struct *work)
 {
 	struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
@@ -123,6 +132,66 @@  static irqreturn_t ae4_core_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q)
+{
+	bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
+	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+	u32 tail_wi;
+
+	if (soc) {
+		desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0);
+		desc->dwouv.dw0 &= ~DWORD0_SOC;
+	}
+
+	mutex_lock(&ae4cmd_q->cmd_lock);
+
+	tail_wi = atomic_read(&ae4cmd_q->tail_wi);
+	memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct ae4dma_desc));
+
+	atomic64_inc(&ae4cmd_q->q_cmd_count);
+
+	tail_wi = (tail_wi + 1) % CMD_Q_LEN;
+
+	atomic_set(&ae4cmd_q->tail_wi, tail_wi);
+	/* Synchronize ordering */
+	mb();
+
+	writel(tail_wi, cmd_q->reg_control + 0x10);
+	/* Synchronize ordering */
+	mb();
+
+	mutex_unlock(&ae4cmd_q->cmd_lock);
+
+	wake_up(&ae4cmd_q->q_w);
+
+	return 0;
+}
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+			     struct pt_passthru_engine *pt_engine)
+{
+	struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
+	struct ae4dma_desc desc;
+
+	cmd_q->cmd_error = 0;
+	cmd_q->total_pt_ops++;
+	memset(&desc, 0, sizeof(desc));
+	desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
+
+	desc.dw1.status = 0;
+	desc.dw1.err_code = 0;
+	desc.dw1.desc_id = 0;
+
+	desc.length = pt_engine->src_len;
+
+	desc.src_lo = upper_32_bits(pt_engine->src_dma);
+	desc.src_hi = lower_32_bits(pt_engine->src_dma);
+	desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
+	desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
+
+	return ae4_core_execute_cmd(&desc, ae4cmd_q);
+}
+
 void ae4_destroy_work(struct ae4_device *ae4)
 {
 	struct ae4_cmd_queue *ae4cmd_q;
@@ -202,5 +271,9 @@  int ae4_core_init(struct ae4_device *ae4)
 		init_completion(&ae4cmd_q->cmp);
 	}
 
+	ret = pt_dmaengine_register(pt);
+	if (ret)
+		ae4_destroy_work(ae4);
+
 	return ret;
 }
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
index ddebf0609c4d..5450fa551eea 100644
--- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
+++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
@@ -131,6 +131,7 @@  static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pt = &ae4->pt;
 	pt->dev = dev;
+	pt->ver = AE4_DMA_VERSION;
 
 	pt->io_regs = pcim_iomap_table(pdev)[0];
 	if (!pt->io_regs) {
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
index 4e4584e152a1..f1b6dcc1d8c3 100644
--- a/drivers/dma/amd/ae4dma/ae4dma.h
+++ b/drivers/dma/amd/ae4dma/ae4dma.h
@@ -16,6 +16,7 @@ 
 
 #define AE4_DESC_COMPLETED		0x3
 #define AE4_DMA_VERSION			4
+#define CMD_AE4_DESC_DW0_VAL		2
 
 struct ae4_msix {
 	int msix_count;
@@ -36,6 +37,7 @@  struct ae4_cmd_queue {
 	atomic64_t done_cnt;
 	atomic64_t q_cmd_count;
 	atomic_t dridx;
+	atomic_t tail_wi;
 	unsigned int id;
 };