diff mbox series

[v2,2/7] dmaengine: ae4dma: Add AMD ae4dma controller driver

Message ID 20240617100359.2550541-3-Basavaraj.Natikar@amd.com (mailing list archive)
State Superseded
Headers show
Series Add support of AMD AE4DMA DMA Engine | expand

Commit Message

Basavaraj Natikar June 17, 2024, 10:03 a.m. UTC
Add support for AMD AE4DMA controller. It performs high-bandwidth
memory to memory and IO copy operation. Device commands are managed
via a circular queue of 'descriptors', each of which specifies source
and destination addresses for copying a single buffer of data.

Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
---
 MAINTAINERS                         |   6 +
 drivers/dma/amd/Kconfig             |   1 +
 drivers/dma/amd/Makefile            |   1 +
 drivers/dma/amd/ae4dma/Kconfig      |  13 ++
 drivers/dma/amd/ae4dma/Makefile     |  10 ++
 drivers/dma/amd/ae4dma/ae4dma-dev.c | 206 ++++++++++++++++++++++++++++
 drivers/dma/amd/ae4dma/ae4dma-pci.c | 190 +++++++++++++++++++++++++
 drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
 drivers/dma/amd/common/amd_dma.h    |  26 ++++
 9 files changed, 530 insertions(+)
 create mode 100644 drivers/dma/amd/ae4dma/Kconfig
 create mode 100644 drivers/dma/amd/ae4dma/Makefile
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
 create mode 100644 drivers/dma/amd/common/amd_dma.h

Comments

Bjorn Helgaas June 17, 2024, 4:40 p.m. UTC | #1
On Mon, Jun 17, 2024 at 03:33:54PM +0530, Basavaraj Natikar wrote:
> Add support for AMD AE4DMA controller. It performs high-bandwidth
> memory to memory and IO copy operation. Device commands are managed
> via a circular queue of 'descriptors', each of which specifies source
> and destination addresses for copying a single buffer of data.

> +++ b/drivers/dma/amd/ae4dma/Kconfig
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0
> +config AMD_AE4DMA
> +	tristate  "AMD AE4DMA Engine"
> +	depends on X86_64 && PCI

Possible "(X86_64 || COMPILE_TEST)"?

> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c

> +static int ae4_get_irqs(struct ae4_device *ae4)
> +{
> +	struct pt_device *pt = &ae4->pt;
> +	struct device *dev = pt->dev;
> +	int ret;
> +
> +	ret = ae4_get_msix_irqs(ae4);
> +	if (!ret)
> +		return 0;
> +
> +	/* Couldn't get MSI-X vectors, try MSI */
> +	dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
> +	ret = ae4_get_msi_irq(ae4);
> +	if (!ret)
> +		return 0;

Consider pci_alloc_irq_vectors() and pci_free_irq_vectors() here.

> +	/* Couldn't get MSI interrupt */
> +	dev_err(dev, "could not enable MSI (%d)\n", ret);
> +
> +	return ret;
> +}
Bjorn Helgaas June 17, 2024, 4:50 p.m. UTC | #2
On Mon, Jun 17, 2024 at 03:33:54PM +0530, Basavaraj Natikar wrote:
> Add support for AMD AE4DMA controller. It performs high-bandwidth
> memory to memory and IO copy operation. Device commands are managed
> via a circular queue of 'descriptors', each of which specifies source
> and destination addresses for copying a single buffer of data.

> +static void ae4_free_irqs(struct ae4_device *ae4)
> +{
> +	struct ae4_msix *ae4_msix;
> +	struct pci_dev *pdev;
> +	struct pt_device *pt;
> +	struct device *dev;
> +	int i;
> +
> +	if (ae4) {

I don't think this test is necessary.  I don't think it's possible to
get here with ae4==0.

> +		pt = &ae4->pt;
> +		dev = pt->dev;
> +		pdev = to_pci_dev(dev);
> +
> +		ae4_msix = ae4->ae4_msix;
> +		if (ae4_msix && ae4_msix->msix_count)
> +			pci_disable_msix(pdev);
> +		else if (pdev->irq)
> +			pci_disable_msi(pdev);
> +
> +		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +			ae4->ae4_irq[i] = 0;

Clearing ae4_irq[] also doesn't seem necessary, since this is only
used in .remove(), and ae4 should never be used again.  If this path
becomes used in some future path that depends on ae4_irq[] being
cleared, perhaps the clearing could be moved to that patch.

> +	}
> +}
> +
> +static void ae4_deinit(struct ae4_device *ae4)
> +{
> +	ae4_free_irqs(ae4);
> +}

> +static void ae4_pci_remove(struct pci_dev *pdev)
> +{
> +	struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
> +
> +	ae4_destroy_work(ae4);
> +	ae4_deinit(ae4);
> +}
Philipp Stanner June 18, 2024, 8:14 a.m. UTC | #3
On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote:
> Add support for AMD AE4DMA controller. It performs high-bandwidth
> memory to memory and IO copy operation. Device commands are managed
> via a circular queue of 'descriptors', each of which specifies source
> and destination addresses for copying a single buffer of data.
> 
> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> ---
>  MAINTAINERS                         |   6 +
>  drivers/dma/amd/Kconfig             |   1 +
>  drivers/dma/amd/Makefile            |   1 +
>  drivers/dma/amd/ae4dma/Kconfig      |  13 ++
>  drivers/dma/amd/ae4dma/Makefile     |  10 ++
>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 206
> ++++++++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma-pci.c | 190 +++++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
>  drivers/dma/amd/common/amd_dma.h    |  26 ++++
>  9 files changed, 530 insertions(+)
>  create mode 100644 drivers/dma/amd/ae4dma/Kconfig
>  create mode 100644 drivers/dma/amd/ae4dma/Makefile
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
>  create mode 100644 drivers/dma/amd/common/amd_dma.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c500c0567779..0d222d1ca83f 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -947,6 +947,12 @@ L: linux-edac@vger.kernel.org
>  S:     Supported
>  F:     drivers/ras/amd/atl/*
>  
> +AMD AE4DMA DRIVER
> +M:     Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> +L:     dmaengine@vger.kernel.org
> +S:     Maintained
> +F:     drivers/dma/amd/ae4dma/
> +
>  AMD AXI W1 DRIVER
>  M:     Kris Chaplin <kris.chaplin@amd.com>
>  R:     Thomas Delev <thomas.delev@amd.com>
> diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
> index 8246b463bcf7..8c25a3ed6b94 100644
> --- a/drivers/dma/amd/Kconfig
> +++ b/drivers/dma/amd/Kconfig
> @@ -3,3 +3,4 @@
>  # AMD DMA Drivers
>  
>  source "drivers/dma/amd/ptdma/Kconfig"
> +source "drivers/dma/amd/ae4dma/Kconfig"
> diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
> index dd7257ba7e06..8049b06a9ff5 100644
> --- a/drivers/dma/amd/Makefile
> +++ b/drivers/dma/amd/Makefile
> @@ -4,3 +4,4 @@
>  #
>  
>  obj-$(CONFIG_AMD_PTDMA) += ptdma/
> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
> diff --git a/drivers/dma/amd/ae4dma/Kconfig
> b/drivers/dma/amd/ae4dma/Kconfig
> new file mode 100644
> index 000000000000..cf8db4dac98d
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/Kconfig
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0
> +config AMD_AE4DMA
> +       tristate  "AMD AE4DMA Engine"
> +       depends on X86_64 && PCI
> +       select DMA_ENGINE
> +       select DMA_VIRTUAL_CHANNELS
> +       help
> +         Enable support for the AMD AE4DMA controller. This
> controller
> +         provides DMA capabilities to perform high bandwidth memory
> to
> +         memory and IO copy operations. It performs DMA transfer
> through
> +         queue-based descriptor management. This DMA controller is
> intended
> +         to be used with AMD Non-Transparent Bridge devices and not
> for
> +         general purpose peripheral DMA.
> diff --git a/drivers/dma/amd/ae4dma/Makefile
> b/drivers/dma/amd/ae4dma/Makefile
> new file mode 100644
> index 000000000000..e918f85a80ec
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/Makefile
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# AMD AE4DMA driver
> +#
> +
> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
> +
> +ae4dma-objs := ae4dma-dev.o
> +
> +ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> new file mode 100644
> index 000000000000..958bdab8db59
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> @@ -0,0 +1,206 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#include "ae4dma.h"
> +
> +static unsigned int max_hw_q = 1;
> +module_param(max_hw_q, uint, 0444);
> +MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any
> non-zero value, default: 1)");
> +
> +static char *ae4_error_codes[] = {
> +       "",
> +       "ERR 01: INVALID HEADER DW0",
> +       "ERR 02: INVALID STATUS",
> +       "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
> +       "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
> +       "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
> +       "ERR 06: INVALID ALIGNMENT",
> +       "ERR 07: INVALID DESCRIPTOR",
> +};
> +
> +static void ae4_log_error(struct pt_device *d, int e)
> +{
> +       if (e <= 7)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> ae4_error_codes[e], e);
> +       else if (e > 7 && e <= 15)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 15 && e <= 31)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 31 && e <= 63)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 63 && e <= 127)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
> ERROR", e);
> +       else if (e > 127 && e <= 255)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
> ERROR", e);
> +       else
> +               dev_info(d->dev, "Unknown AE4DMA error");
> +}
> +
> +static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q,
> int idx)
> +{
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       struct ae4dma_desc desc;
> +       u8 status;
> +
> +       memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct
> ae4dma_desc));
> +       /* Synchronize ordering */
> +       dma_rmb();
> +       status = desc.dw1.status;
> +       if (status && status != AE4_DESC_COMPLETED) {
> +               cmd_q->cmd_error = desc.dw1.err_code;
> +               if (cmd_q->cmd_error)
> +                       ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
> +       }
> +}
> +
> +static void ae4_pending_work(struct work_struct *work)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
> ae4_cmd_queue, p_work.work);
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       struct pt_cmd *cmd;
> +       u32 cridx, dridx;
> +
> +       while (true) {
> +               wait_event_interruptible(ae4cmd_q->q_w,
> +                                        ((atomic64_read(&ae4cmd_q-
> >done_cnt)) <
> +                                          atomic64_read(&ae4cmd_q-
> >intr_cnt)));
> +
> +               atomic64_inc(&ae4cmd_q->done_cnt);
> +
> +               mutex_lock(&ae4cmd_q->cmd_lock);
> +
> +               cridx = readl(cmd_q->reg_control + 0x0C);
> +               dridx = atomic_read(&ae4cmd_q->dridx);
> +
> +               while ((dridx != cridx) && !list_empty(&ae4cmd_q-
> >cmd)) {
> +                       cmd = list_first_entry(&ae4cmd_q->cmd, struct
> pt_cmd, entry);
> +                       list_del(&cmd->entry);
> +
> +                       ae4_check_status_error(ae4cmd_q, dridx);
> +                       cmd->pt_cmd_callback(cmd->data, cmd->ret);
> +
> +                       atomic64_dec(&ae4cmd_q->q_cmd_count);
> +                       dridx = (dridx + 1) % CMD_Q_LEN;
> +                       atomic_set(&ae4cmd_q->dridx, dridx);
> +                       /* Synchronize ordering */
> +                       dma_mb();
> +
> +                       complete_all(&ae4cmd_q->cmp);
> +               }
> +
> +               mutex_unlock(&ae4cmd_q->cmd_lock);
> +       }
> +}
> +
> +static irqreturn_t ae4_core_irq_handler(int irq, void *data)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = data;
> +       struct pt_cmd_queue *cmd_q;
> +       struct pt_device *pt;
> +       u32 status;
> +
> +       cmd_q = &ae4cmd_q->cmd_q;
> +       pt = cmd_q->pt;
> +
> +       pt->total_interrupts++;
> +       atomic64_inc(&ae4cmd_q->intr_cnt);
> +
> +       wake_up(&ae4cmd_q->q_w);
> +
> +       status = readl(cmd_q->reg_control + 0x14);
> +       if (status & BIT(0)) {
> +               status &= GENMASK(31, 1);
> +               writel(status, cmd_q->reg_control + 0x14);
> +       }
> +
> +       return IRQ_HANDLED;
> +}
> +
> +void ae4_destroy_work(struct ae4_device *ae4)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q;
> +       int i;
> +
> +       for (i = 0; i < ae4->cmd_q_count; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +
> +               if (!ae4cmd_q->pws)
> +                       break;
> +
> +               cancel_delayed_work_sync(&ae4cmd_q->p_work);
> +               destroy_workqueue(ae4cmd_q->pws);
> +       }
> +}
> +
> +int ae4_core_init(struct ae4_device *ae4)
> +{
> +       struct pt_device *pt = &ae4->pt;
> +       struct ae4_cmd_queue *ae4cmd_q;
> +       struct device *dev = pt->dev;
> +       struct pt_cmd_queue *cmd_q;
> +       int i, ret = 0;
> +
> +       writel(max_hw_q, pt->io_regs);
> +
> +       for (i = 0; i < max_hw_q; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +               ae4cmd_q->id = ae4->cmd_q_count;
> +               ae4->cmd_q_count++;
> +
> +               cmd_q = &ae4cmd_q->cmd_q;
> +               cmd_q->pt = pt;
> +
> +               /* Preset some register values (Q size is 32byte
> (0x20)) */
> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
> +
> +               ret = devm_request_irq(dev, ae4->ae4_irq[i],
> ae4_core_irq_handler, 0,
> +                                      dev_name(pt->dev), ae4cmd_q);
> +               if (ret)
> +                       return ret;
> +
> +               cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
> +
> +               cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize,
> &cmd_q->qbase_dma,
> +                                                  GFP_KERNEL);
> +               if (!cmd_q->qbase)
> +                       return -ENOMEM;
> +       }
> +
> +       for (i = 0; i < ae4->cmd_q_count; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +
> +               cmd_q = &ae4cmd_q->cmd_q;
> +
> +               /* Preset some register values (Q size is 32byte
> (0x20)) */
> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
> +
> +               /* Update the device registers with queue
> information. */
> +               writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);
> +
> +               cmd_q->qdma_tail = cmd_q->qbase_dma;
> +               writel(lower_32_bits(cmd_q->qdma_tail), cmd_q-
> >reg_control + 0x18);
> +               writel(upper_32_bits(cmd_q->qdma_tail), cmd_q-
> >reg_control + 0x1C);
> +
> +               INIT_LIST_HEAD(&ae4cmd_q->cmd);
> +               init_waitqueue_head(&ae4cmd_q->q_w);
> +
> +               ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d",
> WQ_MEM_RECLAIM, ae4cmd_q->id);
> +               if (!ae4cmd_q->pws) {
> +                       ae4_destroy_work(ae4);
> +                       return -ENOMEM;
> +               }
> +               INIT_DELAYED_WORK(&ae4cmd_q->p_work,
> ae4_pending_work);
> +               queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, 
> usecs_to_jiffies(100));
> +
> +               init_completion(&ae4cmd_q->cmp);
> +       }
> +
> +       return ret;
> +}
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> new file mode 100644
> index 000000000000..ddebf0609c4d
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> @@ -0,0 +1,190 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#include "ae4dma.h"
> +
> +static int ae4_get_msi_irq(struct ae4_device *ae4)
> +{
> +       struct pt_device *pt = &ae4->pt;
> +       struct device *dev = pt->dev;
> +       struct pci_dev *pdev;
> +       int ret, i;
> +
> +       pdev = to_pci_dev(dev);
> +       ret = pci_enable_msi(pdev);
> +       if (ret)
> +               return ret;
> +
> +       for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +               ae4->ae4_irq[i] = pdev->irq;
> +
> +       return 0;
> +}
> +
> +static int ae4_get_msix_irqs(struct ae4_device *ae4)
> +{
> +       struct ae4_msix *ae4_msix = ae4->ae4_msix;
> +       struct pt_device *pt = &ae4->pt;
> +       struct device *dev = pt->dev;
> +       struct pci_dev *pdev;
> +       int v, i, ret;
> +
> +       pdev = to_pci_dev(dev);
> +
> +       for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
> +               ae4_msix->msix_entry[v].entry = v;
> +
> +       ret = pci_enable_msix_range(pdev, ae4_msix->msix_entry, 1,
> v);
> +       if (ret < 0)
> +               return ret;
> +
> +       ae4_msix->msix_count = ret;
> +
> +       for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +               ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
> +
> +       return 0;
> +}
> +
> +static int ae4_get_irqs(struct ae4_device *ae4)
> +{
> +       struct pt_device *pt = &ae4->pt;
> +       struct device *dev = pt->dev;
> +       int ret;
> +
> +       ret = ae4_get_msix_irqs(ae4);
> +       if (!ret)
> +               return 0;
> +
> +       /* Couldn't get MSI-X vectors, try MSI */
> +       dev_err(dev, "could not enable MSI-X (%d), trying MSI\n",
> ret);
> +       ret = ae4_get_msi_irq(ae4);
> +       if (!ret)
> +               return 0;
> +
> +       /* Couldn't get MSI interrupt */
> +       dev_err(dev, "could not enable MSI (%d)\n", ret);
> +
> +       return ret;
> +}
> +
> +static void ae4_free_irqs(struct ae4_device *ae4)
> +{
> +       struct ae4_msix *ae4_msix;
> +       struct pci_dev *pdev;
> +       struct pt_device *pt;
> +       struct device *dev;
> +       int i;
> +
> +       if (ae4) {
> +               pt = &ae4->pt;
> +               dev = pt->dev;
> +               pdev = to_pci_dev(dev);
> +
> +               ae4_msix = ae4->ae4_msix;
> +               if (ae4_msix && ae4_msix->msix_count)
> +                       pci_disable_msix(pdev);
> +               else if (pdev->irq)
> +                       pci_disable_msi(pdev);
> +
> +               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +                       ae4->ae4_irq[i] = 0;
> +       }
> +}
> +
> +static void ae4_deinit(struct ae4_device *ae4)
> +{
> +       ae4_free_irqs(ae4);
> +}
> +
> +static int ae4_pci_probe(struct pci_dev *pdev, const struct
> pci_device_id *id)
> +{
> +       struct device *dev = &pdev->dev;
> +       struct ae4_device *ae4;
> +       struct pt_device *pt;
> +       int bar_mask;
> +       int ret = 0;
> +
> +       ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
> +       if (!ae4)
> +               return -ENOMEM;
> +
> +       ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix),
> GFP_KERNEL);
> +       if (!ae4->ae4_msix)
> +               return -ENOMEM;
> +
> +       ret = pcim_enable_device(pdev);
> +       if (ret)
> +               goto ae4_error;
> +
> +       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
> +       ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
> +       if (ret)
> +               goto ae4_error;
> +
> +       pt = &ae4->pt;
> +       pt->dev = dev;
> +
> +       pt->io_regs = pcim_iomap_table(pdev)[0];
> +       if (!pt->io_regs) {
> +               ret = -ENOMEM;
> +               goto ae4_error;
> +       }

Please note that we are about to deprecate pcim_iomap_regions() and
pcim_iomap_table().

The details are in this series [1]. Code is currently in this branch
[2].

That should go mainline in v6.11, hopefully. So you might want to
consider waiting a bit with your series so you won't end up with a
deprecated API in your new code?

The new way of coding it would then also be a bit simpler:

pt->io_regs = pcim_iomap_region(pdev, 0, "ae4dma");
if (IS_ERR(pt->io_regs)) {

We'd just have to make pcim_iomap_region() public for you, since you'd
be the first external user.

Cheers,
P.


[1] https://lore.kernel.org/all/20240613115032.29098-1-pstanner@redhat.com/
[2] https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/log/?h=devres

> +
> +       ret = ae4_get_irqs(ae4);
> +       if (ret)
> +               goto ae4_error;
> +
> +       pci_set_master(pdev);
> +
> +       dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
> +
> +       dev_set_drvdata(dev, ae4);
> +
> +       ret = ae4_core_init(ae4);
> +       if (ret)
> +               goto ae4_error;
> +
> +       return 0;
> +
> +ae4_error:
> +       ae4_deinit(ae4);
> +
> +       return ret;
> +}
> +
> +static void ae4_pci_remove(struct pci_dev *pdev)
> +{
> +       struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
> +
> +       ae4_destroy_work(ae4);
> +       ae4_deinit(ae4);
> +}
> +
> +static const struct pci_device_id ae4_pci_table[] = {
> +       { PCI_VDEVICE(AMD, 0x14C8), },
> +       { PCI_VDEVICE(AMD, 0x14DC), },
> +       { PCI_VDEVICE(AMD, 0x149B), },
> +       /* Last entry must be zero */
> +       { 0, }
> +};
> +MODULE_DEVICE_TABLE(pci, ae4_pci_table);
> +
> +static struct pci_driver ae4_pci_driver = {
> +       .name = "ae4dma",
> +       .id_table = ae4_pci_table,
> +       .probe = ae4_pci_probe,
> +       .remove = ae4_pci_remove,
> +};
> +
> +module_pci_driver(ae4_pci_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("AMD AE4DMA driver");
> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
> b/drivers/dma/amd/ae4dma/ae4dma.h
> new file mode 100644
> index 000000000000..24b1253ad570
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> @@ -0,0 +1,77 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +#ifndef __AE4DMA_H__
> +#define __AE4DMA_H__
> +
> +#include "../common/amd_dma.h"
> +
> +#define MAX_AE4_HW_QUEUES              16
> +
> +#define AE4_DESC_COMPLETED             0x3
> +
> +struct ae4_msix {
> +       int msix_count;
> +       struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
> +};
> +
> +struct ae4_cmd_queue {
> +       struct ae4_device *ae4;
> +       struct pt_cmd_queue cmd_q;
> +       struct list_head cmd;
> +       /* protect command operations */
> +       struct mutex cmd_lock;
> +       struct delayed_work p_work;
> +       struct workqueue_struct *pws;
> +       struct completion cmp;
> +       wait_queue_head_t q_w;
> +       atomic64_t intr_cnt;
> +       atomic64_t done_cnt;
> +       atomic64_t q_cmd_count;
> +       atomic_t dridx;
> +       unsigned int id;
> +};
> +
> +union dwou {
> +       u32 dw0;
> +       struct dword0 {
> +       u8      byte0;
> +       u8      byte1;
> +       u16     timestamp;
> +       } dws;
> +};
> +
> +struct dword1 {
> +       u8      status;
> +       u8      err_code;
> +       u16     desc_id;
> +};
> +
> +struct ae4dma_desc {
> +       union dwou dwouv;
> +       struct dword1 dw1;
> +       u32 length;
> +       u32 rsvd;
> +       u32 src_hi;
> +       u32 src_lo;
> +       u32 dst_hi;
> +       u32 dst_lo;
> +};
> +
> +struct ae4_device {
> +       struct pt_device pt;
> +       struct ae4_msix *ae4_msix;
> +       struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
> +       unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
> +       unsigned int cmd_q_count;
> +};
> +
> +int ae4_core_init(struct ae4_device *ae4);
> +void ae4_destroy_work(struct ae4_device *ae4);
> +#endif
> diff --git a/drivers/dma/amd/common/amd_dma.h
> b/drivers/dma/amd/common/amd_dma.h
> new file mode 100644
> index 000000000000..f9f396cd4371
> --- /dev/null
> +++ b/drivers/dma/amd/common/amd_dma.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * AMD DMA Driver common
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#ifndef AMD_DMA_H
> +#define AMD_DMA_H
> +
> +#include <linux/device.h>
> +#include <linux/dmaengine.h>
> +#include <linux/dmapool.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/pci.h>
> +#include <linux/spinlock.h>
> +#include <linux/wait.h>
> +
> +#include "../ptdma/ptdma.h"
> +#include "../../virt-dma.h"
> +
> +#endif
Basavaraj Natikar June 18, 2024, 1:23 p.m. UTC | #4
On 6/17/2024 10:10 PM, Bjorn Helgaas wrote:
> On Mon, Jun 17, 2024 at 03:33:54PM +0530, Basavaraj Natikar wrote:
>> Add support for AMD AE4DMA controller. It performs high-bandwidth
>> memory to memory and IO copy operation. Device commands are managed
>> via a circular queue of 'descriptors', each of which specifies source
>> and destination addresses for copying a single buffer of data.
>> +++ b/drivers/dma/amd/ae4dma/Kconfig
>> @@ -0,0 +1,13 @@
>> +# SPDX-License-Identifier: GPL-2.0
>> +config AMD_AE4DMA
>> +	tristate  "AMD AE4DMA Engine"
>> +	depends on X86_64 && PCI
> Possible "(X86_64 || COMPILE_TEST)"?

Sure i will change this to "depends on (X86_64 || COMPILE_TEST) && PCI"

>
>> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> +static int ae4_get_irqs(struct ae4_device *ae4)
>> +{
>> +	struct pt_device *pt = &ae4->pt;
>> +	struct device *dev = pt->dev;
>> +	int ret;
>> +
>> +	ret = ae4_get_msix_irqs(ae4);
>> +	if (!ret)
>> +		return 0;
>> +
>> +	/* Couldn't get MSI-X vectors, try MSI */
>> +	dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
>> +	ret = ae4_get_msi_irq(ae4);
>> +	if (!ret)
>> +		return 0;
> Consider pci_alloc_irq_vectors() and pci_free_irq_vectors() here.

Sure, I will change all code to use pci_alloc_irq_vectors

Thanks,
--
Basavaraj

>
>> +	/* Couldn't get MSI interrupt */
>> +	dev_err(dev, "could not enable MSI (%d)\n", ret);
>> +
>> +	return ret;
>> +}
Basavaraj Natikar June 18, 2024, 1:28 p.m. UTC | #5
On 6/17/2024 10:20 PM, Bjorn Helgaas wrote:
> On Mon, Jun 17, 2024 at 03:33:54PM +0530, Basavaraj Natikar wrote:
>> Add support for AMD AE4DMA controller. It performs high-bandwidth
>> memory to memory and IO copy operation. Device commands are managed
>> via a circular queue of 'descriptors', each of which specifies source
>> and destination addresses for copying a single buffer of data.
>> +static void ae4_free_irqs(struct ae4_device *ae4)
>> +{
>> +	struct ae4_msix *ae4_msix;
>> +	struct pci_dev *pdev;
>> +	struct pt_device *pt;
>> +	struct device *dev;
>> +	int i;
>> +
>> +	if (ae4) {
> I don't think this test is necessary.  I don't think it's possible to
> get here with ae4==0.

ya I will change it accordingly 

>
>> +		pt = &ae4->pt;
>> +		dev = pt->dev;
>> +		pdev = to_pci_dev(dev);
>> +
>> +		ae4_msix = ae4->ae4_msix;
>> +		if (ae4_msix && ae4_msix->msix_count)
>> +			pci_disable_msix(pdev);
>> +		else if (pdev->irq)
>> +			pci_disable_msi(pdev);
>> +
>> +		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
>> +			ae4->ae4_irq[i] = 0;
> Clearing ae4_irq[] also doesn't seem necessary, since this is only
> used in .remove(), and ae4 should never be used again.  If this path
> becomes used in some future path that depends on ae4_irq[] being
> cleared, perhaps the clearing could be moved to that patch.

Sure I will change all the lines to be like this below
       if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES -1]))
               pci_free_irq_vectors(pdev);

Thanks,
--
Basavaraj 

>
>> +	}
>> +}
>> +
>> +static void ae4_deinit(struct ae4_device *ae4)
>> +{
>> +	ae4_free_irqs(ae4);
>> +}
>> +static void ae4_pci_remove(struct pci_dev *pdev)
>> +{
>> +	struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
>> +
>> +	ae4_destroy_work(ae4);
>> +	ae4_deinit(ae4);
>> +}
Basavaraj Natikar June 18, 2024, 1:32 p.m. UTC | #6
On 6/18/2024 1:44 PM, Philipp Stanner wrote:
> On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote:
>> Add support for AMD AE4DMA controller. It performs high-bandwidth
>> memory to memory and IO copy operation. Device commands are managed
>> via a circular queue of 'descriptors', each of which specifies source
>> and destination addresses for copying a single buffer of data.
>>
>> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
>> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> ---
>>  MAINTAINERS                         |   6 +
>>  drivers/dma/amd/Kconfig             |   1 +
>>  drivers/dma/amd/Makefile            |   1 +
>>  drivers/dma/amd/ae4dma/Kconfig      |  13 ++
>>  drivers/dma/amd/ae4dma/Makefile     |  10 ++
>>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 206
>> ++++++++++++++++++++++++++++
>>  drivers/dma/amd/ae4dma/ae4dma-pci.c | 190 +++++++++++++++++++++++++
>>  drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
>>  drivers/dma/amd/common/amd_dma.h    |  26 ++++
>>  9 files changed, 530 insertions(+)
>>  create mode 100644 drivers/dma/amd/ae4dma/Kconfig
>>  create mode 100644 drivers/dma/amd/ae4dma/Makefile
>>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
>>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
>>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
>>  create mode 100644 drivers/dma/amd/common/amd_dma.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index c500c0567779..0d222d1ca83f 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -947,6 +947,12 @@ L: linux-edac@vger.kernel.org
>>  S:     Supported
>>  F:     drivers/ras/amd/atl/*
>>  
>> +AMD AE4DMA DRIVER
>> +M:     Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> +L:     dmaengine@vger.kernel.org
>> +S:     Maintained
>> +F:     drivers/dma/amd/ae4dma/
>> +
>>  AMD AXI W1 DRIVER
>>  M:     Kris Chaplin <kris.chaplin@amd.com>
>>  R:     Thomas Delev <thomas.delev@amd.com>
>> diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
>> index 8246b463bcf7..8c25a3ed6b94 100644
>> --- a/drivers/dma/amd/Kconfig
>> +++ b/drivers/dma/amd/Kconfig
>> @@ -3,3 +3,4 @@
>>  # AMD DMA Drivers
>>  
>>  source "drivers/dma/amd/ptdma/Kconfig"
>> +source "drivers/dma/amd/ae4dma/Kconfig"
>> diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
>> index dd7257ba7e06..8049b06a9ff5 100644
>> --- a/drivers/dma/amd/Makefile
>> +++ b/drivers/dma/amd/Makefile
>> @@ -4,3 +4,4 @@
>>  #
>>  
>>  obj-$(CONFIG_AMD_PTDMA) += ptdma/
>> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
>> diff --git a/drivers/dma/amd/ae4dma/Kconfig
>> b/drivers/dma/amd/ae4dma/Kconfig
>> new file mode 100644
>> index 000000000000..cf8db4dac98d
>> --- /dev/null
>> +++ b/drivers/dma/amd/ae4dma/Kconfig
>> @@ -0,0 +1,13 @@
>> +# SPDX-License-Identifier: GPL-2.0
>> +config AMD_AE4DMA
>> +       tristate  "AMD AE4DMA Engine"
>> +       depends on X86_64 && PCI
>> +       select DMA_ENGINE
>> +       select DMA_VIRTUAL_CHANNELS
>> +       help
>> +         Enable support for the AMD AE4DMA controller. This
>> controller
>> +         provides DMA capabilities to perform high bandwidth memory
>> to
>> +         memory and IO copy operations. It performs DMA transfer
>> through
>> +         queue-based descriptor management. This DMA controller is
>> intended
>> +         to be used with AMD Non-Transparent Bridge devices and not
>> for
>> +         general purpose peripheral DMA.
>> diff --git a/drivers/dma/amd/ae4dma/Makefile
>> b/drivers/dma/amd/ae4dma/Makefile
>> new file mode 100644
>> index 000000000000..e918f85a80ec
>> --- /dev/null
>> +++ b/drivers/dma/amd/ae4dma/Makefile
>> @@ -0,0 +1,10 @@
>> +# SPDX-License-Identifier: GPL-2.0
>> +#
>> +# AMD AE4DMA driver
>> +#
>> +
>> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
>> +
>> +ae4dma-objs := ae4dma-dev.o
>> +
>> +ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> new file mode 100644
>> index 000000000000..958bdab8db59
>> --- /dev/null
>> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
>> @@ -0,0 +1,206 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * AMD AE4DMA driver
>> + *
>> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> + */
>> +
>> +#include "ae4dma.h"
>> +
>> +static unsigned int max_hw_q = 1;
>> +module_param(max_hw_q, uint, 0444);
>> +MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any
>> non-zero value, default: 1)");
>> +
>> +static char *ae4_error_codes[] = {
>> +       "",
>> +       "ERR 01: INVALID HEADER DW0",
>> +       "ERR 02: INVALID STATUS",
>> +       "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
>> +       "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
>> +       "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
>> +       "ERR 06: INVALID ALIGNMENT",
>> +       "ERR 07: INVALID DESCRIPTOR",
>> +};
>> +
>> +static void ae4_log_error(struct pt_device *d, int e)
>> +{
>> +       if (e <= 7)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
>> ae4_error_codes[e], e);
>> +       else if (e > 7 && e <= 15)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
>> "INVALID DESCRIPTOR", e);
>> +       else if (e > 15 && e <= 31)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
>> "INVALID DESCRIPTOR", e);
>> +       else if (e > 31 && e <= 63)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
>> "INVALID DESCRIPTOR", e);
>> +       else if (e > 63 && e <= 127)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
>> ERROR", e);
>> +       else if (e > 127 && e <= 255)
>> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
>> ERROR", e);
>> +       else
>> +               dev_info(d->dev, "Unknown AE4DMA error");
>> +}
>> +
>> +static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q,
>> int idx)
>> +{
>> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
>> +       struct ae4dma_desc desc;
>> +       u8 status;
>> +
>> +       memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct
>> ae4dma_desc));
>> +       /* Synchronize ordering */
>> +       dma_rmb();
>> +       status = desc.dw1.status;
>> +       if (status && status != AE4_DESC_COMPLETED) {
>> +               cmd_q->cmd_error = desc.dw1.err_code;
>> +               if (cmd_q->cmd_error)
>> +                       ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
>> +       }
>> +}
>> +
>> +static void ae4_pending_work(struct work_struct *work)
>> +{
>> +       struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
>> ae4_cmd_queue, p_work.work);
>> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
>> +       struct pt_cmd *cmd;
>> +       u32 cridx, dridx;
>> +
>> +       while (true) {
>> +               wait_event_interruptible(ae4cmd_q->q_w,
>> +                                        ((atomic64_read(&ae4cmd_q-
>>> done_cnt)) <
>> +                                          atomic64_read(&ae4cmd_q-
>>> intr_cnt)));
>> +
>> +               atomic64_inc(&ae4cmd_q->done_cnt);
>> +
>> +               mutex_lock(&ae4cmd_q->cmd_lock);
>> +
>> +               cridx = readl(cmd_q->reg_control + 0x0C);
>> +               dridx = atomic_read(&ae4cmd_q->dridx);
>> +
>> +               while ((dridx != cridx) && !list_empty(&ae4cmd_q-
>>> cmd)) {
>> +                       cmd = list_first_entry(&ae4cmd_q->cmd, struct
>> pt_cmd, entry);
>> +                       list_del(&cmd->entry);
>> +
>> +                       ae4_check_status_error(ae4cmd_q, dridx);
>> +                       cmd->pt_cmd_callback(cmd->data, cmd->ret);
>> +
>> +                       atomic64_dec(&ae4cmd_q->q_cmd_count);
>> +                       dridx = (dridx + 1) % CMD_Q_LEN;
>> +                       atomic_set(&ae4cmd_q->dridx, dridx);
>> +                       /* Synchronize ordering */
>> +                       dma_mb();
>> +
>> +                       complete_all(&ae4cmd_q->cmp);
>> +               }
>> +
>> +               mutex_unlock(&ae4cmd_q->cmd_lock);
>> +       }
>> +}
>> +
>> +static irqreturn_t ae4_core_irq_handler(int irq, void *data)
>> +{
>> +       struct ae4_cmd_queue *ae4cmd_q = data;
>> +       struct pt_cmd_queue *cmd_q;
>> +       struct pt_device *pt;
>> +       u32 status;
>> +
>> +       cmd_q = &ae4cmd_q->cmd_q;
>> +       pt = cmd_q->pt;
>> +
>> +       pt->total_interrupts++;
>> +       atomic64_inc(&ae4cmd_q->intr_cnt);
>> +
>> +       wake_up(&ae4cmd_q->q_w);
>> +
>> +       status = readl(cmd_q->reg_control + 0x14);
>> +       if (status & BIT(0)) {
>> +               status &= GENMASK(31, 1);
>> +               writel(status, cmd_q->reg_control + 0x14);
>> +       }
>> +
>> +       return IRQ_HANDLED;
>> +}
>> +
>> +void ae4_destroy_work(struct ae4_device *ae4)
>> +{
>> +       struct ae4_cmd_queue *ae4cmd_q;
>> +       int i;
>> +
>> +       for (i = 0; i < ae4->cmd_q_count; i++) {
>> +               ae4cmd_q = &ae4->ae4cmd_q[i];
>> +
>> +               if (!ae4cmd_q->pws)
>> +                       break;
>> +
>> +               cancel_delayed_work_sync(&ae4cmd_q->p_work);
>> +               destroy_workqueue(ae4cmd_q->pws);
>> +       }
>> +}
>> +
>> +int ae4_core_init(struct ae4_device *ae4)
>> +{
>> +       struct pt_device *pt = &ae4->pt;
>> +       struct ae4_cmd_queue *ae4cmd_q;
>> +       struct device *dev = pt->dev;
>> +       struct pt_cmd_queue *cmd_q;
>> +       int i, ret = 0;
>> +
>> +       writel(max_hw_q, pt->io_regs);
>> +
>> +       for (i = 0; i < max_hw_q; i++) {
>> +               ae4cmd_q = &ae4->ae4cmd_q[i];
>> +               ae4cmd_q->id = ae4->cmd_q_count;
>> +               ae4->cmd_q_count++;
>> +
>> +               cmd_q = &ae4cmd_q->cmd_q;
>> +               cmd_q->pt = pt;
>> +
>> +               /* Preset some register values (Q size is 32byte
>> (0x20)) */
>> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
>> +
>> +               ret = devm_request_irq(dev, ae4->ae4_irq[i],
>> ae4_core_irq_handler, 0,
>> +                                      dev_name(pt->dev), ae4cmd_q);
>> +               if (ret)
>> +                       return ret;
>> +
>> +               cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
>> +
>> +               cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize,
>> &cmd_q->qbase_dma,
>> +                                                  GFP_KERNEL);
>> +               if (!cmd_q->qbase)
>> +                       return -ENOMEM;
>> +       }
>> +
>> +       for (i = 0; i < ae4->cmd_q_count; i++) {
>> +               ae4cmd_q = &ae4->ae4cmd_q[i];
>> +
>> +               cmd_q = &ae4cmd_q->cmd_q;
>> +
>> +               /* Preset some register values (Q size is 32byte
>> (0x20)) */
>> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
>> +
>> +               /* Update the device registers with queue
>> information. */
>> +               writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);
>> +
>> +               cmd_q->qdma_tail = cmd_q->qbase_dma;
>> +               writel(lower_32_bits(cmd_q->qdma_tail), cmd_q-
>>> reg_control + 0x18);
>> +               writel(upper_32_bits(cmd_q->qdma_tail), cmd_q-
>>> reg_control + 0x1C);
>> +
>> +               INIT_LIST_HEAD(&ae4cmd_q->cmd);
>> +               init_waitqueue_head(&ae4cmd_q->q_w);
>> +
>> +               ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d",
>> WQ_MEM_RECLAIM, ae4cmd_q->id);
>> +               if (!ae4cmd_q->pws) {
>> +                       ae4_destroy_work(ae4);
>> +                       return -ENOMEM;
>> +               }
>> +               INIT_DELAYED_WORK(&ae4cmd_q->p_work,
>> ae4_pending_work);
>> +               queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, 
>> usecs_to_jiffies(100));
>> +
>> +               init_completion(&ae4cmd_q->cmp);
>> +       }
>> +
>> +       return ret;
>> +}
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> new file mode 100644
>> index 000000000000..ddebf0609c4d
>> --- /dev/null
>> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
>> @@ -0,0 +1,190 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * AMD AE4DMA driver
>> + *
>> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> + */
>> +
>> +#include "ae4dma.h"
>> +
>> +static int ae4_get_msi_irq(struct ae4_device *ae4)
>> +{
>> +       struct pt_device *pt = &ae4->pt;
>> +       struct device *dev = pt->dev;
>> +       struct pci_dev *pdev;
>> +       int ret, i;
>> +
>> +       pdev = to_pci_dev(dev);
>> +       ret = pci_enable_msi(pdev);
>> +       if (ret)
>> +               return ret;
>> +
>> +       for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
>> +               ae4->ae4_irq[i] = pdev->irq;
>> +
>> +       return 0;
>> +}
>> +
>> +static int ae4_get_msix_irqs(struct ae4_device *ae4)
>> +{
>> +       struct ae4_msix *ae4_msix = ae4->ae4_msix;
>> +       struct pt_device *pt = &ae4->pt;
>> +       struct device *dev = pt->dev;
>> +       struct pci_dev *pdev;
>> +       int v, i, ret;
>> +
>> +       pdev = to_pci_dev(dev);
>> +
>> +       for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
>> +               ae4_msix->msix_entry[v].entry = v;
>> +
>> +       ret = pci_enable_msix_range(pdev, ae4_msix->msix_entry, 1,
>> v);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       ae4_msix->msix_count = ret;
>> +
>> +       for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
>> +               ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
>> +
>> +       return 0;
>> +}
>> +
>> +static int ae4_get_irqs(struct ae4_device *ae4)
>> +{
>> +       struct pt_device *pt = &ae4->pt;
>> +       struct device *dev = pt->dev;
>> +       int ret;
>> +
>> +       ret = ae4_get_msix_irqs(ae4);
>> +       if (!ret)
>> +               return 0;
>> +
>> +       /* Couldn't get MSI-X vectors, try MSI */
>> +       dev_err(dev, "could not enable MSI-X (%d), trying MSI\n",
>> ret);
>> +       ret = ae4_get_msi_irq(ae4);
>> +       if (!ret)
>> +               return 0;
>> +
>> +       /* Couldn't get MSI interrupt */
>> +       dev_err(dev, "could not enable MSI (%d)\n", ret);
>> +
>> +       return ret;
>> +}
>> +
>> +static void ae4_free_irqs(struct ae4_device *ae4)
>> +{
>> +       struct ae4_msix *ae4_msix;
>> +       struct pci_dev *pdev;
>> +       struct pt_device *pt;
>> +       struct device *dev;
>> +       int i;
>> +
>> +       if (ae4) {
>> +               pt = &ae4->pt;
>> +               dev = pt->dev;
>> +               pdev = to_pci_dev(dev);
>> +
>> +               ae4_msix = ae4->ae4_msix;
>> +               if (ae4_msix && ae4_msix->msix_count)
>> +                       pci_disable_msix(pdev);
>> +               else if (pdev->irq)
>> +                       pci_disable_msi(pdev);
>> +
>> +               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
>> +                       ae4->ae4_irq[i] = 0;
>> +       }
>> +}
>> +
>> +static void ae4_deinit(struct ae4_device *ae4)
>> +{
>> +       ae4_free_irqs(ae4);
>> +}
>> +
>> +static int ae4_pci_probe(struct pci_dev *pdev, const struct
>> pci_device_id *id)
>> +{
>> +       struct device *dev = &pdev->dev;
>> +       struct ae4_device *ae4;
>> +       struct pt_device *pt;
>> +       int bar_mask;
>> +       int ret = 0;
>> +
>> +       ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
>> +       if (!ae4)
>> +               return -ENOMEM;
>> +
>> +       ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix),
>> GFP_KERNEL);
>> +       if (!ae4->ae4_msix)
>> +               return -ENOMEM;
>> +
>> +       ret = pcim_enable_device(pdev);
>> +       if (ret)
>> +               goto ae4_error;
>> +
>> +       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
>> +       ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
>> +       if (ret)
>> +               goto ae4_error;
>> +
>> +       pt = &ae4->pt;
>> +       pt->dev = dev;
>> +
>> +       pt->io_regs = pcim_iomap_table(pdev)[0];
>> +       if (!pt->io_regs) {
>> +               ret = -ENOMEM;
>> +               goto ae4_error;
>> +       }
> Please note that we are about to deprecate pcim_iomap_regions() and
> pcim_iomap_table().
>
> The details are in this series [1]. Code is currently in this branch
> [2].
>
> That should go mainline in v6.11, hopefully. So you might want to
> consider waiting a bit with your series so you won't end up with a
> deprecated API in your new code?
>
> The new way of coding it would then also be a bit simpler:
>
> pt->io_regs = pcim_iomap_region(pdev, 0, "ae4dma");
> if (IS_ERR(pt->io_regs)) {
>
> We'd just have to make pcim_iomap_region() public for you, since you'd
> be the first external user.
>
> Cheers,
> P.
>
>
> [1] https://lore.kernel.org/all/20240613115032.29098-1-pstanner@redhat.com/
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/log/?h=devres

Thank you for the info.
I will be updated accordingly in the future once all the changes are available
in the mainline.

Thanks,
--
Basavaraj

>
>> +
>> +       ret = ae4_get_irqs(ae4);
>> +       if (ret)
>> +               goto ae4_error;
>> +
>> +       pci_set_master(pdev);
>> +
>> +       dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
>> +
>> +       dev_set_drvdata(dev, ae4);
>> +
>> +       ret = ae4_core_init(ae4);
>> +       if (ret)
>> +               goto ae4_error;
>> +
>> +       return 0;
>> +
>> +ae4_error:
>> +       ae4_deinit(ae4);
>> +
>> +       return ret;
>> +}
>> +
>> +static void ae4_pci_remove(struct pci_dev *pdev)
>> +{
>> +       struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
>> +
>> +       ae4_destroy_work(ae4);
>> +       ae4_deinit(ae4);
>> +}
>> +
>> +static const struct pci_device_id ae4_pci_table[] = {
>> +       { PCI_VDEVICE(AMD, 0x14C8), },
>> +       { PCI_VDEVICE(AMD, 0x14DC), },
>> +       { PCI_VDEVICE(AMD, 0x149B), },
>> +       /* Last entry must be zero */
>> +       { 0, }
>> +};
>> +MODULE_DEVICE_TABLE(pci, ae4_pci_table);
>> +
>> +static struct pci_driver ae4_pci_driver = {
>> +       .name = "ae4dma",
>> +       .id_table = ae4_pci_table,
>> +       .probe = ae4_pci_probe,
>> +       .remove = ae4_pci_remove,
>> +};
>> +
>> +module_pci_driver(ae4_pci_driver);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_DESCRIPTION("AMD AE4DMA driver");
>> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
>> b/drivers/dma/amd/ae4dma/ae4dma.h
>> new file mode 100644
>> index 000000000000..24b1253ad570
>> --- /dev/null
>> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
>> @@ -0,0 +1,77 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * AMD AE4DMA driver
>> + *
>> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> + */
>> +#ifndef __AE4DMA_H__
>> +#define __AE4DMA_H__
>> +
>> +#include "../common/amd_dma.h"
>> +
>> +#define MAX_AE4_HW_QUEUES              16
>> +
>> +#define AE4_DESC_COMPLETED             0x3
>> +
>> +struct ae4_msix {
>> +       int msix_count;
>> +       struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
>> +};
>> +
>> +struct ae4_cmd_queue {
>> +       struct ae4_device *ae4;
>> +       struct pt_cmd_queue cmd_q;
>> +       struct list_head cmd;
>> +       /* protect command operations */
>> +       struct mutex cmd_lock;
>> +       struct delayed_work p_work;
>> +       struct workqueue_struct *pws;
>> +       struct completion cmp;
>> +       wait_queue_head_t q_w;
>> +       atomic64_t intr_cnt;
>> +       atomic64_t done_cnt;
>> +       atomic64_t q_cmd_count;
>> +       atomic_t dridx;
>> +       unsigned int id;
>> +};
>> +
>> +union dwou {
>> +       u32 dw0;
>> +       struct dword0 {
>> +       u8      byte0;
>> +       u8      byte1;
>> +       u16     timestamp;
>> +       } dws;
>> +};
>> +
>> +struct dword1 {
>> +       u8      status;
>> +       u8      err_code;
>> +       u16     desc_id;
>> +};
>> +
>> +struct ae4dma_desc {
>> +       union dwou dwouv;
>> +       struct dword1 dw1;
>> +       u32 length;
>> +       u32 rsvd;
>> +       u32 src_hi;
>> +       u32 src_lo;
>> +       u32 dst_hi;
>> +       u32 dst_lo;
>> +};
>> +
>> +struct ae4_device {
>> +       struct pt_device pt;
>> +       struct ae4_msix *ae4_msix;
>> +       struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
>> +       unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
>> +       unsigned int cmd_q_count;
>> +};
>> +
>> +int ae4_core_init(struct ae4_device *ae4);
>> +void ae4_destroy_work(struct ae4_device *ae4);
>> +#endif
>> diff --git a/drivers/dma/amd/common/amd_dma.h
>> b/drivers/dma/amd/common/amd_dma.h
>> new file mode 100644
>> index 000000000000..f9f396cd4371
>> --- /dev/null
>> +++ b/drivers/dma/amd/common/amd_dma.h
>> @@ -0,0 +1,26 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * AMD DMA Driver common
>> + *
>> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
>> + */
>> +
>> +#ifndef AMD_DMA_H
>> +#define AMD_DMA_H
>> +
>> +#include <linux/device.h>
>> +#include <linux/dmaengine.h>
>> +#include <linux/dmapool.h>
>> +#include <linux/list.h>
>> +#include <linux/mutex.h>
>> +#include <linux/pci.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/wait.h>
>> +
>> +#include "../ptdma/ptdma.h"
>> +#include "../../virt-dma.h"
>> +
>> +#endif
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index c500c0567779..0d222d1ca83f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -947,6 +947,12 @@  L:	linux-edac@vger.kernel.org
 S:	Supported
 F:	drivers/ras/amd/atl/*
 
+AMD AE4DMA DRIVER
+M:	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+L:	dmaengine@vger.kernel.org
+S:	Maintained
+F:	drivers/dma/amd/ae4dma/
+
 AMD AXI W1 DRIVER
 M:	Kris Chaplin <kris.chaplin@amd.com>
 R:	Thomas Delev <thomas.delev@amd.com>
diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
index 8246b463bcf7..8c25a3ed6b94 100644
--- a/drivers/dma/amd/Kconfig
+++ b/drivers/dma/amd/Kconfig
@@ -3,3 +3,4 @@ 
 # AMD DMA Drivers
 
 source "drivers/dma/amd/ptdma/Kconfig"
+source "drivers/dma/amd/ae4dma/Kconfig"
diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
index dd7257ba7e06..8049b06a9ff5 100644
--- a/drivers/dma/amd/Makefile
+++ b/drivers/dma/amd/Makefile
@@ -4,3 +4,4 @@ 
 #
 
 obj-$(CONFIG_AMD_PTDMA) += ptdma/
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
diff --git a/drivers/dma/amd/ae4dma/Kconfig b/drivers/dma/amd/ae4dma/Kconfig
new file mode 100644
index 000000000000..cf8db4dac98d
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/Kconfig
@@ -0,0 +1,13 @@ 
+# SPDX-License-Identifier: GPL-2.0
+config AMD_AE4DMA
+	tristate  "AMD AE4DMA Engine"
+	depends on X86_64 && PCI
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the AMD AE4DMA controller. This controller
+	  provides DMA capabilities to perform high bandwidth memory to
+	  memory and IO copy operations. It performs DMA transfer through
+	  queue-based descriptor management. This DMA controller is intended
+	  to be used with AMD Non-Transparent Bridge devices and not for
+	  general purpose peripheral DMA.
diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile
new file mode 100644
index 000000000000..e918f85a80ec
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/Makefile
@@ -0,0 +1,10 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# AMD AE4DMA driver
+#
+
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
+
+ae4dma-objs := ae4dma-dev.o
+
+ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c
new file mode 100644
index 000000000000..958bdab8db59
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
@@ -0,0 +1,206 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static unsigned int max_hw_q = 1;
+module_param(max_hw_q, uint, 0444);
+MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
+
+static char *ae4_error_codes[] = {
+	"",
+	"ERR 01: INVALID HEADER DW0",
+	"ERR 02: INVALID STATUS",
+	"ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
+	"ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
+	"ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
+	"ERR 06: INVALID ALIGNMENT",
+	"ERR 07: INVALID DESCRIPTOR",
+};
+
+static void ae4_log_error(struct pt_device *d, int e)
+{
+	if (e <= 7)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
+	else if (e > 7 && e <= 15)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 15 && e <= 31)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 31 && e <= 63)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 63 && e <= 127)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+	else if (e > 127 && e <= 255)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+	else
+		dev_info(d->dev, "Unknown AE4DMA error");
+}
+
+static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
+{
+	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+	struct ae4dma_desc desc;
+	u8 status;
+
+	memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
+	/* Synchronize ordering */
+	dma_rmb();
+	status = desc.dw1.status;
+	if (status && status != AE4_DESC_COMPLETED) {
+		cmd_q->cmd_error = desc.dw1.err_code;
+		if (cmd_q->cmd_error)
+			ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
+	}
+}
+
+static void ae4_pending_work(struct work_struct *work)
+{
+	struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
+	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+	struct pt_cmd *cmd;
+	u32 cridx, dridx;
+
+	while (true) {
+		wait_event_interruptible(ae4cmd_q->q_w,
+					 ((atomic64_read(&ae4cmd_q->done_cnt)) <
+					   atomic64_read(&ae4cmd_q->intr_cnt)));
+
+		atomic64_inc(&ae4cmd_q->done_cnt);
+
+		mutex_lock(&ae4cmd_q->cmd_lock);
+
+		cridx = readl(cmd_q->reg_control + 0x0C);
+		dridx = atomic_read(&ae4cmd_q->dridx);
+
+		while ((dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
+			cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
+			list_del(&cmd->entry);
+
+			ae4_check_status_error(ae4cmd_q, dridx);
+			cmd->pt_cmd_callback(cmd->data, cmd->ret);
+
+			atomic64_dec(&ae4cmd_q->q_cmd_count);
+			dridx = (dridx + 1) % CMD_Q_LEN;
+			atomic_set(&ae4cmd_q->dridx, dridx);
+			/* Synchronize ordering */
+			dma_mb();
+
+			complete_all(&ae4cmd_q->cmp);
+		}
+
+		mutex_unlock(&ae4cmd_q->cmd_lock);
+	}
+}
+
+static irqreturn_t ae4_core_irq_handler(int irq, void *data)
+{
+	struct ae4_cmd_queue *ae4cmd_q = data;
+	struct pt_cmd_queue *cmd_q;
+	struct pt_device *pt;
+	u32 status;
+
+	cmd_q = &ae4cmd_q->cmd_q;
+	pt = cmd_q->pt;
+
+	pt->total_interrupts++;
+	atomic64_inc(&ae4cmd_q->intr_cnt);
+
+	wake_up(&ae4cmd_q->q_w);
+
+	status = readl(cmd_q->reg_control + 0x14);
+	if (status & BIT(0)) {
+		status &= GENMASK(31, 1);
+		writel(status, cmd_q->reg_control + 0x14);
+	}
+
+	return IRQ_HANDLED;
+}
+
+void ae4_destroy_work(struct ae4_device *ae4)
+{
+	struct ae4_cmd_queue *ae4cmd_q;
+	int i;
+
+	for (i = 0; i < ae4->cmd_q_count; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+
+		if (!ae4cmd_q->pws)
+			break;
+
+		cancel_delayed_work_sync(&ae4cmd_q->p_work);
+		destroy_workqueue(ae4cmd_q->pws);
+	}
+}
+
+int ae4_core_init(struct ae4_device *ae4)
+{
+	struct pt_device *pt = &ae4->pt;
+	struct ae4_cmd_queue *ae4cmd_q;
+	struct device *dev = pt->dev;
+	struct pt_cmd_queue *cmd_q;
+	int i, ret = 0;
+
+	writel(max_hw_q, pt->io_regs);
+
+	for (i = 0; i < max_hw_q; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+		ae4cmd_q->id = ae4->cmd_q_count;
+		ae4->cmd_q_count++;
+
+		cmd_q = &ae4cmd_q->cmd_q;
+		cmd_q->pt = pt;
+
+		/* Preset some register values (Q size is 32byte (0x20)) */
+		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
+
+		ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
+				       dev_name(pt->dev), ae4cmd_q);
+		if (ret)
+			return ret;
+
+		cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
+
+		cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
+						   GFP_KERNEL);
+		if (!cmd_q->qbase)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < ae4->cmd_q_count; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+
+		cmd_q = &ae4cmd_q->cmd_q;
+
+		/* Preset some register values (Q size is 32byte (0x20)) */
+		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
+
+		/* Update the device registers with queue information. */
+		writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);
+
+		cmd_q->qdma_tail = cmd_q->qbase_dma;
+		writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x18);
+		writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x1C);
+
+		INIT_LIST_HEAD(&ae4cmd_q->cmd);
+		init_waitqueue_head(&ae4cmd_q->q_w);
+
+		ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
+		if (!ae4cmd_q->pws) {
+			ae4_destroy_work(ae4);
+			return -ENOMEM;
+		}
+		INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
+		queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work,  usecs_to_jiffies(100));
+
+		init_completion(&ae4cmd_q->cmp);
+	}
+
+	return ret;
+}
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
new file mode 100644
index 000000000000..ddebf0609c4d
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
@@ -0,0 +1,190 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static int ae4_get_msi_irq(struct ae4_device *ae4)
+{
+	struct pt_device *pt = &ae4->pt;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev;
+	int ret, i;
+
+	pdev = to_pci_dev(dev);
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+		ae4->ae4_irq[i] = pdev->irq;
+
+	return 0;
+}
+
+static int ae4_get_msix_irqs(struct ae4_device *ae4)
+{
+	struct ae4_msix *ae4_msix = ae4->ae4_msix;
+	struct pt_device *pt = &ae4->pt;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev;
+	int v, i, ret;
+
+	pdev = to_pci_dev(dev);
+
+	for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
+		ae4_msix->msix_entry[v].entry = v;
+
+	ret = pci_enable_msix_range(pdev, ae4_msix->msix_entry, 1, v);
+	if (ret < 0)
+		return ret;
+
+	ae4_msix->msix_count = ret;
+
+	for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+		ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
+
+	return 0;
+}
+
+static int ae4_get_irqs(struct ae4_device *ae4)
+{
+	struct pt_device *pt = &ae4->pt;
+	struct device *dev = pt->dev;
+	int ret;
+
+	ret = ae4_get_msix_irqs(ae4);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = ae4_get_msi_irq(ae4);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_err(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void ae4_free_irqs(struct ae4_device *ae4)
+{
+	struct ae4_msix *ae4_msix;
+	struct pci_dev *pdev;
+	struct pt_device *pt;
+	struct device *dev;
+	int i;
+
+	if (ae4) {
+		pt = &ae4->pt;
+		dev = pt->dev;
+		pdev = to_pci_dev(dev);
+
+		ae4_msix = ae4->ae4_msix;
+		if (ae4_msix && ae4_msix->msix_count)
+			pci_disable_msix(pdev);
+		else if (pdev->irq)
+			pci_disable_msi(pdev);
+
+		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+			ae4->ae4_irq[i] = 0;
+	}
+}
+
+static void ae4_deinit(struct ae4_device *ae4)
+{
+	ae4_free_irqs(ae4);
+}
+
+static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct ae4_device *ae4;
+	struct pt_device *pt;
+	int bar_mask;
+	int ret = 0;
+
+	ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
+	if (!ae4)
+		return -ENOMEM;
+
+	ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
+	if (!ae4->ae4_msix)
+		return -ENOMEM;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		goto ae4_error;
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
+	if (ret)
+		goto ae4_error;
+
+	pt = &ae4->pt;
+	pt->dev = dev;
+
+	pt->io_regs = pcim_iomap_table(pdev)[0];
+	if (!pt->io_regs) {
+		ret = -ENOMEM;
+		goto ae4_error;
+	}
+
+	ret = ae4_get_irqs(ae4);
+	if (ret)
+		goto ae4_error;
+
+	pci_set_master(pdev);
+
+	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+
+	dev_set_drvdata(dev, ae4);
+
+	ret = ae4_core_init(ae4);
+	if (ret)
+		goto ae4_error;
+
+	return 0;
+
+ae4_error:
+	ae4_deinit(ae4);
+
+	return ret;
+}
+
+static void ae4_pci_remove(struct pci_dev *pdev)
+{
+	struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
+
+	ae4_destroy_work(ae4);
+	ae4_deinit(ae4);
+}
+
+static const struct pci_device_id ae4_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x14C8), },
+	{ PCI_VDEVICE(AMD, 0x14DC), },
+	{ PCI_VDEVICE(AMD, 0x149B), },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ae4_pci_table);
+
+static struct pci_driver ae4_pci_driver = {
+	.name = "ae4dma",
+	.id_table = ae4_pci_table,
+	.probe = ae4_pci_probe,
+	.remove = ae4_pci_remove,
+};
+
+module_pci_driver(ae4_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD AE4DMA driver");
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
new file mode 100644
index 000000000000..24b1253ad570
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma.h
@@ -0,0 +1,77 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+#ifndef __AE4DMA_H__
+#define __AE4DMA_H__
+
+#include "../common/amd_dma.h"
+
+#define MAX_AE4_HW_QUEUES		16
+
+#define AE4_DESC_COMPLETED		0x3
+
+struct ae4_msix {
+	int msix_count;
+	struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
+};
+
+struct ae4_cmd_queue {
+	struct ae4_device *ae4;
+	struct pt_cmd_queue cmd_q;
+	struct list_head cmd;
+	/* protect command operations */
+	struct mutex cmd_lock;
+	struct delayed_work p_work;
+	struct workqueue_struct *pws;
+	struct completion cmp;
+	wait_queue_head_t q_w;
+	atomic64_t intr_cnt;
+	atomic64_t done_cnt;
+	atomic64_t q_cmd_count;
+	atomic_t dridx;
+	unsigned int id;
+};
+
+union dwou {
+	u32 dw0;
+	struct dword0 {
+	u8	byte0;
+	u8	byte1;
+	u16	timestamp;
+	} dws;
+};
+
+struct dword1 {
+	u8	status;
+	u8	err_code;
+	u16	desc_id;
+};
+
+struct ae4dma_desc {
+	union dwou dwouv;
+	struct dword1 dw1;
+	u32 length;
+	u32 rsvd;
+	u32 src_hi;
+	u32 src_lo;
+	u32 dst_hi;
+	u32 dst_lo;
+};
+
+struct ae4_device {
+	struct pt_device pt;
+	struct ae4_msix *ae4_msix;
+	struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
+	unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
+	unsigned int cmd_q_count;
+};
+
+int ae4_core_init(struct ae4_device *ae4);
+void ae4_destroy_work(struct ae4_device *ae4);
+#endif
diff --git a/drivers/dma/amd/common/amd_dma.h b/drivers/dma/amd/common/amd_dma.h
new file mode 100644
index 000000000000..f9f396cd4371
--- /dev/null
+++ b/drivers/dma/amd/common/amd_dma.h
@@ -0,0 +1,26 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD DMA Driver common
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#ifndef AMD_DMA_H
+#define AMD_DMA_H
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include "../ptdma/ptdma.h"
+#include "../../virt-dma.h"
+
+#endif