diff mbox series

[v3,2/7] dmaengine: ae4dma: Add AMD ae4dma controller driver

Message ID 20240624075610.1659502-3-Basavaraj.Natikar@amd.com (mailing list archive)
State New
Headers show
Series Add support of AMD AE4DMA DMA Engine | expand

Commit Message

Basavaraj Natikar June 24, 2024, 7:56 a.m. UTC
Add support for AMD AE4DMA controller. It performs high-bandwidth
memory to memory and IO copy operation. Device commands are managed
via a circular queue of 'descriptors', each of which specifies source
and destination addresses for copying a single buffer of data.

Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
---
 MAINTAINERS                         |   6 +
 drivers/dma/amd/Kconfig             |   1 +
 drivers/dma/amd/Makefile            |   1 +
 drivers/dma/amd/ae4dma/Kconfig      |  14 ++
 drivers/dma/amd/ae4dma/Makefile     |  10 ++
 drivers/dma/amd/ae4dma/ae4dma-dev.c | 200 ++++++++++++++++++++++++++++
 drivers/dma/amd/ae4dma/ae4dma-pci.c | 157 ++++++++++++++++++++++
 drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
 drivers/dma/amd/common/amd_dma.h    |  26 ++++
 9 files changed, 492 insertions(+)
 create mode 100644 drivers/dma/amd/ae4dma/Kconfig
 create mode 100644 drivers/dma/amd/ae4dma/Makefile
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
 create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
 create mode 100644 drivers/dma/amd/common/amd_dma.h

Comments

Philipp Stanner June 24, 2024, 8:20 a.m. UTC | #1
Hi,

thx for reworking the memory ordering.

I just found few more nits:

On Mon, 2024-06-24 at 13:26 +0530, Basavaraj Natikar wrote:
> Add support for AMD AE4DMA controller. It performs high-bandwidth
> memory to memory and IO copy operation. Device commands are managed
> via a circular queue of 'descriptors', each of which specifies source
> and destination addresses for copying a single buffer of data.
> 
> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com>
> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> ---
>  MAINTAINERS                         |   6 +
>  drivers/dma/amd/Kconfig             |   1 +
>  drivers/dma/amd/Makefile            |   1 +
>  drivers/dma/amd/ae4dma/Kconfig      |  14 ++
>  drivers/dma/amd/ae4dma/Makefile     |  10 ++
>  drivers/dma/amd/ae4dma/ae4dma-dev.c | 200
> ++++++++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma-pci.c | 157 ++++++++++++++++++++++
>  drivers/dma/amd/ae4dma/ae4dma.h     |  77 +++++++++++
>  drivers/dma/amd/common/amd_dma.h    |  26 ++++
>  9 files changed, 492 insertions(+)
>  create mode 100644 drivers/dma/amd/ae4dma/Kconfig
>  create mode 100644 drivers/dma/amd/ae4dma/Makefile
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-dev.c
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma-pci.c
>  create mode 100644 drivers/dma/amd/ae4dma/ae4dma.h
>  create mode 100644 drivers/dma/amd/common/amd_dma.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 537964afef55..42436e1cf1e2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -947,6 +947,12 @@ L: linux-edac@vger.kernel.org
>  S:     Supported
>  F:     drivers/ras/amd/atl/*
>  
> +AMD AE4DMA DRIVER
> +M:     Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> +L:     dmaengine@vger.kernel.org
> +S:     Maintained
> +F:     drivers/dma/amd/ae4dma/
> +
>  AMD AXI W1 DRIVER
>  M:     Kris Chaplin <kris.chaplin@amd.com>
>  R:     Thomas Delev <thomas.delev@amd.com>
> diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
> index 8246b463bcf7..8c25a3ed6b94 100644
> --- a/drivers/dma/amd/Kconfig
> +++ b/drivers/dma/amd/Kconfig
> @@ -3,3 +3,4 @@
>  # AMD DMA Drivers
>  
>  source "drivers/dma/amd/ptdma/Kconfig"
> +source "drivers/dma/amd/ae4dma/Kconfig"
> diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
> index dd7257ba7e06..8049b06a9ff5 100644
> --- a/drivers/dma/amd/Makefile
> +++ b/drivers/dma/amd/Makefile
> @@ -4,3 +4,4 @@
>  #
>  
>  obj-$(CONFIG_AMD_PTDMA) += ptdma/
> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
> diff --git a/drivers/dma/amd/ae4dma/Kconfig
> b/drivers/dma/amd/ae4dma/Kconfig
> new file mode 100644
> index 000000000000..ea8a7fe68df5
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/Kconfig
> @@ -0,0 +1,14 @@
> +# SPDX-License-Identifier: GPL-2.0
> +config AMD_AE4DMA
> +       tristate  "AMD AE4DMA Engine"
> +       depends on (X86_64 || COMPILE_TEST) && PCI
> +       depends on AMD_PTDMA
> +       select DMA_ENGINE
> +       select DMA_VIRTUAL_CHANNELS
> +       help
> +         Enable support for the AMD AE4DMA controller. This
> controller
> +         provides DMA capabilities to perform high bandwidth memory
> to
> +         memory and IO copy operations. It performs DMA transfer
> through
> +         queue-based descriptor management. This DMA controller is
> intended
> +         to be used with AMD Non-Transparent Bridge devices and not
> for
> +         general purpose peripheral DMA.
> diff --git a/drivers/dma/amd/ae4dma/Makefile
> b/drivers/dma/amd/ae4dma/Makefile
> new file mode 100644
> index 000000000000..e918f85a80ec
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/Makefile
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# AMD AE4DMA driver
> +#
> +
> +obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
> +
> +ae4dma-objs := ae4dma-dev.o
> +
> +ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c
> b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> new file mode 100644
> index 000000000000..cb05fcb47987
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
> @@ -0,0 +1,200 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#include "ae4dma.h"
> +
> +static unsigned int max_hw_q = 1;
> +module_param(max_hw_q, uint, 0444);
> +MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any
> non-zero value, default: 1)");
> +
> +static char *ae4_error_codes[] = {
> +       "",
> +       "ERR 01: INVALID HEADER DW0",
> +       "ERR 02: INVALID STATUS",
> +       "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
> +       "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
> +       "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
> +       "ERR 06: INVALID ALIGNMENT",
> +       "ERR 07: INVALID DESCRIPTOR",
> +};
> +
> +static void ae4_log_error(struct pt_device *d, int e)
> +{
> +       if (e <= 7)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> ae4_error_codes[e], e);
> +       else if (e > 7 && e <= 15)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 15 && e <= 31)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 31 && e <= 63)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n",
> "INVALID DESCRIPTOR", e);
> +       else if (e > 63 && e <= 127)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
> ERROR", e);
> +       else if (e > 127 && e <= 255)
> +               dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE
> ERROR", e);

Those numbers are hardware-specific I guess? Would it make sense to
define constants for them? Or have a short comment where they come
from?

> +       else
> +               dev_info(d->dev, "Unknown AE4DMA error");
> +}
> +
> +static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q,
> int idx)
> +{
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       struct ae4dma_desc desc;
> +       u8 status;
> +
> +       memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct
> ae4dma_desc));
> +       status = desc.dw1.status;
> +       if (status && status != AE4_DESC_COMPLETED) {
> +               cmd_q->cmd_error = desc.dw1.err_code;
> +               if (cmd_q->cmd_error)
> +                       ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
> +       }
> +}
> +
> +static void ae4_pending_work(struct work_struct *work)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct
> ae4_cmd_queue, p_work.work);
> +       struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
> +       struct pt_cmd *cmd;
> +       u32 cridx;
> +
> +       while (true) {

Nit: I think

for (;;) {

is more idiomatic in the kernel

> +               wait_event_interruptible(ae4cmd_q->q_w,
> +                                        ((atomic64_read(&ae4cmd_q-
> >done_cnt)) <
> +                                          atomic64_read(&ae4cmd_q-
> >intr_cnt)));
> +
> +               atomic64_inc(&ae4cmd_q->done_cnt);
> +
> +               mutex_lock(&ae4cmd_q->cmd_lock);
> +
> +               cridx = readl(cmd_q->reg_control + 0x0C);

Magic number 0x0C – what does it mean? I'd define a constant.

> +
> +               while ((ae4cmd_q->dridx != cridx) &&
> !list_empty(&ae4cmd_q->cmd)) {
> +                       cmd = list_first_entry(&ae4cmd_q->cmd, struct
> pt_cmd, entry);
> +                       list_del(&cmd->entry);
> +
> +                       ae4_check_status_error(ae4cmd_q, ae4cmd_q-
> >dridx);
> +                       cmd->pt_cmd_callback(cmd->data, cmd->ret);
> +
> +                       ae4cmd_q->q_cmd_count--;
> +                       ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) %
> CMD_Q_LEN;
> +
> +                       complete_all(&ae4cmd_q->cmp);
> +               }
> +
> +               mutex_unlock(&ae4cmd_q->cmd_lock);
> +       }
> +}
> +
> +static irqreturn_t ae4_core_irq_handler(int irq, void *data)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q = data;
> +       struct pt_cmd_queue *cmd_q;
> +       struct pt_device *pt;
> +       u32 status;
> +
> +       cmd_q = &ae4cmd_q->cmd_q;
> +       pt = cmd_q->pt;
> +
> +       pt->total_interrupts++;
> +       atomic64_inc(&ae4cmd_q->intr_cnt);
> +
> +       wake_up(&ae4cmd_q->q_w);
> +
> +       status = readl(cmd_q->reg_control + 0x14);
> +       if (status & BIT(0)) {
> +               status &= GENMASK(31, 1);
> +               writel(status, cmd_q->reg_control + 0x14);

same with 0x14

> +       }
> +
> +       return IRQ_HANDLED;
> +}
> +
> +void ae4_destroy_work(struct ae4_device *ae4)
> +{
> +       struct ae4_cmd_queue *ae4cmd_q;
> +       int i;
> +
> +       for (i = 0; i < ae4->cmd_q_count; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +
> +               if (!ae4cmd_q->pws)
> +                       break;
> +
> +               cancel_delayed_work_sync(&ae4cmd_q->p_work);
> +               destroy_workqueue(ae4cmd_q->pws);
> +       }
> +}
> +
> +int ae4_core_init(struct ae4_device *ae4)
> +{
> +       struct pt_device *pt = &ae4->pt;
> +       struct ae4_cmd_queue *ae4cmd_q;
> +       struct device *dev = pt->dev;
> +       struct pt_cmd_queue *cmd_q;
> +       int i, ret = 0;
> +
> +       writel(max_hw_q, pt->io_regs);
> +
> +       for (i = 0; i < max_hw_q; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +               ae4cmd_q->id = ae4->cmd_q_count;
> +               ae4->cmd_q_count++;
> +
> +               cmd_q = &ae4cmd_q->cmd_q;
> +               cmd_q->pt = pt;
> +
> +               /* Preset some register values (Q size is 32byte
> (0x20)) */
> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);

OK, here's a comment. Could think about doing:

#define Q_SIZE 0x20

> +
> +               ret = devm_request_irq(dev, ae4->ae4_irq[i],
> ae4_core_irq_handler, 0,
> +                                      dev_name(pt->dev), ae4cmd_q);
> +               if (ret)
> +                       return ret;
> +
> +               cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
> +
> +               cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize,
> &cmd_q->qbase_dma,
> +                                                  GFP_KERNEL);
> +               if (!cmd_q->qbase)
> +                       return -ENOMEM;
> +       }
> +
> +       for (i = 0; i < ae4->cmd_q_count; i++) {
> +               ae4cmd_q = &ae4->ae4cmd_q[i];
> +
> +               cmd_q = &ae4cmd_q->cmd_q;
> +
> +               /* Preset some register values (Q size is 32byte
> (0x20)) */
> +               cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);

same

> +
> +               /* Update the device registers with queue
> information. */
> +               writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);

same

> +
> +               cmd_q->qdma_tail = cmd_q->qbase_dma;
> +               writel(lower_32_bits(cmd_q->qdma_tail), cmd_q-
> >reg_control + 0x18);
> +               writel(upper_32_bits(cmd_q->qdma_tail), cmd_q-
> >reg_control + 0x1C);

same with those two

> +
> +               INIT_LIST_HEAD(&ae4cmd_q->cmd);
> +               init_waitqueue_head(&ae4cmd_q->q_w);
> +
> +               ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d",
> WQ_MEM_RECLAIM, ae4cmd_q->id);
> +               if (!ae4cmd_q->pws) {
> +                       ae4_destroy_work(ae4);
> +                       return -ENOMEM;
> +               }
> +               INIT_DELAYED_WORK(&ae4cmd_q->p_work,
> ae4_pending_work);
> +               queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, 
> usecs_to_jiffies(100));
> +
> +               init_completion(&ae4cmd_q->cmp);
> +       }
> +
> +       return ret;
> +}
> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c
> b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> new file mode 100644
> index 000000000000..43d36e9d1efb
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
> @@ -0,0 +1,157 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#include "ae4dma.h"
> +
> +static int ae4_get_irqs(struct ae4_device *ae4)
> +{
> +       struct ae4_msix *ae4_msix = ae4->ae4_msix;
> +       struct pt_device *pt = &ae4->pt;
> +       struct device *dev = pt->dev;
> +       struct pci_dev *pdev;
> +       int i, v, ret;
> +
> +       pdev = to_pci_dev(dev);
> +
> +       for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
> +               ae4_msix->msix_entry[v].entry = v;
> +
> +       ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
> +       if (ret != v) {
> +               if (ret > 0)
> +                       pci_free_irq_vectors(pdev);
> +
> +               dev_err(dev, "could not enable MSI-X (%d), trying
> MSI\n", ret);
> +               ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
> +               if (ret < 0) {
> +                       dev_err(dev, "could not enable MSI (%d)\n",
> ret);
> +                       return ret;
> +               }
> +
> +               ret = pci_irq_vector(pdev, 0);
> +               if (ret < 0) {
> +                       pci_free_irq_vectors(pdev);
> +                       return ret;
> +               }
> +
> +               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +                       ae4->ae4_irq[i] = ret;
> +
> +       } else {
> +               ae4_msix->msix_count = ret;
> +               for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
> +                       ae4->ae4_irq[i] = ae4_msix-
> >msix_entry[i].vector;
> +       }
> +
> +       return ret;
> +}
> +
> +static void ae4_free_irqs(struct ae4_device *ae4)
> +{
> +       struct ae4_msix *ae4_msix = ae4->ae4_msix;
> +       struct pt_device *pt = &ae4->pt;
> +       struct device *dev = pt->dev;
> +       struct pci_dev *pdev;
> +
> +       pdev = to_pci_dev(dev);
> +
> +       if (ae4_msix && (ae4_msix->msix_count || ae4-
> >ae4_irq[MAX_AE4_HW_QUEUES - 1]))
> +               pci_free_irq_vectors(pdev);
> +}
> +
> +static void ae4_deinit(struct ae4_device *ae4)
> +{
> +       ae4_free_irqs(ae4);
> +}
> +
> +static int ae4_pci_probe(struct pci_dev *pdev, const struct
> pci_device_id *id)
> +{
> +       struct device *dev = &pdev->dev;
> +       struct ae4_device *ae4;
> +       struct pt_device *pt;
> +       int bar_mask;
> +       int ret = 0;
> +
> +       ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
> +       if (!ae4)
> +               return -ENOMEM;
> +
> +       ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix),
> GFP_KERNEL);
> +       if (!ae4->ae4_msix)
> +               return -ENOMEM;
> +
> +       ret = pcim_enable_device(pdev);
> +       if (ret)
> +               goto ae4_error;
> +
> +       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
> +       ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
> +       if (ret)
> +               goto ae4_error;
> +
> +       pt = &ae4->pt;
> +       pt->dev = dev;
> +
> +       pt->io_regs = pcim_iomap_table(pdev)[0];
> +       if (!pt->io_regs) {
> +               ret = -ENOMEM;
> +               goto ae4_error;
> +       }
> +
> +       ret = ae4_get_irqs(ae4);
> +       if (ret < 0)
> +               goto ae4_error;
> +
> +       pci_set_master(pdev);
> +
> +       dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
> +
> +       dev_set_drvdata(dev, ae4);
> +
> +       ret = ae4_core_init(ae4);
> +       if (ret)
> +               goto ae4_error;
> +
> +       return 0;
> +
> +ae4_error:
> +       ae4_deinit(ae4);
> +
> +       return ret;
> +}
> +
> +static void ae4_pci_remove(struct pci_dev *pdev)
> +{
> +       struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
> +
> +       ae4_destroy_work(ae4);
> +       ae4_deinit(ae4);
> +}
> +
> +static const struct pci_device_id ae4_pci_table[] = {
> +       { PCI_VDEVICE(AMD, 0x14C8), },
> +       { PCI_VDEVICE(AMD, 0x14DC), },
> +       { PCI_VDEVICE(AMD, 0x149B), },
> +       /* Last entry must be zero */
> +       { 0, }
> +};
> +MODULE_DEVICE_TABLE(pci, ae4_pci_table);
> +
> +static struct pci_driver ae4_pci_driver = {
> +       .name = "ae4dma",
> +       .id_table = ae4_pci_table,
> +       .probe = ae4_pci_probe,
> +       .remove = ae4_pci_remove,
> +};
> +
> +module_pci_driver(ae4_pci_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("AMD AE4DMA driver");
> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h
> b/drivers/dma/amd/ae4dma/ae4dma.h
> new file mode 100644
> index 000000000000..59a0ea3293cb
> --- /dev/null
> +++ b/drivers/dma/amd/ae4dma/ae4dma.h
> @@ -0,0 +1,77 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * AMD AE4DMA driver
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +#ifndef __AE4DMA_H__
> +#define __AE4DMA_H__
> +
> +#include "../common/amd_dma.h"
> +
> +#define MAX_AE4_HW_QUEUES              16
> +
> +#define AE4_DESC_COMPLETED             0x3

Here you define some :)


Thanks,
P.

> +
> +struct ae4_msix {
> +       int msix_count;
> +       struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
> +};
> +
> +struct ae4_cmd_queue {
> +       struct ae4_device *ae4;
> +       struct pt_cmd_queue cmd_q;
> +       struct list_head cmd;
> +       /* protect command operations */
> +       struct mutex cmd_lock;
> +       struct delayed_work p_work;
> +       struct workqueue_struct *pws;
> +       struct completion cmp;
> +       wait_queue_head_t q_w;
> +       atomic64_t intr_cnt;
> +       atomic64_t done_cnt;
> +       u64 q_cmd_count;
> +       u32 dridx;
> +       u32 id;
> +};
> +
> +union dwou {
> +       u32 dw0;
> +       struct dword0 {
> +       u8      byte0;
> +       u8      byte1;
> +       u16     timestamp;
> +       } dws;
> +};
> +
> +struct dword1 {
> +       u8      status;
> +       u8      err_code;
> +       u16     desc_id;
> +};
> +
> +struct ae4dma_desc {
> +       union dwou dwouv;
> +       struct dword1 dw1;
> +       u32 length;
> +       u32 rsvd;
> +       u32 src_hi;
> +       u32 src_lo;
> +       u32 dst_hi;
> +       u32 dst_lo;
> +};
> +
> +struct ae4_device {
> +       struct pt_device pt;
> +       struct ae4_msix *ae4_msix;
> +       struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
> +       unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
> +       unsigned int cmd_q_count;
> +};
> +
> +int ae4_core_init(struct ae4_device *ae4);
> +void ae4_destroy_work(struct ae4_device *ae4);
> +#endif
> diff --git a/drivers/dma/amd/common/amd_dma.h
> b/drivers/dma/amd/common/amd_dma.h
> new file mode 100644
> index 000000000000..f9f396cd4371
> --- /dev/null
> +++ b/drivers/dma/amd/common/amd_dma.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * AMD DMA Driver common
> + *
> + * Copyright (c) 2024, Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
> + */
> +
> +#ifndef AMD_DMA_H
> +#define AMD_DMA_H
> +
> +#include <linux/device.h>
> +#include <linux/dmaengine.h>
> +#include <linux/dmapool.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/pci.h>
> +#include <linux/spinlock.h>
> +#include <linux/wait.h>
> +
> +#include "../ptdma/ptdma.h"
> +#include "../../virt-dma.h"
> +
> +#endif
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 537964afef55..42436e1cf1e2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -947,6 +947,12 @@  L:	linux-edac@vger.kernel.org
 S:	Supported
 F:	drivers/ras/amd/atl/*
 
+AMD AE4DMA DRIVER
+M:	Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+L:	dmaengine@vger.kernel.org
+S:	Maintained
+F:	drivers/dma/amd/ae4dma/
+
 AMD AXI W1 DRIVER
 M:	Kris Chaplin <kris.chaplin@amd.com>
 R:	Thomas Delev <thomas.delev@amd.com>
diff --git a/drivers/dma/amd/Kconfig b/drivers/dma/amd/Kconfig
index 8246b463bcf7..8c25a3ed6b94 100644
--- a/drivers/dma/amd/Kconfig
+++ b/drivers/dma/amd/Kconfig
@@ -3,3 +3,4 @@ 
 # AMD DMA Drivers
 
 source "drivers/dma/amd/ptdma/Kconfig"
+source "drivers/dma/amd/ae4dma/Kconfig"
diff --git a/drivers/dma/amd/Makefile b/drivers/dma/amd/Makefile
index dd7257ba7e06..8049b06a9ff5 100644
--- a/drivers/dma/amd/Makefile
+++ b/drivers/dma/amd/Makefile
@@ -4,3 +4,4 @@ 
 #
 
 obj-$(CONFIG_AMD_PTDMA) += ptdma/
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
diff --git a/drivers/dma/amd/ae4dma/Kconfig b/drivers/dma/amd/ae4dma/Kconfig
new file mode 100644
index 000000000000..ea8a7fe68df5
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/Kconfig
@@ -0,0 +1,14 @@ 
+# SPDX-License-Identifier: GPL-2.0
+config AMD_AE4DMA
+	tristate  "AMD AE4DMA Engine"
+	depends on (X86_64 || COMPILE_TEST) && PCI
+	depends on AMD_PTDMA
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the AMD AE4DMA controller. This controller
+	  provides DMA capabilities to perform high bandwidth memory to
+	  memory and IO copy operations. It performs DMA transfer through
+	  queue-based descriptor management. This DMA controller is intended
+	  to be used with AMD Non-Transparent Bridge devices and not for
+	  general purpose peripheral DMA.
diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile
new file mode 100644
index 000000000000..e918f85a80ec
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/Makefile
@@ -0,0 +1,10 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# AMD AE4DMA driver
+#
+
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
+
+ae4dma-objs := ae4dma-dev.o
+
+ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c
new file mode 100644
index 000000000000..cb05fcb47987
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c
@@ -0,0 +1,200 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static unsigned int max_hw_q = 1;
+module_param(max_hw_q, uint, 0444);
+MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
+
+static char *ae4_error_codes[] = {
+	"",
+	"ERR 01: INVALID HEADER DW0",
+	"ERR 02: INVALID STATUS",
+	"ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
+	"ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
+	"ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
+	"ERR 06: INVALID ALIGNMENT",
+	"ERR 07: INVALID DESCRIPTOR",
+};
+
+static void ae4_log_error(struct pt_device *d, int e)
+{
+	if (e <= 7)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
+	else if (e > 7 && e <= 15)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 15 && e <= 31)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 31 && e <= 63)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+	else if (e > 63 && e <= 127)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+	else if (e > 127 && e <= 255)
+		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+	else
+		dev_info(d->dev, "Unknown AE4DMA error");
+}
+
+static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
+{
+	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+	struct ae4dma_desc desc;
+	u8 status;
+
+	memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
+	status = desc.dw1.status;
+	if (status && status != AE4_DESC_COMPLETED) {
+		cmd_q->cmd_error = desc.dw1.err_code;
+		if (cmd_q->cmd_error)
+			ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
+	}
+}
+
+static void ae4_pending_work(struct work_struct *work)
+{
+	struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
+	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+	struct pt_cmd *cmd;
+	u32 cridx;
+
+	while (true) {
+		wait_event_interruptible(ae4cmd_q->q_w,
+					 ((atomic64_read(&ae4cmd_q->done_cnt)) <
+					   atomic64_read(&ae4cmd_q->intr_cnt)));
+
+		atomic64_inc(&ae4cmd_q->done_cnt);
+
+		mutex_lock(&ae4cmd_q->cmd_lock);
+
+		cridx = readl(cmd_q->reg_control + 0x0C);
+
+		while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
+			cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
+			list_del(&cmd->entry);
+
+			ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx);
+			cmd->pt_cmd_callback(cmd->data, cmd->ret);
+
+			ae4cmd_q->q_cmd_count--;
+			ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN;
+
+			complete_all(&ae4cmd_q->cmp);
+		}
+
+		mutex_unlock(&ae4cmd_q->cmd_lock);
+	}
+}
+
+static irqreturn_t ae4_core_irq_handler(int irq, void *data)
+{
+	struct ae4_cmd_queue *ae4cmd_q = data;
+	struct pt_cmd_queue *cmd_q;
+	struct pt_device *pt;
+	u32 status;
+
+	cmd_q = &ae4cmd_q->cmd_q;
+	pt = cmd_q->pt;
+
+	pt->total_interrupts++;
+	atomic64_inc(&ae4cmd_q->intr_cnt);
+
+	wake_up(&ae4cmd_q->q_w);
+
+	status = readl(cmd_q->reg_control + 0x14);
+	if (status & BIT(0)) {
+		status &= GENMASK(31, 1);
+		writel(status, cmd_q->reg_control + 0x14);
+	}
+
+	return IRQ_HANDLED;
+}
+
+void ae4_destroy_work(struct ae4_device *ae4)
+{
+	struct ae4_cmd_queue *ae4cmd_q;
+	int i;
+
+	for (i = 0; i < ae4->cmd_q_count; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+
+		if (!ae4cmd_q->pws)
+			break;
+
+		cancel_delayed_work_sync(&ae4cmd_q->p_work);
+		destroy_workqueue(ae4cmd_q->pws);
+	}
+}
+
+int ae4_core_init(struct ae4_device *ae4)
+{
+	struct pt_device *pt = &ae4->pt;
+	struct ae4_cmd_queue *ae4cmd_q;
+	struct device *dev = pt->dev;
+	struct pt_cmd_queue *cmd_q;
+	int i, ret = 0;
+
+	writel(max_hw_q, pt->io_regs);
+
+	for (i = 0; i < max_hw_q; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+		ae4cmd_q->id = ae4->cmd_q_count;
+		ae4->cmd_q_count++;
+
+		cmd_q = &ae4cmd_q->cmd_q;
+		cmd_q->pt = pt;
+
+		/* Preset some register values (Q size is 32byte (0x20)) */
+		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
+
+		ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
+				       dev_name(pt->dev), ae4cmd_q);
+		if (ret)
+			return ret;
+
+		cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
+
+		cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
+						   GFP_KERNEL);
+		if (!cmd_q->qbase)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < ae4->cmd_q_count; i++) {
+		ae4cmd_q = &ae4->ae4cmd_q[i];
+
+		cmd_q = &ae4cmd_q->cmd_q;
+
+		/* Preset some register values (Q size is 32byte (0x20)) */
+		cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
+
+		/* Update the device registers with queue information. */
+		writel(CMD_Q_LEN, cmd_q->reg_control + 0x08);
+
+		cmd_q->qdma_tail = cmd_q->qbase_dma;
+		writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x18);
+		writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + 0x1C);
+
+		INIT_LIST_HEAD(&ae4cmd_q->cmd);
+		init_waitqueue_head(&ae4cmd_q->q_w);
+
+		ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
+		if (!ae4cmd_q->pws) {
+			ae4_destroy_work(ae4);
+			return -ENOMEM;
+		}
+		INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
+		queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work,  usecs_to_jiffies(100));
+
+		init_completion(&ae4cmd_q->cmp);
+	}
+
+	return ret;
+}
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
new file mode 100644
index 000000000000..43d36e9d1efb
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
@@ -0,0 +1,157 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static int ae4_get_irqs(struct ae4_device *ae4)
+{
+	struct ae4_msix *ae4_msix = ae4->ae4_msix;
+	struct pt_device *pt = &ae4->pt;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev;
+	int i, v, ret;
+
+	pdev = to_pci_dev(dev);
+
+	for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
+		ae4_msix->msix_entry[v].entry = v;
+
+	ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
+	if (ret != v) {
+		if (ret > 0)
+			pci_free_irq_vectors(pdev);
+
+		dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+		ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+		if (ret < 0) {
+			dev_err(dev, "could not enable MSI (%d)\n", ret);
+			return ret;
+		}
+
+		ret = pci_irq_vector(pdev, 0);
+		if (ret < 0) {
+			pci_free_irq_vectors(pdev);
+			return ret;
+		}
+
+		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+			ae4->ae4_irq[i] = ret;
+
+	} else {
+		ae4_msix->msix_count = ret;
+		for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+			ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
+	}
+
+	return ret;
+}
+
+static void ae4_free_irqs(struct ae4_device *ae4)
+{
+	struct ae4_msix *ae4_msix = ae4->ae4_msix;
+	struct pt_device *pt = &ae4->pt;
+	struct device *dev = pt->dev;
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+
+	if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1]))
+		pci_free_irq_vectors(pdev);
+}
+
+static void ae4_deinit(struct ae4_device *ae4)
+{
+	ae4_free_irqs(ae4);
+}
+
+static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct ae4_device *ae4;
+	struct pt_device *pt;
+	int bar_mask;
+	int ret = 0;
+
+	ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
+	if (!ae4)
+		return -ENOMEM;
+
+	ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
+	if (!ae4->ae4_msix)
+		return -ENOMEM;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		goto ae4_error;
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
+	if (ret)
+		goto ae4_error;
+
+	pt = &ae4->pt;
+	pt->dev = dev;
+
+	pt->io_regs = pcim_iomap_table(pdev)[0];
+	if (!pt->io_regs) {
+		ret = -ENOMEM;
+		goto ae4_error;
+	}
+
+	ret = ae4_get_irqs(ae4);
+	if (ret < 0)
+		goto ae4_error;
+
+	pci_set_master(pdev);
+
+	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+
+	dev_set_drvdata(dev, ae4);
+
+	ret = ae4_core_init(ae4);
+	if (ret)
+		goto ae4_error;
+
+	return 0;
+
+ae4_error:
+	ae4_deinit(ae4);
+
+	return ret;
+}
+
+static void ae4_pci_remove(struct pci_dev *pdev)
+{
+	struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
+
+	ae4_destroy_work(ae4);
+	ae4_deinit(ae4);
+}
+
+static const struct pci_device_id ae4_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x14C8), },
+	{ PCI_VDEVICE(AMD, 0x14DC), },
+	{ PCI_VDEVICE(AMD, 0x149B), },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ae4_pci_table);
+
+static struct pci_driver ae4_pci_driver = {
+	.name = "ae4dma",
+	.id_table = ae4_pci_table,
+	.probe = ae4_pci_probe,
+	.remove = ae4_pci_remove,
+};
+
+module_pci_driver(ae4_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD AE4DMA driver");
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
new file mode 100644
index 000000000000..59a0ea3293cb
--- /dev/null
+++ b/drivers/dma/amd/ae4dma/ae4dma.h
@@ -0,0 +1,77 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+#ifndef __AE4DMA_H__
+#define __AE4DMA_H__
+
+#include "../common/amd_dma.h"
+
+#define MAX_AE4_HW_QUEUES		16
+
+#define AE4_DESC_COMPLETED		0x3
+
+struct ae4_msix {
+	int msix_count;
+	struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
+};
+
+struct ae4_cmd_queue {
+	struct ae4_device *ae4;
+	struct pt_cmd_queue cmd_q;
+	struct list_head cmd;
+	/* protect command operations */
+	struct mutex cmd_lock;
+	struct delayed_work p_work;
+	struct workqueue_struct *pws;
+	struct completion cmp;
+	wait_queue_head_t q_w;
+	atomic64_t intr_cnt;
+	atomic64_t done_cnt;
+	u64 q_cmd_count;
+	u32 dridx;
+	u32 id;
+};
+
+union dwou {
+	u32 dw0;
+	struct dword0 {
+	u8	byte0;
+	u8	byte1;
+	u16	timestamp;
+	} dws;
+};
+
+struct dword1 {
+	u8	status;
+	u8	err_code;
+	u16	desc_id;
+};
+
+struct ae4dma_desc {
+	union dwou dwouv;
+	struct dword1 dw1;
+	u32 length;
+	u32 rsvd;
+	u32 src_hi;
+	u32 src_lo;
+	u32 dst_hi;
+	u32 dst_lo;
+};
+
+struct ae4_device {
+	struct pt_device pt;
+	struct ae4_msix *ae4_msix;
+	struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
+	unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
+	unsigned int cmd_q_count;
+};
+
+int ae4_core_init(struct ae4_device *ae4);
+void ae4_destroy_work(struct ae4_device *ae4);
+#endif
diff --git a/drivers/dma/amd/common/amd_dma.h b/drivers/dma/amd/common/amd_dma.h
new file mode 100644
index 000000000000..f9f396cd4371
--- /dev/null
+++ b/drivers/dma/amd/common/amd_dma.h
@@ -0,0 +1,26 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD DMA Driver common
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#ifndef AMD_DMA_H
+#define AMD_DMA_H
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include "../ptdma/ptdma.h"
+#include "../../virt-dma.h"
+
+#endif