diff mbox series

[V2,01/10] accel/amdxdna: Add a new driver for AMD AI Engine

Message ID 20240805173959.3181199-2-lizhi.hou@amd.com (mailing list archive)
State New, archived
Headers show
Series AMD XDNA driver | expand

Commit Message

Lizhi Hou Aug. 5, 2024, 5:39 p.m. UTC
AMD AI Engine forms the core of AMD NPU and can be used for accelerating
machine learning applications.

Add the driver to support AI Engine integrated to AMD CPU.
Only very basic functionalities are added.
  - module and PCI device initialization
  - firmware load
  - power up
  - low level hardware initialization

Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Co-developed-by: George Yang <George.Yang@amd.com>
Signed-off-by: George Yang <George.Yang@amd.com>
Co-developed-by: Min Ma <min.ma@amd.com>
Signed-off-by: Min Ma <min.ma@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 MAINTAINERS                             |   8 ++
 drivers/accel/Kconfig                   |   1 +
 drivers/accel/Makefile                  |   1 +
 drivers/accel/amdxdna/Kconfig           |  15 ++
 drivers/accel/amdxdna/Makefile          |  14 ++
 drivers/accel/amdxdna/TODO              |   4 +
 drivers/accel/amdxdna/aie2_pci.c        | 182 ++++++++++++++++++++++++
 drivers/accel/amdxdna/aie2_pci.h        | 144 +++++++++++++++++++
 drivers/accel/amdxdna/aie2_psp.c        | 137 ++++++++++++++++++
 drivers/accel/amdxdna/aie2_smu.c        | 112 +++++++++++++++
 drivers/accel/amdxdna/amdxdna_drm.c     |  20 +++
 drivers/accel/amdxdna/amdxdna_drm.h     |  65 +++++++++
 drivers/accel/amdxdna/amdxdna_pci_drv.c | 118 +++++++++++++++
 drivers/accel/amdxdna/amdxdna_pci_drv.h |  31 ++++
 drivers/accel/amdxdna/amdxdna_sysfs.c   |  47 ++++++
 drivers/accel/amdxdna/npu1_regs.c       |  94 ++++++++++++
 drivers/accel/amdxdna/npu2_regs.c       | 111 +++++++++++++++
 drivers/accel/amdxdna/npu4_regs.c       | 111 +++++++++++++++
 drivers/accel/amdxdna/npu5_regs.c       | 111 +++++++++++++++
 include/uapi/drm/amdxdna_accel.h        |  27 ++++
 20 files changed, 1353 insertions(+)
 create mode 100644 drivers/accel/amdxdna/Kconfig
 create mode 100644 drivers/accel/amdxdna/Makefile
 create mode 100644 drivers/accel/amdxdna/TODO
 create mode 100644 drivers/accel/amdxdna/aie2_pci.c
 create mode 100644 drivers/accel/amdxdna/aie2_pci.h
 create mode 100644 drivers/accel/amdxdna/aie2_psp.c
 create mode 100644 drivers/accel/amdxdna/aie2_smu.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_drm.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_drm.h
 create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.h
 create mode 100644 drivers/accel/amdxdna/amdxdna_sysfs.c
 create mode 100644 drivers/accel/amdxdna/npu1_regs.c
 create mode 100644 drivers/accel/amdxdna/npu2_regs.c
 create mode 100644 drivers/accel/amdxdna/npu4_regs.c
 create mode 100644 drivers/accel/amdxdna/npu5_regs.c
 create mode 100644 include/uapi/drm/amdxdna_accel.h

Comments

Markus Elfring Aug. 7, 2024, 11:06 a.m. UTC | #1
If you temporarily find the circumstances too challenging for applications
of scope-based resource management, I suggest to use the following statements instead
(so that a bit of redundant code can be avoided).



…
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -0,0 +1,182 @@> +static int aie2_init(struct amdxdna_dev *xdna)
> +{> +	release_firmware(fw);
> +	return 0;

	ret = 0;
	goto release_fw;

…
> +release_fw:
> +	release_firmware(fw);
> +
> +	return ret;
> +}
…


Otherwise (in case further collateral evolution will become more desirable):
…
> +static int aie2_init(struct amdxdna_dev *xdna)
> +{> +	const struct firmware *fw;

I propose to take another software design option better into account.

* You may reduce the scope of such a local variable.

* How do you think about to use the attribute “__free(firmware)”?
  https://elixir.bootlin.com/linux/v6.11-rc2/source/include/linux/firmware.h#L214

…
> +	ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
…


Regards,
Markus
Lizhi Hou Aug. 7, 2024, 4:07 p.m. UTC | #2
On 8/7/24 04:06, Markus Elfring wrote:
> If you temporarily find the circumstances too challenging for applications
> of scope-based resource management, I suggest to use the following statements instead
> (so that a bit of redundant code can be avoided).
>
>
>
> …
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -0,0 +1,182 @@
> …
>> +static int aie2_init(struct amdxdna_dev *xdna)
>> +{
> …
>> +	release_firmware(fw);
>> +	return 0;
> 	ret = 0;
> 	goto release_fw;

I believe compiler will do the optimization. And I prefer current code 
which only error branch jumps to the corresponding cleanup.


Thanks,

Lizhi

>
> …
>> +release_fw:
>> +	release_firmware(fw);
>> +
>> +	return ret;
>> +}
> …
>
>
> Otherwise (in case further collateral evolution will become more desirable):
> …
>> +static int aie2_init(struct amdxdna_dev *xdna)
>> +{
> …
>> +	const struct firmware *fw;
> I propose to take another software design option better into account.
>
> * You may reduce the scope of such a local variable.
>
> * How do you think about to use the attribute “__free(firmware)”?
>    https://elixir.bootlin.com/linux/v6.11-rc2/source/include/linux/firmware.h#L214
>
> …
>> +	ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
> …
>
>
> Regards,
> Markus
Carl Vanderlip Aug. 9, 2024, 3:24 p.m. UTC | #3
On 8/5/2024 10:39 AM, Lizhi Hou wrote:
 > +static int aie2_init(struct amdxdna_dev *xdna)
 > +{
 > +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
 > +    struct amdxdna_dev_hdl *ndev;
 > +    struct psp_config psp_conf;
 > +    const struct firmware *fw;
 > +    void __iomem * const *tbl;
 > +    int i, bars, nvec, ret;
 > +
 > +    ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
 > +    if (!ndev)
 > +        return -ENOMEM;
 > +
 > +    ndev->priv = xdna->dev_info->dev_priv;
 > +    ndev->xdna = xdna;
 > +
 > +    ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
 > +    if (ret) {
 > +        XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
 > +             ndev->priv->fw_path, ret);
 > +        return ret;
 > +    }
 > +
 > +    ret = pcim_enable_device(pdev);


Does request_firmware need to be the first thing here? Its not used 
until the end of init. Likewise, fw image is copied in *_create, but 
then not released until after *_hw_start; could release_firmware more 
closely wrap where it is used? I could see it being checked first 
because if the fw isn't there, what's the point, but that could be said 
about any of the other resources here.

On 8/5/2024 10:39 AM, Lizhi Hou wrote:
 > +enum aie2_smu_reg_idx {
 > +    SMU_CMD_REG = 0,
 > +    SMU_ARG_REG,
 > +    SMU_INTR_REG,
 > +    SMU_RESP_REG,
 > +    SMU_OUT_REG,
 > +    SMU_MAX_REGS /* Kepp this at the end */
 > +};

*Keep


-Carl V.

PS Sorry for double email Lizhi, forgot to send to list.
Jeffrey Hugo Aug. 9, 2024, 4:11 p.m. UTC | #4
On 8/5/2024 11:39 AM, Lizhi Hou wrote:
> AMD AI Engine forms the core of AMD NPU and can be used for accelerating
> machine learning applications.
> 
> Add the driver to support AI Engine integrated to AMD CPU.
> Only very basic functionalities are added.
>    - module and PCI device initialization
>    - firmware load
>    - power up
>    - low level hardware initialization
> 
> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
> Co-developed-by: George Yang <George.Yang@amd.com>
> Signed-off-by: George Yang <George.Yang@amd.com>
> Co-developed-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Min Ma <min.ma@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   MAINTAINERS                             |   8 ++
>   drivers/accel/Kconfig                   |   1 +
>   drivers/accel/Makefile                  |   1 +
>   drivers/accel/amdxdna/Kconfig           |  15 ++
>   drivers/accel/amdxdna/Makefile          |  14 ++
>   drivers/accel/amdxdna/TODO              |   4 +
>   drivers/accel/amdxdna/aie2_pci.c        | 182 ++++++++++++++++++++++++
>   drivers/accel/amdxdna/aie2_pci.h        | 144 +++++++++++++++++++
>   drivers/accel/amdxdna/aie2_psp.c        | 137 ++++++++++++++++++
>   drivers/accel/amdxdna/aie2_smu.c        | 112 +++++++++++++++
>   drivers/accel/amdxdna/amdxdna_drm.c     |  20 +++
>   drivers/accel/amdxdna/amdxdna_drm.h     |  65 +++++++++
>   drivers/accel/amdxdna/amdxdna_pci_drv.c | 118 +++++++++++++++
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  31 ++++
>   drivers/accel/amdxdna/amdxdna_sysfs.c   |  47 ++++++
>   drivers/accel/amdxdna/npu1_regs.c       |  94 ++++++++++++
>   drivers/accel/amdxdna/npu2_regs.c       | 111 +++++++++++++++
>   drivers/accel/amdxdna/npu4_regs.c       | 111 +++++++++++++++
>   drivers/accel/amdxdna/npu5_regs.c       | 111 +++++++++++++++
>   include/uapi/drm/amdxdna_accel.h        |  27 ++++
>   20 files changed, 1353 insertions(+)
>   create mode 100644 drivers/accel/amdxdna/Kconfig
>   create mode 100644 drivers/accel/amdxdna/Makefile
>   create mode 100644 drivers/accel/amdxdna/TODO
>   create mode 100644 drivers/accel/amdxdna/aie2_pci.c
>   create mode 100644 drivers/accel/amdxdna/aie2_pci.h
>   create mode 100644 drivers/accel/amdxdna/aie2_psp.c
>   create mode 100644 drivers/accel/amdxdna/aie2_smu.c
>   create mode 100644 drivers/accel/amdxdna/amdxdna_drm.c
>   create mode 100644 drivers/accel/amdxdna/amdxdna_drm.h
>   create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.c
>   create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.h
>   create mode 100644 drivers/accel/amdxdna/amdxdna_sysfs.c
>   create mode 100644 drivers/accel/amdxdna/npu1_regs.c
>   create mode 100644 drivers/accel/amdxdna/npu2_regs.c
>   create mode 100644 drivers/accel/amdxdna/npu4_regs.c
>   create mode 100644 drivers/accel/amdxdna/npu5_regs.c
>   create mode 100644 include/uapi/drm/amdxdna_accel.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 36d66b141352..3d2b9f1f1a07 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1146,6 +1146,14 @@ M:	Sanjay R Mehta <sanju.mehta@amd.com>
>   S:	Maintained
>   F:	drivers/spi/spi-amd.c
>   
> +AMD XDNA DRIVER
> +M:	Min Ma <min.ma@amd.com>
> +M:	Lizhi Hou <lizhi.hou@amd.com>
> +L:	dri-devel@lists.freedesktop.org
> +S:	Supported
> +F:	drivers/accel/amdxdna

Trailing "/"?

> +F:	include/uapi/drm/amdxdna_accel.h

T: ?

> +
>   AMD XGBE DRIVER
>   M:	"Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
>   L:	netdev@vger.kernel.org

> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> new file mode 100644
> index 000000000000..3660967c00e6
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -0,0 +1,182 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/amd-iommu.h>
> +#include <linux/errno.h>
> +#include <linux/firmware.h>
> +#include <linux/iommu.h>

You are clearly missing linux/pci.h and I suspect many more.

> +
> +#include "aie2_pci.h"
> +
> +static void aie2_hw_stop(struct amdxdna_dev *xdna)
> +{
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +
> +	aie2_psp_stop(ndev->psp_hdl);
> +	aie2_smu_fini(ndev);
> +	pci_clear_master(pdev);
> +	pci_disable_device(pdev);

pci_clear_master() is redundant with pci_disable_device(), no?

> +}
> +
> +static int aie2_hw_start(struct amdxdna_dev *xdna)
> +{
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> +	int ret;
> +
> +	ret = pci_enable_device(pdev);
> +	if (ret) {
> +		XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
> +		return ret;
> +	}
> +	pci_set_master(pdev);
> +
> +	ret = aie2_smu_init(ndev);
> +	if (ret) {
> +		XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
> +		goto disable_dev;
> +	}
> +
> +	ret = aie2_psp_start(ndev->psp_hdl);
> +	if (ret) {
> +		XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
> +		goto fini_smu;
> +	}
> +
> +	return 0;
> +
> +fini_smu:
> +	aie2_smu_fini(ndev);
> +disable_dev:
> +	pci_disable_device(pdev);
> +	pci_clear_master(pdev);

Again, clear_master appears redundant

> +
> +	return ret;
> +}
> +
> +static int aie2_init(struct amdxdna_dev *xdna)
> +{
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +	struct amdxdna_dev_hdl *ndev;
> +	struct psp_config psp_conf;
> +	const struct firmware *fw;
> +	void __iomem * const *tbl;

Is there an extra "*" here?

> +	int i, bars, nvec, ret;
> +
> +	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
> +	if (!ndev)
> +		return -ENOMEM;
> +
> +	ndev->priv = xdna->dev_info->dev_priv;
> +	ndev->xdna = xdna;
> +
> +	ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
> +	if (ret) {
> +		XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
> +			 ndev->priv->fw_path, ret);
> +		return ret;
> +	}
> +
> +	ret = pcim_enable_device(pdev);
> +	if (ret) {
> +		XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
> +		goto release_fw;
> +	}
> +
> +	bars = pci_select_bars(pdev, IORESOURCE_MEM);
> +	for (i = 0; i < PSP_MAX_REGS; i++) {
> +		if (!(BIT(PSP_REG_BAR(ndev, i)) && bars)) {
> +			XDNA_ERR(xdna, "does not get pci bar%d",
> +				 PSP_REG_BAR(ndev, i));
> +			ret = -EINVAL;
> +			goto release_fw;
> +		}
> +	}
> +
> +	ret = pcim_iomap_regions(pdev, bars, "amdxdna-npu");
> +	if (ret) {
> +		XDNA_ERR(xdna, "map regions failed, ret %d", ret);
> +		goto release_fw;
> +	}
> +
> +	tbl = pcim_iomap_table(pdev);
> +	if (!tbl) {
> +		XDNA_ERR(xdna, "Cannot get iomap table");
> +		ret = -ENOMEM;
> +		goto release_fw;
> +	}
> +	ndev->sram_base = tbl[xdna->dev_info->sram_bar];
> +	ndev->smu_base = tbl[xdna->dev_info->smu_bar];

Doesn't this throw a sparse error from directly accessing memory marked 
iomem?

> +
> +	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
> +	if (ret) {
> +		XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
> +		goto release_fw;
> +	}
> +
> +	nvec = pci_msix_vec_count(pdev);

This feels weird.  Can your device advertise variable number of MSI-X 
vectors?  It only works if all of the vectors are used?

> +	if (nvec <= 0) {
> +		XDNA_ERR(xdna, "does not get number of interrupt vector");
> +		ret = -EINVAL;
> +		goto release_fw;
> +	}
> +
> +	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
> +	if (ret < 0) {
> +		XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
> +		goto release_fw;
> +	}
> +
> +	ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
> +	if (ret) {
> +		XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
> +		goto free_irq;
> +	}
> +
> +	psp_conf.fw_size = fw->size;
> +	psp_conf.fw_buf = fw->data;
> +	for (i = 0; i < PSP_MAX_REGS; i++)
> +		psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
> +	ndev->psp_hdl = aie2m_psp_create(&pdev->dev, &psp_conf);
> +	if (!ndev->psp_hdl) {
> +		XDNA_ERR(xdna, "failed to create psp");
> +		ret = -ENOMEM;
> +		goto disable_sva;
> +	}
> +	xdna->dev_handle = ndev;
> +
> +	ret = aie2_hw_start(xdna);
> +	if (ret) {
> +		XDNA_ERR(xdna, "start npu failed, ret %d", ret);
> +		goto disable_sva;
> +	}
> +
> +	release_firmware(fw);
> +	return 0;
> +
> +disable_sva:
> +	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
> +free_irq:
> +	pci_free_irq_vectors(pdev);
> +release_fw:
> +	release_firmware(fw);
> +
> +	return ret;
> +}
> +
> +static void aie2_fini(struct amdxdna_dev *xdna)
> +{
> +	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> +
> +	aie2_hw_stop(xdna);
> +	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
> +	pci_free_irq_vectors(pdev);
> +}
> +
> +const struct amdxdna_dev_ops aie2_ops = {
> +	.init           = aie2_init,
> +	.fini           = aie2_fini,
> +};
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> new file mode 100644
> index 000000000000..984bf65b7a2b
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -0,0 +1,144 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AIE2_PCI_H_
> +#define _AIE2_PCI_H_
> +
> +#include <linux/device.h>
> +#include <linux/iopoll.h>
> +#include <linux/io.h>
> +
> +#include "amdxdna_pci_drv.h"
> +
> +#define AIE2_INTERVAL	20000	/* us */
> +#define AIE2_TIMEOUT	1000000	/* us */
> +
> +/* Firmware determines device memory base address and size */
> +#define AIE2_DEVM_BASE	0x4000000
> +#define AIE2_DEVM_SIZE	(48 * 1024 * 1024)

I'd expect to see some SZ_ macros used here

> +
> +#define NDEV2PDEV(ndev) \
> +	(to_pci_dev((ndev)->xdna->ddev.dev))
> +
> +#define AIE2_SRAM_OFF(ndev, addr) \
> +	((addr) - (ndev)->priv->sram_dev_addr)
> +#define AIE2_MBOX_OFF(ndev, addr) \
> +	((addr) - (ndev)->priv->mbox_dev_addr)
> +
> +#define PSP_REG_BAR(ndev, idx) \
> +	((ndev)->priv->psp_regs_off[(idx)].bar_idx)
> +#define PSP_REG_OFF(ndev, idx) \
> +	((ndev)->priv->psp_regs_off[(idx)].offset)
> +#define SRAM_REG_OFF(ndev, idx) \
> +	((ndev)->priv->sram_offs[(idx)].offset)

Really looks like these 6 macros can all fit on a single line, why split 
them up?

> +
> +#define SMU_REG(ndev, idx) \
> +({ \
> +	typeof(ndev) _ndev = ndev; \
> +	((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
> +})
> +#define SRAM_GET_ADDR(ndev, idx) \
> +({ \
> +	typeof(ndev) _ndev = ndev; \
> +	((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
> +})
> +
> +#define SMU_MPNPUCLK_FREQ_MAX(ndev) \
> +	((ndev)->priv->smu_mpnpuclk_freq_max)
> +#define SMU_HCLK_FREQ_MAX(ndev) \
> +	((ndev)->priv->smu_hclk_freq_max)
> +
> +enum aie2_smu_reg_idx {
> +	SMU_CMD_REG = 0,
> +	SMU_ARG_REG,
> +	SMU_INTR_REG,
> +	SMU_RESP_REG,
> +	SMU_OUT_REG,
> +	SMU_MAX_REGS /* Kepp this at the end */

"keep"

> diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
> new file mode 100644
> index 000000000000..c24207c252a1
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie2_psp.c
> @@ -0,0 +1,137 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/bitfield.h>
> +#include <linux/firmware.h>
> +#include <linux/iopoll.h>
> +#include <linux/slab.h>
> +#include "aie2_pci.h"
> +
> +#define PSP_STATUS_READY	BIT(31)
> +
> +/* PSP commands */
> +#define PSP_VALIDATE		1
> +#define PSP_START		2
> +#define PSP_RELEASE_TMR		3
> +
> +/* PSP special arguments */
> +#define PSP_START_COPY_FW	1
> +
> +/* PSP response error code */
> +#define PSP_ERROR_CANCEL	0xFFFF0002
> +#define PSP_ERROR_BAD_STATE	0xFFFF0007
> +
> +#define PSP_FW_ALIGN		0x10000
> +#define PSP_POLL_INTERVAL	20000	/* us */
> +#define PSP_POLL_TIMEOUT	1000000	/* us */
> +
> +#define PSP_REG(p, reg) \
> +	((p)->psp_regs[reg])

This is not valid with __iomem

> +struct psp_device *aie2m_psp_create(struct device *dev, struct psp_config *conf)
> +{
> +	struct psp_device *psp;
> +	u64 offset;
> +
> +	psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
> +	if (!psp)
> +		return NULL;
> +
> +	psp->dev = dev;
> +	memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
> +
> +	psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
> +	psp->fw_buffer = devm_kmalloc(psp->dev, psp->fw_buf_sz, GFP_KERNEL);

Feels like this (and a bunch of other instances I haven't commented on) 
should be drmm_* allocs.

> +	if (!psp->fw_buffer) {
> +		dev_err(psp->dev, "no memory for fw buffer");
> +		return NULL;
> +	}
> +
> +	psp->fw_paddr = virt_to_phys(psp->fw_buffer);

I'm pretty sure virt_to_phys() is always wrong

> +	offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
> +	psp->fw_paddr += offset;
> +	memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
> +
> +	return psp;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_drm.c b/drivers/accel/amdxdna/amdxdna_drm.c
> new file mode 100644
> index 000000000000..91e4f9c9dac9
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_drm.c

What is the point of this file?  Seems like all of this could just be in 
amdxdna_pci_drv.c

> @@ -0,0 +1,20 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/drm_ioctl.h>
> +#include <drm/drm_accel.h>
> +
> +#include "amdxdna_drm.h"
> +
> +DEFINE_DRM_ACCEL_FOPS(amdxdna_fops);
> +
> +const struct drm_driver amdxdna_drm_drv = {
> +	.driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
> +	.fops = &amdxdna_fops,
> +	.name = "amdxdna_accel_driver",
> +	.desc = "AMD XDNA DRM implementation",
> +	.major = AMDXDNA_DRIVER_MAJOR,
> +	.minor = AMDXDNA_DRIVER_MINOR,

These are deprecated.  You should drop them

> +};
> diff --git a/drivers/accel/amdxdna/amdxdna_drm.h b/drivers/accel/amdxdna/amdxdna_drm.h
> new file mode 100644
> index 000000000000..2b18bcbdc23e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_drm.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AMDXDNA_DRM_H_
> +#define _AMDXDNA_DRM_H_
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_drv.h>
> +#include <drm/drm_gem.h>
> +#include <drm/drm_managed.h>
> +#include <drm/drm_print.h>
> +#include <drm/drm_file.h>
> +
> +#define XDNA_INFO(xdna, fmt, args...)	drm_info(&(xdna)->ddev, fmt, ##args)
> +#define XDNA_WARN(xdna, fmt, args...)	drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
> +#define XDNA_ERR(xdna, fmt, args...)	drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
> +#define XDNA_DBG(xdna, fmt, args...)	drm_dbg(&(xdna)->ddev, fmt, ##args)
> +#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
> +
> +#define to_xdna_dev(drm_dev) \
> +	((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
> +
> +extern const struct drm_driver amdxdna_drm_drv;
> +
> +struct amdxdna_dev;
> +
> +/*
> + * struct amdxdna_dev_ops - Device hardware operation callbacks
> + */
> +struct amdxdna_dev_ops {
> +	int (*init)(struct amdxdna_dev *xdna);
> +	void (*fini)(struct amdxdna_dev *xdna);
> +};
> +
> +/*
> + * struct amdxdna_dev_info - Device hardware information
> + * Record device static information, like reg, mbox, PSP, SMU bar index,

The last "," is weird

> + */
> +struct amdxdna_dev_info {
> +	int				reg_bar;
> +	int				mbox_bar;
> +	int				sram_bar;
> +	int				psp_bar;
> +	int				smu_bar;
> +	int				device_type;
> +	int				first_col;
> +	u32				dev_mem_buf_shift;
> +	u64				dev_mem_base;
> +	size_t				dev_mem_size;
> +	char				*vbnv;
> +	const struct amdxdna_dev_priv	*dev_priv;
> +	const struct amdxdna_dev_ops	*ops;
> +};
> +
> +struct amdxdna_dev {
> +	struct drm_device		ddev;
> +	struct amdxdna_dev_hdl		*dev_handle;
> +	const struct amdxdna_dev_info	*dev_info;
> +
> +	struct mutex			dev_lock; /* per device lock */
> +};
> +
> +#endif /* _AMDXDNA_DRM_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> new file mode 100644
> index 000000000000..7d0cfd918b0e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -0,0 +1,118 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/module.h>
> +
> +#include "amdxdna_pci_drv.h"
> +
> +/*
> + *  There are platforms which share the same PCI device ID
> + *  but have different PCI revision IDs. So, let the PCI class
> + *  determine the probe and later use the (device_id, rev_id)
> + *  pair as a key to select the devices.
> + */

Huh?  So, VID == AMD, DID == 0x17f0, rev == 0x1 is a completely 
different device?  That feels like a PCIe spec violation...

> +static const struct pci_device_id pci_ids[] = {
> +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_ANY_ID),
> +		.class = PCI_CLASS_SP_OTHER << 8,

Weird.  I would have expected the Accelerator class to be used

> +		.class_mask = 0xFFFF00,
> +	},
> +	{0}
> +};
> +
> +MODULE_DEVICE_TABLE(pci, pci_ids);
> +
> +static const struct amdxdna_device_id amdxdna_ids[] = {
> +	{ 0x1502, 0x0,  &dev_npu1_info },
> +	{ 0x17f0, 0x0,  &dev_npu2_info },
> +	{ 0x17f0, 0x10, &dev_npu4_info },
> +	{ 0x17f0, 0x11, &dev_npu5_info },
> +	{0}
> +};
> +

> +module_pci_driver(amdxdna_pci_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
> +MODULE_VERSION("0.1");

This is redundant with the kernel version, no?  How are you going to 
have different module versions for the same kernel version?

> +MODULE_DESCRIPTION("amdxdna driver");
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> new file mode 100644
> index 000000000000..1b699464150e
> --- /dev/null
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _UAPI_AMDXDNA_ACCEL_H_
> +#define _UAPI_AMDXDNA_ACCEL_H_
> +
> +#include "drm.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +#define AMDXDNA_DRIVER_MAJOR	1
> +#define AMDXDNA_DRIVER_MINOR	0

Drop these.

> +
> +enum amdxdna_device_type {
> +	AMDXDNA_DEV_TYPE_UNKNOWN = -1,
> +	AMDXDNA_DEV_TYPE_KMQ,
> +};
> +
> +#if defined(__cplusplus)
> +} /* extern c end */
> +#endif
> +
> +#endif /* _UAPI_AMDXDNA_ACCEL_H_ */
Lizhi Hou Aug. 12, 2024, 3:58 p.m. UTC | #5
On 8/9/24 08:24, Carl Vanderlip wrote:
> On 8/5/2024 10:39 AM, Lizhi Hou wrote:
> > +static int aie2_init(struct amdxdna_dev *xdna)
> > +{
> > +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> > +    struct amdxdna_dev_hdl *ndev;
> > +    struct psp_config psp_conf;
> > +    const struct firmware *fw;
> > +    void __iomem * const *tbl;
> > +    int i, bars, nvec, ret;
> > +
> > +    ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
> > +    if (!ndev)
> > +        return -ENOMEM;
> > +
> > +    ndev->priv = xdna->dev_info->dev_priv;
> > +    ndev->xdna = xdna;
> > +
> > +    ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
> > +    if (ret) {
> > +        XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
> > +             ndev->priv->fw_path, ret);
> > +        return ret;
> > +    }
> > +
> > +    ret = pcim_enable_device(pdev);
>
>
> Does request_firmware need to be the first thing here? Its not used 
> until the end of init. Likewise, fw image is copied in *_create, but 
> then not released until after *_hw_start; could release_firmware more 
> closely wrap where it is used? I could see it being checked first 
> because if the fw isn't there, what's the point, but that could be 
> said about any of the other resources here.
request_firmware() will failed if user forget to install the firmware 
package. Other initialization calls (e.g. enable device, etc) are very 
unlikely to happen.  That is why request_firmware() is checked first. 
This will only hold the memory before the function exits. I think it is 
very short period and should be ok.
>
> On 8/5/2024 10:39 AM, Lizhi Hou wrote:
> > +enum aie2_smu_reg_idx {
> > +    SMU_CMD_REG = 0,
> > +    SMU_ARG_REG,
> > +    SMU_INTR_REG,
> > +    SMU_RESP_REG,
> > +    SMU_OUT_REG,
> > +    SMU_MAX_REGS /* Kepp this at the end */
> > +};
>
> *Keep

Thanks. I will fix this.


Lizhi

>
>
> -Carl V.
>
> PS Sorry for double email Lizhi, forgot to send to list.
Lizhi Hou Aug. 14, 2024, 6:16 p.m. UTC | #6
On 8/9/24 09:11, Jeffrey Hugo wrote:
> On 8/5/2024 11:39 AM, Lizhi Hou wrote:
>> AMD AI Engine forms the core of AMD NPU and can be used for accelerating
>> machine learning applications.
>>
>> Add the driver to support AI Engine integrated to AMD CPU.
>> Only very basic functionalities are added.
>>    - module and PCI device initialization
>>    - firmware load
>>    - power up
>>    - low level hardware initialization
>>
>> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
>> Co-developed-by: George Yang <George.Yang@amd.com>
>> Signed-off-by: George Yang <George.Yang@amd.com>
>> Co-developed-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   MAINTAINERS                             |   8 ++
>>   drivers/accel/Kconfig                   |   1 +
>>   drivers/accel/Makefile                  |   1 +
>>   drivers/accel/amdxdna/Kconfig           |  15 ++
>>   drivers/accel/amdxdna/Makefile          |  14 ++
>>   drivers/accel/amdxdna/TODO              |   4 +
>>   drivers/accel/amdxdna/aie2_pci.c        | 182 ++++++++++++++++++++++++
>>   drivers/accel/amdxdna/aie2_pci.h        | 144 +++++++++++++++++++
>>   drivers/accel/amdxdna/aie2_psp.c        | 137 ++++++++++++++++++
>>   drivers/accel/amdxdna/aie2_smu.c        | 112 +++++++++++++++
>>   drivers/accel/amdxdna/amdxdna_drm.c     |  20 +++
>>   drivers/accel/amdxdna/amdxdna_drm.h     |  65 +++++++++
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c | 118 +++++++++++++++
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  31 ++++
>>   drivers/accel/amdxdna/amdxdna_sysfs.c   |  47 ++++++
>>   drivers/accel/amdxdna/npu1_regs.c       |  94 ++++++++++++
>>   drivers/accel/amdxdna/npu2_regs.c       | 111 +++++++++++++++
>>   drivers/accel/amdxdna/npu4_regs.c       | 111 +++++++++++++++
>>   drivers/accel/amdxdna/npu5_regs.c       | 111 +++++++++++++++
>>   include/uapi/drm/amdxdna_accel.h        |  27 ++++
>>   20 files changed, 1353 insertions(+)
>>   create mode 100644 drivers/accel/amdxdna/Kconfig
>>   create mode 100644 drivers/accel/amdxdna/Makefile
>>   create mode 100644 drivers/accel/amdxdna/TODO
>>   create mode 100644 drivers/accel/amdxdna/aie2_pci.c
>>   create mode 100644 drivers/accel/amdxdna/aie2_pci.h
>>   create mode 100644 drivers/accel/amdxdna/aie2_psp.c
>>   create mode 100644 drivers/accel/amdxdna/aie2_smu.c
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_drm.c
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_drm.h
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.c
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_pci_drv.h
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_sysfs.c
>>   create mode 100644 drivers/accel/amdxdna/npu1_regs.c
>>   create mode 100644 drivers/accel/amdxdna/npu2_regs.c
>>   create mode 100644 drivers/accel/amdxdna/npu4_regs.c
>>   create mode 100644 drivers/accel/amdxdna/npu5_regs.c
>>   create mode 100644 include/uapi/drm/amdxdna_accel.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 36d66b141352..3d2b9f1f1a07 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -1146,6 +1146,14 @@ M:    Sanjay R Mehta <sanju.mehta@amd.com>
>>   S:    Maintained
>>   F:    drivers/spi/spi-amd.c
>>   +AMD XDNA DRIVER
>> +M:    Min Ma <min.ma@amd.com>
>> +M:    Lizhi Hou <lizhi.hou@amd.com>
>> +L:    dri-devel@lists.freedesktop.org
>> +S:    Supported
>> +F:    drivers/accel/amdxdna
>
> Trailing "/"?
>
>> +F:    include/uapi/drm/amdxdna_accel.h
>
> T: ?
Sure. I will fix above two.
>
>> +
>>   AMD XGBE DRIVER
>>   M:    "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
>>   L:    netdev@vger.kernel.org
>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> new file mode 100644
>> index 000000000000..3660967c00e6
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -0,0 +1,182 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <linux/amd-iommu.h>
>> +#include <linux/errno.h>
>> +#include <linux/firmware.h>
>> +#include <linux/iommu.h>
>
> You are clearly missing linux/pci.h and I suspect many more.
Other headers are indirectly included by "aie2_pci.h" underneath.
>
>> +
>> +#include "aie2_pci.h"
>> +
>> +static void aie2_hw_stop(struct amdxdna_dev *xdna)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +    struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> +
>> +    aie2_psp_stop(ndev->psp_hdl);
>> +    aie2_smu_fini(ndev);
>> +    pci_clear_master(pdev);
>> +    pci_disable_device(pdev);
>
> pci_clear_master() is redundant with pci_disable_device(), no?
You are right. I will remove it.
>
>> +}
>> +
>> +static int aie2_hw_start(struct amdxdna_dev *xdna)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +    struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> +    int ret;
>> +
>> +    ret = pci_enable_device(pdev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
>> +        return ret;
>> +    }
>> +    pci_set_master(pdev);
>> +
>> +    ret = aie2_smu_init(ndev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
>> +        goto disable_dev;
>> +    }
>> +
>> +    ret = aie2_psp_start(ndev->psp_hdl);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
>> +        goto fini_smu;
>> +    }
>> +
>> +    return 0;
>> +
>> +fini_smu:
>> +    aie2_smu_fini(ndev);
>> +disable_dev:
>> +    pci_disable_device(pdev);
>> +    pci_clear_master(pdev);
>
> Again, clear_master appears redundant
>
>> +
>> +    return ret;
>> +}
>> +
>> +static int aie2_init(struct amdxdna_dev *xdna)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +    struct amdxdna_dev_hdl *ndev;
>> +    struct psp_config psp_conf;
>> +    const struct firmware *fw;
>> +    void __iomem * const *tbl;
>
> Is there an extra "*" here?
It looks a match with pcim_iomap_table() return type.
>
>> +    int i, bars, nvec, ret;
>> +
>> +    ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
>> +    if (!ndev)
>> +        return -ENOMEM;
>> +
>> +    ndev->priv = xdna->dev_info->dev_priv;
>> +    ndev->xdna = xdna;
>> +
>> +    ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
>> +             ndev->priv->fw_path, ret);
>> +        return ret;
>> +    }
>> +
>> +    ret = pcim_enable_device(pdev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
>> +        goto release_fw;
>> +    }
>> +
>> +    bars = pci_select_bars(pdev, IORESOURCE_MEM);
>> +    for (i = 0; i < PSP_MAX_REGS; i++) {
>> +        if (!(BIT(PSP_REG_BAR(ndev, i)) && bars)) {
>> +            XDNA_ERR(xdna, "does not get pci bar%d",
>> +                 PSP_REG_BAR(ndev, i));
>> +            ret = -EINVAL;
>> +            goto release_fw;
>> +        }
>> +    }
>> +
>> +    ret = pcim_iomap_regions(pdev, bars, "amdxdna-npu");
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "map regions failed, ret %d", ret);
>> +        goto release_fw;
>> +    }
>> +
>> +    tbl = pcim_iomap_table(pdev);
>> +    if (!tbl) {
>> +        XDNA_ERR(xdna, "Cannot get iomap table");
>> +        ret = -ENOMEM;
>> +        goto release_fw;
>> +    }
>> +    ndev->sram_base = tbl[xdna->dev_info->sram_bar];
>> +    ndev->smu_base = tbl[xdna->dev_info->smu_bar];
>
> Doesn't this throw a sparse error from directly accessing memory 
> marked iomem?

It does not. tbl[] returns the iomem pointer and the driver uses 
writel/readl to access.

e.g. writel(0, SMU_REG(ndev, SMU_RESP_REG));

>
>> +
>> +    ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
>> +        goto release_fw;
>> +    }
>> +
>> +    nvec = pci_msix_vec_count(pdev);
>
> This feels weird.  Can your device advertise variable number of MSI-X 
> vectors?  It only works if all of the vectors are used?
That is possible. the driver supports different hardware. And the fw 
assigns vector for hardware context dynamically. So the driver needs to 
allocate all vectors ahead.
>
>> +    if (nvec <= 0) {
>> +        XDNA_ERR(xdna, "does not get number of interrupt vector");
>> +        ret = -EINVAL;
>> +        goto release_fw;
>> +    }
>> +
>> +    ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
>> +    if (ret < 0) {
>> +        XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
>> +        goto release_fw;
>> +    }
>> +
>> +    ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
>> +        goto free_irq;
>> +    }
>> +
>> +    psp_conf.fw_size = fw->size;
>> +    psp_conf.fw_buf = fw->data;
>> +    for (i = 0; i < PSP_MAX_REGS; i++)
>> +        psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + 
>> PSP_REG_OFF(ndev, i);
>> +    ndev->psp_hdl = aie2m_psp_create(&pdev->dev, &psp_conf);
>> +    if (!ndev->psp_hdl) {
>> +        XDNA_ERR(xdna, "failed to create psp");
>> +        ret = -ENOMEM;
>> +        goto disable_sva;
>> +    }
>> +    xdna->dev_handle = ndev;
>> +
>> +    ret = aie2_hw_start(xdna);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "start npu failed, ret %d", ret);
>> +        goto disable_sva;
>> +    }
>> +
>> +    release_firmware(fw);
>> +    return 0;
>> +
>> +disable_sva:
>> +    iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
>> +free_irq:
>> +    pci_free_irq_vectors(pdev);
>> +release_fw:
>> +    release_firmware(fw);
>> +
>> +    return ret;
>> +}
>> +
>> +static void aie2_fini(struct amdxdna_dev *xdna)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +
>> +    aie2_hw_stop(xdna);
>> +    iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
>> +    pci_free_irq_vectors(pdev);
>> +}
>> +
>> +const struct amdxdna_dev_ops aie2_ops = {
>> +    .init           = aie2_init,
>> +    .fini           = aie2_fini,
>> +};
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> new file mode 100644
>> index 000000000000..984bf65b7a2b
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -0,0 +1,144 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#ifndef _AIE2_PCI_H_
>> +#define _AIE2_PCI_H_
>> +
>> +#include <linux/device.h>
>> +#include <linux/iopoll.h>
>> +#include <linux/io.h>
>> +
>> +#include "amdxdna_pci_drv.h"
>> +
>> +#define AIE2_INTERVAL    20000    /* us */
>> +#define AIE2_TIMEOUT    1000000    /* us */
>> +
>> +/* Firmware determines device memory base address and size */
>> +#define AIE2_DEVM_BASE    0x4000000
>> +#define AIE2_DEVM_SIZE    (48 * 1024 * 1024)
>
> I'd expect to see some SZ_ macros used here
Sure.
>
>> +
>> +#define NDEV2PDEV(ndev) \
>> +    (to_pci_dev((ndev)->xdna->ddev.dev))
>> +
>> +#define AIE2_SRAM_OFF(ndev, addr) \
>> +    ((addr) - (ndev)->priv->sram_dev_addr)
>> +#define AIE2_MBOX_OFF(ndev, addr) \
>> +    ((addr) - (ndev)->priv->mbox_dev_addr)
>> +
>> +#define PSP_REG_BAR(ndev, idx) \
>> +    ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
>> +#define PSP_REG_OFF(ndev, idx) \
>> +    ((ndev)->priv->psp_regs_off[(idx)].offset)
>> +#define SRAM_REG_OFF(ndev, idx) \
>> +    ((ndev)->priv->sram_offs[(idx)].offset)
>
> Really looks like these 6 macros can all fit on a single line, why 
> split them up?
I will fix these. and other places.
>
>> +
>> +#define SMU_REG(ndev, idx) \
>> +({ \
>> +    typeof(ndev) _ndev = ndev; \
>> +    ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
>> +})
>> +#define SRAM_GET_ADDR(ndev, idx) \
>> +({ \
>> +    typeof(ndev) _ndev = ndev; \
>> +    ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
>> +})
>> +
>> +#define SMU_MPNPUCLK_FREQ_MAX(ndev) \
>> +    ((ndev)->priv->smu_mpnpuclk_freq_max)
>> +#define SMU_HCLK_FREQ_MAX(ndev) \
>> +    ((ndev)->priv->smu_hclk_freq_max)
>> +
>> +enum aie2_smu_reg_idx {
>> +    SMU_CMD_REG = 0,
>> +    SMU_ARG_REG,
>> +    SMU_INTR_REG,
>> +    SMU_RESP_REG,
>> +    SMU_OUT_REG,
>> +    SMU_MAX_REGS /* Kepp this at the end */
>
> "keep"
Got it. Thanks.
>
>> diff --git a/drivers/accel/amdxdna/aie2_psp.c 
>> b/drivers/accel/amdxdna/aie2_psp.c
>> new file mode 100644
>> index 000000000000..c24207c252a1
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/aie2_psp.c
>> @@ -0,0 +1,137 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <linux/bitfield.h>
>> +#include <linux/firmware.h>
>> +#include <linux/iopoll.h>
>> +#include <linux/slab.h>
>> +#include "aie2_pci.h"
>> +
>> +#define PSP_STATUS_READY    BIT(31)
>> +
>> +/* PSP commands */
>> +#define PSP_VALIDATE        1
>> +#define PSP_START        2
>> +#define PSP_RELEASE_TMR        3
>> +
>> +/* PSP special arguments */
>> +#define PSP_START_COPY_FW    1
>> +
>> +/* PSP response error code */
>> +#define PSP_ERROR_CANCEL    0xFFFF0002
>> +#define PSP_ERROR_BAD_STATE    0xFFFF0007
>> +
>> +#define PSP_FW_ALIGN        0x10000
>> +#define PSP_POLL_INTERVAL    20000    /* us */
>> +#define PSP_POLL_TIMEOUT    1000000    /* us */
>> +
>> +#define PSP_REG(p, reg) \
>> +    ((p)->psp_regs[reg])
>
> This is not valid with __iomem
Sorry, I am not fully understand the comment.

Each element of ->psp_regs[] is a void __iomem *. And readl/writel are 
used with it.

e.g. writel(reg_vals[i], PSP_REG(psp, i));

>
>> +struct psp_device *aie2m_psp_create(struct device *dev, struct 
>> psp_config *conf)
>> +{
>> +    struct psp_device *psp;
>> +    u64 offset;
>> +
>> +    psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
>> +    if (!psp)
>> +        return NULL;
>> +
>> +    psp->dev = dev;
>> +    memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
>> +
>> +    psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
>> +    psp->fw_buffer = devm_kmalloc(psp->dev, psp->fw_buf_sz, 
>> GFP_KERNEL);
>
> Feels like this (and a bunch of other instances I haven't commented 
> on) should be drmm_* allocs.

The PSP code is kind of low level and directly interact with hardware. 
All the PSP interfaces use struct device * instead of drm_device. I 
think it is kind make sense because PSP is not related to drm.

I will scan all other allocs and change them to drmm_* allocs for the 
code related to drm_device. Does this sound ok to you?

>
>> +    if (!psp->fw_buffer) {
>> +        dev_err(psp->dev, "no memory for fw buffer");
>> +        return NULL;
>> +    }
>> +
>> +    psp->fw_paddr = virt_to_phys(psp->fw_buffer);
>
> I'm pretty sure virt_to_phys() is always wrong

The hardware exposes several registers to communicate with platform PSP 
(AMD Platform Security Processor) to load NPU firmware. And PSP only 
accept host physical address with current hardware.

I understand usually virt_to_phys() should not be needed for device 
driver. And maybe it is ok to use if there is hardware requirement? I 
can see some drivers use it as well.

>
>> +    offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
>> +    psp->fw_paddr += offset;
>> +    memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
>> +
>> +    return psp;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_drm.c 
>> b/drivers/accel/amdxdna/amdxdna_drm.c
>> new file mode 100644
>> index 000000000000..91e4f9c9dac9
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_drm.c
>
> What is the point of this file?  Seems like all of this could just be 
> in amdxdna_pci_drv.c
The future product may have NPU with non-pci device. So it might be a 
amdxdna_plat_drv.c and share the same amdxdna_drm.c in the future.
>
>> @@ -0,0 +1,20 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <drm/drm_ioctl.h>
>> +#include <drm/drm_accel.h>
>> +
>> +#include "amdxdna_drm.h"
>> +
>> +DEFINE_DRM_ACCEL_FOPS(amdxdna_fops);
>> +
>> +const struct drm_driver amdxdna_drm_drv = {
>> +    .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
>> +    .fops = &amdxdna_fops,
>> +    .name = "amdxdna_accel_driver",
>> +    .desc = "AMD XDNA DRM implementation",
>> +    .major = AMDXDNA_DRIVER_MAJOR,
>> +    .minor = AMDXDNA_DRIVER_MINOR,
>
> These are deprecated.  You should drop them
Sure. I will remove them.
>
>> +};
>> diff --git a/drivers/accel/amdxdna/amdxdna_drm.h 
>> b/drivers/accel/amdxdna/amdxdna_drm.h
>> new file mode 100644
>> index 000000000000..2b18bcbdc23e
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_drm.h
>> @@ -0,0 +1,65 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#ifndef _AMDXDNA_DRM_H_
>> +#define _AMDXDNA_DRM_H_
>> +
>> +#include <drm/amdxdna_accel.h>
>> +#include <drm/drm_drv.h>
>> +#include <drm/drm_gem.h>
>> +#include <drm/drm_managed.h>
>> +#include <drm/drm_print.h>
>> +#include <drm/drm_file.h>
>> +
>> +#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, 
>> ##args)
>> +#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: 
>> "fmt, __func__, ##args)
>> +#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: 
>> "fmt, __func__, ##args)
>> +#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, 
>> ##args)
>> +#define XDNA_INFO_ONCE(xdna, fmt, args...) 
>> drm_info_once(&(xdna)->ddev, fmt, ##args)
>> +
>> +#define to_xdna_dev(drm_dev) \
>> +    ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, 
>> ddev))
>> +
>> +extern const struct drm_driver amdxdna_drm_drv;
>> +
>> +struct amdxdna_dev;
>> +
>> +/*
>> + * struct amdxdna_dev_ops - Device hardware operation callbacks
>> + */
>> +struct amdxdna_dev_ops {
>> +    int (*init)(struct amdxdna_dev *xdna);
>> +    void (*fini)(struct amdxdna_dev *xdna);
>> +};
>> +
>> +/*
>> + * struct amdxdna_dev_info - Device hardware information
>> + * Record device static information, like reg, mbox, PSP, SMU bar 
>> index,
>
> The last "," is weird
Will fix it. Thanks.
>
>> + */
>> +struct amdxdna_dev_info {
>> +    int                reg_bar;
>> +    int                mbox_bar;
>> +    int                sram_bar;
>> +    int                psp_bar;
>> +    int                smu_bar;
>> +    int                device_type;
>> +    int                first_col;
>> +    u32                dev_mem_buf_shift;
>> +    u64                dev_mem_base;
>> +    size_t                dev_mem_size;
>> +    char                *vbnv;
>> +    const struct amdxdna_dev_priv    *dev_priv;
>> +    const struct amdxdna_dev_ops    *ops;
>> +};
>> +
>> +struct amdxdna_dev {
>> +    struct drm_device        ddev;
>> +    struct amdxdna_dev_hdl        *dev_handle;
>> +    const struct amdxdna_dev_info    *dev_info;
>> +
>> +    struct mutex            dev_lock; /* per device lock */
>> +};
>> +
>> +#endif /* _AMDXDNA_DRM_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> new file mode 100644
>> index 000000000000..7d0cfd918b0e
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -0,0 +1,118 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <linux/module.h>
>> +
>> +#include "amdxdna_pci_drv.h"
>> +
>> +/*
>> + *  There are platforms which share the same PCI device ID
>> + *  but have different PCI revision IDs. So, let the PCI class
>> + *  determine the probe and later use the (device_id, rev_id)
>> + *  pair as a key to select the devices.
>> + */
>
> Huh?  So, VID == AMD, DID == 0x17f0, rev == 0x1 is a completely 
> different device?  That feels like a PCIe spec violation...
Maybe the comment is misleading. The hardware with same device id 0x17f0 
uses the same commands, registers etc. And they are same device with 
different revisions.
>
>> +static const struct pci_device_id pci_ids[] = {
>> +    { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_ANY_ID),
>> +        .class = PCI_CLASS_SP_OTHER << 8,
>
> Weird.  I would have expected the Accelerator class to be used
We contacted our hardware team to figure out why accelerator class is 
not used here. Some of hardware is already released. Hopefully hardware 
team may consider to use accelerator class with new products.
>
>> +        .class_mask = 0xFFFF00,
>> +    },
>> +    {0}
>> +};
>> +
>> +MODULE_DEVICE_TABLE(pci, pci_ids);
>> +
>> +static const struct amdxdna_device_id amdxdna_ids[] = {
>> +    { 0x1502, 0x0,  &dev_npu1_info },
>> +    { 0x17f0, 0x0,  &dev_npu2_info },
>> +    { 0x17f0, 0x10, &dev_npu4_info },
>> +    { 0x17f0, 0x11, &dev_npu5_info },
>> +    {0}
>> +};
>> +
>
>> +module_pci_driver(amdxdna_pci_driver);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
>> +MODULE_VERSION("0.1");
>
> This is redundant with the kernel version, no?  How are you going to 
> have different module versions for the same kernel version?
This is used in out of tree driver. It is meaningless for upstream. I 
will remove it.
>
>> +MODULE_DESCRIPTION("amdxdna driver");
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> new file mode 100644
>> index 000000000000..1b699464150e
>> --- /dev/null
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +/*
>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#ifndef _UAPI_AMDXDNA_ACCEL_H_
>> +#define _UAPI_AMDXDNA_ACCEL_H_
>> +
>> +#include "drm.h"
>> +
>> +#if defined(__cplusplus)
>> +extern "C" {
>> +#endif
>> +
>> +#define AMDXDNA_DRIVER_MAJOR    1
>> +#define AMDXDNA_DRIVER_MINOR    0
>
> Drop these.

Sure.


Thanks,

Lizhi

>
>> +
>> +enum amdxdna_device_type {
>> +    AMDXDNA_DEV_TYPE_UNKNOWN = -1,
>> +    AMDXDNA_DEV_TYPE_KMQ,
>> +};
>> +
>> +#if defined(__cplusplus)
>> +} /* extern c end */
>> +#endif
>> +
>> +#endif /* _UAPI_AMDXDNA_ACCEL_H_ */
>
Jeffrey Hugo Aug. 14, 2024, 6:46 p.m. UTC | #7
On 8/14/2024 12:16 PM, Lizhi Hou wrote:
> 
> On 8/9/24 09:11, Jeffrey Hugo wrote:
>> On 8/5/2024 11:39 AM, Lizhi Hou wrote:
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>>> b/drivers/accel/amdxdna/aie2_pci.c
>>> new file mode 100644
>>> index 000000000000..3660967c00e6
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -0,0 +1,182 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <linux/amd-iommu.h>
>>> +#include <linux/errno.h>
>>> +#include <linux/firmware.h>
>>> +#include <linux/iommu.h>
>>
>> You are clearly missing linux/pci.h and I suspect many more.
> Other headers are indirectly included by "aie2_pci.h" underneath.

aie2_pci.h also does not directly include linux/pci.h

>>> +
>>> +    ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>>> +    if (ret) {
>>> +        XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
>>> +        goto release_fw;
>>> +    }
>>> +
>>> +    nvec = pci_msix_vec_count(pdev);
>>
>> This feels weird.  Can your device advertise variable number of MSI-X 
>> vectors?  It only works if all of the vectors are used?
> That is possible. the driver supports different hardware. And the fw 
> assigns vector for hardware context dynamically. So the driver needs to 
> allocate all vectors ahead.

So, if the device requests N MSIs, but the host is only able to satisfy 
1 (or some number less than N), the fw is completely unable to function?

>>> +struct psp_device *aie2m_psp_create(struct device *dev, struct 
>>> psp_config *conf)
>>> +{
>>> +    struct psp_device *psp;
>>> +    u64 offset;
>>> +
>>> +    psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
>>> +    if (!psp)
>>> +        return NULL;
>>> +
>>> +    psp->dev = dev;
>>> +    memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
>>> +
>>> +    psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
>>> +    psp->fw_buffer = devm_kmalloc(psp->dev, psp->fw_buf_sz, 
>>> GFP_KERNEL);
>>
>> Feels like this (and a bunch of other instances I haven't commented 
>> on) should be drmm_* allocs.
> 
> The PSP code is kind of low level and directly interact with hardware. 
> All the PSP interfaces use struct device * instead of drm_device. I 
> think it is kind make sense because PSP is not related to drm.
> 
> I will scan all other allocs and change them to drmm_* allocs for the 
> code related to drm_device. Does this sound ok to you?

I don't think so.  Look up
drm/todo: Add TODO entry for "lints"
on the dri-devel list, and its history.

> 
>>
>>> +    if (!psp->fw_buffer) {
>>> +        dev_err(psp->dev, "no memory for fw buffer");
>>> +        return NULL;
>>> +    }
>>> +
>>> +    psp->fw_paddr = virt_to_phys(psp->fw_buffer);
>>
>> I'm pretty sure virt_to_phys() is always wrong
> 
> The hardware exposes several registers to communicate with platform PSP 
> (AMD Platform Security Processor) to load NPU firmware. And PSP only 
> accept host physical address with current hardware.
> 
> I understand usually virt_to_phys() should not be needed for device 
> driver. And maybe it is ok to use if there is hardware requirement? I 
> can see some drivers use it as well.

Eh.  I guess the PSP would never have an IOMMU in front of it or 
anything like that.

This feels similar to what Qualcomm MSM platforms do, which uses the 
remoteproc framework.  Not sure if that helps you here.

This still feels not good, but you might have a valid exception here. 
I'd suggest putting a justification comment in the code through. 
Someone looking at this in X months might raise the same question.

> 
>>
>>> +    offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
>>> +    psp->fw_paddr += offset;
>>> +    memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
>>> +
>>> +    return psp;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/amdxdna_drm.c 
>>> b/drivers/accel/amdxdna/amdxdna_drm.c
>>> new file mode 100644
>>> index 000000000000..91e4f9c9dac9
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/amdxdna_drm.c
>>
>> What is the point of this file?  Seems like all of this could just be 
>> in amdxdna_pci_drv.c
> The future product may have NPU with non-pci device. So it might be a 
> amdxdna_plat_drv.c and share the same amdxdna_drm.c in the future.

This seems like a weak justification.  "may" is not definitive.  If such 
hardware appears, you could refactor the driver at that time.

>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> new file mode 100644
>>> index 000000000000..7d0cfd918b0e
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -0,0 +1,118 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <linux/module.h>
>>> +
>>> +#include "amdxdna_pci_drv.h"
>>> +
>>> +/*
>>> + *  There are platforms which share the same PCI device ID
>>> + *  but have different PCI revision IDs. So, let the PCI class
>>> + *  determine the probe and later use the (device_id, rev_id)
>>> + *  pair as a key to select the devices.
>>> + */
>>
>> Huh?  So, VID == AMD, DID == 0x17f0, rev == 0x1 is a completely 
>> different device?  That feels like a PCIe spec violation...
> Maybe the comment is misleading. The hardware with same device id 0x17f0 
> uses the same commands, registers etc. And they are same device with 
> different revisions.

Then I don't understand why you need to do the class matching.  Match on 
PCI_VENDOR_ID_AMD with the Device IDs you need to support like a 
"normal" PCI(e) driver?

>>
>>> +static const struct pci_device_id pci_ids[] = {
>>> +    { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_ANY_ID),
>>> +        .class = PCI_CLASS_SP_OTHER << 8,
>>
>> Weird.  I would have expected the Accelerator class to be used
> We contacted our hardware team to figure out why accelerator class is 
> not used here. Some of hardware is already released. Hopefully hardware 
> team may consider to use accelerator class with new products.
Lizhi Hou Aug. 14, 2024, 8:24 p.m. UTC | #8
On 8/14/24 11:46, Jeffrey Hugo wrote:
> On 8/14/2024 12:16 PM, Lizhi Hou wrote:
>>
>> On 8/9/24 09:11, Jeffrey Hugo wrote:
>>> On 8/5/2024 11:39 AM, Lizhi Hou wrote:
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>>>> b/drivers/accel/amdxdna/aie2_pci.c
>>>> new file mode 100644
>>>> index 000000000000..3660967c00e6
>>>> --- /dev/null
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>> @@ -0,0 +1,182 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>>>> + */
>>>> +
>>>> +#include <linux/amd-iommu.h>
>>>> +#include <linux/errno.h>
>>>> +#include <linux/firmware.h>
>>>> +#include <linux/iommu.h>
>>>
>>> You are clearly missing linux/pci.h and I suspect many more.
>> Other headers are indirectly included by "aie2_pci.h" underneath.
>
> aie2_pci.h also does not directly include linux/pci.h

it is aie2_pci.h --> amdxdna_pci_drv.h --> linux/pci.h.

It compiles without any issue.

>
>>>> +
>>>> +    ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>>>> +    if (ret) {
>>>> +        XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
>>>> +        goto release_fw;
>>>> +    }
>>>> +
>>>> +    nvec = pci_msix_vec_count(pdev);
>>>
>>> This feels weird.  Can your device advertise variable number of 
>>> MSI-X vectors?  It only works if all of the vectors are used?
>> That is possible. the driver supports different hardware. And the fw 
>> assigns vector for hardware context dynamically. So the driver needs 
>> to allocate all vectors ahead.
>
> So, if the device requests N MSIs, but the host is only able to 
> satisfy 1 (or some number less than N), the fw is completely unable to 
> function?
The fw may return interrupt 2 is assigned to hardware context. Then the 
driver may not deal with it in this case. I think it is ok to fail if 
the system has very limited resource.
>
>
>>>> +struct psp_device *aie2m_psp_create(struct device *dev, struct 
>>>> psp_config *conf)
>>>> +{
>>>> +    struct psp_device *psp;
>>>> +    u64 offset;
>>>> +
>>>> +    psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
>>>> +    if (!psp)
>>>> +        return NULL;
>>>> +
>>>> +    psp->dev = dev;
>>>> +    memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
>>>> +
>>>> +    psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + 
>>>> PSP_FW_ALIGN;
>>>> +    psp->fw_buffer = devm_kmalloc(psp->dev, psp->fw_buf_sz, 
>>>> GFP_KERNEL);
>>>
>>> Feels like this (and a bunch of other instances I haven't commented 
>>> on) should be drmm_* allocs.
>>
>> The PSP code is kind of low level and directly interact with 
>> hardware. All the PSP interfaces use struct device * instead of 
>> drm_device. I think it is kind make sense because PSP is not related 
>> to drm.
>>
>> I will scan all other allocs and change them to drmm_* allocs for the 
>> code related to drm_device. Does this sound ok to you?
>
> I don't think so.  Look up
> drm/todo: Add TODO entry for "lints"
> on the dri-devel list, and its history.
Ok, I will replace them with drm_*alloc.
>
>>
>>>
>>>> +    if (!psp->fw_buffer) {
>>>> +        dev_err(psp->dev, "no memory for fw buffer");
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    psp->fw_paddr = virt_to_phys(psp->fw_buffer);
>>>
>>> I'm pretty sure virt_to_phys() is always wrong
>>
>> The hardware exposes several registers to communicate with platform 
>> PSP (AMD Platform Security Processor) to load NPU firmware. And PSP 
>> only accept host physical address with current hardware.
>>
>> I understand usually virt_to_phys() should not be needed for device 
>> driver. And maybe it is ok to use if there is hardware requirement? I 
>> can see some drivers use it as well.
>
> Eh.  I guess the PSP would never have an IOMMU in front of it or 
> anything like that.
>
> This feels similar to what Qualcomm MSM platforms do, which uses the 
> remoteproc framework.  Not sure if that helps you here.
>
> This still feels not good, but you might have a valid exception here. 
> I'd suggest putting a justification comment in the code through. 
> Someone looking at this in X months might raise the same question.
Sure. I will add a justification.
>
>>
>>>
>>>> +    offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
>>>> +    psp->fw_paddr += offset;
>>>> +    memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
>>>> +
>>>> +    return psp;
>>>> +}
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_drm.c 
>>>> b/drivers/accel/amdxdna/amdxdna_drm.c
>>>> new file mode 100644
>>>> index 000000000000..91e4f9c9dac9
>>>> --- /dev/null
>>>> +++ b/drivers/accel/amdxdna/amdxdna_drm.c
>>>
>>> What is the point of this file?  Seems like all of this could just 
>>> be in amdxdna_pci_drv.c
>> The future product may have NPU with non-pci device. So it might be a 
>> amdxdna_plat_drv.c and share the same amdxdna_drm.c in the future.
>
> This seems like a weak justification.  "may" is not definitive. If 
> such hardware appears, you could refactor the driver at that time.
Ok, I will merge them.
>
>
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>>>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>> new file mode 100644
>>>> index 000000000000..7d0cfd918b0e
>>>> --- /dev/null
>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>> @@ -0,0 +1,118 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
>>>> + */
>>>> +
>>>> +#include <linux/module.h>
>>>> +
>>>> +#include "amdxdna_pci_drv.h"
>>>> +
>>>> +/*
>>>> + *  There are platforms which share the same PCI device ID
>>>> + *  but have different PCI revision IDs. So, let the PCI class
>>>> + *  determine the probe and later use the (device_id, rev_id)
>>>> + *  pair as a key to select the devices.
>>>> + */
>>>
>>> Huh?  So, VID == AMD, DID == 0x17f0, rev == 0x1 is a completely 
>>> different device?  That feels like a PCIe spec violation...
>> Maybe the comment is misleading. The hardware with same device id 
>> 0x17f0 uses the same commands, registers etc. And they are same 
>> device with different revisions.
>
> Then I don't understand why you need to do the class matching. Match 
> on PCI_VENDOR_ID_AMD with the Device IDs you need to support like a 
> "normal" PCI(e) driver?

ok. I will used device id to bind.


Thanks,

Lizhi

>
>>>
>>>> +static const struct pci_device_id pci_ids[] = {
>>>> +    { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_ANY_ID),
>>>> +        .class = PCI_CLASS_SP_OTHER << 8,
>>>
>>> Weird.  I would have expected the Accelerator class to be used
>> We contacted our hardware team to figure out why accelerator class is 
>> not used here. Some of hardware is already released. Hopefully 
>> hardware team may consider to use accelerator class with new products.
Jeffrey Hugo Aug. 14, 2024, 9:53 p.m. UTC | #9
On 8/14/2024 2:24 PM, Lizhi Hou wrote:
> 
> On 8/14/24 11:46, Jeffrey Hugo wrote:
>> On 8/14/2024 12:16 PM, Lizhi Hou wrote:
>>>
>>> On 8/9/24 09:11, Jeffrey Hugo wrote:
>>>> On 8/5/2024 11:39 AM, Lizhi Hou wrote:
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>>>>> b/drivers/accel/amdxdna/aie2_pci.c
>>>>> new file mode 100644
>>>>> index 000000000000..3660967c00e6
>>>>> --- /dev/null
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>> @@ -0,0 +1,182 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>> +/*
>>>>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>>>>> + */
>>>>> +
>>>>> +#include <linux/amd-iommu.h>
>>>>> +#include <linux/errno.h>
>>>>> +#include <linux/firmware.h>
>>>>> +#include <linux/iommu.h>
>>>>
>>>> You are clearly missing linux/pci.h and I suspect many more.
>>> Other headers are indirectly included by "aie2_pci.h" underneath.
>>
>> aie2_pci.h also does not directly include linux/pci.h
> 
> it is aie2_pci.h --> amdxdna_pci_drv.h --> linux/pci.h.
> 
> It compiles without any issue.

I did not doubt that it compiled.  To be clear, I'm pointing out what I 
believe is poor style - relying on implicit includes.

There is no reason that amdxdna_pci_drv.h needs to include linux/pci.h 
(at-least in this patch).  That header file doesn't use any of the 
content from pci.h.  If this were merged, I suspect it would be valid 
for someone to post a cleanup patch that removes pci.h from 
amdxdna_pci_drv.h.

The problem comes when someone does a tree wide refactor of some header 
- perhaps moving a function out of pci.h into something else.  If they 
grep the source tree, they'll find amdxdna_pci_drv.h includes pci.h but 
really doesn't use it.  They likely won't see aie2_pci.c which may break 
because of the refactor.  Even more problematic is if pci.h is including 
something that you need, and you are not including it anywhere.  The 
code will still compile, but maybe in the next kernel cycle pci.h no 
longer includes that thing.  Your code will break.

The 4 includes you have here seems entirely too little, and I'm not 
clearly seeing the logic of what gets explicitly included vs what is 
implicitly included.  firmware.h is explicitly included, but pci.h is 
not, yet it seems like you use a lot more from pci.h.

There is the include what you use project that attempts to automate 
this, although I don't know how well it works with kernel code - 
https://github.com/include-what-you-use/include-what-you-use

> 
>>
>>>>> +
>>>>> +    ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>>>>> +    if (ret) {
>>>>> +        XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
>>>>> +        goto release_fw;
>>>>> +    }
>>>>> +
>>>>> +    nvec = pci_msix_vec_count(pdev);
>>>>
>>>> This feels weird.  Can your device advertise variable number of 
>>>> MSI-X vectors?  It only works if all of the vectors are used?
>>> That is possible. the driver supports different hardware. And the fw 
>>> assigns vector for hardware context dynamically. So the driver needs 
>>> to allocate all vectors ahead.
>>
>> So, if the device requests N MSIs, but the host is only able to 
>> satisfy 1 (or some number less than N), the fw is completely unable to 
>> function?
> The fw may return interrupt 2 is assigned to hardware context. Then the 
> driver may not deal with it in this case. I think it is ok to fail if 
> the system has very limited resource.

Ok.  I suspect you'll want to change that behavior in the future with a 
fw update, but if this is how things work today, then this is how the 
driver must be.
Lizhi Hou Aug. 14, 2024, 9:59 p.m. UTC | #10
On 8/14/24 14:53, Jeffrey Hugo wrote:
> On 8/14/2024 2:24 PM, Lizhi Hou wrote:
>>
>> On 8/14/24 11:46, Jeffrey Hugo wrote:
>>> On 8/14/2024 12:16 PM, Lizhi Hou wrote:
>>>>
>>>> On 8/9/24 09:11, Jeffrey Hugo wrote:
>>>>> On 8/5/2024 11:39 AM, Lizhi Hou wrote:
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>>>>>> b/drivers/accel/amdxdna/aie2_pci.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..3660967c00e6
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>>> @@ -0,0 +1,182 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +/*
>>>>>> + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
>>>>>> + */
>>>>>> +
>>>>>> +#include <linux/amd-iommu.h>
>>>>>> +#include <linux/errno.h>
>>>>>> +#include <linux/firmware.h>
>>>>>> +#include <linux/iommu.h>
>>>>>
>>>>> You are clearly missing linux/pci.h and I suspect many more.
>>>> Other headers are indirectly included by "aie2_pci.h" underneath.
>>>
>>> aie2_pci.h also does not directly include linux/pci.h
>>
>> it is aie2_pci.h --> amdxdna_pci_drv.h --> linux/pci.h.
>>
>> It compiles without any issue.
>
> I did not doubt that it compiled.  To be clear, I'm pointing out what 
> I believe is poor style - relying on implicit includes.
>
> There is no reason that amdxdna_pci_drv.h needs to include linux/pci.h 
> (at-least in this patch).  That header file doesn't use any of the 
> content from pci.h.  If this were merged, I suspect it would be valid 
> for someone to post a cleanup patch that removes pci.h from 
> amdxdna_pci_drv.h.
>
> The problem comes when someone does a tree wide refactor of some 
> header - perhaps moving a function out of pci.h into something else.  
> If they grep the source tree, they'll find amdxdna_pci_drv.h includes 
> pci.h but really doesn't use it.  They likely won't see aie2_pci.c 
> which may break because of the refactor.  Even more problematic is if 
> pci.h is including something that you need, and you are not including 
> it anywhere. The code will still compile, but maybe in the next kernel 
> cycle pci.h no longer includes that thing.  Your code will break.
>
> The 4 includes you have here seems entirely too little, and I'm not 
> clearly seeing the logic of what gets explicitly included vs what is 
> implicitly included.  firmware.h is explicitly included, but pci.h is 
> not, yet it seems like you use a lot more from pci.h.
>
> There is the include what you use project that attempts to automate 
> this, although I don't know how well it works with kernel code - 
> https://github.com/include-what-you-use/include-what-you-use

Ok. got your point and I will cleanup include.


Lizhi

>
>>
>>>
>>>>>> +
>>>>>> +    ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>>>>>> +    if (ret) {
>>>>>> +        XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
>>>>>> +        goto release_fw;
>>>>>> +    }
>>>>>> +
>>>>>> +    nvec = pci_msix_vec_count(pdev);
>>>>>
>>>>> This feels weird.  Can your device advertise variable number of 
>>>>> MSI-X vectors?  It only works if all of the vectors are used?
>>>> That is possible. the driver supports different hardware. And the 
>>>> fw assigns vector for hardware context dynamically. So the driver 
>>>> needs to allocate all vectors ahead.
>>>
>>> So, if the device requests N MSIs, but the host is only able to 
>>> satisfy 1 (or some number less than N), the fw is completely unable 
>>> to function?
>> The fw may return interrupt 2 is assigned to hardware context. Then 
>> the driver may not deal with it in this case. I think it is ok to 
>> fail if the system has very limited resource.
>
> Ok.  I suspect you'll want to change that behavior in the future with 
> a fw update, but if this is how things work today, then this is how 
> the driver must be.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 36d66b141352..3d2b9f1f1a07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1146,6 +1146,14 @@  M:	Sanjay R Mehta <sanju.mehta@amd.com>
 S:	Maintained
 F:	drivers/spi/spi-amd.c
 
+AMD XDNA DRIVER
+M:	Min Ma <min.ma@amd.com>
+M:	Lizhi Hou <lizhi.hou@amd.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+F:	drivers/accel/amdxdna
+F:	include/uapi/drm/amdxdna_accel.h
+
 AMD XGBE DRIVER
 M:	"Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
 L:	netdev@vger.kernel.org
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 64065fb8922b..5b9490367a39 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -24,6 +24,7 @@  menuconfig DRM_ACCEL
 	  different device files, called accel/accel* (in /dev, sysfs
 	  and debugfs).
 
+source "drivers/accel/amdxdna/Kconfig"
 source "drivers/accel/habanalabs/Kconfig"
 source "drivers/accel/ivpu/Kconfig"
 source "drivers/accel/qaic/Kconfig"
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index ab3df932937f..a301fb6089d4 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,5 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA)		+= amdxdna/
 obj-$(CONFIG_DRM_ACCEL_HABANALABS)	+= habanalabs/
 obj-$(CONFIG_DRM_ACCEL_IVPU)		+= ivpu/
 obj-$(CONFIG_DRM_ACCEL_QAIC)		+= qaic/
diff --git a/drivers/accel/amdxdna/Kconfig b/drivers/accel/amdxdna/Kconfig
new file mode 100644
index 000000000000..b6f4c364dd6b
--- /dev/null
+++ b/drivers/accel/amdxdna/Kconfig
@@ -0,0 +1,15 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+
+config DRM_ACCEL_AMDXDNA
+	tristate "AMD AI Engine"
+	depends on AMD_IOMMU
+	depends on DRM_ACCEL
+	depends on PCI && HAS_IOMEM
+	depends on X86_64
+	select FW_LOADER
+	help
+	  Choose this option to enable support for NPU integrated into AMD
+	  client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
+	  accelerate machine learning applications.
+
+	  If "M" is selected, the driver module will be amdxdna.
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
new file mode 100644
index 000000000000..eb3de5a144b5
--- /dev/null
+++ b/drivers/accel/amdxdna/Makefile
@@ -0,0 +1,14 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+
+amdxdna-y := \
+	aie2_pci.o \
+	aie2_psp.o \
+	aie2_smu.o \
+	amdxdna_drm.o \
+	amdxdna_pci_drv.o \
+	amdxdna_sysfs.o \
+	npu1_regs.o \
+	npu2_regs.o \
+	npu4_regs.o \
+	npu5_regs.o
+obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
new file mode 100644
index 000000000000..7d5c48d0daf8
--- /dev/null
+++ b/drivers/accel/amdxdna/TODO
@@ -0,0 +1,4 @@ 
+- Add import and export BO support
+- Add debugfs support
+- Improve debug BO support
+- Improve power management
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
new file mode 100644
index 000000000000..3660967c00e6
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -0,0 +1,182 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/amd-iommu.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <linux/iommu.h>
+
+#include "aie2_pci.h"
+
+static void aie2_hw_stop(struct amdxdna_dev *xdna)
+{
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+
+	aie2_psp_stop(ndev->psp_hdl);
+	aie2_smu_fini(ndev);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+}
+
+static int aie2_hw_start(struct amdxdna_dev *xdna)
+{
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
+		return ret;
+	}
+	pci_set_master(pdev);
+
+	ret = aie2_smu_init(ndev);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
+		goto disable_dev;
+	}
+
+	ret = aie2_psp_start(ndev->psp_hdl);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
+		goto fini_smu;
+	}
+
+	return 0;
+
+fini_smu:
+	aie2_smu_fini(ndev);
+disable_dev:
+	pci_disable_device(pdev);
+	pci_clear_master(pdev);
+
+	return ret;
+}
+
+static int aie2_init(struct amdxdna_dev *xdna)
+{
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+	struct amdxdna_dev_hdl *ndev;
+	struct psp_config psp_conf;
+	const struct firmware *fw;
+	void __iomem * const *tbl;
+	int i, bars, nvec, ret;
+
+	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		return -ENOMEM;
+
+	ndev->priv = xdna->dev_info->dev_priv;
+	ndev->xdna = xdna;
+
+	ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
+			 ndev->priv->fw_path, ret);
+		return ret;
+	}
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
+		goto release_fw;
+	}
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	for (i = 0; i < PSP_MAX_REGS; i++) {
+		if (!(BIT(PSP_REG_BAR(ndev, i)) && bars)) {
+			XDNA_ERR(xdna, "does not get pci bar%d",
+				 PSP_REG_BAR(ndev, i));
+			ret = -EINVAL;
+			goto release_fw;
+		}
+	}
+
+	ret = pcim_iomap_regions(pdev, bars, "amdxdna-npu");
+	if (ret) {
+		XDNA_ERR(xdna, "map regions failed, ret %d", ret);
+		goto release_fw;
+	}
+
+	tbl = pcim_iomap_table(pdev);
+	if (!tbl) {
+		XDNA_ERR(xdna, "Cannot get iomap table");
+		ret = -ENOMEM;
+		goto release_fw;
+	}
+	ndev->sram_base = tbl[xdna->dev_info->sram_bar];
+	ndev->smu_base = tbl[xdna->dev_info->smu_bar];
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
+		goto release_fw;
+	}
+
+	nvec = pci_msix_vec_count(pdev);
+	if (nvec <= 0) {
+		XDNA_ERR(xdna, "does not get number of interrupt vector");
+		ret = -EINVAL;
+		goto release_fw;
+	}
+
+	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
+		goto release_fw;
+	}
+
+	ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+	if (ret) {
+		XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
+		goto free_irq;
+	}
+
+	psp_conf.fw_size = fw->size;
+	psp_conf.fw_buf = fw->data;
+	for (i = 0; i < PSP_MAX_REGS; i++)
+		psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
+	ndev->psp_hdl = aie2m_psp_create(&pdev->dev, &psp_conf);
+	if (!ndev->psp_hdl) {
+		XDNA_ERR(xdna, "failed to create psp");
+		ret = -ENOMEM;
+		goto disable_sva;
+	}
+	xdna->dev_handle = ndev;
+
+	ret = aie2_hw_start(xdna);
+	if (ret) {
+		XDNA_ERR(xdna, "start npu failed, ret %d", ret);
+		goto disable_sva;
+	}
+
+	release_firmware(fw);
+	return 0;
+
+disable_sva:
+	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+free_irq:
+	pci_free_irq_vectors(pdev);
+release_fw:
+	release_firmware(fw);
+
+	return ret;
+}
+
+static void aie2_fini(struct amdxdna_dev *xdna)
+{
+	struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+
+	aie2_hw_stop(xdna);
+	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+	pci_free_irq_vectors(pdev);
+}
+
+const struct amdxdna_dev_ops aie2_ops = {
+	.init           = aie2_init,
+	.fini           = aie2_fini,
+};
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
new file mode 100644
index 000000000000..984bf65b7a2b
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -0,0 +1,144 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AIE2_PCI_H_
+#define _AIE2_PCI_H_
+
+#include <linux/device.h>
+#include <linux/iopoll.h>
+#include <linux/io.h>
+
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_INTERVAL	20000	/* us */
+#define AIE2_TIMEOUT	1000000	/* us */
+
+/* Firmware determines device memory base address and size */
+#define AIE2_DEVM_BASE	0x4000000
+#define AIE2_DEVM_SIZE	(48 * 1024 * 1024)
+
+#define NDEV2PDEV(ndev) \
+	(to_pci_dev((ndev)->xdna->ddev.dev))
+
+#define AIE2_SRAM_OFF(ndev, addr) \
+	((addr) - (ndev)->priv->sram_dev_addr)
+#define AIE2_MBOX_OFF(ndev, addr) \
+	((addr) - (ndev)->priv->mbox_dev_addr)
+
+#define PSP_REG_BAR(ndev, idx) \
+	((ndev)->priv->psp_regs_off[(idx)].bar_idx)
+#define PSP_REG_OFF(ndev, idx) \
+	((ndev)->priv->psp_regs_off[(idx)].offset)
+#define SRAM_REG_OFF(ndev, idx) \
+	((ndev)->priv->sram_offs[(idx)].offset)
+
+#define SMU_REG(ndev, idx) \
+({ \
+	typeof(ndev) _ndev = ndev; \
+	((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
+})
+#define SRAM_GET_ADDR(ndev, idx) \
+({ \
+	typeof(ndev) _ndev = ndev; \
+	((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
+})
+
+#define SMU_MPNPUCLK_FREQ_MAX(ndev) \
+	((ndev)->priv->smu_mpnpuclk_freq_max)
+#define SMU_HCLK_FREQ_MAX(ndev) \
+	((ndev)->priv->smu_hclk_freq_max)
+
+enum aie2_smu_reg_idx {
+	SMU_CMD_REG = 0,
+	SMU_ARG_REG,
+	SMU_INTR_REG,
+	SMU_RESP_REG,
+	SMU_OUT_REG,
+	SMU_MAX_REGS /* Kepp this at the end */
+};
+
+enum aie2_sram_reg_idx {
+	MBOX_CHANN_OFF = 0,
+	FW_ALIVE_OFF,
+	SRAM_MAX_INDEX /* Keep this at the end */
+};
+
+enum psp_reg_idx {
+	PSP_CMD_REG = 0,
+	PSP_ARG0_REG,
+	PSP_ARG1_REG,
+	PSP_ARG2_REG,
+	PSP_NUM_IN_REGS, /* number of input registers */
+	PSP_INTR_REG = PSP_NUM_IN_REGS,
+	PSP_STATUS_REG,
+	PSP_RESP_REG,
+	PSP_MAX_REGS /* Keep this at the end */
+};
+
+struct psp_config {
+	const void	*fw_buf;
+	u32		fw_size;
+	void __iomem	*psp_regs[PSP_MAX_REGS];
+};
+
+struct clock_entry {
+	char name[16];
+	u32 freq_mhz;
+};
+
+struct rt_config {
+	u32	type;
+	u32	value;
+};
+
+struct amdxdna_dev_hdl {
+	struct amdxdna_dev		*xdna;
+	const struct amdxdna_dev_priv	*priv;
+	void			__iomem *sram_base;
+	void			__iomem *smu_base;
+	struct psp_device		*psp_hdl;
+	struct clock_entry		mp_npu_clock;
+	struct clock_entry		h_clock;
+};
+
+#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
+	[reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
+
+struct aie2_bar_off_pair {
+	int	bar_idx;
+	u32	offset;
+};
+
+struct amdxdna_dev_priv {
+	const char			*fw_path;
+	u64				protocol_major;
+	u64				protocol_minor;
+	struct rt_config		rt_config;
+#define COL_ALIGN_NONE   0
+#define COL_ALIGN_NATURE 1
+	u32				col_align;
+	u32				mbox_dev_addr;
+	/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
+	u32				mbox_size;
+	u32				sram_dev_addr;
+	struct aie2_bar_off_pair	sram_offs[SRAM_MAX_INDEX];
+	struct aie2_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
+	struct aie2_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
+	u32				smu_mpnpuclk_freq_max;
+	u32				smu_hclk_freq_max;
+};
+
+extern const struct amdxdna_dev_ops aie2_ops;
+
+/* aie2_smu.c */
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+
+/* aie2_psp.c */
+struct psp_device *aie2m_psp_create(struct device *dev, struct psp_config *conf);
+int aie2_psp_start(struct psp_device *psp);
+void aie2_psp_stop(struct psp_device *psp);
+
+#endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
new file mode 100644
index 000000000000..c24207c252a1
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -0,0 +1,137 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+#include <linux/iopoll.h>
+#include <linux/slab.h>
+#include "aie2_pci.h"
+
+#define PSP_STATUS_READY	BIT(31)
+
+/* PSP commands */
+#define PSP_VALIDATE		1
+#define PSP_START		2
+#define PSP_RELEASE_TMR		3
+
+/* PSP special arguments */
+#define PSP_START_COPY_FW	1
+
+/* PSP response error code */
+#define PSP_ERROR_CANCEL	0xFFFF0002
+#define PSP_ERROR_BAD_STATE	0xFFFF0007
+
+#define PSP_FW_ALIGN		0x10000
+#define PSP_POLL_INTERVAL	20000	/* us */
+#define PSP_POLL_TIMEOUT	1000000	/* us */
+
+#define PSP_REG(p, reg) \
+	((p)->psp_regs[reg])
+
+struct psp_device {
+	struct device	  *dev;
+	struct psp_config conf;
+	u32		  fw_buf_sz;
+	u64		  fw_paddr;
+	void		  *fw_buffer;
+	void __iomem	  *psp_regs[PSP_MAX_REGS];
+};
+
+static int psp_exec(struct psp_device *psp, u32 *reg_vals)
+{
+	u32 resp_code;
+	int ret, i;
+	u32 ready;
+
+	/* Write command and argument registers */
+	for (i = 0; i < PSP_NUM_IN_REGS; i++)
+		writel(reg_vals[i], PSP_REG(psp, i));
+
+	/* clear and set PSP INTR register to kick off */
+	writel(0, PSP_REG(psp, PSP_INTR_REG));
+	writel(1, PSP_REG(psp, PSP_INTR_REG));
+
+	/* PSP should be busy. Wait for ready, so we know task is done. */
+	ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
+				 FIELD_GET(PSP_STATUS_READY, ready),
+				 PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
+	if (ret) {
+		dev_err(psp->dev, "PSP is not ready, ret 0x%x", ret);
+		return ret;
+	}
+
+	resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
+	if (resp_code) {
+		dev_err(psp->dev, "fw return error 0x%x", resp_code);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+void aie2_psp_stop(struct psp_device *psp)
+{
+	u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
+	int ret;
+
+	ret = psp_exec(psp, reg_vals);
+	if (ret)
+		dev_err(psp->dev, "release tmr failed, ret %d", ret);
+}
+
+int aie2_psp_start(struct psp_device *psp)
+{
+	u32 reg_vals[PSP_NUM_IN_REGS];
+	int ret;
+
+	reg_vals[0] = PSP_VALIDATE;
+	reg_vals[1] = lower_32_bits(psp->fw_paddr);
+	reg_vals[2] = upper_32_bits(psp->fw_paddr);
+	reg_vals[3] = psp->fw_buf_sz;
+
+	ret = psp_exec(psp, reg_vals);
+	if (ret) {
+		dev_err(psp->dev, "failed to validate fw, ret %d", ret);
+		return ret;
+	}
+
+	memset(reg_vals, 0, sizeof(reg_vals));
+	reg_vals[0] = PSP_START;
+	reg_vals[1] = PSP_START_COPY_FW;
+	ret = psp_exec(psp, reg_vals);
+	if (ret) {
+		dev_err(psp->dev, "failed to start fw, ret %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct psp_device *aie2m_psp_create(struct device *dev, struct psp_config *conf)
+{
+	struct psp_device *psp;
+	u64 offset;
+
+	psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
+	if (!psp)
+		return NULL;
+
+	psp->dev = dev;
+	memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
+
+	psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
+	psp->fw_buffer = devm_kmalloc(psp->dev, psp->fw_buf_sz, GFP_KERNEL);
+	if (!psp->fw_buffer) {
+		dev_err(psp->dev, "no memory for fw buffer");
+		return NULL;
+	}
+
+	psp->fw_paddr = virt_to_phys(psp->fw_buffer);
+	offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
+	psp->fw_paddr += offset;
+	memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
+
+	return psp;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
new file mode 100644
index 000000000000..fa8e08af6e30
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -0,0 +1,112 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include "aie2_pci.h"
+
+#define SMU_RESULT_OK		1
+
+/* SMU commands */
+#define AIE2_SMU_POWER_ON		0x3
+#define AIE2_SMU_POWER_OFF		0x4
+#define AIE2_SMU_SET_MPNPUCLK_FREQ	0x5
+#define AIE2_SMU_SET_HCLK_FREQ		0x6
+
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
+{
+	u32 resp;
+	int ret;
+
+	writel(0, SMU_REG(ndev, SMU_RESP_REG));
+	writel(reg_arg, SMU_REG(ndev, SMU_ARG_REG));
+	writel(reg_cmd, SMU_REG(ndev, SMU_CMD_REG));
+
+	/* Clear and set SMU_INTR_REG to kick off */
+	writel(0, SMU_REG(ndev, SMU_INTR_REG));
+	writel(1, SMU_REG(ndev, SMU_INTR_REG));
+
+	ret = readx_poll_timeout(readl, SMU_REG(ndev, SMU_RESP_REG), resp,
+				 resp, AIE2_INTERVAL, AIE2_TIMEOUT);
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "smu cmd %d timed out", reg_cmd);
+		return ret;
+	}
+
+	if (resp != SMU_RESULT_OK) {
+		XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+{
+	int ret;
+
+	if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
+		XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
+		return -EINVAL;
+	}
+
+	ndev->mp_npu_clock.freq_mhz = freq_mhz;
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
+	if (!ret)
+		XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
+
+	return ret;
+}
+
+static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+{
+	int ret;
+
+	if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
+		XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
+		return -EINVAL;
+	}
+
+	ndev->h_clock.freq_mhz = freq_mhz;
+	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
+	if (!ret)
+		XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
+
+	return ret;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+	int ret;
+
+	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+		return ret;
+	}
+
+	ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
+		return ret;
+	}
+	snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
+
+	ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
+	if (ret) {
+		XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
+		return ret;
+	}
+	snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
+
+	return 0;
+}
+
+void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
+{
+	int ret;
+
+	ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
+	if (ret)
+		XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_drm.c b/drivers/accel/amdxdna/amdxdna_drm.c
new file mode 100644
index 000000000000..91e4f9c9dac9
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_drm.c
@@ -0,0 +1,20 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_ioctl.h>
+#include <drm/drm_accel.h>
+
+#include "amdxdna_drm.h"
+
+DEFINE_DRM_ACCEL_FOPS(amdxdna_fops);
+
+const struct drm_driver amdxdna_drm_drv = {
+	.driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
+	.fops = &amdxdna_fops,
+	.name = "amdxdna_accel_driver",
+	.desc = "AMD XDNA DRM implementation",
+	.major = AMDXDNA_DRIVER_MAJOR,
+	.minor = AMDXDNA_DRIVER_MINOR,
+};
diff --git a/drivers/accel/amdxdna/amdxdna_drm.h b/drivers/accel/amdxdna/amdxdna_drm.h
new file mode 100644
index 000000000000..2b18bcbdc23e
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_drm.h
@@ -0,0 +1,65 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_DRM_H_
+#define _AMDXDNA_DRM_H_
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+#include <drm/drm_file.h>
+
+#define XDNA_INFO(xdna, fmt, args...)	drm_info(&(xdna)->ddev, fmt, ##args)
+#define XDNA_WARN(xdna, fmt, args...)	drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_ERR(xdna, fmt, args...)	drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
+#define XDNA_DBG(xdna, fmt, args...)	drm_dbg(&(xdna)->ddev, fmt, ##args)
+#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
+
+#define to_xdna_dev(drm_dev) \
+	((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
+
+extern const struct drm_driver amdxdna_drm_drv;
+
+struct amdxdna_dev;
+
+/*
+ * struct amdxdna_dev_ops - Device hardware operation callbacks
+ */
+struct amdxdna_dev_ops {
+	int (*init)(struct amdxdna_dev *xdna);
+	void (*fini)(struct amdxdna_dev *xdna);
+};
+
+/*
+ * struct amdxdna_dev_info - Device hardware information
+ * Record device static information, like reg, mbox, PSP, SMU bar index,
+ */
+struct amdxdna_dev_info {
+	int				reg_bar;
+	int				mbox_bar;
+	int				sram_bar;
+	int				psp_bar;
+	int				smu_bar;
+	int				device_type;
+	int				first_col;
+	u32				dev_mem_buf_shift;
+	u64				dev_mem_base;
+	size_t				dev_mem_size;
+	char				*vbnv;
+	const struct amdxdna_dev_priv	*dev_priv;
+	const struct amdxdna_dev_ops	*ops;
+};
+
+struct amdxdna_dev {
+	struct drm_device		ddev;
+	struct amdxdna_dev_hdl		*dev_handle;
+	const struct amdxdna_dev_info	*dev_info;
+
+	struct mutex			dev_lock; /* per device lock */
+};
+
+#endif /* _AMDXDNA_DRM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
new file mode 100644
index 000000000000..7d0cfd918b0e
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -0,0 +1,118 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/module.h>
+
+#include "amdxdna_pci_drv.h"
+
+/*
+ *  There are platforms which share the same PCI device ID
+ *  but have different PCI revision IDs. So, let the PCI class
+ *  determine the probe and later use the (device_id, rev_id)
+ *  pair as a key to select the devices.
+ */
+static const struct pci_device_id pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_ANY_ID),
+		.class = PCI_CLASS_SP_OTHER << 8,
+		.class_mask = 0xFFFF00,
+	},
+	{0}
+};
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static const struct amdxdna_device_id amdxdna_ids[] = {
+	{ 0x1502, 0x0,  &dev_npu1_info },
+	{ 0x17f0, 0x0,  &dev_npu2_info },
+	{ 0x17f0, 0x10, &dev_npu4_info },
+	{ 0x17f0, 0x11, &dev_npu5_info },
+	{0}
+};
+
+static const struct amdxdna_dev_info *
+amdxdna_get_dev_info(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(amdxdna_ids); i++) {
+		if (pdev->device == amdxdna_ids[i].device &&
+		    pdev->revision == amdxdna_ids[i].revision)
+			return amdxdna_ids[i].dev_info;
+	}
+	return NULL;
+}
+
+static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct amdxdna_dev *xdna;
+	int ret;
+
+	xdna = devm_drm_dev_alloc(&pdev->dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
+	if (IS_ERR(xdna))
+		return PTR_ERR(xdna);
+
+	xdna->dev_info = amdxdna_get_dev_info(pdev);
+	if (!xdna->dev_info)
+		return -ENODEV;
+
+	drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+	pci_set_drvdata(pdev, xdna);
+
+	mutex_lock(&xdna->dev_lock);
+	ret = xdna->dev_info->ops->init(xdna);
+	mutex_unlock(&xdna->dev_lock);
+	if (ret) {
+		XDNA_ERR(xdna, "Hardware init failed, ret %d", ret);
+		return ret;
+	}
+
+	ret = amdxdna_sysfs_init(xdna);
+	if (ret) {
+		XDNA_ERR(xdna, "Create amdxdna attrs failed: %d", ret);
+		goto failed_dev_fini;
+	}
+
+	ret = drm_dev_register(&xdna->ddev, 0);
+	if (ret) {
+		XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
+		goto failed_sysfs_fini;
+	}
+
+	return 0;
+
+failed_sysfs_fini:
+	amdxdna_sysfs_fini(xdna);
+failed_dev_fini:
+	mutex_lock(&xdna->dev_lock);
+	xdna->dev_info->ops->fini(xdna);
+	mutex_unlock(&xdna->dev_lock);
+	return ret;
+}
+
+static void amdxdna_remove(struct pci_dev *pdev)
+{
+	struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
+
+	drm_dev_unplug(&xdna->ddev);
+	amdxdna_sysfs_fini(xdna);
+
+	mutex_lock(&xdna->dev_lock);
+	xdna->dev_info->ops->fini(xdna);
+	mutex_unlock(&xdna->dev_lock);
+}
+
+static struct pci_driver amdxdna_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = pci_ids,
+	.probe = amdxdna_probe,
+	.remove = amdxdna_remove,
+};
+
+module_pci_driver(amdxdna_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
+MODULE_VERSION("0.1");
+MODULE_DESCRIPTION("amdxdna driver");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
new file mode 100644
index 000000000000..ec80efc679b6
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -0,0 +1,31 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PCI_DRV_H_
+#define _AMDXDNA_PCI_DRV_H_
+
+#include <linux/pci.h>
+
+#include "amdxdna_drm.h"
+
+/*
+ * struct amdxdna_device_id - PCI device info
+ */
+struct amdxdna_device_id {
+	unsigned short device;
+	u8 revision;
+	const struct amdxdna_dev_info *dev_info;
+};
+
+/* Add device info below */
+extern const struct amdxdna_dev_info dev_npu1_info;
+extern const struct amdxdna_dev_info dev_npu2_info;
+extern const struct amdxdna_dev_info dev_npu4_info;
+extern const struct amdxdna_dev_info dev_npu5_info;
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PCI_DRV_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c
new file mode 100644
index 000000000000..95b5b7352cf3
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_sysfs.c
@@ -0,0 +1,47 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+#include "amdxdna_pci_drv.h"
+
+static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", xdna->dev_info->vbnv);
+}
+static DEVICE_ATTR_RO(vbnv);
+
+static ssize_t device_type_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct amdxdna_dev *xdna = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", xdna->dev_info->device_type);
+}
+static DEVICE_ATTR_RO(device_type);
+
+static struct attribute *amdxdna_attrs[] = {
+	&dev_attr_device_type.attr,
+	&dev_attr_vbnv.attr,
+	NULL,
+};
+
+static struct attribute_group amdxdna_attr_group = {
+	.attrs = amdxdna_attrs,
+};
+
+int amdxdna_sysfs_init(struct amdxdna_dev *xdna)
+{
+	int ret;
+
+	ret = sysfs_create_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+	if (ret)
+		XDNA_ERR(xdna, "Create attr group failed");
+
+	return ret;
+}
+
+void amdxdna_sysfs_fini(struct amdxdna_dev *xdna)
+{
+	sysfs_remove_group(&xdna->ddev.dev->kobj, &amdxdna_attr_group);
+}
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
new file mode 100644
index 000000000000..822f69be0a23
--- /dev/null
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -0,0 +1,94 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include "aie2_pci.h"
+
+/* Address definition from NPU1 docs */
+#define MPNPU_PUB_SEC_INTR		0x3010090
+#define MPNPU_PUB_PWRMGMT_INTR		0x3010094
+#define MPNPU_PUB_SCRATCH2		0x30100A0
+#define MPNPU_PUB_SCRATCH3		0x30100A4
+#define MPNPU_PUB_SCRATCH4		0x30100A8
+#define MPNPU_PUB_SCRATCH5		0x30100AC
+#define MPNPU_PUB_SCRATCH6		0x30100B0
+#define MPNPU_PUB_SCRATCH7		0x30100B4
+#define MPNPU_PUB_SCRATCH9		0x30100BC
+
+#define MPNPU_SRAM_X2I_MAILBOX_0	0x30A0000
+#define MPNPU_SRAM_X2I_MAILBOX_1	0x30A2000
+#define MPNPU_SRAM_I2X_MAILBOX_15	0x30BF000
+
+#define MPNPU_APERTURE0_BASE		0x3000000
+#define MPNPU_APERTURE1_BASE		0x3080000
+#define MPNPU_APERTURE2_BASE		0x30C0000
+
+/* PCIe BAR Index for NPU1 */
+#define NPU1_REG_BAR_INDEX  0
+#define NPU1_MBOX_BAR_INDEX 4
+#define NPU1_PSP_BAR_INDEX  0
+#define NPU1_SMU_BAR_INDEX  0
+#define NPU1_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU1_REG_BAR_BASE  MPNPU_APERTURE0_BASE
+#define NPU1_MBOX_BAR_BASE MPNPU_APERTURE2_BASE
+#define NPU1_PSP_BAR_BASE  MPNPU_APERTURE0_BASE
+#define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
+#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
+
+#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
+#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
+#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
+
+#define NPU1_MPNPUCLK_FREQ_MAX  600
+#define NPU1_HCLK_FREQ_MAX      1024
+
+const struct amdxdna_dev_priv npu1_dev_priv = {
+	.fw_path        = "amdnpu/1502_00/npu.sbin",
+	.protocol_major = 0x5,
+	.protocol_minor = 0x1,
+	.rt_config	= {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
+	.col_align	= COL_ALIGN_NONE,
+	.mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
+	.mbox_size      = 0, /* Use BAR size */
+	.sram_dev_addr  = NPU1_SRAM_BAR_BASE,
+	.sram_offs      = {
+		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU1_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU1_SRAM, MPNPU_SRAM_I2X_MAILBOX_15),
+	},
+	.psp_regs_off   = {
+		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU1_PSP, MPNPU_PUB_SCRATCH2),
+		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU1_PSP, MPNPU_PUB_SCRATCH3),
+		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU1_PSP, MPNPU_PUB_SCRATCH4),
+		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU1_PSP, MPNPU_PUB_SCRATCH9),
+		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU1_PSP, MPNPU_PUB_SEC_INTR),
+		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
+		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU1_PSP, MPNPU_PUB_SCRATCH3),
+	},
+	.smu_regs_off   = {
+		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH5),
+		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
+		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU1_SMU, MPNPU_PUB_PWRMGMT_INTR),
+		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
+		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
+	},
+	.smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
+	.smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
+};
+
+const struct amdxdna_dev_info dev_npu1_info = {
+	.reg_bar           = NPU1_REG_BAR_INDEX,
+	.mbox_bar          = NPU1_MBOX_BAR_INDEX,
+	.sram_bar          = NPU1_SRAM_BAR_INDEX,
+	.psp_bar           = NPU1_PSP_BAR_INDEX,
+	.smu_bar           = NPU1_SMU_BAR_INDEX,
+	.first_col         = 1,
+	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
+	.dev_mem_base      = AIE2_DEVM_BASE,
+	.dev_mem_size      = AIE2_DEVM_SIZE,
+	.vbnv              = "RyzenAI-npu1",
+	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
+	.dev_priv          = &npu1_dev_priv,
+	.ops               = &aie2_ops,
+};
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
new file mode 100644
index 000000000000..ba41bfa3fc7e
--- /dev/null
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -0,0 +1,111 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include "aie2_pci.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR             0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR         0x3010064
+#define MPNPU_PUB_SCRATCH0             0x301006C
+#define MPNPU_PUB_SCRATCH1             0x3010070
+#define MPNPU_PUB_SCRATCH2             0x3010074
+#define MPNPU_PUB_SCRATCH3             0x3010078
+#define MPNPU_PUB_SCRATCH4             0x301007C
+#define MPNPU_PUB_SCRATCH5             0x3010080
+#define MPNPU_PUB_SCRATCH6             0x3010084
+#define MPNPU_PUB_SCRATCH7             0x3010088
+#define MPNPU_PUB_SCRATCH8             0x301008C
+#define MPNPU_PUB_SCRATCH9             0x3010090
+#define MPNPU_PUB_SCRATCH10            0x3010094
+#define MPNPU_PUB_SCRATCH11            0x3010098
+#define MPNPU_PUB_SCRATCH12            0x301009C
+#define MPNPU_PUB_SCRATCH13            0x30100A0
+#define MPNPU_PUB_SCRATCH14            0x30100A4
+#define MPNPU_PUB_SCRATCH15            0x30100A8
+#define MP0_C2PMSG_73                  0x3810A24
+#define MP0_C2PMSG_123                 0x3810AEC
+
+#define MP1_C2PMSG_0                   0x3B10900
+#define MP1_C2PMSG_60                  0x3B109F0
+#define MP1_C2PMSG_61                  0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
+
+#define MMNPU_APERTURE0_BASE           0x3000000
+#define MMNPU_APERTURE1_BASE           0x3600000
+#define MMNPU_APERTURE3_BASE           0x3810000
+#define MMNPU_APERTURE4_BASE           0x3B10000
+
+/* PCIe BAR Index for NPU2 */
+#define NPU2_REG_BAR_INDEX	0
+#define NPU2_MBOX_BAR_INDEX	0
+#define NPU2_PSP_BAR_INDEX	4
+#define NPU2_SMU_BAR_INDEX	5
+#define NPU2_SRAM_BAR_INDEX	2
+/* Associated BARs and Apertures */
+#define NPU2_REG_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU2_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU2_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
+#define NPU2_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
+#define NPU2_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
+
+#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
+#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
+#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
+
+#define NPU2_MPNPUCLK_FREQ_MAX  1267
+#define NPU2_HCLK_FREQ_MAX      1800
+
+const struct amdxdna_dev_priv npu2_dev_priv = {
+	.fw_path        = "amdnpu/17f0_00/npu.sbin",
+	.protocol_major = 0x6,
+	.protocol_minor = 0x1,
+	.rt_config	= {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
+	.col_align	= COL_ALIGN_NATURE,
+	.mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
+	.mbox_size      = 0, /* Use BAR size */
+	.sram_dev_addr  = NPU2_SRAM_BAR_BASE,
+	.sram_offs      = {
+		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU2_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+	},
+	.psp_regs_off   = {
+		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU2_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU2_REG, MPNPU_PUB_SCRATCH3),
+		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU2_REG, MPNPU_PUB_SCRATCH4),
+		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU2_REG, MPNPU_PUB_SCRATCH9),
+		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU2_PSP, MP0_C2PMSG_73),
+		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU2_REG, MPNPU_PUB_SCRATCH3),
+	},
+	.smu_regs_off   = {
+		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU2_SMU, MP1_C2PMSG_0),
+		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU2_SMU, MP1_C2PMSG_60),
+		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU2_SMU, MMNPU_APERTURE4_BASE),
+		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
+		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
+	},
+	.smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
+	.smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
+};
+
+const struct amdxdna_dev_info dev_npu2_info = {
+	.reg_bar           = NPU2_REG_BAR_INDEX,
+	.mbox_bar          = NPU2_MBOX_BAR_INDEX,
+	.sram_bar          = NPU2_SRAM_BAR_INDEX,
+	.psp_bar           = NPU2_PSP_BAR_INDEX,
+	.smu_bar           = NPU2_SMU_BAR_INDEX,
+	.first_col         = 0,
+	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
+	.dev_mem_base      = AIE2_DEVM_BASE,
+	.dev_mem_size      = AIE2_DEVM_SIZE,
+	.vbnv              = "RyzenAI-npu2",
+	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
+	.dev_priv          = &npu2_dev_priv,
+	.ops               = &aie2_ops, /* NPU2 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
new file mode 100644
index 000000000000..7d82e9d39824
--- /dev/null
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -0,0 +1,111 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
+ */
+
+#include "aie2_pci.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR             0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR         0x3010064
+#define MPNPU_PUB_SCRATCH0             0x301006C
+#define MPNPU_PUB_SCRATCH1             0x3010070
+#define MPNPU_PUB_SCRATCH2             0x3010074
+#define MPNPU_PUB_SCRATCH3             0x3010078
+#define MPNPU_PUB_SCRATCH4             0x301007C
+#define MPNPU_PUB_SCRATCH5             0x3010080
+#define MPNPU_PUB_SCRATCH6             0x3010084
+#define MPNPU_PUB_SCRATCH7             0x3010088
+#define MPNPU_PUB_SCRATCH8             0x301008C
+#define MPNPU_PUB_SCRATCH9             0x3010090
+#define MPNPU_PUB_SCRATCH10            0x3010094
+#define MPNPU_PUB_SCRATCH11            0x3010098
+#define MPNPU_PUB_SCRATCH12            0x301009C
+#define MPNPU_PUB_SCRATCH13            0x30100A0
+#define MPNPU_PUB_SCRATCH14            0x30100A4
+#define MPNPU_PUB_SCRATCH15            0x30100A8
+#define MP0_C2PMSG_73                  0x3810A24
+#define MP0_C2PMSG_123                 0x3810AEC
+
+#define MP1_C2PMSG_0                   0x3B10900
+#define MP1_C2PMSG_60                  0x3B109F0
+#define MP1_C2PMSG_61                  0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
+
+#define MMNPU_APERTURE0_BASE           0x3000000
+#define MMNPU_APERTURE1_BASE           0x3600000
+#define MMNPU_APERTURE3_BASE           0x3810000
+#define MMNPU_APERTURE4_BASE           0x3B10000
+
+/* PCIe BAR Index for NPU4 */
+#define NPU4_REG_BAR_INDEX	0
+#define NPU4_MBOX_BAR_INDEX	0
+#define NPU4_PSP_BAR_INDEX	4
+#define NPU4_SMU_BAR_INDEX	5
+#define NPU4_SRAM_BAR_INDEX	2
+/* Associated BARs and Apertures */
+#define NPU4_REG_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU4_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU4_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
+#define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
+#define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
+
+#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
+#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
+#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
+
+#define NPU4_MPNPUCLK_FREQ_MAX  1267
+#define NPU4_HCLK_FREQ_MAX      1800
+
+const struct amdxdna_dev_priv npu4_dev_priv = {
+	.fw_path        = "amdnpu/17f0_10/npu.sbin",
+	.protocol_major = 0x6,
+	.protocol_minor = 0x1,
+	.rt_config	= {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
+	.col_align	= COL_ALIGN_NATURE,
+	.mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
+	.mbox_size      = 0, /* Use BAR size */
+	.sram_dev_addr  = NPU4_SRAM_BAR_BASE,
+	.sram_offs      = {
+		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+	},
+	.psp_regs_off   = {
+		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU4_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU4_REG, MPNPU_PUB_SCRATCH3),
+		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU4_REG, MPNPU_PUB_SCRATCH4),
+		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU4_REG, MPNPU_PUB_SCRATCH9),
+		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU4_PSP, MP0_C2PMSG_73),
+		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU4_REG, MPNPU_PUB_SCRATCH3),
+	},
+	.smu_regs_off   = {
+		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU4_SMU, MP1_C2PMSG_0),
+		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU4_SMU, MP1_C2PMSG_60),
+		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
+		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
+		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
+	},
+	.smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
+	.smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
+};
+
+const struct amdxdna_dev_info dev_npu4_info = {
+	.reg_bar           = NPU4_REG_BAR_INDEX,
+	.mbox_bar          = NPU4_MBOX_BAR_INDEX,
+	.sram_bar          = NPU4_SRAM_BAR_INDEX,
+	.psp_bar           = NPU4_PSP_BAR_INDEX,
+	.smu_bar           = NPU4_SMU_BAR_INDEX,
+	.first_col         = 0,
+	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
+	.dev_mem_base      = AIE2_DEVM_BASE,
+	.dev_mem_size      = AIE2_DEVM_SIZE,
+	.vbnv              = "RyzenAI-npu4",
+	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
+	.dev_priv          = &npu4_dev_priv,
+	.ops               = &aie2_ops, /* NPU4 can share NPU1's callback */
+};
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
new file mode 100644
index 000000000000..6d7f4aea0e6f
--- /dev/null
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -0,0 +1,111 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include "aie2_pci.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR             0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR         0x3010064
+#define MPNPU_PUB_SCRATCH0             0x301006C
+#define MPNPU_PUB_SCRATCH1             0x3010070
+#define MPNPU_PUB_SCRATCH2             0x3010074
+#define MPNPU_PUB_SCRATCH3             0x3010078
+#define MPNPU_PUB_SCRATCH4             0x301007C
+#define MPNPU_PUB_SCRATCH5             0x3010080
+#define MPNPU_PUB_SCRATCH6             0x3010084
+#define MPNPU_PUB_SCRATCH7             0x3010088
+#define MPNPU_PUB_SCRATCH8             0x301008C
+#define MPNPU_PUB_SCRATCH9             0x3010090
+#define MPNPU_PUB_SCRATCH10            0x3010094
+#define MPNPU_PUB_SCRATCH11            0x3010098
+#define MPNPU_PUB_SCRATCH12            0x301009C
+#define MPNPU_PUB_SCRATCH13            0x30100A0
+#define MPNPU_PUB_SCRATCH14            0x30100A4
+#define MPNPU_PUB_SCRATCH15            0x30100A8
+#define MP0_C2PMSG_73                  0x3810A24
+#define MP0_C2PMSG_123                 0x3810AEC
+
+#define MP1_C2PMSG_0                   0x3B10900
+#define MP1_C2PMSG_60                  0x3B109F0
+#define MP1_C2PMSG_61                  0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
+
+#define MMNPU_APERTURE0_BASE           0x3000000
+#define MMNPU_APERTURE1_BASE           0x3600000
+#define MMNPU_APERTURE3_BASE           0x3810000
+#define MMNPU_APERTURE4_BASE           0x3B10000
+
+/* PCIe BAR Index for NPU5 */
+#define NPU5_REG_BAR_INDEX	0
+#define NPU5_MBOX_BAR_INDEX	0
+#define NPU5_PSP_BAR_INDEX	4
+#define NPU5_SMU_BAR_INDEX	5
+#define NPU5_SRAM_BAR_INDEX	2
+/* Associated BARs and Apertures */
+#define NPU5_REG_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU5_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
+#define NPU5_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
+#define NPU5_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
+#define NPU5_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
+
+#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
+#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
+#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
+
+#define NPU5_MPNPUCLK_FREQ_MAX  1267
+#define NPU5_HCLK_FREQ_MAX      1800
+
+const struct amdxdna_dev_priv npu5_dev_priv = {
+	.fw_path        = "amdnpu/17f0_11/npu.sbin",
+	.protocol_major = 0x6,
+	.protocol_minor = 0x1,
+	.rt_config	= {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
+	.col_align	= COL_ALIGN_NATURE,
+	.mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
+	.mbox_size      = 0, /* Use BAR size */
+	.sram_dev_addr  = NPU5_SRAM_BAR_BASE,
+	.sram_offs      = {
+		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU5_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+	},
+	.psp_regs_off   = {
+		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU5_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU5_REG, MPNPU_PUB_SCRATCH3),
+		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU5_REG, MPNPU_PUB_SCRATCH4),
+		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU5_REG, MPNPU_PUB_SCRATCH9),
+		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU5_PSP, MP0_C2PMSG_73),
+		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
+		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU5_REG, MPNPU_PUB_SCRATCH3),
+	},
+	.smu_regs_off   = {
+		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU5_SMU, MP1_C2PMSG_0),
+		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU5_SMU, MP1_C2PMSG_60),
+		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU5_SMU, MMNPU_APERTURE4_BASE),
+		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
+		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
+	},
+	.smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
+	.smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
+};
+
+const struct amdxdna_dev_info dev_npu5_info = {
+	.reg_bar           = NPU5_REG_BAR_INDEX,
+	.mbox_bar          = NPU5_MBOX_BAR_INDEX,
+	.sram_bar          = NPU5_SRAM_BAR_INDEX,
+	.psp_bar           = NPU5_PSP_BAR_INDEX,
+	.smu_bar           = NPU5_SMU_BAR_INDEX,
+	.first_col         = 0,
+	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
+	.dev_mem_base      = AIE2_DEVM_BASE,
+	.dev_mem_size      = AIE2_DEVM_SIZE,
+	.vbnv              = "RyzenAI-npu5",
+	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
+	.dev_priv          = &npu5_dev_priv,
+	.ops               = &aie2_ops,
+};
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
new file mode 100644
index 000000000000..1b699464150e
--- /dev/null
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -0,0 +1,27 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _UAPI_AMDXDNA_ACCEL_H_
+#define _UAPI_AMDXDNA_ACCEL_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define AMDXDNA_DRIVER_MAJOR	1
+#define AMDXDNA_DRIVER_MINOR	0
+
+enum amdxdna_device_type {
+	AMDXDNA_DEV_TYPE_UNKNOWN = -1,
+	AMDXDNA_DEV_TYPE_KMQ,
+};
+
+#if defined(__cplusplus)
+} /* extern c end */
+#endif
+
+#endif /* _UAPI_AMDXDNA_ACCEL_H_ */