diff mbox series

[v2,2/3] accel/amdpk: add driver for AMD PKI accelerator

Message ID 20250409173033.2261755-2-nipun.gupta@amd.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/3] dt-bindings: accel: add device tree for AMD PKI accelerator | expand

Commit Message

Gupta, Nipun April 9, 2025, 5:30 p.m. UTC
The AMD PKI accelerator driver provides a accel interface to interact
with the device for offloading and accelerating asymmetric crypto
operations.

Signed-off-by: Nipun Gupta <nipun.gupta@amd.com>
---

Changes RFC->v2:
- moved from misc to accel
- added architecture and compile test dependency in Kconfig
- removed sysfs (and added debugfs in new patch 3/3)
- fixed platform compat
- removed redundant resource index 1 configuration (which was there in
  RFC patch)

 MAINTAINERS                     |   2 +
 drivers/accel/Kconfig           |   1 +
 drivers/accel/Makefile          |   1 +
 drivers/accel/amdpk/Kconfig     |  18 +
 drivers/accel/amdpk/Makefile    |   8 +
 drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++
 drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++
 include/uapi/drm/amdpk.h        |  49 +++
 8 files changed, 1086 insertions(+)
 create mode 100644 drivers/accel/amdpk/Kconfig
 create mode 100644 drivers/accel/amdpk/Makefile
 create mode 100644 drivers/accel/amdpk/amdpk_drv.c
 create mode 100644 drivers/accel/amdpk/amdpk_drv.h
 create mode 100644 include/uapi/drm/amdpk.h

--
2.34.1

Comments

Krzysztof Kozlowski April 10, 2025, 7:36 a.m. UTC | #1
On Wed, Apr 09, 2025 at 11:00:32PM GMT, Nipun Gupta wrote:
> The AMD PKI accelerator driver provides a accel interface to interact
> with the device for offloading and accelerating asymmetric crypto
> operations.
> 

For me this is clearly a crypto driver and you are supposed to:
1. Cc crypto maintaners,
2. Put it actually into crypto and use crypto API.


Limited review follows.

> Signed-off-by: Nipun Gupta <nipun.gupta@amd.com>
> ---
> 
> Changes RFC->v2:
> - moved from misc to accel
> - added architecture and compile test dependency in Kconfig
> - removed sysfs (and added debugfs in new patch 3/3)
> - fixed platform compat
> - removed redundant resource index 1 configuration (which was there in
>   RFC patch)
> 
>  MAINTAINERS                     |   2 +
>  drivers/accel/Kconfig           |   1 +
>  drivers/accel/Makefile          |   1 +
>  drivers/accel/amdpk/Kconfig     |  18 +
>  drivers/accel/amdpk/Makefile    |   8 +
>  drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++
>  drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++
>  include/uapi/drm/amdpk.h        |  49 +++
>  8 files changed, 1086 insertions(+)
>  create mode 100644 drivers/accel/amdpk/Kconfig
>  create mode 100644 drivers/accel/amdpk/Makefile
>  create mode 100644 drivers/accel/amdpk/amdpk_drv.c
>  create mode 100644 drivers/accel/amdpk/amdpk_drv.h
>  create mode 100644 include/uapi/drm/amdpk.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 11f8815daa77..cdc305a206aa 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1161,6 +1161,8 @@ L:	dri-devel@lists.freedesktop.org
>  S:	Maintained
>  T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
>  F:	Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml
> +F:	drivers/accel/amdpk/
> +F:	include/uapi/drm/amdpk.h
> 
>  AMD PMC DRIVER
>  M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
> index 5b9490367a39..5632c6c62c15 100644
> --- a/drivers/accel/Kconfig
> +++ b/drivers/accel/Kconfig
> @@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig"
>  source "drivers/accel/habanalabs/Kconfig"
>  source "drivers/accel/ivpu/Kconfig"
>  source "drivers/accel/qaic/Kconfig"
> +source "drivers/accel/amdpk/Kconfig"

Why placing at the end?

> 
>  endif
> diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
> index a301fb6089d4..caea6d636ac8 100644
> --- a/drivers/accel/Makefile
> +++ b/drivers/accel/Makefile
> @@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA)		+= amdxdna/
>  obj-$(CONFIG_DRM_ACCEL_HABANALABS)	+= habanalabs/
>  obj-$(CONFIG_DRM_ACCEL_IVPU)		+= ivpu/
>  obj-$(CONFIG_DRM_ACCEL_QAIC)		+= qaic/
> +obj-$(CONFIG_DRM_ACCEL_AMDPK)		+= amdpk/

Did you just add it to the end breaking any ordering? Look, there is
already AMD entry in the context.

> diff --git a/drivers/accel/amdpk/Kconfig b/drivers/accel/amdpk/Kconfig
> new file mode 100644
> index 000000000000..c0b459bb66a7
> --- /dev/null
> +++ b/drivers/accel/amdpk/Kconfig
> @@ -0,0 +1,18 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +# Makefile for AMD PKI accelerator for versal-net
> +#
> +
> +config DRM_ACCEL_AMDPK
> +	tristate "AMD PKI accelerator for versal-net"
> +	depends on DRM_ACCEL
> +	depends on ARM64 || COMPILE_TEST

What do you need from arm64? I don't see it from the code at all.

> +	help
> +	  Enables platform driver for AMD PKI accelerator that are designed
> +	  for high performance Public Key asymmetric crypto operations on AMD
> +	  versal-net.
> +
> +	  If unsure, say N.
> +
> +	  To compile this driver as a module, choose M here: the
> +	  module will be called amdpk.

...

> +static void amdpk_remove_device(struct amdpk_dev *pkdev)
> +{
> +	drm_dev_unplug(&pkdev->ddev);
> +	pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0);
> +	ida_destroy(&pkdev->avail_queues);
> +}
> +
> +static int amdpk_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct amdpk_dev *pkdev;
> +	struct resource *memres;
> +	int irq, ret;
> +
> +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> +	if (ret < 0)
> +		return ret;
> +
> +	pkdev = devm_drm_dev_alloc(dev, &amdpk_accel_driver, typeof(*pkdev), ddev);
> +	if (IS_ERR(pkdev))
> +		return PTR_ERR(pkdev);
> +	pkdev->dev = dev;
> +
> +	memres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	pkdev->regs = devm_ioremap_resource(dev, memres);

Use wrapper for these two.

> +	if (IS_ERR(pkdev->regs))
> +		return PTR_ERR(pkdev->regs);
> +	pkdev->regsphys = memres->start;
> +	platform_set_drvdata(pdev, pkdev);
> +
> +	if (platform_irq_count(pdev) != 1)

Drop, what's the benefit? DT schema ensures you have only one entry.

> +		return -ENODEV;
> +
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0)
> +		return -ENODEV;
> +
> +	ret = drm_dev_register(&pkdev->ddev, 0);
> +	if (ret) {
> +		dev_err(&pdev->dev, "DRM register failed, ret %d", ret);
> +		return ret;
> +	}
> +
> +	return amdpk_create_device(pkdev, dev, irq);
> +}
> +
> +static void amdpk_remove(struct platform_device *pdev)
> +{
> +	struct amdpk_dev *pkdev = platform_get_drvdata(pdev);
> +
> +	amdpk_remove_device(pkdev);
> +}
> +
> +static void amdpk_shutdown(struct platform_device *pdev)
> +{
> +	amdpk_remove(pdev);

I am not sure why this is necessary. Please provide comment WHY you
need it. IOW, why do you need to disable IRQ manually here if the entire
system is shutting down?

> +}
> +
> +static const struct of_device_id amdpk_match_table[] = {
> +	{ .compatible = "amd,versal-net-pki" },
> +	{ },
> +};
> +MODULE_DEVICE_TABLE(of, amdpk_match_table);
> +
> +static struct platform_driver amdpk_pdrv = {
> +	.probe = amdpk_probe,
> +	.remove = amdpk_remove,
> +	.shutdown = amdpk_shutdown,
> +	.driver = {
> +		.name = DRIVER_NAME,
> +		.of_match_table = amdpk_match_table,
> +	},
> +};
> +
> +static int __init amdpk_init(void)
> +{
> +	int ret;
> +
> +	ret = platform_driver_register(&amdpk_pdrv);
> +	if (ret) {
> +		pr_err("can't register platform driver\n");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void __exit amdpk_exit(void)
> +{
> +	platform_driver_unregister(&amdpk_pdrv);
> +}
> +
> +module_init(amdpk_init);
> +module_exit(amdpk_exit);

Why do you need to open code module_platform_driver?

Best regards,
Krzysztof
Gupta, Nipun April 11, 2025, 4:51 a.m. UTC | #2
On 10-04-2025 13:06, Krzysztof Kozlowski wrote:
> On Wed, Apr 09, 2025 at 11:00:32PM GMT, Nipun Gupta wrote:
>> The AMD PKI accelerator driver provides a accel interface to interact
>> with the device for offloading and accelerating asymmetric crypto
>> operations.
>>
> 
> For me this is clearly a crypto driver and you are supposed to:
> 1. Cc crypto maintaners,
> 2. Put it actually into crypto and use crypto API.

added crypto maintainers for comments.
IMO, as accel framework is designed to support any type of compute
accelerators, the PKI crypto accelerator in accel framework is not 
completely out of place here, as also suggested at:
https://lore.kernel.org/all/2025031352-gyration-deceit-5563@gregkh/

Having the crypto accelerator as part of accel also enables to extract
the most performance from the HW PKI engines, given that the queue
assignment is handled per drm device open call.

Regards,
Nipun
Herbert Xu April 11, 2025, 5:17 a.m. UTC | #3
On Fri, Apr 11, 2025 at 10:21:03AM +0530, Gupta, Nipun wrote:
>
> added crypto maintainers for comments.
> IMO, as accel framework is designed to support any type of compute
> accelerators, the PKI crypto accelerator in accel framework is not
> completely out of place here, as also suggested at:
> https://lore.kernel.org/all/2025031352-gyration-deceit-5563@gregkh/
> 
> Having the crypto accelerator as part of accel also enables to extract
> the most performance from the HW PKI engines, given that the queue
> assignment is handled per drm device open call.

There is actually a user-space interface for asymmetric crypto
through the keyring subsystem.  Adding the maintainers of those
in case they wish to comment on your driver.

Thanks,
Jeff Hugo April 11, 2025, 4:34 p.m. UTC | #4
On 4/9/2025 11:30 AM, Nipun Gupta wrote:
> The AMD PKI accelerator driver provides a accel interface to interact

"an accel"

> with the device for offloading and accelerating asymmetric crypto
> operations.
> 
> Signed-off-by: Nipun Gupta <nipun.gupta@amd.com>
> ---
> 
> Changes RFC->v2:
> - moved from misc to accel
> - added architecture and compile test dependency in Kconfig
> - removed sysfs (and added debugfs in new patch 3/3)
> - fixed platform compat
> - removed redundant resource index 1 configuration (which was there in
>    RFC patch)
> 
>   MAINTAINERS                     |   2 +
>   drivers/accel/Kconfig           |   1 +
>   drivers/accel/Makefile          |   1 +
>   drivers/accel/amdpk/Kconfig     |  18 +
>   drivers/accel/amdpk/Makefile    |   8 +
>   drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++
>   drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++
>   include/uapi/drm/amdpk.h        |  49 +++
>   8 files changed, 1086 insertions(+)
>   create mode 100644 drivers/accel/amdpk/Kconfig
>   create mode 100644 drivers/accel/amdpk/Makefile
>   create mode 100644 drivers/accel/amdpk/amdpk_drv.c
>   create mode 100644 drivers/accel/amdpk/amdpk_drv.h
>   create mode 100644 include/uapi/drm/amdpk.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 11f8815daa77..cdc305a206aa 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1161,6 +1161,8 @@ L:	dri-devel@lists.freedesktop.org
>   S:	Maintained
>   T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
>   F:	Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml
> +F:	drivers/accel/amdpk/
> +F:	include/uapi/drm/amdpk.h
> 
>   AMD PMC DRIVER
>   M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
> diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
> index 5b9490367a39..5632c6c62c15 100644
> --- a/drivers/accel/Kconfig
> +++ b/drivers/accel/Kconfig
> @@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig"
>   source "drivers/accel/habanalabs/Kconfig"
>   source "drivers/accel/ivpu/Kconfig"
>   source "drivers/accel/qaic/Kconfig"
> +source "drivers/accel/amdpk/Kconfig"

Alphabetical order

> 
>   endif
> diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
> index a301fb6089d4..caea6d636ac8 100644
> --- a/drivers/accel/Makefile
> +++ b/drivers/accel/Makefile
> @@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA)		+= amdxdna/
>   obj-$(CONFIG_DRM_ACCEL_HABANALABS)	+= habanalabs/
>   obj-$(CONFIG_DRM_ACCEL_IVPU)		+= ivpu/
>   obj-$(CONFIG_DRM_ACCEL_QAIC)		+= qaic/
> +obj-$(CONFIG_DRM_ACCEL_AMDPK)		+= amdpk/

Alphabetical order

> diff --git a/drivers/accel/amdpk/amdpk_drv.c b/drivers/accel/amdpk/amdpk_drv.c
> new file mode 100644
> index 000000000000..17c328d03db8
> --- /dev/null
> +++ b/drivers/accel/amdpk/amdpk_drv.c
> @@ -0,0 +1,736 @@
> +// SPDX-License-Identifier: GPL-2.0

Deprecated SPDX tag.  Checkpatch will catch this.

> +/*
> + * Copyright (c) 2018-2021 Silex Insight sa
> + * Copyright (c) 2018-2021 Beerten Engineering scs
> + * Copyright (c) 2025 Advanced Micro Devices, Inc.
> + */
> +
> +/*
> + * Device Overview
> + * ===============
> + * AMD PKI accelerator is a device on AMD versal-net to execute public
> + * key asymmetric crypto operations like ECDSA, ECDH, RSA etc. with high
> + * performance. The driver provides accel interface to applications for
> + * configuring the device and performing the required operations. AMD PKI
> + * device comprises of multiple Barco Silex ba414 PKI engines bundled together,
> + * and providing a queue based interface to interact with these devices on AMD
> + * versal-net.
> + *
> + * Following figure provides the brief overview of the device interface with
> + * the software:
> + *
> + * +------------------+
> + * |    Software      |
> + * +------------------+
> + *     |          |
> + *     |          v
> + *     |     +-----------------------------------------------------------+
> + *     |     |                     RAM                                   |
> + *     |     |  +----------------------------+   +---------------------+ |
> + *     |     |  |           RQ pages         |   |       CQ pages      | |
> + *     |     |  | +------------------------+ |   | +-----------------+ | |
> + *     |     |  | |   START (cmd)          | |   | | req_id | status | | |
> + *     |     |  | |   TFRI (addr, sz)---+  | |   | | req_id | status | | |
> + *     |     |  | | +-TFRO (addr, sz)   |  | |   | | ...             | | |
> + *     |     |  | | | NTFY (req_id)     |  | |   | +-----------------+ | |
> + *     |     |  | +-|-------------------|--+ |   |                     | |
> + *     |     |  |   |                   v    |   +---------------------+ |
> + *     |     |  |   |         +-----------+  |                           |
> + *     |     |  |   |         | input     |  |                           |
> + *     |     |  |   |         | data      |  |                           |
> + *     |     |  |   v         +-----------+  |                           |
> + *     |     |  |  +----------------+        |                           |
> + *     |     |  |  |  output data   |        |                           |
> + *     |     |  |  +----------------+        |                           |
> + *     |     |  +----------------------------+                           |
> + *     |     |                                                           |
> + *     |     +-----------------------------------------------------------+
> + *     |
> + *     |
> + * +---|----------------------------------------------------+
> + * |   v                AMD PKI device                      |
> + * |  +-------------------+     +------------------------+  |
> + * |  | New request FIFO  | --> |       PK engines       |  |
> + * |  +-------------------+     +------------------------+  |
> + * +--------------------------------------------------------+
> + *
> + * To perform a crypto operation, the software writes a sequence of descriptors,
> + * into the RQ memory. This includes input data and designated location for the
> + * output data. After preparing the request, request offset (from the RQ memory
> + * region) is written into the NEW_REQUEST register. Request is then stored in a
> + * common hardware FIFO shared among all RQs.
> + *
> + * When a PK engine becomes available, device pops the request from the FIFO and
> + * fetches the descriptors. It DMAs the input data from RQ memory and executes
> + * the necessary computations. After computation is complete, the device writes
> + * output data back to RAM via DMA. Device then writes a new entry in CQ ring
> + * buffer in RAM, indicating completion of the request. Device also generates
> + * an interrupt for notifying completion to the software.
> + */

Feels like this would be better served in Documentation

> +
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/of.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/interrupt.h>
> +#include <linux/delay.h>
> +#include <linux/eventfd.h>
> +#include <drm/drm_accel.h>
> +#include <drm/drm_ioctl.h>

Alphabetical order

> +
> +#include "amdpk_drv.h"
> +
> +#define DRIVER_NAME "amdpk"
> +
> +static void amdpk_init_cq(struct amdpk_dev *pkdev, struct amdpk_cq *cq,
> +			  int szcode, char *base)
> +{
> +	cq->pkdev = pkdev;
> +	cq->generation = 1;
> +	cq->szcode = szcode;
> +	cq->base = (u32 *)base;
> +	cq->tail = 0;
> +}
> +
> +static int amdpk_pop_cq(struct amdpk_cq *cq, int *rid)
> +{
> +	u32 status = CQ_STATUS_VALID;
> +	unsigned int sz;
> +	u32 completion;
> +
> +	completion = cq->base[cq->tail + 1];
> +	if ((completion & CQ_GENERATION_BIT) != cq->generation)
> +		return CQ_STATUS_INVALID;
> +
> +	*rid = (completion >> 16) & 0xffff;
> +	/* read memory barrier: to avoid a race condition, the status field should
> +	 * not be read before the completion generation bit. Otherwise we could
> +	 * get stale outdated status data.
> +	 */

Incorrect comment format.

> +	rmb();

Shouldn't you be using readl()?

> +	status |= cq->base[cq->tail];
> +	/* advance completion queue tail */
> +	cq->tail += 2;
> +	sz = 1 << (cq->szcode - 2);
> +	if (cq->tail >= sz) {
> +		cq->tail = 0;
> +		cq->generation ^= 1; /* invert generation bit */
> +	}
> +
> +	/* evaluate status from the completion queue */
> +	if (completion & CQ_COMPLETION_BIT)
> +		status |= CQ_COMPLETION_ERROR;
> +
> +	return status;
> +}
> +
> +static const struct file_operations amdpk_accel_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= accel_open,
> +	.release	= drm_release,
> +	.unlocked_ioctl	= drm_ioctl,
> +	.compat_ioctl	= drm_compat_ioctl,
> +	.llseek		= noop_llseek,
> +	.mmap		= amdpk_accel_mmap,
> +};

DEFINE_DRM_ACCEL_FOPS ?

> diff --git a/include/uapi/drm/amdpk.h b/include/uapi/drm/amdpk.h
> new file mode 100644
> index 000000000000..e5e18fdbc2c4
> --- /dev/null
> +++ b/include/uapi/drm/amdpk.h
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2025 Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef __AMDPK_H__
> +#define __AMDPK_H__
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +#define MAX_PK_REQS		256
> +
> +struct amdpk_info {
> +	/** maximum available queue depth */

This doesn't look like valid kerneldoc

> +	unsigned int avail_qdepth;
> +};

Doesn't look like this handles compat correctly.  Did you read the 
documentation on creating ioctls?
Gupta, Nipun April 11, 2025, 6:20 p.m. UTC | #5
On 11-04-2025 10:47, Herbert Xu wrote:
> On Fri, Apr 11, 2025 at 10:21:03AM +0530, Gupta, Nipun wrote:
>>
>> added crypto maintainers for comments.
>> IMO, as accel framework is designed to support any type of compute
>> accelerators, the PKI crypto accelerator in accel framework is not
>> completely out of place here, as also suggested at:
>> https://lore.kernel.org/all/2025031352-gyration-deceit-5563@gregkh/
>>
>> Having the crypto accelerator as part of accel also enables to extract
>> the most performance from the HW PKI engines, given that the queue
>> assignment is handled per drm device open call.
> 
> There is actually a user-space interface for asymmetric crypto
> through the keyring subsystem.  Adding the maintainers of those
> in case they wish to comment on your driver.

AFAIU after looking into it, the keyring subsystem is not to perform the 
data operations, but for managing keys for these operations. Kindly 
correct me if I am wrong here.

Thanks,
Nipun
Herbert Xu April 12, 2025, 1:23 a.m. UTC | #6
On Fri, Apr 11, 2025 at 11:50:54PM +0530, Gupta, Nipun wrote:
>
> AFAIU after looking into it, the keyring subsystem is not to perform the
> data operations, but for managing keys for these operations. Kindly correct
> me if I am wrong here.

Have a look at

security/keys/keyctl_pkey.c

Cheers,
Lukas Wunner April 13, 2025, 6:52 p.m. UTC | #7
On Fri, Apr 11, 2025 at 10:21:03AM +0530, Gupta, Nipun wrote:
> On 10-04-2025 13:06, Krzysztof Kozlowski wrote:
> > On Wed, Apr 09, 2025 at 11:00:32PM GMT, Nipun Gupta wrote:
> > > The AMD PKI accelerator driver provides a accel interface to interact
> > > with the device for offloading and accelerating asymmetric crypto
> > > operations.
> > > 
> > 
> > For me this is clearly a crypto driver and you are supposed to:
> > 1. Cc crypto maintaners,
> > 2. Put it actually into crypto and use crypto API.
> 
> added crypto maintainers for comments.
> IMO, as accel framework is designed to support any type of compute
> accelerators, the PKI crypto accelerator in accel framework is not
> completely out of place here, as also suggested at:
> https://lore.kernel.org/all/2025031352-gyration-deceit-5563@gregkh/

To be fair, Greg did suggest drivers/crypto/ as an alternative... :)

  "Great, then why isn't this in drivers/accel/ or drivers/crypto/ ?"
  https://lore.kernel.org/r/2025031236-siamese-graffiti-5b98@gregkh/

There are already six drivers for crypto accelerators in drivers/crypto/,
so that would seem to be a natural fit for your driver:

  aspeed/aspeed-acry.c
  caam/caampkc.c
  ccp/ccp-crypto-rsa.c                 <-- from AMD no less!
  hisilicon/hpre/hpre_crypto.c
  intel/qat/qat_common/qat_asym_algs.c
  starfive/jh7110-rsa.c

You can find these in the tree with:

  git grep 'cra_name = "rsa"'

So far there are only drivers to accelerate RSA encryption/decryption.
The kernel supports a single padding scheme, PKCS1, which is implemented
by crypto/rsa-pkcs1pad.c.  There is no support yet for OAEP.

So the padding of the hash (which is cheap) happens in software and then
crypto/rsa-pkcs1pad.c performs an RSA encrypt/decrypt operation which is
either performed in software by crypto/rsa.c, or in hardware if a crypto
accelerator is present.  Drivers for crypto accelerators register the
"rsa" algorithm with a higher cra_priority than the software
implementation, hence are generally preferred.

One benefit that you get from implementing a proper akcipher_alg in your
driver is that virtual machines may take advantage of the hardware
accelerator through the virtio support implemented by:
drivers/crypto/virtio/virtio_crypto_akcipher_algs.c

Note that the crypto subsystem currently does not support hardware
acceleration of signature generation/verification (crypto_sig),
but only encryption/decryption (crypto_akcipher).  One reason is
that signature generation/verification is generally a synchronous
operation and doesn't benefit as much from hardware acceleration
due to the overhead of interacting with the hardware.

So there's no support e.g. for generating or verifying ECDSA signatures
in hardware.  I think that would only really make sense if private keys
are kept in hardware and cannot be retrieved.  So the use case wouldn't
be acceleration, but security of private keys.

That said, for RSA specifically, signature generation/verification does
involve an encrypt/decrypt operation internally.  The padding is once
again done in software (by crypto/rsassa-pkcs1.c -- no PSS support yet).
But the actual encrypt/decrypt operation will be performed in
hardware if a crypto accelerator is present.

The user space interface Herbert referred to is a set of system calls
which are usable e.g. via the keyutils library and command line utility:
https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/keyutils.git/

HTH,

Lukas
kernel test robot April 16, 2025, 10:41 a.m. UTC | #8
Hi Nipun,

kernel test robot noticed the following build errors:

[auto build test ERROR on robh/for-next]
[also build test ERROR on staging/staging-testing staging/staging-next staging/staging-linus linus/master drm-misc/drm-misc-next drm-tip/drm-tip v6.15-rc2 next-20250416]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Nipun-Gupta/accel-amdpk-add-driver-for-AMD-PKI-accelerator/20250410-013224
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link:    https://lore.kernel.org/r/20250409173033.2261755-2-nipun.gupta%40amd.com
patch subject: [PATCH v2 2/3] accel/amdpk: add driver for AMD PKI accelerator
config: x86_64-randconfig-003-20250416 (https://download.01.org/0day-ci/archive/20250416/202504161842.xI2wcOdf-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250416/202504161842.xI2wcOdf-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202504161842.xI2wcOdf-lkp@intel.com/

All errors (new ones prefixed by >>):

>> error: include/uapi/drm/amdpk.h: missing "WITH Linux-syscall-note" for SPDX-License-Identifier
   make[3]: *** [scripts/Makefile.headersinst:63: usr/include/drm/amdpk.h] Error 1 shuffle=3983912090
   make[3]: Target '__headers' not remade because of errors.
   make[2]: *** [Makefile:1375: headers] Error 2 shuffle=3983912090
   scripts/kernel-doc: 1: kernel-doc.py: not found
   make[3]: *** [scripts/Makefile.build:203: scripts/mod/empty.o] Error 127 shuffle=3983912090
   make[3]: *** Deleting file 'scripts/mod/empty.o'
   make[3]: Target 'scripts/mod/' not remade because of errors.
   make[2]: *** [Makefile:1276: prepare0] Error 2 shuffle=3983912090
   make[2]: Target 'prepare' not remade because of errors.
   make[1]: *** [Makefile:248: __sub-make] Error 2 shuffle=3983912090
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:248: __sub-make] Error 2 shuffle=3983912090
   make: Target 'prepare' not remade because of errors.
Gupta, Nipun April 16, 2025, 3:11 p.m. UTC | #9
On 14-04-2025 00:22, Lukas Wunner wrote:
> On Fri, Apr 11, 2025 at 10:21:03AM +0530, Gupta, Nipun wrote:
>> On 10-04-2025 13:06, Krzysztof Kozlowski wrote:
>>> On Wed, Apr 09, 2025 at 11:00:32PM GMT, Nipun Gupta wrote:
>>>> The AMD PKI accelerator driver provides a accel interface to interact
>>>> with the device for offloading and accelerating asymmetric crypto
>>>> operations.
>>>>
>>>
>>> For me this is clearly a crypto driver and you are supposed to:
>>> 1. Cc crypto maintaners,
>>> 2. Put it actually into crypto and use crypto API.
>>
>> added crypto maintainers for comments.
>> IMO, as accel framework is designed to support any type of compute
>> accelerators, the PKI crypto accelerator in accel framework is not
>> completely out of place here, as also suggested at:
>> https://lore.kernel.org/all/2025031352-gyration-deceit-5563@gregkh/
> 
> To be fair, Greg did suggest drivers/crypto/ as an alternative... :)
> 
>    "Great, then why isn't this in drivers/accel/ or drivers/crypto/ ?"
>    https://lore.kernel.org/r/2025031236-siamese-graffiti-5b98@gregkh/
> 
> There are already six drivers for crypto accelerators in drivers/crypto/,
> so that would seem to be a natural fit for your driver:
> 
>    aspeed/aspeed-acry.c
>    caam/caampkc.c
>    ccp/ccp-crypto-rsa.c                 <-- from AMD no less!
>    hisilicon/hpre/hpre_crypto.c
>    intel/qat/qat_common/qat_asym_algs.c
>    starfive/jh7110-rsa.c
> 
> You can find these in the tree with:
> 
>    git grep 'cra_name = "rsa"'
> 
> So far there are only drivers to accelerate RSA encryption/decryption.
> The kernel supports a single padding scheme, PKCS1, which is implemented
> by crypto/rsa-pkcs1pad.c.  There is no support yet for OAEP.
> 
> So the padding of the hash (which is cheap) happens in software and then
> crypto/rsa-pkcs1pad.c performs an RSA encrypt/decrypt operation which is
> either performed in software by crypto/rsa.c, or in hardware if a crypto
> accelerator is present.  Drivers for crypto accelerators register the
> "rsa" algorithm with a higher cra_priority than the software
> implementation, hence are generally preferred.
> 
> One benefit that you get from implementing a proper akcipher_alg in your
> driver is that virtual machines may take advantage of the hardware
> accelerator through the virtio support implemented by:
> drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
> 
> Note that the crypto subsystem currently does not support hardware
> acceleration of signature generation/verification (crypto_sig),
> but only encryption/decryption (crypto_akcipher).  One reason is
> that signature generation/verification is generally a synchronous
> operation and doesn't benefit as much from hardware acceleration
> due to the overhead of interacting with the hardware.

Thank you for the feedback.

When establishing TLS connections, OpenSSL requires signature 
generation/verification. In scenarios where multiple connections occur 
simultaneously, asynchronous operations are beneficial as they lead to 
much improved CPU utilization. OpenSSL ASYNC support can very well 
utilizes the asynchronous operations while establishing multiple TLS 
connections.

> 
> So there's no support e.g. for generating or verifying ECDSA signatures
> in hardware.  I think that would only really make sense if private keys
> are kept in hardware and cannot be retrieved.  So the use case wouldn't
> be acceleration, but security of private keys.

While ECDSA signature generation and verification enhance the security 
of private keys when stored in hardware, they also play important role 
in the establishment of TLS connections. Offloading these operations to 
hardware frees up CPU, enhancing performance during TLS handshakes.

> 
> That said, for RSA specifically, signature generation/verification does
> involve an encrypt/decrypt operation internally.  The padding is once
> again done in software (by crypto/rsassa-pkcs1.c -- no PSS support yet).
> But the actual encrypt/decrypt operation will be performed in
> hardware if a crypto accelerator is present.
> 
> The user space interface Herbert referred to is a set of system calls
> which are usable e.g. via the keyutils library and command line utility:
> https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/keyutils.git/

These system calls can support only synchronous operations, which 
precludes their use for asynchronous operations. This limitation 
prevents the use of keyutils here.

Thanks,
Nipun
Gupta, Nipun April 17, 2025, 3:32 p.m. UTC | #10
On 12-04-2025 06:53, Herbert Xu wrote:
> On Fri, Apr 11, 2025 at 11:50:54PM +0530, Gupta, Nipun wrote:
>>
>> AFAIU after looking into it, the keyring subsystem is not to perform the
>> data operations, but for managing keys for these operations. Kindly correct
>> me if I am wrong here.
> 
> Have a look at
> 
> security/keys/keyctl_pkey.c

Thanks for pointing out to the C file, but as these these system calls 
can support only synchronous operations, precludes their use for 
asynchronous operations. In the TLS handshakes, where multiple 
connections occur simultaneously, asynchronous operations are 
beneficial. OpenSSL ASYNC support can very well utilizes the 
asynchronous operations while establishing multiple TLS connections.

Thanks,
Nipun
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 11f8815daa77..cdc305a206aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1161,6 +1161,8 @@  L:	dri-devel@lists.freedesktop.org
 S:	Maintained
 T:	git https://gitlab.freedesktop.org/drm/misc/kernel.git
 F:	Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml
+F:	drivers/accel/amdpk/
+F:	include/uapi/drm/amdpk.h

 AMD PMC DRIVER
 M:	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 5b9490367a39..5632c6c62c15 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -28,5 +28,6 @@  source "drivers/accel/amdxdna/Kconfig"
 source "drivers/accel/habanalabs/Kconfig"
 source "drivers/accel/ivpu/Kconfig"
 source "drivers/accel/qaic/Kconfig"
+source "drivers/accel/amdpk/Kconfig"

 endif
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index a301fb6089d4..caea6d636ac8 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -4,3 +4,4 @@  obj-$(CONFIG_DRM_ACCEL_AMDXDNA)		+= amdxdna/
 obj-$(CONFIG_DRM_ACCEL_HABANALABS)	+= habanalabs/
 obj-$(CONFIG_DRM_ACCEL_IVPU)		+= ivpu/
 obj-$(CONFIG_DRM_ACCEL_QAIC)		+= qaic/
+obj-$(CONFIG_DRM_ACCEL_AMDPK)		+= amdpk/
diff --git a/drivers/accel/amdpk/Kconfig b/drivers/accel/amdpk/Kconfig
new file mode 100644
index 000000000000..c0b459bb66a7
--- /dev/null
+++ b/drivers/accel/amdpk/Kconfig
@@ -0,0 +1,18 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for AMD PKI accelerator for versal-net
+#
+
+config DRM_ACCEL_AMDPK
+	tristate "AMD PKI accelerator for versal-net"
+	depends on DRM_ACCEL
+	depends on ARM64 || COMPILE_TEST
+	help
+	  Enables platform driver for AMD PKI accelerator that are designed
+	  for high performance Public Key asymmetric crypto operations on AMD
+	  versal-net.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called amdpk.
diff --git a/drivers/accel/amdpk/Makefile b/drivers/accel/amdpk/Makefile
new file mode 100644
index 000000000000..826f43ccebdf
--- /dev/null
+++ b/drivers/accel/amdpk/Makefile
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for AMD PKI accelerator for versal-net
+#
+
+obj-$(CONFIG_DRM_ACCEL_AMDPK) := amdpk.o
+
+amdpk-y := amdpk_drv.o
diff --git a/drivers/accel/amdpk/amdpk_drv.c b/drivers/accel/amdpk/amdpk_drv.c
new file mode 100644
index 000000000000..17c328d03db8
--- /dev/null
+++ b/drivers/accel/amdpk/amdpk_drv.c
@@ -0,0 +1,736 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2021 Silex Insight sa
+ * Copyright (c) 2018-2021 Beerten Engineering scs
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+/*
+ * Device Overview
+ * ===============
+ * AMD PKI accelerator is a device on AMD versal-net to execute public
+ * key asymmetric crypto operations like ECDSA, ECDH, RSA etc. with high
+ * performance. The driver provides accel interface to applications for
+ * configuring the device and performing the required operations. AMD PKI
+ * device comprises of multiple Barco Silex ba414 PKI engines bundled together,
+ * and providing a queue based interface to interact with these devices on AMD
+ * versal-net.
+ *
+ * Following figure provides the brief overview of the device interface with
+ * the software:
+ *
+ * +------------------+
+ * |    Software      |
+ * +------------------+
+ *     |          |
+ *     |          v
+ *     |     +-----------------------------------------------------------+
+ *     |     |                     RAM                                   |
+ *     |     |  +----------------------------+   +---------------------+ |
+ *     |     |  |           RQ pages         |   |       CQ pages      | |
+ *     |     |  | +------------------------+ |   | +-----------------+ | |
+ *     |     |  | |   START (cmd)          | |   | | req_id | status | | |
+ *     |     |  | |   TFRI (addr, sz)---+  | |   | | req_id | status | | |
+ *     |     |  | | +-TFRO (addr, sz)   |  | |   | | ...             | | |
+ *     |     |  | | | NTFY (req_id)     |  | |   | +-----------------+ | |
+ *     |     |  | +-|-------------------|--+ |   |                     | |
+ *     |     |  |   |                   v    |   +---------------------+ |
+ *     |     |  |   |         +-----------+  |                           |
+ *     |     |  |   |         | input     |  |                           |
+ *     |     |  |   |         | data      |  |                           |
+ *     |     |  |   v         +-----------+  |                           |
+ *     |     |  |  +----------------+        |                           |
+ *     |     |  |  |  output data   |        |                           |
+ *     |     |  |  +----------------+        |                           |
+ *     |     |  +----------------------------+                           |
+ *     |     |                                                           |
+ *     |     +-----------------------------------------------------------+
+ *     |
+ *     |
+ * +---|----------------------------------------------------+
+ * |   v                AMD PKI device                      |
+ * |  +-------------------+     +------------------------+  |
+ * |  | New request FIFO  | --> |       PK engines       |  |
+ * |  +-------------------+     +------------------------+  |
+ * +--------------------------------------------------------+
+ *
+ * To perform a crypto operation, the software writes a sequence of descriptors,
+ * into the RQ memory. This includes input data and designated location for the
+ * output data. After preparing the request, request offset (from the RQ memory
+ * region) is written into the NEW_REQUEST register. Request is then stored in a
+ * common hardware FIFO shared among all RQs.
+ *
+ * When a PK engine becomes available, device pops the request from the FIFO and
+ * fetches the descriptors. It DMAs the input data from RQ memory and executes
+ * the necessary computations. After computation is complete, the device writes
+ * output data back to RAM via DMA. Device then writes a new entry in CQ ring
+ * buffer in RAM, indicating completion of the request. Device also generates
+ * an interrupt for notifying completion to the software.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/eventfd.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_ioctl.h>
+
+#include "amdpk_drv.h"
+
+#define DRIVER_NAME "amdpk"
+
+static void amdpk_init_cq(struct amdpk_dev *pkdev, struct amdpk_cq *cq,
+			  int szcode, char *base)
+{
+	cq->pkdev = pkdev;
+	cq->generation = 1;
+	cq->szcode = szcode;
+	cq->base = (u32 *)base;
+	cq->tail = 0;
+}
+
+static int amdpk_pop_cq(struct amdpk_cq *cq, int *rid)
+{
+	u32 status = CQ_STATUS_VALID;
+	unsigned int sz;
+	u32 completion;
+
+	completion = cq->base[cq->tail + 1];
+	if ((completion & CQ_GENERATION_BIT) != cq->generation)
+		return CQ_STATUS_INVALID;
+
+	*rid = (completion >> 16) & 0xffff;
+	/* read memory barrier: to avoid a race condition, the status field should
+	 * not be read before the completion generation bit. Otherwise we could
+	 * get stale outdated status data.
+	 */
+	rmb();
+	status |= cq->base[cq->tail];
+	/* advance completion queue tail */
+	cq->tail += 2;
+	sz = 1 << (cq->szcode - 2);
+	if (cq->tail >= sz) {
+		cq->tail = 0;
+		cq->generation ^= 1; /* invert generation bit */
+	}
+
+	/* evaluate status from the completion queue */
+	if (completion & CQ_COMPLETION_BIT)
+		status |= CQ_COMPLETION_ERROR;
+
+	return status;
+}
+
+static int amdpk_trigpos(struct amdpk_cq *cq)
+{
+	int trigpos;
+
+	/* Set trigger position on next completed operation */
+	trigpos = cq->tail / 2 + (cq->generation << (cq->szcode - 3));
+	trigpos++;
+	trigpos &= (1 << (cq->szcode - 2)) - 1;
+
+	return trigpos;
+}
+
+static void amdpk_cq_workfn(struct kthread_work *work)
+{
+	struct amdpk_work *pkwork;
+	struct amdpk_dev *pkdev;
+	struct amdpk_user *user;
+	int qid, rid, trigpos;
+	u32 status;
+
+	pkwork = to_amdpk_work(work);
+	pkdev = pkwork->pkdev;
+	qid = pkwork->qid;
+
+	user = pkwork->user;
+	status = amdpk_pop_cq(&pkdev->work[qid]->pk_cq, &rid);
+	if (rid < user->rq_entries && status != CQ_STATUS_INVALID) {
+		u32 *status_mem;
+
+		status_mem = (u32 *)user->stmem;
+		status_mem[rid] = status;
+		eventfd_signal(user->evfd_ctx[rid]);
+	}
+
+	trigpos = amdpk_trigpos(&pkdev->work[qid]->pk_cq);
+	pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(user->qid), trigpos);
+}
+
+static irqreturn_t amdpk_cq_irq(int irq, void *dev)
+{
+	struct amdpk_dev *pkdev = (struct amdpk_dev *)dev;
+	u64 active = 0;
+	int i;
+
+	active = pk_rdreg(pkdev->regs, REG_PK_IRQ_STATUS);
+	pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, active);
+
+	for (i = 0; i < pkdev->max_queues && active; i++, active >>= 1) {
+		if (!(active & 1))
+			continue;
+		if (!pkdev->users[i])
+			continue;
+		kthread_queue_work(pkdev->work[i]->cq_wq, &pkdev->work[i]->cq_work);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void amdpk_free_rqmem(struct amdpk_dev *pkdev, struct amdpk_user *user)
+{
+	int pages = user->rq_pages;
+	int pagemult = pages / 4;
+	int i;
+
+	for (i = 0; i < pages / pagemult; i++) {
+		if (!user->rqmem[i])
+			continue;
+		dma_free_coherent(pkdev->dev, PAGE_SIZE * pagemult,
+				  user->rqmem[i], user->physrq[i]);
+		user->rqmem[i] = NULL;
+	}
+}
+
+static int amdpk_accel_get_info(struct drm_device *dev, void *data, struct drm_file *fp)
+{
+	struct amdpk_user *user = fp->driver_priv;
+	struct amdpk_dev *pkdev = user->pkdev;
+	struct amdpk_info *info = data;
+
+	info->avail_qdepth = atomic_read(&pkdev->avail_qdepth);
+	return 0;
+}
+
+static int amdpk_accel_configure(struct amdpk_user *user, struct amdpk_conf *conf)
+{
+	struct amdpk_dev *pkdev = user->pkdev;
+	struct amdpk_work *pkwork = NULL;
+	int qid = user->qid;
+	int trigpos, ret, i;
+	char wq_name[32];
+
+	i = atomic_sub_return(conf->qdepth, &pkdev->avail_qdepth);
+	if (i < 0) {
+		/* If enough entries are not present, give back the reserved entries. */
+		dev_err(user->pkdev->dev, "Out of descriptors\n");
+		atomic_add(conf->qdepth, &pkdev->avail_qdepth);
+		return -ENOSPC;
+	}
+	user->rq_entries = conf->qdepth;
+
+	for (i = 0; i < user->rq_entries; i++) {
+		if (conf->eventfd[i] <= 0) {
+			dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]);
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		user->evfd_ctx[i] = eventfd_ctx_fdget(conf->eventfd[i]);
+		if (IS_ERR(user->evfd_ctx[i])) {
+			dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]);
+			ret = PTR_ERR(user->evfd_ctx[i]);
+			goto fail;
+		}
+	}
+
+	user->cqmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physcq, GFP_KERNEL);
+	if (!user->cqmem) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Initialize completion queue handler */
+	pkwork = pkdev->work[qid];
+	amdpk_init_cq(pkdev, &pkwork->pk_cq, __builtin_ctz(PAGE_SIZE), user->cqmem);
+
+	snprintf(wq_name, sizeof(wq_name), "cq_worker_%d", qid);
+	pkwork->cq_wq = kthread_create_worker(0, wq_name);
+	if (IS_ERR(pkwork->cq_wq)) {
+		ret = PTR_ERR(pkwork->cq_wq);
+		pkwork->cq_wq = NULL;
+		goto fail;
+	}
+	kthread_init_work(&pkwork->cq_work, amdpk_cq_workfn);
+
+	pk_wrreg(pkdev->regs, REG_CQ_CFG_IRQ_NR(qid), qid);
+	pk_wrreg(pkdev->regs, REG_CQ_CFG_ADDR(qid), user->physcq);
+	pk_wrreg(pkdev->regs, REG_CQ_CFG_SIZE(qid), PAGE_SHIFT);
+	pk_wrreg(pkdev->regs, REG_RQ_CFG_CQID(qid), qid);
+	pk_wrreg(pkdev->regs, REG_RQ_CFG_DEPTH(qid), user->rq_entries);
+
+	/* set trigger position for notifications */
+	trigpos = amdpk_trigpos(&pkwork->pk_cq);
+	pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(qid), trigpos);
+
+	return 0;
+fail:
+	if (pkwork->cq_wq) {
+		kthread_destroy_worker(pkwork->cq_wq);
+		pkwork->cq_wq = NULL;
+	}
+	if (user->cqmem) {
+		dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq);
+		user->cqmem = NULL;
+	}
+	atomic_add(user->rq_entries, &pkdev->avail_qdepth);
+	user->rq_entries = 0;
+
+	return ret;
+}
+
+static int amdpk_accel_set_conf(struct drm_device *dev, void *data, struct drm_file *fp)
+{
+	struct amdpk_user *user = fp->driver_priv;
+	struct amdpk_conf *conf = data;
+	int ret;
+
+	if (conf->qdepth == 0 || conf->qdepth > MAX_CQ_ENTRIES_ON_PAGE) {
+		dev_err(user->pkdev->dev, "Invalid qdepth: %d\n", conf->qdepth);
+		return -EINVAL;
+	}
+
+	if (user->configured) {
+		dev_err(user->pkdev->dev, "User already configured\n");
+		return -EEXIST;
+	}
+
+	ret = amdpk_accel_configure(user, conf);
+	if (ret)
+		return ret;
+
+	user->configured = true;
+	return 0;
+}
+
+static int amdpk_mmap_regs(struct vm_area_struct *vma)
+{
+	struct amdpk_user *user = vma->vm_private_data;
+	struct amdpk_dev *pkdev = user->pkdev;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return io_remap_pfn_range(vma, vma->vm_start,
+				  (pkdev->regsphys + REG_CTL_BASE(user->qid)) >> PAGE_SHIFT,
+				  vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
+static int mmap_dmamem(struct vm_area_struct *vma, struct amdpk_dev *pkdev,
+		       void *addr, dma_addr_t phys, off_t offset, size_t sz)
+{
+	unsigned long vmstart = vma->vm_start;
+	unsigned long pgoff = vma->vm_pgoff;
+	int ret;
+
+	vma->vm_pgoff = 0;
+	vma->vm_start = vmstart + offset;
+	vma->vm_end = vma->vm_start + sz;
+	ret = dma_mmap_coherent(pkdev->dev, vma, addr, phys, sz);
+	vma->vm_pgoff = pgoff;
+	vma->vm_start = vmstart;
+
+	return ret;
+}
+
+static int amdpk_mmap_mem(struct vm_area_struct *vma)
+{
+	struct amdpk_user *user = vma->vm_private_data;
+	struct amdpk_dev *pkdev = user->pkdev;
+	int pagemult, pagemultshift;
+	int requested_pages;
+	int qid = user->qid;
+	int ret, i;
+
+	if (!user->configured) {
+		dev_err(pkdev->dev, "configuration not found!");
+		return -ENODEV;
+	}
+	/* Mapping already done */
+	if (user->stmem) {
+		dev_err(pkdev->dev, "memory already mapped\n");
+		return -EINVAL;
+	}
+
+	requested_pages = vma_pages(vma);
+	/* As the last page is reserved for the status and the starting ones are for
+	 * the rq, the mmap must be at least 2 pages big.
+	 */
+	if (requested_pages < 2) {
+		dev_err(pkdev->dev, "Invalid request pages: %d\n", requested_pages);
+		return -EINVAL;
+	}
+	/* Store number of rq pages. 1 page is reserved for status */
+	user->rq_pages = requested_pages - 1;
+	/* Requests memory can have up to 4 hardware pages. All hardware pages have the
+	 * same size. If requesting more than 4 OS pages, the hardware pages will use
+	 * the same multiple (pagemult) of OS pages. Thus the requested size for the
+	 * request queue must be a multiple of pagemult.
+	 */
+	pagemult = (requested_pages - 1 + 3) / 4;
+	if ((requested_pages - 1) % pagemult != 0) {
+		dev_err(pkdev->dev, "requested pages: %d not multiple of page multiplier: %d\n",
+			requested_pages, pagemult);
+		return -EINVAL;
+	}
+	/* hardware page size must be a power of 2, and as a consequence pagemult too. */
+	if ((pagemult & (pagemult - 1)) != 0) {
+		dev_err(pkdev->dev, "page multiplier: %d is not power of 2\n", pagemult);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < (requested_pages - 1) / pagemult; i++) {
+		user->rqmem[i] = dma_alloc_coherent(pkdev->dev, PAGE_SIZE * pagemult,
+						    &user->physrq[i], GFP_KERNEL);
+		if (!user->rqmem[i]) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+		pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[i]);
+	}
+
+	user->stmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physst, GFP_KERNEL);
+	if (!user->stmem) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Configure unused rq pages with start of allocated shared mem. Those should not
+	 * be accessed, but if descriptors of a (malicious) user writes descriptors for
+	 * those pages, it will not break the rest of the system.
+	 */
+	for (i = (requested_pages - 1) / pagemult; i < MAX_RQMEM_PER_QUEUE; i++)
+		pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[0]);
+
+	pagemultshift = pagemult - 1;
+	pagemultshift = (pagemultshift & 5) + ((pagemultshift & 0xa) >> 1);
+	pagemultshift = (pagemultshift & 3) + ((pagemultshift >> 2) & 3);
+	pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE_SIZE(qid), PAGE_SHIFT + pagemultshift);
+	pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGES_WREN(qid),
+		 (1 << ((requested_pages - 1) / pagemult)));
+
+	ret = mmap_dmamem(vma, pkdev, user->stmem, user->physst, 0, PAGE_SIZE);
+	if (ret)
+		goto fail;
+	for (i = 0; i < (requested_pages - 1) / pagemult; i++) {
+		ret = mmap_dmamem(vma, pkdev, user->rqmem[i], user->physrq[i],
+				  (i * pagemult + 1) * PAGE_SIZE, PAGE_SIZE * pagemult);
+		if (ret)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	amdpk_free_rqmem(pkdev, user);
+	if (user->stmem) {
+		dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst);
+		user->stmem = NULL;
+	}
+	return ret;
+}
+
+static int amdpk_accel_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	struct drm_file *dfp = fp->private_data;
+	struct amdpk_user *user;
+	int ret = 0;
+
+	user = dfp->driver_priv;
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+
+	vma->vm_private_data = user;
+
+	switch (vma->vm_pgoff) {
+	case AMDPK_MMAP_REGS:
+		ret = amdpk_mmap_regs(vma);
+		break;
+	case AMDPK_MMAP_MEM:
+		ret = amdpk_mmap_mem(vma);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int amdpk_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct amdpk_work *pkwork = NULL;
+	struct amdpk_user *user = NULL;
+	struct amdpk_dev *pkdev;
+	int ret, qid;
+
+	pkdev = to_amdpk_dev(dev);
+	qid = ida_alloc_range(&pkdev->avail_queues, 0, pkdev->max_queues - 1, GFP_KERNEL);
+	if (qid < 0)
+		return -ENOSPC;
+
+	get_device(pkdev->dev);
+
+	user = kzalloc(sizeof(*user), GFP_KERNEL);
+	if (!user) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	user->pkdev = pkdev;
+	user->qid = qid;
+	user->rq_entries = 0;
+	file->driver_priv = user;
+	pkdev->users[qid] = user;
+
+	pkwork = kzalloc(sizeof(*pkwork), GFP_KERNEL);
+	if (!pkwork) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	pkwork->qid = qid;
+	pkwork->pkdev = pkdev;
+	pkwork->user = user;
+	pkdev->work[qid] = pkwork;
+
+	return 0;
+
+fail:
+	kfree(user);
+	kfree(pkwork);
+	ida_free(&pkdev->avail_queues, qid);
+	put_device(pkdev->dev);
+	return ret;
+}
+
+static void amdpk_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct amdpk_user *user = file->driver_priv;
+	struct amdpk_dev *pkdev = user->pkdev;
+	char __iomem *regs = pkdev->regs;
+
+	/* Set pkdev->users[qid] to NULL first, so that Completion interrupt handler gets
+	 * to know that this user will not exists, and so it does not schedule any completion
+	 * work on cq worker kthread.
+	 */
+	pkdev->users[user->qid] = NULL;
+
+	if (user->configured) {
+		unsigned int attempts = 0;
+
+		/* Disable RQCQ pages to help the hardware finish potential
+		 * pending requests sooner.
+		 */
+		pk_wrreg(regs, REG_RQ_CFG_PAGE_SIZE(user->qid), 0);
+		pk_wrreg(regs, REG_RQ_CFG_PAGES_WREN(user->qid), 0);
+		pk_wrreg(regs, REG_CQ_CFG_SIZE(user->qid), 0);
+
+		/* The hardware does not have a flush mechanism for the requests pending
+		 * in the RQ. Instead check periodically with REG_CTL_PENDING_REQS if the
+		 * user still has requests going on. If the hardware never completes the
+		 * requests, abort after a MAX_FLUSH_WAIT_ATTEMPTS and don't free resources.
+		 */
+		while (pk_rdreg(regs, REG_CTL_BASE(user->qid) + REG_CTL_PENDING_REQS)) {
+			attempts++;
+			if (attempts > MAX_FLUSH_WAIT_ATTEMPTS) {
+				dev_err(pkdev->dev,
+					"Time out waiting for hw completions. Resources leaked.\n");
+				goto abort_cleanup;
+			}
+			msleep(20);
+		}
+
+		if (pkdev->work[user->qid]->cq_wq) {
+			kthread_cancel_work_sync(&pkdev->work[user->qid]->cq_work);
+			kthread_destroy_worker(pkdev->work[user->qid]->cq_wq);
+		}
+
+		amdpk_free_rqmem(pkdev, user);
+		if (user->cqmem) {
+			dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq);
+			user->cqmem = NULL;
+		}
+		if (user->stmem) {
+			dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst);
+			user->stmem = NULL;
+		}
+
+		atomic_add(user->rq_entries, &pkdev->avail_qdepth);
+	}
+	ida_free(&pkdev->avail_queues, user->qid);
+
+abort_cleanup:
+	put_device(pkdev->dev);
+	kfree(pkdev->work[user->qid]);
+	pkdev->work[user->qid] = NULL;
+	kfree(user);
+}
+
+static const struct drm_ioctl_desc amdpk_accel_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(AMDPK_GET_INFO, amdpk_accel_get_info, 0),
+	DRM_IOCTL_DEF_DRV(AMDPK_SET_CONF, amdpk_accel_set_conf, 0),
+};
+
+static const struct file_operations amdpk_accel_fops = {
+	.owner		= THIS_MODULE,
+	.open		= accel_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+	.compat_ioctl	= drm_compat_ioctl,
+	.llseek		= noop_llseek,
+	.mmap		= amdpk_accel_mmap,
+};
+
+static const struct drm_driver amdpk_accel_driver = {
+	.driver_features	= DRIVER_COMPUTE_ACCEL,
+
+	.name			= "amdpk_accel_driver",
+	.desc			= "AMD PKI Accelerator for versal-net",
+
+	.fops			= &amdpk_accel_fops,
+	.open			= amdpk_open,
+	.postclose		= amdpk_postclose,
+
+	.ioctls			= amdpk_accel_ioctls,
+	.num_ioctls		= ARRAY_SIZE(amdpk_accel_ioctls),
+};
+
+static int amdpk_create_device(struct amdpk_dev *pkdev, struct device *dev, int irq)
+{
+	u64 qdepth, ver;
+	long magic;
+	int ret;
+
+	magic = pk_rdreg(pkdev->regs, REG_MAGIC);
+	if (magic != AMDPK_MAGIC) {
+		dev_err(dev, "Invalid magic constant %08lx !\n", magic);
+		return -ENODEV;
+	}
+	ver = pk_rdreg(pkdev->regs, REG_SEMVER);
+	if (AMDPK_SEMVER_MAJOR(ver) != 1 || AMDPK_SEMVER_MINOR(ver) < 1) {
+		dev_err(dev, "Hardware version (%d.%d) not supported.\n",
+			(int)AMDPK_SEMVER_MAJOR(ver), (int)AMDPK_SEMVER_MINOR(ver));
+		return -ENODEV;
+	}
+
+	/* Reset all accelerators and the hw scheduler */
+	pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x1);
+	pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x0);
+
+	pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT);
+	qdepth = pk_rdreg(pkdev->regs, REG_CFG_MAX_PENDING_REQ);
+	atomic_set(&pkdev->avail_qdepth, qdepth);
+	pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT);
+
+	pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0);
+	pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, ~0);
+	pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, (1 << pkdev->max_queues) - 1);
+
+	ret = devm_request_irq(dev, irq, amdpk_cq_irq, 0, "amdpk", pkdev);
+	if (ret)
+		return ret;
+
+	ida_init(&pkdev->avail_queues);
+
+	return 0;
+}
+
+static void amdpk_remove_device(struct amdpk_dev *pkdev)
+{
+	drm_dev_unplug(&pkdev->ddev);
+	pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0);
+	ida_destroy(&pkdev->avail_queues);
+}
+
+static int amdpk_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct amdpk_dev *pkdev;
+	struct resource *memres;
+	int irq, ret;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret < 0)
+		return ret;
+
+	pkdev = devm_drm_dev_alloc(dev, &amdpk_accel_driver, typeof(*pkdev), ddev);
+	if (IS_ERR(pkdev))
+		return PTR_ERR(pkdev);
+	pkdev->dev = dev;
+
+	memres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pkdev->regs = devm_ioremap_resource(dev, memres);
+	if (IS_ERR(pkdev->regs))
+		return PTR_ERR(pkdev->regs);
+	pkdev->regsphys = memres->start;
+	platform_set_drvdata(pdev, pkdev);
+
+	if (platform_irq_count(pdev) != 1)
+		return -ENODEV;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+
+	ret = drm_dev_register(&pkdev->ddev, 0);
+	if (ret) {
+		dev_err(&pdev->dev, "DRM register failed, ret %d", ret);
+		return ret;
+	}
+
+	return amdpk_create_device(pkdev, dev, irq);
+}
+
+static void amdpk_remove(struct platform_device *pdev)
+{
+	struct amdpk_dev *pkdev = platform_get_drvdata(pdev);
+
+	amdpk_remove_device(pkdev);
+}
+
+static void amdpk_shutdown(struct platform_device *pdev)
+{
+	amdpk_remove(pdev);
+}
+
+static const struct of_device_id amdpk_match_table[] = {
+	{ .compatible = "amd,versal-net-pki" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, amdpk_match_table);
+
+static struct platform_driver amdpk_pdrv = {
+	.probe = amdpk_probe,
+	.remove = amdpk_remove,
+	.shutdown = amdpk_shutdown,
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = amdpk_match_table,
+	},
+};
+
+static int __init amdpk_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&amdpk_pdrv);
+	if (ret) {
+		pr_err("can't register platform driver\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit amdpk_exit(void)
+{
+	platform_driver_unregister(&amdpk_pdrv);
+}
+
+module_init(amdpk_init);
+module_exit(amdpk_exit);
+
+MODULE_AUTHOR("AMD");
+MODULE_DESCRIPTION("AMD PKI accelerator for versal-net");
+MODULE_LICENSE("GPL");
diff --git a/drivers/accel/amdpk/amdpk_drv.h b/drivers/accel/amdpk/amdpk_drv.h
new file mode 100644
index 000000000000..c14c10db5d97
--- /dev/null
+++ b/drivers/accel/amdpk/amdpk_drv.h
@@ -0,0 +1,271 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2021 Silex Insight sa
+ * Copyright (c) 2018-2021 Beerten Engineering scs
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __AMDPK_DRV_H__
+#define __AMDPK_DRV_H__
+
+#include <linux/types.h>
+#include <linux/kthread.h>
+#include <linux/io.h>
+#include <drm/drm_drv.h>
+#include <uapi/drm/amdpk.h>
+
+/* Magic number in the AMD PKI device, required to validate hardware access. */
+#define AMDPK_MAGIC 0x5113C50C
+
+/* Contains magic number 0x5113C5OC.
+ * Used to validate access to the hardware registers.
+ */
+#define REG_MAGIC (0x00)
+
+/* Contains version of the hardware interface as semver.
+ * The semantic version : major 8 bits, minor 8 bits in little endian order.
+ */
+#define REG_SEMVER (0x08)
+
+/* The number of request queues available in the hardware. */
+#define REG_CFG_REQ_QUEUES_CNT 0x10
+
+/* The maximum number of pending requests from all request queues combined. */
+#define REG_CFG_MAX_PENDING_REQ 0x18
+
+/* The maximum number of pending requests in a single request queue. */
+#define REG_CFG_MAX_REQ_QUEUE_ENTRIES 0x0020
+
+/* The first 16 bits give the amount of PK core instances with 64 multipliers.
+ * The next 16 bits give the amount of PK core instances with 256 multipliers.
+ */
+#define REG_CFG_PK_INST 0x28
+
+/* Writing 0x1 puts all pkcore accelerators and scheduler in reset.
+ * Writing 0x0 makes all pkcore accelerators and scheduler leave reset
+ * and become operational.
+ */
+#define REG_PK_GLOBAL_STATE 0x38
+
+/* The semantic version : major 8 bits, minor 8 bits,
+ * scm id 16 bits in little endian order.
+ */
+#define REG_HW_VERSION (0x40)
+
+/* Bitmask of which CQ interrupts are raised. */
+#define REG_PK_IRQ_STATUS 0x88
+
+/* Bitmask of which CQ may trigger interrupts. */
+#define REG_IRQ_ENABLE 0x90
+
+/* Bitmask of CQ interrupts to reset. */
+#define REG_PK_IRQ_RESET 0xA0
+
+/* Bus address of the page p for the given request queue.
+ * The address must be aligned on the page size.
+ */
+#define REG_RQ_CFG_PAGE(qid, pageidx) (0x00100 + (qid) * 0x80 + (pageidx) * 0x8)
+
+/* Size in bytes of the pages represented as a power of 2.
+ *
+ * Allowed values :
+ * ================ ==============
+ *  register value   size in bytes
+ * ================ ==============
+ *     7               128
+ *     8               256
+ *     9               512
+ *    10              1024
+ *    11              2048
+ *    12              4096
+ *    13              8192
+ *    14             16384
+ *    15             32768
+ *    16             65536
+ * ================ ==============
+ */
+#define REG_RQ_CFG_PAGE_SIZE(qid) (0x00120 + (qid) * 0x80)
+
+/* Index of the associated completion queue. */
+#define REG_RQ_CFG_CQID(qid) (0x00128 + (qid) * 0x80)
+
+/* Bit field of pages where descriptor can write to.
+ * When a bit is 1, a descriptor can write to the corresponding page.
+ */
+#define REG_RQ_CFG_PAGES_WREN(qid) (0x00138 + (qid) * 0x80)
+
+/* Maximum number of entries which can be written into this request queue. */
+#define REG_RQ_CFG_DEPTH(qid) (0x00140 + (qid) * 0x80)
+
+/* Bus address of the ring base of completion queue n.
+ * The address must be aligned on 64 bits.
+ */
+#define REG_CQ_CFG_ADDR(qid) (0x1100 + (qid) * 0x80)
+
+/* CQ notification trigger position. */
+#define REG_CTL_CQ_NTFY(qid) (0x2028 + (qid) * 0x1000)
+
+/* Size in bytes of the completion ring represented as a power of 2.
+ *
+ * Allowed sizes :
+ * ================ ============== ==============
+ *  register value   size in bytes  max entries
+ * ================ ============== ==============
+ *      7             128             16
+ *      8             256             32
+ *      9             512             64
+ *     10            1024            128
+ *     11            2048            256
+ *     12            4096            512
+ *     13            8192           1024
+ *     14           16384           2048
+ * ================ ============== ==============
+ */
+#define REG_CQ_CFG_SIZE(qid) (0x1108 + (qid) * 0x80)
+
+/* Interrupt number for this completion queue. */
+#define REG_CQ_CFG_IRQ_NR(qid) (0x1110 + (qid) * 0x80)
+
+/* Control registers base address for the given request completion queue pair. */
+#define REG_CTL_BASE(qid) (0x2000 + (qid) * 0x1000)
+
+/* Count of how many requests are queued at a given time for this RQCQ.
+ * When this count reaches 0, the resources of the request and
+ * completion queues can be deleted.
+ */
+#define REG_CTL_PENDING_REQS  0x18
+
+/* Busy cycle count register address. */
+#define REG_PK_BUSY_CYCLES 0x2108
+/* Busy cycle count  register address.*/
+#define REG_PK_IDLE_CYCLES 0x2110
+
+/* Hardware interface versions. */
+#define AMDPK_SEMVER_MAJOR(v) (((v) >> 24) & 0xff)
+#define AMDPK_SEMVER_MINOR(v) (((v) >> 16) & 0xff)
+#define AMDPK_SEMVER_PATCH(v) ((v) & 0xffff)
+
+/* Hardware implementation versions. */
+#define AMDPK_HWVER_MAJOR(v)  (((v) >> 24) & 0xff)
+#define AMDPK_HWVER_MINOR(v)  (((v) >> 16) & 0xff)
+#define AMDPK_HWVER_SVN(v)    ((v) & 0xffff)
+
+/* Maximum number of queues supported by the driver. */
+#define MAX_QUEUES 4
+
+/* Number of RQ memory addresses for each queue. */
+#define MAX_RQMEM_PER_QUEUE 4
+
+/* Wait attempts for HW to flush all requests before close. */
+#define MAX_FLUSH_WAIT_ATTEMPTS 500
+
+/* Bit 0 (0x1) is the Generation bit. */
+#define CQ_GENERATION_BIT BIT(0)
+
+/* Bit 1 (0x2) is set when completion is valid. */
+#define CQ_COMPLETION_BIT BIT(1)
+
+/* Maximal value of rq_entries is 512. There is 1 CQ of 4K bytes.
+ * Each completion status is 8 Bytes. Only 4096 / 8 = 512 entries
+ * are possible at any time.
+ */
+#define MAX_CQ_ENTRIES_ON_PAGE (PAGE_SIZE / 8)
+
+/* Forward declaration */
+struct amdpk_dev;
+struct amdpk_user;
+
+/* structure to hold completion queue information */
+struct amdpk_cq {
+	/* PKI device */
+	struct amdpk_dev *pkdev;
+	/* Base address of the completion queue */
+	u32 *base;
+	/* tail representing last completion */
+	unsigned int tail;
+	/* generation bit which toggles as per the device */
+	unsigned int generation;
+	/* size code as configured in REG_RQ_CFG_PAGE_SIZE */
+	u16 szcode;
+};
+
+/* represents PKI work context */
+struct amdpk_work {
+	/* PKI device */
+	struct amdpk_dev *pkdev;
+	/* PKI user */
+	struct amdpk_user *user;
+	/* Completion queue */
+	struct amdpk_cq pk_cq;
+	/* Kthread work associated with the PKI work */
+	struct kthread_work cq_work;
+	/* Kthred worker to handle completions */
+	struct kthread_worker *cq_wq;
+	/* Associated queue ID */
+	u16 qid;
+};
+
+/* AMD PKI device */
+struct amdpk_dev {
+	/* DRM device associated with PKI device */
+	struct drm_device ddev;
+	/* Core device */
+	struct device *dev;
+	/* PKI register space address */
+	char __iomem *regs;
+	/* PKI register space physical address */
+	resource_size_t regsphys;
+	/* Maximum queues supported by device. */
+	u16 max_queues;
+	/* Available queues */
+	struct ida avail_queues;
+	/* Total available queues */
+	atomic_t avail_qdepth;
+	/* List of all the AMD users */
+	struct amdpk_user *users[MAX_QUEUES];
+	/* PKI work for each queue */
+	struct amdpk_work *work[MAX_QUEUES];
+};
+
+/* AMD PKI user */
+struct amdpk_user {
+	/* PKI device */
+	struct amdpk_dev *pkdev;
+	/* Indicates if user has been configured */
+	bool configured;
+	/* Queue ID allocated for the user */
+	u16 qid;
+	/* Number of pages allocated on request queue */
+	u16 rq_pages;
+	/* RQ entries reserved for this user */
+	size_t rq_entries;
+	/* DMA address for RQ pages */
+	dma_addr_t physrq[MAX_RQMEM_PER_QUEUE];
+	/* RQ pages addresses */
+	u8 *rqmem[MAX_RQMEM_PER_QUEUE];
+	/* DMA address for CQ page */
+	dma_addr_t physcq;
+	/* CQ page address */
+	u8 *cqmem;
+	/* DMA address for status page */
+	dma_addr_t physst;
+	/* Status page address */
+	u8 *stmem;
+	/* Eventfd context for each request */
+	struct eventfd_ctx *evfd_ctx[MAX_PK_REQS];
+};
+
+#define to_amdpk_dev(dev) container_of(dev, struct amdpk_dev, ddev)
+#define to_amdpk_work(work) container_of(work, struct amdpk_work, cq_work)
+
+static void __maybe_unused pk_wrreg(char __iomem *regs, int addr, u64 val)
+{
+	iowrite64(val, regs + addr);
+}
+
+static u64 pk_rdreg(char __iomem *regs, int addr)
+{
+	return ioread64(regs + addr);
+}
+
+#endif /* __AMDPK_DRV_H__ */
diff --git a/include/uapi/drm/amdpk.h b/include/uapi/drm/amdpk.h
new file mode 100644
index 000000000000..e5e18fdbc2c4
--- /dev/null
+++ b/include/uapi/drm/amdpk.h
@@ -0,0 +1,49 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __AMDPK_H__
+#define __AMDPK_H__
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define MAX_PK_REQS		256
+
+struct amdpk_info {
+	/** maximum available queue depth */
+	unsigned int avail_qdepth;
+};
+
+struct amdpk_conf {
+	/** queue depth to configure */
+	unsigned int qdepth;
+	/** eventfd's associated with the descriptors */
+	int eventfd[MAX_PK_REQS];
+};
+
+/* IOCTL */
+#define DRM_AMDPK_GET_INFO	0x0
+#define DRM_AMDPK_SET_CONF	0x1
+
+#define DRM_IOCTL_AMDPK_GET_INFO	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_GET_INFO, \
+						 struct amdpk_info)
+#define DRM_IOCTL_AMDPK_SET_CONF	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_SET_CONF, \
+						 struct amdpk_conf)
+
+/* MMAP */
+#define AMDPK_MMAP_REGS		0
+#define AMDPK_MMAP_MEM		1
+
+/* Completion Status */
+#define CQ_STATUS_INVALID	0x0
+#define CQ_STATUS_VALID		0x80000000
+#define CQ_COMPLETION_ERROR	0x40000000
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __AMDPK_H__ */