diff mbox series

[v3,07/10] fwctl/mlx5: Support for communicating with mlx5 fw

Message ID 7-v3-960f17f90f17+516-fwctl_jgg@nvidia.com
State New
Headers show
Series Introduce fwctl subystem | expand

Commit Message

Jason Gunthorpe Aug. 21, 2024, 6:10 p.m. UTC
From: Saeed Mahameed <saeedm@nvidia.com>

mlx5's fw has long provided a User Context concept. This has a long
history in RDMA as part of the devx extended verbs programming
interface. A User Context is a security envelope that contains objects and
controls access. It contains the Protection Domain object from the
InfiniBand Architecture and both togther provide the OS with the necessary
tools to bind a security context like a process to the device.

The security context is restricted to not be able to touch the kernel or
other processes. In the RDMA verbs case it is also restricted to not touch
global device resources.

The fwctl_mlx5 takes this approach and builds a User Context per fwctl
file descriptor and uses a FW security capability on the User Context to
enable access to global device resources. This makes the context useful
for provisioning and debugging the global device state.

mlx5 already has a robust infrastructure for delivering RPC messages to
fw. Trivially connect fwctl's RPC mechanism to mlx5_cmd_do(). Enforce the
User Context ID in every RPC header so the FW knows the security context
of the issuing ID.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 MAINTAINERS                 |   7 +
 drivers/fwctl/Kconfig       |  14 ++
 drivers/fwctl/Makefile      |   1 +
 drivers/fwctl/mlx5/Makefile |   4 +
 drivers/fwctl/mlx5/main.c   | 337 ++++++++++++++++++++++++++++++++++++
 include/uapi/fwctl/fwctl.h  |   1 +
 include/uapi/fwctl/mlx5.h   |  36 ++++
 7 files changed, 400 insertions(+)
 create mode 100644 drivers/fwctl/mlx5/Makefile
 create mode 100644 drivers/fwctl/mlx5/main.c
 create mode 100644 include/uapi/fwctl/mlx5.h

Comments

Jonathan Cameron Aug. 23, 2024, 2:48 p.m. UTC | #1
On Wed, 21 Aug 2024 15:10:59 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> From: Saeed Mahameed <saeedm@nvidia.com>
> 
> mlx5's fw has long provided a User Context concept. This has a long
> history in RDMA as part of the devx extended verbs programming
> interface. A User Context is a security envelope that contains objects and
> controls access. It contains the Protection Domain object from the
> InfiniBand Architecture and both togther provide the OS with the necessary
> tools to bind a security context like a process to the device.
> 
> The security context is restricted to not be able to touch the kernel or
> other processes. In the RDMA verbs case it is also restricted to not touch
> global device resources.
> 
> The fwctl_mlx5 takes this approach and builds a User Context per fwctl
> file descriptor and uses a FW security capability on the User Context to
> enable access to global device resources. This makes the context useful
> for provisioning and debugging the global device state.
> 
> mlx5 already has a robust infrastructure for delivering RPC messages to
> fw. Trivially connect fwctl's RPC mechanism to mlx5_cmd_do(). Enforce the
> User Context ID in every RPC header so the FW knows the security context
> of the issuing ID.
> 
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Trivial stuff only. Feel free to ignore if you really like the code
the way it is.  I don't know the MLX5 parts, but based on just what
is visible here and in this series.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
> new file mode 100644
> index 00000000000000..8839770fbe7ba5
> --- /dev/null
> +++ b/drivers/fwctl/mlx5/main.c


> +
> +static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
> +			    void *rpc_in, size_t in_len, size_t *out_len)
> +{
> +	struct mlx5ctl_dev *mcdev =
> +		container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
> +	struct mlx5ctl_uctx *mfd =
> +		container_of(uctx, struct mlx5ctl_uctx, uctx);
> +	void *rpc_alloc __free(kvfree) = NULL;

Whilst you can't completely pair this with destructor, I'd still
move this as locally as possible.

> +	void *rpc_out;
> +	int ret;
> +
> +	if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) ||
> +	    *out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr))
> +		return ERR_PTR(-EMSGSIZE);
> +
> +	/* FIXME: Requires device support for more scopes */
> +	if (scope != FWCTL_RPC_CONFIGURATION &&
> +	    scope != FWCTL_RPC_DEBUG_READ_ONLY)
> +		return ERR_PTR(-EOPNOTSUPP);
> +
> +	mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n",
> +		    mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
> +		    in_len, *out_len);
> +
> +	if (!mlx5ctl_validate_rpc(rpc_in, scope))
> +		return ERR_PTR(-EBADMSG);
> +
> +	/*
> +	 * mlx5_cmd_do() copies the input message to its own buffer before
> +	 * executing it, so we can reuse the allocation for the output.
> +	 */
> +	if (*out_len <= in_len) {
> +		rpc_out = rpc_in;
> +	} else {
> +		rpc_out = rpc_alloc = kvzalloc(*out_len, GFP_KERNEL);
> +		if (!rpc_alloc)
> +			return ERR_PTR(-ENOMEM);
> +	}
> +
> +	/* Enforce the user context for the command */
> +	MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid);
> +	ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len);
> +
> +	mlx5ctl_dbg(mcdev,
> +		    "[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n",
> +		    mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
> +		    MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret));
> +
> +	/*
> +	 * -EREMOTEIO means execution succeeded and the out is valid,
> +	 * but an error code was returned inside out. Everything else
> +	 * means the RPC did not make it to the device.
> +	 */
> +	if (ret && ret != -EREMOTEIO)
> +		return ERR_PTR(ret);
> +	if (rpc_out == rpc_in)
> +		return rpc_in;
> +	return_ptr(rpc_alloc);
> +}
> +

> +static void mlx5ctl_remove(struct auxiliary_device *adev)
> +{
> +	struct mlx5ctl_dev *mcdev __free(mlx5ctl) = auxiliary_get_drvdata(adev);

I'm not keen on the non constructor being paired with destructor
but it's your code so you get keep the confusion if you really
like it.

I'd just have an explicit put.

> +
> +	fwctl_unregister(&mcdev->fwctl);
> +}
Jason Gunthorpe Aug. 27, 2024, 3:07 p.m. UTC | #2
On Fri, Aug 23, 2024 at 03:48:30PM +0100, Jonathan Cameron wrote:
> On Wed, 21 Aug 2024 15:10:59 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > From: Saeed Mahameed <saeedm@nvidia.com>
> > 
> > mlx5's fw has long provided a User Context concept. This has a long
> > history in RDMA as part of the devx extended verbs programming
> > interface. A User Context is a security envelope that contains objects and
> > controls access. It contains the Protection Domain object from the
> > InfiniBand Architecture and both togther provide the OS with the necessary
> > tools to bind a security context like a process to the device.
> > 
> > The security context is restricted to not be able to touch the kernel or
> > other processes. In the RDMA verbs case it is also restricted to not touch
> > global device resources.
> > 
> > The fwctl_mlx5 takes this approach and builds a User Context per fwctl
> > file descriptor and uses a FW security capability on the User Context to
> > enable access to global device resources. This makes the context useful
> > for provisioning and debugging the global device state.
> > 
> > mlx5 already has a robust infrastructure for delivering RPC messages to
> > fw. Trivially connect fwctl's RPC mechanism to mlx5_cmd_do(). Enforce the
> > User Context ID in every RPC header so the FW knows the security context
> > of the issuing ID.
> > 
> > Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> 
> Trivial stuff only. Feel free to ignore if you really like the code
> the way it is.  I don't know the MLX5 parts, but based on just what
> is visible here and in this series.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> > diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
> > new file mode 100644
> > index 00000000000000..8839770fbe7ba5
> > --- /dev/null
> > +++ b/drivers/fwctl/mlx5/main.c
> 
> 
> > +
> > +static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
> > +			    void *rpc_in, size_t in_len, size_t *out_len)
> > +{
> > +	struct mlx5ctl_dev *mcdev =
> > +		container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
> > +	struct mlx5ctl_uctx *mfd =
> > +		container_of(uctx, struct mlx5ctl_uctx, uctx);
> > +	void *rpc_alloc __free(kvfree) = NULL;
> 
> Whilst you can't completely pair this with destructor, I'd still
> move this as locally as possible.

Yeah, this is a troubling area for cleanup.h here.

I can't really move it as locally as possible because the assignment
is in a scope:

	} else {
		rpc_out = rpc_alloc = kvzalloc(*out_len, GFP_KERNEL);
		if (!rpc_alloc)
			return ERR_PTR(-ENOMEM);
	}

So given the choice of putting it at the top or put a NULL initialized
variable above the if, I'm feeling the top is more kernely?

Or this is just the wrong place to use a cleanup.h technique??

--- a/drivers/fwctl/mlx5/main.c
+++ b/drivers/fwctl/mlx5/main.c
@@ -226,7 +226,6 @@ static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
                container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
        struct mlx5ctl_uctx *mfd =
                container_of(uctx, struct mlx5ctl_uctx, uctx);
-       void *rpc_alloc __free(kvfree) = NULL;
        void *rpc_out;
        int ret;
 
@@ -253,8 +252,8 @@ static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
        if (*out_len <= in_len) {
                rpc_out = rpc_in;
        } else {
-               rpc_out = rpc_alloc = kvzalloc(*out_len, GFP_KERNEL);
-               if (!rpc_alloc)
+               rpc_out = kvzalloc(*out_len, GFP_KERNEL);
+               if (!rpc_out)
                        return ERR_PTR(-ENOMEM);
        }
 
@@ -272,11 +271,12 @@ static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
         * but an error code was returned inside out. Everything else
         * means the RPC did not make it to the device.
         */
-       if (ret && ret != -EREMOTEIO)
+       if (ret && ret != -EREMOTEIO) {
+               if (rpc_out != rpc_in)
+                       kfree(rpc_out);
                return ERR_PTR(ret);
-       if (rpc_out == rpc_in)
-               return rpc_in;
-       return_ptr(rpc_alloc);
+       }
+       return rpc_out;
 }

Arguably it is clearer like above.. Let's go with the above, I think
this was too clever a use of cleanup.h, it seems to work alot better
with simpler cases.

> > +static void mlx5ctl_remove(struct auxiliary_device *adev)
> > +{
> > +	struct mlx5ctl_dev *mcdev __free(mlx5ctl) = auxiliary_get_drvdata(adev);
> 
> I'm not keen on the non constructor being paired with destructor
> but it's your code so you get keep the confusion if you really
> like it.
> 
> I'd just have an explicit put.

Yes, I thought I did that already.. Hum must have just thought about it

Jason
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 97945ca04b1108..d7d12adc521be1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9250,6 +9250,13 @@  F:	drivers/fwctl/
 F:	include/linux/fwctl.h
 F:	include/uapi/fwctl/
 
+FWCTL MLX5 DRIVER
+M:	Saeed Mahameed <saeedm@nvidia.com>
+R:	Itay Avraham <itayavr@nvidia.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/fwctl/mlx5/
+
 GALAXYCORE GC0308 CAMERA SENSOR DRIVER
 M:	Sebastian Reichel <sre@kernel.org>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
index 37147a695add9a..e5ee2d46d43126 100644
--- a/drivers/fwctl/Kconfig
+++ b/drivers/fwctl/Kconfig
@@ -7,3 +7,17 @@  menuconfig FWCTL
 	  support a wide range of lockdown compatible device behaviors including
 	  manipulating device FLASH, debugging, and other activities that don't
 	  fit neatly into an existing subsystem.
+
+if FWCTL
+config FWCTL_MLX5
+	tristate "mlx5 ConnectX control fwctl driver"
+	depends on MLX5_CORE
+	help
+	  MLX5CTL provides interface for the user process to access the debug and
+	  configuration registers of the ConnectX hardware family
+	  (NICs, PCI switches and SmartNIC SoCs).
+	  This will allow configuration and debug tools to work out of the box on
+	  mainstream kernel.
+
+	  If you don't know what to do here, say N.
+endif
diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
index 1cad210f6ba580..1c535f694d7fe4 100644
--- a/drivers/fwctl/Makefile
+++ b/drivers/fwctl/Makefile
@@ -1,4 +1,5 @@ 
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_FWCTL) += fwctl.o
+obj-$(CONFIG_FWCTL_MLX5) += mlx5/
 
 fwctl-y += main.o
diff --git a/drivers/fwctl/mlx5/Makefile b/drivers/fwctl/mlx5/Makefile
new file mode 100644
index 00000000000000..139a23e3c7c517
--- /dev/null
+++ b/drivers/fwctl/mlx5/Makefile
@@ -0,0 +1,4 @@ 
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL_MLX5) += mlx5_fwctl.o
+
+mlx5_fwctl-y += main.o
diff --git a/drivers/fwctl/mlx5/main.c b/drivers/fwctl/mlx5/main.c
new file mode 100644
index 00000000000000..8839770fbe7ba5
--- /dev/null
+++ b/drivers/fwctl/mlx5/main.c
@@ -0,0 +1,337 @@ 
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/fwctl.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/driver.h>
+#include <uapi/fwctl/mlx5.h>
+
+#define mlx5ctl_err(mcdev, format, ...) \
+	dev_err(&mcdev->fwctl.dev, format, ##__VA_ARGS__)
+
+#define mlx5ctl_dbg(mcdev, format, ...)                             \
+	dev_dbg(&mcdev->fwctl.dev, "PID %u: " format, current->pid, \
+		##__VA_ARGS__)
+
+struct mlx5ctl_uctx {
+	struct fwctl_uctx uctx;
+	u32 uctx_caps;
+	u32 uctx_uid;
+};
+
+struct mlx5ctl_dev {
+	struct fwctl_device fwctl;
+	struct mlx5_core_dev *mdev;
+};
+DEFINE_FREE(mlx5ctl, struct mlx5ctl_dev *, if (_T) fwctl_put(&_T->fwctl));
+
+struct mlx5_ifc_mbox_in_hdr_bits {
+	u8 opcode[0x10];
+	u8 uid[0x10];
+
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+
+	u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_out_hdr_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+
+	u8 syndrome[0x20];
+
+	u8 reserved_at_40[0x40];
+};
+
+enum {
+	MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES = 0x4,
+};
+
+enum {
+	MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819,
+	MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820,
+	MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS = 0x821,
+	MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT = 0xb2e,
+};
+
+static int mlx5ctl_alloc_uid(struct mlx5ctl_dev *mcdev, u32 cap)
+{
+	u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+	void *uctx;
+	int ret;
+	u16 uid;
+
+	uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+
+	mlx5ctl_dbg(mcdev, "%s: caps 0x%x\n", __func__, cap);
+	MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+	MLX5_SET(uctx, uctx, cap, cap);
+
+	ret = mlx5_cmd_exec(mcdev->mdev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	uid = MLX5_GET(create_uctx_out, out, uid);
+	mlx5ctl_dbg(mcdev, "allocated uid %u with caps 0x%x\n", uid, cap);
+	return uid;
+}
+
+static void mlx5ctl_release_uid(struct mlx5ctl_dev *mcdev, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+	struct mlx5_core_dev *mdev = mcdev->mdev;
+	int ret;
+
+	MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+	MLX5_SET(destroy_uctx_in, in, uid, uid);
+
+	ret = mlx5_cmd_exec_in(mdev, destroy_uctx, in);
+	mlx5ctl_dbg(mcdev, "released uid %u %pe\n", uid, ERR_PTR(ret));
+}
+
+static int mlx5ctl_open_uctx(struct fwctl_uctx *uctx)
+{
+	struct mlx5ctl_uctx *mfd =
+		container_of(uctx, struct mlx5ctl_uctx, uctx);
+	struct mlx5ctl_dev *mcdev =
+		container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+	int uid;
+
+	/*
+	 * New FW supports the TOOLS_RESOURCES uid security label
+	 * which allows commands to manipulate the global device state.
+	 * Otherwise only basic existing RDMA devx privilege are allowed.
+	 */
+	if (MLX5_CAP_GEN(mcdev->mdev, uctx_cap) &
+	    MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES)
+		mfd->uctx_caps |= MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES;
+
+	uid = mlx5ctl_alloc_uid(mcdev, mfd->uctx_caps);
+	if (uid < 0)
+		return uid;
+
+	mfd->uctx_uid = uid;
+	return 0;
+}
+
+static void mlx5ctl_close_uctx(struct fwctl_uctx *uctx)
+{
+	struct mlx5ctl_dev *mcdev =
+		container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+	struct mlx5ctl_uctx *mfd =
+		container_of(uctx, struct mlx5ctl_uctx, uctx);
+
+	mlx5ctl_release_uid(mcdev, mfd->uctx_uid);
+}
+
+static void *mlx5ctl_info(struct fwctl_uctx *uctx, size_t *length)
+{
+	struct mlx5ctl_uctx *mfd =
+		container_of(uctx, struct mlx5ctl_uctx, uctx);
+	struct fwctl_info_mlx5 *info;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->uid = mfd->uctx_uid;
+	info->uctx_caps = mfd->uctx_caps;
+	*length = sizeof(*info);
+	return info;
+}
+
+static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
+{
+	u16 opcode = MLX5_GET(mbox_in_hdr, in, opcode);
+
+	/*
+	 * Currently the driver can't keep track of commands that allocate
+	 * objects in the FW, these commands are safe from a security
+	 * perspective but nothing will free the memory when the FD is closed.
+	 * For now permit only query commands. Also the caps for the scope have
+	 * not been defined yet, filter commands manually for now.
+	 */
+	switch (opcode) {
+	case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT:
+	case MLX5_CMD_OP_QUERY_ADAPTER:
+	case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+		return scope <= FWCTL_RPC_CONFIGURATION;
+
+	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+	case MLX5_CMD_OP_QUERY_CONG_STATUS:
+	case MLX5_CMD_OP_QUERY_CQ:
+	case MLX5_CMD_OP_QUERY_DCT:
+	case MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS:
+	case MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS:
+	case MLX5_CMD_OP_QUERY_EQ:
+	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_ISSI:
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_LAG:
+	case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+	case MLX5_CMD_OP_QUERY_MKEY:
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_PAGES:
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_QUERY_QP:
+	case MLX5_CMD_OP_QUERY_RMP:
+	case MLX5_CMD_OP_QUERY_RQ:
+	case MLX5_CMD_OP_QUERY_RQT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+	case MLX5_CMD_OP_QUERY_SQ:
+	case MLX5_CMD_OP_QUERY_SRQ:
+	case MLX5_CMD_OP_QUERY_TIR:
+	case MLX5_CMD_OP_QUERY_TIS:
+	case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+	case MLX5_CMD_OP_QUERY_VNIC_ENV:
+	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+	case MLX5_CMD_OP_QUERY_VPORT_STATE:
+	case MLX5_CMD_OP_QUERY_WOL_ROL:
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+	case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+	case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+	case MLX5_CMD_OP_QUERY_XRQ:
+		return scope <= FWCTL_RPC_DEBUG_READ_ONLY;
+
+	case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS:
+		return scope <= FWCTL_RPC_DEBUG_WRITE;
+
+	case MLX5_CMD_OP_ACCESS_REG:
+		return scope <= FWCTL_RPC_DEBUG_WRITE_FULL;
+	default:
+		return false;
+	}
+}
+
+static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+			    void *rpc_in, size_t in_len, size_t *out_len)
+{
+	struct mlx5ctl_dev *mcdev =
+		container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
+	struct mlx5ctl_uctx *mfd =
+		container_of(uctx, struct mlx5ctl_uctx, uctx);
+	void *rpc_alloc __free(kvfree) = NULL;
+	void *rpc_out;
+	int ret;
+
+	if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) ||
+	    *out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr))
+		return ERR_PTR(-EMSGSIZE);
+
+	/* FIXME: Requires device support for more scopes */
+	if (scope != FWCTL_RPC_CONFIGURATION &&
+	    scope != FWCTL_RPC_DEBUG_READ_ONLY)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n",
+		    mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+		    in_len, *out_len);
+
+	if (!mlx5ctl_validate_rpc(rpc_in, scope))
+		return ERR_PTR(-EBADMSG);
+
+	/*
+	 * mlx5_cmd_do() copies the input message to its own buffer before
+	 * executing it, so we can reuse the allocation for the output.
+	 */
+	if (*out_len <= in_len) {
+		rpc_out = rpc_in;
+	} else {
+		rpc_out = rpc_alloc = kvzalloc(*out_len, GFP_KERNEL);
+		if (!rpc_alloc)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	/* Enforce the user context for the command */
+	MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid);
+	ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len);
+
+	mlx5ctl_dbg(mcdev,
+		    "[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n",
+		    mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
+		    MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret));
+
+	/*
+	 * -EREMOTEIO means execution succeeded and the out is valid,
+	 * but an error code was returned inside out. Everything else
+	 * means the RPC did not make it to the device.
+	 */
+	if (ret && ret != -EREMOTEIO)
+		return ERR_PTR(ret);
+	if (rpc_out == rpc_in)
+		return rpc_in;
+	return_ptr(rpc_alloc);
+}
+
+static const struct fwctl_ops mlx5ctl_ops = {
+	.device_type = FWCTL_DEVICE_TYPE_MLX5,
+	.uctx_size = sizeof(struct mlx5ctl_uctx),
+	.open_uctx = mlx5ctl_open_uctx,
+	.close_uctx = mlx5ctl_close_uctx,
+	.info = mlx5ctl_info,
+	.fw_rpc = mlx5ctl_fw_rpc,
+};
+
+static int mlx5ctl_probe(struct auxiliary_device *adev,
+			 const struct auxiliary_device_id *id)
+
+{
+	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+	struct mlx5_core_dev *mdev = madev->mdev;
+	struct mlx5ctl_dev *mcdev __free(mlx5ctl) = fwctl_alloc_device(
+		&mdev->pdev->dev, &mlx5ctl_ops, struct mlx5ctl_dev, fwctl);
+	int ret;
+
+	if (!mcdev)
+		return -ENOMEM;
+
+	mcdev->mdev = mdev;
+
+	ret = fwctl_register(&mcdev->fwctl);
+	if (ret)
+		return ret;
+	auxiliary_set_drvdata(adev, no_free_ptr(mcdev));
+	return 0;
+}
+
+static void mlx5ctl_remove(struct auxiliary_device *adev)
+{
+	struct mlx5ctl_dev *mcdev __free(mlx5ctl) = auxiliary_get_drvdata(adev);
+
+	fwctl_unregister(&mcdev->fwctl);
+}
+
+static const struct auxiliary_device_id mlx5ctl_id_table[] = {
+	{.name = MLX5_ADEV_NAME ".fwctl",},
+	{}
+};
+MODULE_DEVICE_TABLE(auxiliary, mlx5ctl_id_table);
+
+static struct auxiliary_driver mlx5ctl_driver = {
+	.name = "mlx5_fwctl",
+	.probe = mlx5ctl_probe,
+	.remove = mlx5ctl_remove,
+	.id_table = mlx5ctl_id_table,
+};
+
+module_auxiliary_driver(mlx5ctl_driver);
+
+MODULE_IMPORT_NS(FWCTL);
+MODULE_DESCRIPTION("mlx5 ConnectX fwctl driver");
+MODULE_AUTHOR("Saeed Mahameed <saeedm@nvidia.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
index 3af9f9eb9b1878..f9b27fb5c1618c 100644
--- a/include/uapi/fwctl/fwctl.h
+++ b/include/uapi/fwctl/fwctl.h
@@ -42,6 +42,7 @@  enum {
 
 enum fwctl_device_type {
 	FWCTL_DEVICE_TYPE_ERROR = 0,
+	FWCTL_DEVICE_TYPE_MLX5 = 1,
 };
 
 /**
diff --git a/include/uapi/fwctl/mlx5.h b/include/uapi/fwctl/mlx5.h
new file mode 100644
index 00000000000000..bcb4602ffdeee4
--- /dev/null
+++ b/include/uapi/fwctl/mlx5.h
@@ -0,0 +1,36 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * These are definitions for the command interface for mlx5 HW. mlx5 FW has a
+ * User Context mechanism which allows the FW to understand a security scope.
+ * FWCTL binds each FD to a FW user context and then places the User Context ID
+ * (UID) in each command header. The created User Context has a capability set
+ * that is appropriate for FWCTL's security model.
+ *
+ * Command formation should use a copy of the structs in mlx5_ifc.h following
+ * the Programmers Reference Manual. A open release is available here:
+ *
+ *  https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf
+ *
+ * The device_type for this file is FWCTL_DEVICE_TYPE_MLX5.
+ */
+#ifndef _UAPI_FWCTL_MLX5_H
+#define _UAPI_FWCTL_MLX5_H
+
+#include <linux/types.h>
+
+/**
+ * struct fwctl_info_mlx5 - ioctl(FWCTL_INFO) out_device_data
+ * @uid: The FW UID this FD is bound to. Each command header will force
+ *	this value.
+ * @uctx_caps: The FW capabilities that are enabled for the uid.
+ *
+ * Return basic information about the FW interface available.
+ */
+struct fwctl_info_mlx5 {
+	__u32 uid;
+	__u32 uctx_caps;
+};
+
+#endif