diff mbox series

[v2,5/8] fwctl: FWCTL_RPC to execute a Remote Procedure Call to device firmware

Message ID 5-v2-940e479ceba9+3821-fwctl_jgg@nvidia.com
State Superseded
Headers show
Series Introduce fwctl subystem | expand

Commit Message

Jason Gunthorpe June 24, 2024, 10:47 p.m. UTC
Add the FWCTL_RPC ioctl which allows a request/response RPC call to device
firmware. Drivers implementing this call must follow the security
guidelines under Documentation/userspace-api/fwctl.rst

The core code provides some memory management helpers to get the messages
copied from and back to userspace. The driver is responsible for
allocating the output message memory and delivering the message to the
device.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/fwctl/main.c       | 62 +++++++++++++++++++++++++++++++++++
 include/linux/fwctl.h      |  5 +++
 include/uapi/fwctl/fwctl.h | 66 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+)

Comments

Jonathan Cameron July 26, 2024, 3:30 p.m. UTC | #1
On Mon, 24 Jun 2024 19:47:29 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> Add the FWCTL_RPC ioctl which allows a request/response RPC call to device
> firmware. Drivers implementing this call must follow the security
> guidelines under Documentation/userspace-api/fwctl.rst
> 
> The core code provides some memory management helpers to get the messages
> copied from and back to userspace. The driver is responsible for
> allocating the output message memory and delivering the message to the
> device.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
A few minor things inline.

> ---
>  drivers/fwctl/main.c       | 62 +++++++++++++++++++++++++++++++++++
>  include/linux/fwctl.h      |  5 +++
>  include/uapi/fwctl/fwctl.h | 66 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 133 insertions(+)
> 
> diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
> index f1dec0b590aee4..9506b993a1a56d 100644
> --- a/drivers/fwctl/main.c
> +++ b/drivers/fwctl/main.c
> @@ -8,16 +8,20 @@
>  #include <linux/slab.h>
>  #include <linux/container_of.h>
>  #include <linux/fs.h>
> +#include <linux/sizes.h>
>  
>  #include <uapi/fwctl/fwctl.h>
>  
>  enum {
>  	FWCTL_MAX_DEVICES = 256,
> +	MAX_RPC_LEN = SZ_2M,
>  };

In what way is that usefully handled as an enum?
I'd just use #defines

>  static dev_t fwctl_dev;
>  static DEFINE_IDA(fwctl_ida);
> +static unsigned long fwctl_tainted;
>  
>  DEFINE_FREE(kfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T));
> +DEFINE_FREE(kvfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T));
kvfree define free already defined as this since 6.9


>  
>  struct fwctl_ucmd {
>  	struct fwctl_uctx *uctx;
> @@ -75,9 +79,66 @@ static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
>  	return ucmd_respond(ucmd, sizeof(*cmd));
>  }
>  
> +static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
> +{
> +	struct fwctl_device *fwctl = ucmd->uctx->fwctl;
> +	struct fwctl_rpc *cmd = ucmd->cmd;
> +	size_t out_len;
> +
> +	if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
> +		return -EMSGSIZE;
> +
> +	switch (cmd->scope) {
> +	case FWCTL_RPC_CONFIGURATION:
> +	case FWCTL_RPC_DEBUG_READ_ONLY:
> +		break;
> +
> +	case FWCTL_RPC_DEBUG_WRITE_FULL:
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +		fallthrough;
> +	case FWCTL_RPC_DEBUG_WRITE:
> +		if (!test_and_set_bit(0, &fwctl_tainted)) {
> +			dev_warn(
> +				&fwctl->dev,
> +				"%s(%d): has requested full access to the physical device device",
> +				current->comm, task_pid_nr(current));
> +			add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
> +		}
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	};
> +
> +	void *inbuf __free(kvfree) =
> +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);

As before
#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
so don't need both.

> +	if (!inbuf)
> +		return -ENOMEM;
> +	if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
> +		return -EFAULT;
> +
> +	out_len = cmd->out_len;
> +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(

> +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
> +	if (IS_ERR(outbuf))
> +		return PTR_ERR(outbuf);
> +	if (outbuf == inbuf) {
> +		/* The driver can re-use inbuf as outbuf */
> +		inbuf = NULL;
I wish no_free_ptr() didn't have __must_check. Can do something ugly
like
		outbuf = no_free_ptr(inbuf);
probably but maybe just setting it NULL is simpler.

> +	}
> +
> +	if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
> +			 min(cmd->out_len, out_len)))
> +		return -EFAULT;
> +
> +	cmd->out_len = out_len;
> +	return ucmd_respond(ucmd, sizeof(*cmd));
> +}

> diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
> index 39db9f09f8068e..8bde0d4416fd55 100644
> --- a/include/uapi/fwctl/fwctl.h
> +++ b/include/uapi/fwctl/fwctl.h
> @@ -67,4 +67,70 @@ struct fwctl_info {
>  };
>  #define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO)
>  
> +/**
> + * enum fwctl_rpc_scope - Scope of access for the RPC
> + */
> +enum fwctl_rpc_scope {
...

> +	/**
> +	 * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
> +	 *
> +	 * Readable debug information. Debug information is compatible with
> +	 * kernel lockdown, and does not disclose any sensitive information. For
> +	 * instance exposing any encryption secrets from this information is
> +	 * forbidden.
> +	 */
> +	FWCTL_RPC_DEBUG_READ_ONLY = 1,
> +	/**
> +	 * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information

Write access
probably rather than writeable.

> +	 *
> +	 * Allows write access to data in the device which may leave a fully
> +	 * supported state. This is intended to permit intensive and possibly
> +	 * invasive debugging. This scope will taint the kernel.
> +	 */


> +};
Jason Gunthorpe July 29, 2024, 4:28 p.m. UTC | #2
On Fri, Jul 26, 2024 at 04:30:09PM +0100, Jonathan Cameron wrote:
> > @@ -8,16 +8,20 @@
> >  #include <linux/slab.h>
> >  #include <linux/container_of.h>
> >  #include <linux/fs.h>
> > +#include <linux/sizes.h>
> >  
> >  #include <uapi/fwctl/fwctl.h>
> >  
> >  enum {
> >  	FWCTL_MAX_DEVICES = 256,
> > +	MAX_RPC_LEN = SZ_2M,
> >  };
> 
> In what way is that usefully handled as an enum?
> I'd just use #defines

I generally am not so keen on defines for constants.. There is some
advantage with clangd and gdb, for instance. Enum is the only other
option even though it is a bit of abuse to use it like this.

> >  DEFINE_FREE(kfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T));
> > +DEFINE_FREE(kvfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T));
> kvfree define free already defined as this since 6.9

Ok
 
> > +	void *inbuf __free(kvfree) =
> > +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);
> 
> As before
> #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
> so don't need both.

Yep
 
> > +	if (outbuf == inbuf) {
> > +		/* The driver can re-use inbuf as outbuf */
> > +		inbuf = NULL;
> I wish no_free_ptr() didn't have __must_check. Can do something ugly
> like
> 		outbuf = no_free_ptr(inbuf);
> probably but maybe just setting it NULL is simpler.

Yeah NULL seems clearer, the outbuf assignment is a bit too odd, IMHO

> > +	/**
> > +	 * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
> > +	 *
> > +	 * Readable debug information. Debug information is compatible with
> > +	 * kernel lockdown, and does not disclose any sensitive information. For
> > +	 * instance exposing any encryption secrets from this information is
> > +	 * forbidden.
> > +	 */
> > +	FWCTL_RPC_DEBUG_READ_ONLY = 1,
> > +	/**
> > +	 * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information
> 
> Write access
> probably rather than writeable.

Sure

Thanks,
Jason
Leon Romanovsky July 30, 2024, 8 a.m. UTC | #3
On Mon, Jun 24, 2024 at 07:47:29PM -0300, Jason Gunthorpe wrote:
> Add the FWCTL_RPC ioctl which allows a request/response RPC call to device
> firmware. Drivers implementing this call must follow the security
> guidelines under Documentation/userspace-api/fwctl.rst
> 
> The core code provides some memory management helpers to get the messages
> copied from and back to userspace. The driver is responsible for
> allocating the output message memory and delivering the message to the
> device.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/fwctl/main.c       | 62 +++++++++++++++++++++++++++++++++++
>  include/linux/fwctl.h      |  5 +++
>  include/uapi/fwctl/fwctl.h | 66 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 133 insertions(+)
> 
> diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
> index f1dec0b590aee4..9506b993a1a56d 100644
> --- a/drivers/fwctl/main.c
> +++ b/drivers/fwctl/main.c
> @@ -8,16 +8,20 @@
>  #include <linux/slab.h>
>  #include <linux/container_of.h>
>  #include <linux/fs.h>
> +#include <linux/sizes.h>
>  
>  #include <uapi/fwctl/fwctl.h>
>  
>  enum {
>  	FWCTL_MAX_DEVICES = 256,
> +	MAX_RPC_LEN = SZ_2M,
>  };
>  static dev_t fwctl_dev;
>  static DEFINE_IDA(fwctl_ida);
> +static unsigned long fwctl_tainted;
>  
>  DEFINE_FREE(kfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T));
> +DEFINE_FREE(kvfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T));
>  
>  struct fwctl_ucmd {
>  	struct fwctl_uctx *uctx;
> @@ -75,9 +79,66 @@ static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
>  	return ucmd_respond(ucmd, sizeof(*cmd));
>  }
>  
> +static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
> +{
> +	struct fwctl_device *fwctl = ucmd->uctx->fwctl;
> +	struct fwctl_rpc *cmd = ucmd->cmd;
> +	size_t out_len;
> +
> +	if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
> +		return -EMSGSIZE;
> +
> +	switch (cmd->scope) {
> +	case FWCTL_RPC_CONFIGURATION:
> +	case FWCTL_RPC_DEBUG_READ_ONLY:
> +		break;
> +
> +	case FWCTL_RPC_DEBUG_WRITE_FULL:
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +		fallthrough;
> +	case FWCTL_RPC_DEBUG_WRITE:
> +		if (!test_and_set_bit(0, &fwctl_tainted)) {
> +			dev_warn(
> +				&fwctl->dev,
> +				"%s(%d): has requested full access to the physical device device",
> +				current->comm, task_pid_nr(current));
> +			add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
> +		}
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	};
> +
> +	void *inbuf __free(kvfree) =
> +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);


<...>

> +	out_len = cmd->out_len;
> +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);

I was under impression that declaration of variables in C should be at the beginning
of block. Was it changed for the kernel?

Thanks
Jason Gunthorpe Aug. 1, 2024, 12:58 p.m. UTC | #4
On Tue, Jul 30, 2024 at 11:00:38AM +0300, Leon Romanovsky wrote:
> > +
> > +	void *inbuf __free(kvfree) =
> > +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);
> 
> 
> <...>
> 
> > +	out_len = cmd->out_len;
> > +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> > +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
> 
> I was under impression that declaration of variables in C should be at the beginning
> of block. Was it changed for the kernel?

Yes, the compiler check blocking variables in the body was disabled to
allow cleanup.h

Jonathan said this is the agreed coding style to use for this

Jason
Leon Romanovsky Aug. 1, 2024, 5:26 p.m. UTC | #5
On Thu, Aug 01, 2024 at 09:58:29AM -0300, Jason Gunthorpe wrote:
> On Tue, Jul 30, 2024 at 11:00:38AM +0300, Leon Romanovsky wrote:
> > > +
> > > +	void *inbuf __free(kvfree) =
> > > +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);
> > 
> > 
> > <...>
> > 
> > > +	out_len = cmd->out_len;
> > > +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> > > +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
> > 
> > I was under impression that declaration of variables in C should be at the beginning
> > of block. Was it changed for the kernel?
> 
> Yes, the compiler check blocking variables in the body was disabled to
> allow cleanup.h
> 
> Jonathan said this is the agreed coding style to use for this

I'm said to hear that.

Thanks

> 
> Jason
Jonathan Cameron Aug. 2, 2024, 1:59 p.m. UTC | #6
On Thu, 1 Aug 2024 20:26:31 +0300
Leon Romanovsky <leon@kernel.org> wrote:

> On Thu, Aug 01, 2024 at 09:58:29AM -0300, Jason Gunthorpe wrote:
> > On Tue, Jul 30, 2024 at 11:00:38AM +0300, Leon Romanovsky wrote:  
> > > > +
> > > > +	void *inbuf __free(kvfree) =
> > > > +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);  
> > > 
> > > 
> > > <...>
> > >   
> > > > +	out_len = cmd->out_len;
> > > > +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> > > > +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);  
> > > 
> > > I was under impression that declaration of variables in C should be at the beginning
> > > of block. Was it changed for the kernel?  
> > 
> > Yes, the compiler check blocking variables in the body was disabled to
> > allow cleanup.h
> > 
> > Jonathan said this is the agreed coding style to use for this  
> 
> I'm said to hear that.

Was passing on a statement Linus made (not digging it out right now)
that he really wanted to be able see constructors and destructors
together.

The other part is that in some cases you can end up with non
obvious ordering bugs because the cleanup is the reverse of the
declarations, not the constructors being called.
Whilst it is fairly easy to review for this, future code reorganization
may well lead to subtle bugs, typically in error paths etc.

Putting the declaration inline avoids this potential problem

Dan wrote a style guide proposal.
https://lore.kernel.org/all/171175585714.2192972.12661675876300167762.stgit@dwillia2-xfh.jf.intel.com/
[PATCH v3] cleanup: Add usage and style documentation

seems it died out without anyone applying it.  I've poked.

Jonathan

> 
> Thanks
> 
> > 
> > Jason  
>
Leon Romanovsky Aug. 2, 2024, 3:57 p.m. UTC | #7
On Fri, Aug 02, 2024 at 02:59:46PM +0100, Jonathan Cameron wrote:
> On Thu, 1 Aug 2024 20:26:31 +0300
> Leon Romanovsky <leon@kernel.org> wrote:
> 
> > On Thu, Aug 01, 2024 at 09:58:29AM -0300, Jason Gunthorpe wrote:
> > > On Tue, Jul 30, 2024 at 11:00:38AM +0300, Leon Romanovsky wrote:  
> > > > > +
> > > > > +	void *inbuf __free(kvfree) =
> > > > > +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);  
> > > > 
> > > > 
> > > > <...>
> > > >   
> > > > > +	out_len = cmd->out_len;
> > > > > +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> > > > > +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);  
> > > > 
> > > > I was under impression that declaration of variables in C should be at the beginning
> > > > of block. Was it changed for the kernel?  
> > > 
> > > Yes, the compiler check blocking variables in the body was disabled to
> > > allow cleanup.h
> > > 
> > > Jonathan said this is the agreed coding style to use for this  
> > 
> > I'm said to hear that.
> 
> Was passing on a statement Linus made (not digging it out right now)
> that he really wanted to be able see constructors and destructors
> together.

The thing is that we are talking about the same thing. I and Linus want
to keep locality of variables declaration and initialization. I don't
know the Linus's stance on it, but I'm sad that to achieve that for
cleanup.h, very useful feature of GCC (keep variables at the beginning
of the block) was disabled.

Right now, you can declare variables in any place and it is harder to
review the code now. It is a matter of time when we will see code like
this and start to chase bugs introduced by this pattern:

int f()
{
	<some code>
	int i;
	<some code>
	return something;
}

Thanks

> 
> The other part is that in some cases you can end up with non
> obvious ordering bugs because the cleanup is the reverse of the
> declarations, not the constructors being called.
> Whilst it is fairly easy to review for this, future code reorganization
> may well lead to subtle bugs, typically in error paths etc.
> 
> Putting the declaration inline avoids this potential problem
> 
> Dan wrote a style guide proposal.
> https://lore.kernel.org/all/171175585714.2192972.12661675876300167762.stgit@dwillia2-xfh.jf.intel.com/
> [PATCH v3] cleanup: Add usage and style documentation
> 
> seems it died out without anyone applying it.  I've poked.
> 
> Jonathan
> 
> > 
> > Thanks
> > 
> > > 
> > > Jason  
> > 
> 
>
Oded Gabbay Aug. 7, 2024, 7:44 a.m. UTC | #8
On Mon, Jun 24, 2024 at 07:47:29PM -0300, Jason Gunthorpe wrote:
> Add the FWCTL_RPC ioctl which allows a request/response RPC call to device
> firmware. Drivers implementing this call must follow the security
> guidelines under Documentation/userspace-api/fwctl.rst
>
> The core code provides some memory management helpers to get the messages
> copied from and back to userspace. The driver is responsible for
> allocating the output message memory and delivering the message to the
> device.
>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/fwctl/main.c       | 62 +++++++++++++++++++++++++++++++++++
>  include/linux/fwctl.h      |  5 +++
>  include/uapi/fwctl/fwctl.h | 66 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 133 insertions(+)
>
> diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
> index f1dec0b590aee4..9506b993a1a56d 100644
> --- a/drivers/fwctl/main.c
> +++ b/drivers/fwctl/main.c
> @@ -8,16 +8,20 @@
>  #include <linux/slab.h>
>  #include <linux/container_of.h>
>  #include <linux/fs.h>
> +#include <linux/sizes.h>
>
>  #include <uapi/fwctl/fwctl.h>
>
>  enum {
>  	FWCTL_MAX_DEVICES = 256,
> +	MAX_RPC_LEN = SZ_2M,
>  };
>  static dev_t fwctl_dev;
>  static DEFINE_IDA(fwctl_ida);
> +static unsigned long fwctl_tainted;
>
>  DEFINE_FREE(kfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T));
> +DEFINE_FREE(kvfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T));
>
>  struct fwctl_ucmd {
>  	struct fwctl_uctx *uctx;
> @@ -75,9 +79,66 @@ static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
>  	return ucmd_respond(ucmd, sizeof(*cmd));
>  }
>
> +static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
> +{
> +	struct fwctl_device *fwctl = ucmd->uctx->fwctl;
> +	struct fwctl_rpc *cmd = ucmd->cmd;
> +	size_t out_len;
> +
> +	if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
> +		return -EMSGSIZE;
> +
> +	switch (cmd->scope) {
> +	case FWCTL_RPC_CONFIGURATION:
> +	case FWCTL_RPC_DEBUG_READ_ONLY:
> +		break;
> +
> +	case FWCTL_RPC_DEBUG_WRITE_FULL:
> +		if (!capable(CAP_SYS_RAWIO))
> +			return -EPERM;
> +		fallthrough;
> +	case FWCTL_RPC_DEBUG_WRITE:
> +		if (!test_and_set_bit(0, &fwctl_tainted)) {
> +			dev_warn(
> +				&fwctl->dev,
> +				"%s(%d): has requested full access to the physical device device",
> +				current->comm, task_pid_nr(current));
> +			add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
> +		}
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	};
> +
> +	void *inbuf __free(kvfree) =
> +		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);
> +	if (!inbuf)
> +		return -ENOMEM;
> +	if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
> +		return -EFAULT;
> +
> +	out_len = cmd->out_len;
> +	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
> +		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
Hi Jason,
First of all, great work. I fully support this direction. Although I'm not
working anymore in Habana, I would have definitely moved to this interface
instead of the custom one we implemented in our driver. I believe this can
be of use for other accel drivers as well.

Complex devices which contains multiple IPs and FWs, like Habana's Gaudi,
have some RPCs to the firmware which are not related to the funcationality
of the IP drivers (the compute and networking drivers in our case).

Spreading the RPCs between the drivers required to separate it also among
the different user-spaces libraries, as each subsystem has its own user-space.

Disconnecting the RPCs from the drivers and providing a generic interface will
allow to have a single user-space library which will be able to communicate
with the firmware for all the IPs in the device. In Habana's case, it was
mainly for monitoring and debugging purposes.

I do have one question about the rpc ioctl. Are you assuming that the rpc
is synchronous (we send a message to the firmware and block until we get the
reply)? If so, what happen if we have an async RPC implementation
inside the driver? How would you recommend to handle it?

Thanks,
Oded

> +	if (IS_ERR(outbuf))
> +		return PTR_ERR(outbuf);
> +	if (outbuf == inbuf) {
> +		/* The driver can re-use inbuf as outbuf */
> +		inbuf = NULL;
> +	}
> +
> +	if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
> +			 min(cmd->out_len, out_len)))
> +		return -EFAULT;
> +
> +	cmd->out_len = out_len;
> +	return ucmd_respond(ucmd, sizeof(*cmd));
> +}
> +
>  /* On stack memory for the ioctl structs */
>  union ucmd_buffer {
>  	struct fwctl_info info;
> +	struct fwctl_rpc rpc;
>  };
>
>  struct fwctl_ioctl_op {
> @@ -98,6 +159,7 @@ struct fwctl_ioctl_op {
>  	}
>  static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = {
>  	IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data),
> +	IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out),
>  };
>
>  static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd,
> diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h
> index 9a906b861acf3a..294cfbf63306a2 100644
> --- a/include/linux/fwctl.h
> +++ b/include/linux/fwctl.h
> @@ -26,6 +26,9 @@ struct fwctl_uctx;
>   *	out_device_data. On input length indicates the size of the user buffer
>   *	on output it indicates the size of the memory. The driver can ignore
>   *	length on input, the core code will handle everything.
> + * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and return
> + *	the response and set out_len. rpc_in can be returned as the response
> + *	pointer. Otherwise the returned pointer is freed with kvfree().
>   */
>  struct fwctl_ops {
>  	enum fwctl_device_type device_type;
> @@ -33,6 +36,8 @@ struct fwctl_ops {
>  	int (*open_uctx)(struct fwctl_uctx *uctx);
>  	void (*close_uctx)(struct fwctl_uctx *uctx);
>  	void *(*info)(struct fwctl_uctx *uctx, size_t *length);
> +	void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
> +			void *rpc_in, size_t in_len, size_t *out_len);
>  };
>
>  /**
> diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
> index 39db9f09f8068e..8bde0d4416fd55 100644
> --- a/include/uapi/fwctl/fwctl.h
> +++ b/include/uapi/fwctl/fwctl.h
> @@ -67,4 +67,70 @@ struct fwctl_info {
>  };
>  #define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO)
>
> +/**
> + * enum fwctl_rpc_scope - Scope of access for the RPC
> + */
> +enum fwctl_rpc_scope {
> +	/**
> +	 * @FWCTL_RPC_CONFIGURATION: Device configuration access scope
> +	 *
> +	 * Read/write access to device configuration. When configuration
> +	 * is written to the device remains in a fully supported state.
> +	 */
> +	FWCTL_RPC_CONFIGURATION = 0,
> +	/**
> +	 * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
> +	 *
> +	 * Readable debug information. Debug information is compatible with
> +	 * kernel lockdown, and does not disclose any sensitive information. For
> +	 * instance exposing any encryption secrets from this information is
> +	 * forbidden.
> +	 */
> +	FWCTL_RPC_DEBUG_READ_ONLY = 1,
> +	/**
> +	 * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information
> +	 *
> +	 * Allows write access to data in the device which may leave a fully
> +	 * supported state. This is intended to permit intensive and possibly
> +	 * invasive debugging. This scope will taint the kernel.
> +	 */
> +	FWCTL_RPC_DEBUG_WRITE = 2,
> +	/**
> +	 * @FWCTL_RPC_DEBUG_WRITE_FULL: Writable access to all debug information
> +	 *
> +	 * Allows read/write access to everything. Requires CAP_SYS_RAW_IO, so
> +	 * it is not required to follow lockdown principals. If in doubt
> +	 * debugging should be placed in this scope. This scope will taint the
> +	 * kernel.
> +	 */
> +	FWCTL_RPC_DEBUG_WRITE_FULL = 3,
> +};
> +
> +/**
> + * struct fwctl_rpc - ioctl(FWCTL_RPC)
> + * @size: sizeof(struct fwctl_rpc)
> + * @scope: One of enum fwctl_rpc_scope, required scope for the RPC
> + * @in_len: Length of the in memory
> + * @out_len: Length of the out memory
> + * @in: Request message in device specific format
> + * @out: Response message in device specific format
> + *
> + * Deliver a Remote Procedure Call to the device FW and return the response. The
> + * call's parameters and return are marshaled into linear buffers of memory. Any
> + * errno indicates that delivery of the RPC to the device failed. Return status
> + * originating in the device during a successful delivery must be encoded into
> + * out.
> + *
> + * The format of the buffers matches the out_device_type from FWCTL_INFO.
> + */
> +struct fwctl_rpc {
> +	__u32 size;
> +	__u32 scope;
> +	__u32 in_len;
> +	__u32 out_len;
> +	__aligned_u64 in;
> +	__aligned_u64 out;
> +};
> +#define FWCTL_RPC _IO(FWCTL_TYPE, FWCTL_CMD_RPC)
> +
>  #endif
> --
> 2.45.2
>
>
Jason Gunthorpe Aug. 8, 2024, 11:46 a.m. UTC | #9
On Wed, Aug 07, 2024 at 10:44:21AM +0300, Oded Gabbay wrote:
> Disconnecting the RPCs from the drivers and providing a generic interface will
> allow to have a single user-space library which will be able to communicate
> with the firmware for all the IPs in the device. In Habana's case, it was
> mainly for monitoring and debugging purposes.

Yeah, monitoring and debugging is definately a key use case.
 
> I do have one question about the rpc ioctl. Are you assuming that the rpc
> is synchronous (we send a message to the firmware and block until we get the
> reply)? If so, what happen if we have an async RPC implementation
> inside the driver? How would you recommend to handle it?

Yes, this is all simplified so the ioctl system call is
synchronous. mlx5 is async under the hood too, it just launches an
async work and waits on a completion before returning from the system
call.

Userspace could multi-thread it, and If people were really keen maybe
there is some io_uring approach.

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
index f1dec0b590aee4..9506b993a1a56d 100644
--- a/drivers/fwctl/main.c
+++ b/drivers/fwctl/main.c
@@ -8,16 +8,20 @@ 
 #include <linux/slab.h>
 #include <linux/container_of.h>
 #include <linux/fs.h>
+#include <linux/sizes.h>
 
 #include <uapi/fwctl/fwctl.h>
 
 enum {
 	FWCTL_MAX_DEVICES = 256,
+	MAX_RPC_LEN = SZ_2M,
 };
 static dev_t fwctl_dev;
 static DEFINE_IDA(fwctl_ida);
+static unsigned long fwctl_tainted;
 
 DEFINE_FREE(kfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T));
+DEFINE_FREE(kvfree_errptr, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T));
 
 struct fwctl_ucmd {
 	struct fwctl_uctx *uctx;
@@ -75,9 +79,66 @@  static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
 	return ucmd_respond(ucmd, sizeof(*cmd));
 }
 
+static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
+{
+	struct fwctl_device *fwctl = ucmd->uctx->fwctl;
+	struct fwctl_rpc *cmd = ucmd->cmd;
+	size_t out_len;
+
+	if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
+		return -EMSGSIZE;
+
+	switch (cmd->scope) {
+	case FWCTL_RPC_CONFIGURATION:
+	case FWCTL_RPC_DEBUG_READ_ONLY:
+		break;
+
+	case FWCTL_RPC_DEBUG_WRITE_FULL:
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		fallthrough;
+	case FWCTL_RPC_DEBUG_WRITE:
+		if (!test_and_set_bit(0, &fwctl_tainted)) {
+			dev_warn(
+				&fwctl->dev,
+				"%s(%d): has requested full access to the physical device device",
+				current->comm, task_pid_nr(current));
+			add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	};
+
+	void *inbuf __free(kvfree) =
+		kvzalloc(cmd->in_len, GFP_KERNEL | GFP_KERNEL_ACCOUNT);
+	if (!inbuf)
+		return -ENOMEM;
+	if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
+		return -EFAULT;
+
+	out_len = cmd->out_len;
+	void *outbuf __free(kvfree_errptr) = fwctl->ops->fw_rpc(
+		ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
+	if (IS_ERR(outbuf))
+		return PTR_ERR(outbuf);
+	if (outbuf == inbuf) {
+		/* The driver can re-use inbuf as outbuf */
+		inbuf = NULL;
+	}
+
+	if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
+			 min(cmd->out_len, out_len)))
+		return -EFAULT;
+
+	cmd->out_len = out_len;
+	return ucmd_respond(ucmd, sizeof(*cmd));
+}
+
 /* On stack memory for the ioctl structs */
 union ucmd_buffer {
 	struct fwctl_info info;
+	struct fwctl_rpc rpc;
 };
 
 struct fwctl_ioctl_op {
@@ -98,6 +159,7 @@  struct fwctl_ioctl_op {
 	}
 static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = {
 	IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data),
+	IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out),
 };
 
 static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd,
diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h
index 9a906b861acf3a..294cfbf63306a2 100644
--- a/include/linux/fwctl.h
+++ b/include/linux/fwctl.h
@@ -26,6 +26,9 @@  struct fwctl_uctx;
  *	out_device_data. On input length indicates the size of the user buffer
  *	on output it indicates the size of the memory. The driver can ignore
  *	length on input, the core code will handle everything.
+ * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and return
+ *	the response and set out_len. rpc_in can be returned as the response
+ *	pointer. Otherwise the returned pointer is freed with kvfree().
  */
 struct fwctl_ops {
 	enum fwctl_device_type device_type;
@@ -33,6 +36,8 @@  struct fwctl_ops {
 	int (*open_uctx)(struct fwctl_uctx *uctx);
 	void (*close_uctx)(struct fwctl_uctx *uctx);
 	void *(*info)(struct fwctl_uctx *uctx, size_t *length);
+	void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
+			void *rpc_in, size_t in_len, size_t *out_len);
 };
 
 /**
diff --git a/include/uapi/fwctl/fwctl.h b/include/uapi/fwctl/fwctl.h
index 39db9f09f8068e..8bde0d4416fd55 100644
--- a/include/uapi/fwctl/fwctl.h
+++ b/include/uapi/fwctl/fwctl.h
@@ -67,4 +67,70 @@  struct fwctl_info {
 };
 #define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO)
 
+/**
+ * enum fwctl_rpc_scope - Scope of access for the RPC
+ */
+enum fwctl_rpc_scope {
+	/**
+	 * @FWCTL_RPC_CONFIGURATION: Device configuration access scope
+	 *
+	 * Read/write access to device configuration. When configuration
+	 * is written to the device remains in a fully supported state.
+	 */
+	FWCTL_RPC_CONFIGURATION = 0,
+	/**
+	 * @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
+	 *
+	 * Readable debug information. Debug information is compatible with
+	 * kernel lockdown, and does not disclose any sensitive information. For
+	 * instance exposing any encryption secrets from this information is
+	 * forbidden.
+	 */
+	FWCTL_RPC_DEBUG_READ_ONLY = 1,
+	/**
+	 * @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information
+	 *
+	 * Allows write access to data in the device which may leave a fully
+	 * supported state. This is intended to permit intensive and possibly
+	 * invasive debugging. This scope will taint the kernel.
+	 */
+	FWCTL_RPC_DEBUG_WRITE = 2,
+	/**
+	 * @FWCTL_RPC_DEBUG_WRITE_FULL: Writable access to all debug information
+	 *
+	 * Allows read/write access to everything. Requires CAP_SYS_RAW_IO, so
+	 * it is not required to follow lockdown principals. If in doubt
+	 * debugging should be placed in this scope. This scope will taint the
+	 * kernel.
+	 */
+	FWCTL_RPC_DEBUG_WRITE_FULL = 3,
+};
+
+/**
+ * struct fwctl_rpc - ioctl(FWCTL_RPC)
+ * @size: sizeof(struct fwctl_rpc)
+ * @scope: One of enum fwctl_rpc_scope, required scope for the RPC
+ * @in_len: Length of the in memory
+ * @out_len: Length of the out memory
+ * @in: Request message in device specific format
+ * @out: Response message in device specific format
+ *
+ * Deliver a Remote Procedure Call to the device FW and return the response. The
+ * call's parameters and return are marshaled into linear buffers of memory. Any
+ * errno indicates that delivery of the RPC to the device failed. Return status
+ * originating in the device during a successful delivery must be encoded into
+ * out.
+ *
+ * The format of the buffers matches the out_device_type from FWCTL_INFO.
+ */
+struct fwctl_rpc {
+	__u32 size;
+	__u32 scope;
+	__u32 in_len;
+	__u32 out_len;
+	__aligned_u64 in;
+	__aligned_u64 out;
+};
+#define FWCTL_RPC _IO(FWCTL_TYPE, FWCTL_CMD_RPC)
+
 #endif