diff mbox series

[v2,5/6] pds_fwctl: add rpc and query support

Message ID 20250301013554.49511-6-shannon.nelson@amd.com (mailing list archive)
State Superseded
Headers show
Series pds_fwctl: fwctl for AMD/Pensando core devices | expand

Commit Message

Nelson, Shannon March 1, 2025, 1:35 a.m. UTC
From: Brett Creeley <brett.creeley@amd.com>

The pds_fwctl driver doesn't know what RPC operations are available
in the firmware, so also doesn't know what scope they might have.  The
userland utility supplies the firmware "endpoint" and "operation" id values
and this driver queries the firmware for endpoints and their available
operations.  The operation descriptions include the scope information
which the driver uses for scope testing.

Signed-off-by: Brett Creeley <brett.creeley@amd.com>
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
---
 drivers/fwctl/pds/main.c       | 343 ++++++++++++++++++++++++++++++++-
 include/linux/pds/pds_adminq.h | 187 ++++++++++++++++++
 include/uapi/fwctl/pds.h       |  16 ++
 3 files changed, 544 insertions(+), 2 deletions(-)

Comments

Jonathan Cameron March 4, 2025, 9:08 a.m. UTC | #1
On Fri, 28 Feb 2025 17:35:53 -0800
Shannon Nelson <shannon.nelson@amd.com> wrote:

> From: Brett Creeley <brett.creeley@amd.com>
> 
> The pds_fwctl driver doesn't know what RPC operations are available
> in the firmware, so also doesn't know what scope they might have.  The
> userland utility supplies the firmware "endpoint" and "operation" id values
> and this driver queries the firmware for endpoints and their available
> operations.  The operation descriptions include the scope information
> which the driver uses for scope testing.
> 
> Signed-off-by: Brett Creeley <brett.creeley@amd.com>
> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Hi

A few minor suggestions inline,

Jonathan




> +static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
> +			      struct fwctl_rpc_pds *rpc,
> +			      enum fwctl_rpc_scope scope)
> +{
> +	struct pds_fwctl_query_data_operation *op_entry;
> +	struct pdsfc_rpc_endpoint_info *ep_info = NULL;
> +	struct device *dev = &pdsfc->fwctl.dev;
> +	int i;
> +
> +	/* validate rpc in_len & out_len based
> +	 * on ident.max_req_sz & max_resp_sz
> +	 */
> +	if (rpc->in.len > pdsfc->ident.max_req_sz) {
> +		dev_err(dev, "Invalid request size %u, max %u\n",
> +			rpc->in.len, pdsfc->ident.max_req_sz);
> +		return -EINVAL;
> +	}
> +
> +	if (rpc->out.len > pdsfc->ident.max_resp_sz) {
> +		dev_err(dev, "Invalid response size %u, max %u\n",
> +			rpc->out.len, pdsfc->ident.max_resp_sz);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
> +		if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
> +			ep_info = &pdsfc->endpoint_info[i];
> +			break;
> +		}
> +	}
> +	if (!ep_info) {
> +		dev_err(dev, "Invalid endpoint %d\n", rpc->in.ep);
> +		return -EINVAL;
> +	}
> +
> +	/* query and cache this endpoint's operations */
> +	mutex_lock(&ep_info->lock);
> +	if (!ep_info->operations) {
> +		ep_info->operations = pdsfc_get_operations(pdsfc,
> +							   &ep_info->operations_pa,
> +							   rpc->in.ep);
> +		if (!ep_info->operations) {
> +			mutex_unlock(&ep_info->lock);
> +			dev_err(dev, "Failed to allocate operations list\n");
> +			return -ENOMEM;
> +		}
> +	}
> +	mutex_unlock(&ep_info->lock);
> +
> +	/* reject unsupported and/or out of scope commands */
> +	op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
> +	for (i = 0; i < ep_info->operations->num_entries; i++) {
> +		if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
> +			if (scope < op_entry[i].scope)
> +				return -EPERM;
> +			return 0;
> +		}
> +	}
> +
> +	dev_err(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);

Perhaps a little noisy as I think userspace can trigger this easily.  dev_dbg()
might be better.  -EINVAL should be all userspace needs under most circumstances.

> +
> +	return -EINVAL;
> +}
> +
>  static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
>  			  void *in, size_t in_len, size_t *out_len)
>  {
> -	return NULL;
> +	struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
> +	struct fwctl_rpc_pds *rpc = (struct fwctl_rpc_pds *)in;

in is a void * and the C spec always allows implicit conversion for those
so no cast here.

> +	struct device *dev = &uctx->fwctl->dev;
> +	union pds_core_adminq_comp comp = {0};
> +	dma_addr_t out_payload_dma_addr = 0;
> +	dma_addr_t in_payload_dma_addr = 0;
> +	union pds_core_adminq_cmd cmd;
> +	void *out_payload = NULL;
> +	void *in_payload = NULL;
> +	void *out = NULL;
> +	int err;
> +
> +	err = pdsfc_validate_rpc(pdsfc, rpc, scope);
> +	if (err) {
> +		dev_err(dev, "Invalid RPC request\n");
> +		return ERR_PTR(err);
> +	}
> +
> +	if (rpc->in.len > 0) {
> +		in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
> +		if (!in_payload) {
> +			dev_err(dev, "Failed to allocate in_payload\n");
> +			out = ERR_PTR(-ENOMEM);
> +			goto done;
> +		}
> +
> +		if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
> +				   rpc->in.len)) {
> +			dev_err(dev, "Failed to copy in_payload from user\n");
> +			out = ERR_PTR(-EFAULT);
> +			goto done;
> +		}
> +
> +		in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
> +						     rpc->in.len, DMA_TO_DEVICE);
> +		err = dma_mapping_error(dev->parent, in_payload_dma_addr);
> +		if (err) {
> +			dev_err(dev, "Failed to map in_payload\n");
> +			in_payload_dma_addr = 0;

See below for comment on ordering and why ideally this zero setting should
not be needed in error path.  Really small point though!

> +			out = ERR_PTR(err);
> +			goto done;
> +		}
> +	}
> +
> +	if (rpc->out.len > 0) {
> +		out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
> +		if (!out_payload) {
> +			dev_err(dev, "Failed to allocate out_payload\n");
> +			out = ERR_PTR(-ENOMEM);
> +			goto done;
> +		}
> +
> +		out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
> +						      rpc->out.len, DMA_FROM_DEVICE);
> +		err = dma_mapping_error(dev->parent, out_payload_dma_addr);
> +		if (err) {
> +			dev_err(dev, "Failed to map out_payload\n");
> +			out_payload_dma_addr = 0;

As above. Slight ordering tweaks and more labels would avoid need
to zero this on error as we would never use it again.

> +			out = ERR_PTR(err);
> +			goto done;
> +		}
> +	}
> +
> +	cmd = (union pds_core_adminq_cmd) {
> +		.fwctl_rpc = {
> +			.opcode = PDS_FWCTL_CMD_RPC,
> +			.flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
> +			.ep = cpu_to_le32(rpc->in.ep),
> +			.op = cpu_to_le32(rpc->in.op),
> +			.req_pa = cpu_to_le64(in_payload_dma_addr),
> +			.req_sz = cpu_to_le32(rpc->in.len),
> +			.resp_pa = cpu_to_le64(out_payload_dma_addr),
> +			.resp_sz = cpu_to_le32(rpc->out.len),
> +		}
> +	};
> +
> +	err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
> +	if (err) {
> +		dev_err(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
> +			__func__, rpc->in.ep, rpc->in.op,
> +			cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
> +			cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
> +			err);
> +		out = ERR_PTR(err);
> +		goto done;
> +	}
> +
> +	dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
> +
> +	if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
> +		dev_err(dev, "Failed to copy out_payload to user\n");
> +		out = ERR_PTR(-EFAULT);
> +		goto done;
> +	}
> +
> +	rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
> +	*out_len = in_len;
> +	out = in;
> +
> +done:
> +	if (in_payload_dma_addr)
> +		dma_unmap_single(dev->parent, in_payload_dma_addr,
> +				 rpc->in.len, DMA_TO_DEVICE);
> +
> +	if (out_payload_dma_addr)

Can we do these in reverse order of the setup path?
It obviously makes limited difference in practice.
With them in strict reverse order you could avoid need to
zero out_payload_dma_addr and similar in error paths by
using multiple labels.

> +		dma_unmap_single(dev->parent, out_payload_dma_addr,
> +				 rpc->out.len, DMA_FROM_DEVICE);
> +
> +	kfree(in_payload);
> +	kfree(out_payload);
> +
> +	return out;

At least some cases are simplified if you do
	if (err)
		return ERR_PTR(err);

	return in;

and handle all errors via err integer until here.
>  }

>  static const struct auxiliary_device_id pdsfc_id_table[] = {
> diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
> index ae6886ebc841..03bca2d27de0 100644
> --- a/include/linux/pds/pds_adminq.h
> +++ b/include/linux/pds/pds_adminq.h

> +/**
> + * struct pds_fwctl_query_data - query data structure
> + * @version:     Version of the query data structure
> + * @rsvd:        Word boundary padding
> + * @num_entries: Number of entries in the union
> + * @entries:     Array of query data entries, depending on the entity type.
> + */
> +struct pds_fwctl_query_data {
> +	u8      version;
> +	u8      rsvd[3];
> +	__le32  num_entries;
> +	uint8_t entries[];

__counted_by_le() probably a nice to have here.


> +} __packed;

> +/**
> + * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element
> + * @addr:	DMA address of SG element data buffer
> + * @len:	Length of SG element data buffer, in bytes
> + * @rsvd:	Word boundary padding
> + */
> +struct pds_sg_elem {
> +	__le64 addr;
> +	__le32 len;
> +	__le16 rsvd[2];

__le32 rsvd; ?  Or stick to u8 only.


> +} __packed;
> +
> +/**
> + * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC.
> + * @status:     Status of the command
> + * @rsvd:       Word boundary padding
> + * @comp_index: Completion index of the command
> + * @err:        Error code, if any, from the RPC.

Odd mix of full stop and not.  Make it more consistent.

> + * @resp_sz:    Size of the response.
> + * @rsvd2:      Word boundary padding

words changing size in one structure?  Just stick to "Reserved" for
the docs. Never any reason to explicitly talk about 'why' things
are reserved.

> + * @color:      Color bit indicating the state of the completion.
> + */
> +struct pds_fwctl_rpc_comp {
> +	u8     status;
> +	u8     rsvd;
> +	__le16 comp_index;
> +	__le32 err;
> +	__le32 resp_sz;
> +	u8     rsvd2[3];
> +	u8     color;
> +} __packed;
> +
>  union pds_core_adminq_cmd {
>  	u8     opcode;
>  	u8     bytes[64];
> @@ -1291,6 +1474,8 @@ union pds_core_adminq_cmd {
>  
>  	struct pds_fwctl_cmd		  fwctl;
>  	struct pds_fwctl_ident_cmd	  fwctl_ident;
> +	struct pds_fwctl_rpc_cmd	  fwctl_rpc;
> +	struct pds_fwctl_query_cmd	  fwctl_query;
>  };
>  
>  union pds_core_adminq_comp {
> @@ -1320,6 +1505,8 @@ union pds_core_adminq_comp {
>  	struct pds_lm_dirty_status_comp	  lm_dirty_status;
>  
>  	struct pds_fwctl_comp		  fwctl;
> +	struct pds_fwctl_rpc_comp	  fwctl_rpc;
> +	struct pds_fwctl_query_comp	  fwctl_query;
>  };
>  
>  #ifndef __CHECKER__
> diff --git a/include/uapi/fwctl/pds.h b/include/uapi/fwctl/pds.h
> index a01b032cbdb1..da6cd2d1c6fa 100644
> --- a/include/uapi/fwctl/pds.h
> +++ b/include/uapi/fwctl/pds.h
> @@ -24,4 +24,20 @@ enum pds_fwctl_capabilities {
>  	PDS_FWCTL_QUERY_CAP = 0,
>  	PDS_FWCTL_SEND_CAP,
>  };
> +
> +struct fwctl_rpc_pds {
> +	struct {
> +		__u32 op;
> +		__u32 ep;
> +		__u32 rsvd;
> +		__u32 len;
> +		__u64 payload;
> +	} in;
> +	struct {
> +		__u32 retval;
> +		__u32 rsvd[2];
> +		__u32 len;
> +		__u64 payload;
> +	} out;
> +};
>  #endif /* _UAPI_FWCTL_PDS_H_ */
Jason Gunthorpe March 4, 2025, 5:24 p.m. UTC | #2
On Tue, Mar 04, 2025 at 05:08:08PM +0800, Jonathan Cameron wrote:
> > +	dev_err(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
> 
> Perhaps a little noisy as I think userspace can trigger this easily.  dev_dbg()
> might be better.  -EINVAL should be all userspace needs under most circumstances.

Yes, please remove or degrade to dbg all the prints that userspace
could trigger.

> > +	if (rpc->in.len > 0) {
> > +		in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
> > +		if (!in_payload) {
> > +			dev_err(dev, "Failed to allocate in_payload\n");

kzalloc is already super noisy if it fails

> > +			out = ERR_PTR(-ENOMEM);
> > +			goto done;
> > +		}
> > +
> > +		if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
> > +				   rpc->in.len)) {
> > +			dev_err(dev, "Failed to copy in_payload from user\n");
> > +			out = ERR_PTR(-EFAULT);
> > +			goto done;
> > +		}
> > +
> > +		in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
> > +						     rpc->in.len, DMA_TO_DEVICE);
> > +		err = dma_mapping_error(dev->parent, in_payload_dma_addr);
> > +		if (err) {
> > +			dev_err(dev, "Failed to map in_payload\n");
> > +			in_payload_dma_addr = 0;

etc

> > +	err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
> > +	if (err) {
> > +		dev_err(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
> > +			__func__, rpc->in.ep, rpc->in.op,
> > +			cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
> > +			cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
> > +			err);

Triggerable by a malformed RPC?

> > +		out = ERR_PTR(err);
> > +		goto done;
> > +	}
> > +
> > +	dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
> > +
> > +	if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
> > +		dev_err(dev, "Failed to copy out_payload to user\n");

Triggerable by a malformed user provided pointer

Jason
Jason Gunthorpe March 4, 2025, 7:52 p.m. UTC | #3
On Fri, Feb 28, 2025 at 05:35:53PM -0800, Shannon Nelson wrote:

> +	/* reject unsupported and/or out of scope commands */
> +	op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
> +	for (i = 0; i < ep_info->operations->num_entries; i++) {
> +		if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
> +			if (scope < op_entry[i].scope)
> +				return -EPERM;
> +			return 0;

Oh I see, you are doing the scope like CXL with a "command intent's
report". That is a good idea.

> +	if (rpc->in.len > 0) {
> +		in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
> +		if (!in_payload) {
> +			dev_err(dev, "Failed to allocate in_payload\n");
> +			out = ERR_PTR(-ENOMEM);
> +			goto done;
> +		}
> +
> +		if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
> +				   rpc->in.len)) {
> +			dev_err(dev, "Failed to copy in_payload from user\n");
> +			out = ERR_PTR(-EFAULT);
> +			goto done;
> +		}

This wasn't really the intention to have a payload pointer inside the
existing payload pointer, is it the same issue as CXL where you wanted
a header?

I see your FW API also can't take a scatterlist, so it makes sense it
needs to be linearized like this.

I don't think it makes a difference to have the indirection or not, it
would be a bunch of stuff to keep the header seperate from the payload
and then still also end up with memcpy in the driver, so leave it.

> +#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT	0
> +#define PDS_FWCTL_RPC_OPCODE_CMD_MASK	GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
> +#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT	16
> +#define PDS_FWCTL_RPC_OPCODE_VER_MASK	GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
> +
> +#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op)	\
> +	(((op) & PDS_FWCTL_RPC_OPCODE_CMD_MASK) >> PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)

Isn't that FIELD_GET?

> +struct fwctl_rpc_pds {
> +	struct {
> +		__u32 op;
> +		__u32 ep;
> +		__u32 rsvd;
> +		__u32 len;
> +		__u64 payload;
> +	} in;
> +	struct {
> +		__u32 retval;
> +		__u32 rsvd[2];
> +		__u32 len;
> +		__u64 payload;
> +	} out;
> +};

Use __aligned_u64 in the uapi headers

Jason
Nelson, Shannon March 6, 2025, 1:55 a.m. UTC | #4
On 3/4/2025 1:08 AM, Jonathan Cameron wrote:
> On Fri, 28 Feb 2025 17:35:53 -0800
> Shannon Nelson <shannon.nelson@amd.com> wrote:
> 
>> From: Brett Creeley <brett.creeley@amd.com>
>>
>> The pds_fwctl driver doesn't know what RPC operations are available
>> in the firmware, so also doesn't know what scope they might have.  The
>> userland utility supplies the firmware "endpoint" and "operation" id values
>> and this driver queries the firmware for endpoints and their available
>> operations.  The operation descriptions include the scope information
>> which the driver uses for scope testing.
>>
>> Signed-off-by: Brett Creeley <brett.creeley@amd.com>
>> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
> Hi
> 
> A few minor suggestions inline,
> 
> Jonathan
> 
> 
> 
> 
>> +static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
>> +                           struct fwctl_rpc_pds *rpc,
>> +                           enum fwctl_rpc_scope scope)
>> +{
>> +     struct pds_fwctl_query_data_operation *op_entry;
>> +     struct pdsfc_rpc_endpoint_info *ep_info = NULL;
>> +     struct device *dev = &pdsfc->fwctl.dev;
>> +     int i;
>> +
>> +     /* validate rpc in_len & out_len based
>> +      * on ident.max_req_sz & max_resp_sz
>> +      */
>> +     if (rpc->in.len > pdsfc->ident.max_req_sz) {
>> +             dev_err(dev, "Invalid request size %u, max %u\n",
>> +                     rpc->in.len, pdsfc->ident.max_req_sz);
>> +             return -EINVAL;
>> +     }
>> +
>> +     if (rpc->out.len > pdsfc->ident.max_resp_sz) {
>> +             dev_err(dev, "Invalid response size %u, max %u\n",
>> +                     rpc->out.len, pdsfc->ident.max_resp_sz);
>> +             return -EINVAL;
>> +     }
>> +
>> +     for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
>> +             if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
>> +                     ep_info = &pdsfc->endpoint_info[i];
>> +                     break;
>> +             }
>> +     }
>> +     if (!ep_info) {
>> +             dev_err(dev, "Invalid endpoint %d\n", rpc->in.ep);
>> +             return -EINVAL;
>> +     }
>> +
>> +     /* query and cache this endpoint's operations */
>> +     mutex_lock(&ep_info->lock);
>> +     if (!ep_info->operations) {
>> +             ep_info->operations = pdsfc_get_operations(pdsfc,
>> +                                                        &ep_info->operations_pa,
>> +                                                        rpc->in.ep);
>> +             if (!ep_info->operations) {
>> +                     mutex_unlock(&ep_info->lock);
>> +                     dev_err(dev, "Failed to allocate operations list\n");
>> +                     return -ENOMEM;
>> +             }
>> +     }
>> +     mutex_unlock(&ep_info->lock);
>> +
>> +     /* reject unsupported and/or out of scope commands */
>> +     op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
>> +     for (i = 0; i < ep_info->operations->num_entries; i++) {
>> +             if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
>> +                     if (scope < op_entry[i].scope)
>> +                             return -EPERM;
>> +                     return 0;
>> +             }
>> +     }
>> +
>> +     dev_err(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
> 
> Perhaps a little noisy as I think userspace can trigger this easily.  dev_dbg()
> might be better.  -EINVAL should be all userspace needs under most circumstances.

Sure

> 
>> +
>> +     return -EINVAL;
>> +}
>> +
>>   static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
>>                          void *in, size_t in_len, size_t *out_len)
>>   {
>> -     return NULL;
>> +     struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
>> +     struct fwctl_rpc_pds *rpc = (struct fwctl_rpc_pds *)in;
> 
> in is a void * and the C spec always allows implicit conversion for those
> so no cast here.

Will fix

> 
>> +     struct device *dev = &uctx->fwctl->dev;
>> +     union pds_core_adminq_comp comp = {0};
>> +     dma_addr_t out_payload_dma_addr = 0;
>> +     dma_addr_t in_payload_dma_addr = 0;
>> +     union pds_core_adminq_cmd cmd;
>> +     void *out_payload = NULL;
>> +     void *in_payload = NULL;
>> +     void *out = NULL;
>> +     int err;
>> +
>> +     err = pdsfc_validate_rpc(pdsfc, rpc, scope);
>> +     if (err) {
>> +             dev_err(dev, "Invalid RPC request\n");
>> +             return ERR_PTR(err);
>> +     }
>> +
>> +     if (rpc->in.len > 0) {
>> +             in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
>> +             if (!in_payload) {
>> +                     dev_err(dev, "Failed to allocate in_payload\n");
>> +                     out = ERR_PTR(-ENOMEM);
>> +                     goto done;
>> +             }
>> +
>> +             if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
>> +                                rpc->in.len)) {
>> +                     dev_err(dev, "Failed to copy in_payload from user\n");
>> +                     out = ERR_PTR(-EFAULT);
>> +                     goto done;
>> +             }
>> +
>> +             in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
>> +                                                  rpc->in.len, DMA_TO_DEVICE);
>> +             err = dma_mapping_error(dev->parent, in_payload_dma_addr);
>> +             if (err) {
>> +                     dev_err(dev, "Failed to map in_payload\n");
>> +                     in_payload_dma_addr = 0;
> 
> See below for comment on ordering and why ideally this zero setting should
> not be needed in error path.  Really small point though!
> 
>> +                     out = ERR_PTR(err);
>> +                     goto done;
>> +             }
>> +     }
>> +
>> +     if (rpc->out.len > 0) {
>> +             out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
>> +             if (!out_payload) {
>> +                     dev_err(dev, "Failed to allocate out_payload\n");
>> +                     out = ERR_PTR(-ENOMEM);
>> +                     goto done;
>> +             }
>> +
>> +             out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
>> +                                                   rpc->out.len, DMA_FROM_DEVICE);
>> +             err = dma_mapping_error(dev->parent, out_payload_dma_addr);
>> +             if (err) {
>> +                     dev_err(dev, "Failed to map out_payload\n");
>> +                     out_payload_dma_addr = 0;
> 
> As above. Slight ordering tweaks and more labels would avoid need
> to zero this on error as we would never use it again.
> 
>> +                     out = ERR_PTR(err);
>> +                     goto done;
>> +             }
>> +     }
>> +
>> +     cmd = (union pds_core_adminq_cmd) {
>> +             .fwctl_rpc = {
>> +                     .opcode = PDS_FWCTL_CMD_RPC,
>> +                     .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
>> +                     .ep = cpu_to_le32(rpc->in.ep),
>> +                     .op = cpu_to_le32(rpc->in.op),
>> +                     .req_pa = cpu_to_le64(in_payload_dma_addr),
>> +                     .req_sz = cpu_to_le32(rpc->in.len),
>> +                     .resp_pa = cpu_to_le64(out_payload_dma_addr),
>> +                     .resp_sz = cpu_to_le32(rpc->out.len),
>> +             }
>> +     };
>> +
>> +     err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
>> +     if (err) {
>> +             dev_err(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
>> +                     __func__, rpc->in.ep, rpc->in.op,
>> +                     cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
>> +                     cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
>> +                     err);
>> +             out = ERR_PTR(err);
>> +             goto done;
>> +     }
>> +
>> +     dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
>> +
>> +     if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
>> +             dev_err(dev, "Failed to copy out_payload to user\n");
>> +             out = ERR_PTR(-EFAULT);
>> +             goto done;
>> +     }
>> +
>> +     rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
>> +     *out_len = in_len;
>> +     out = in;
>> +
>> +done:
>> +     if (in_payload_dma_addr)
>> +             dma_unmap_single(dev->parent, in_payload_dma_addr,
>> +                              rpc->in.len, DMA_TO_DEVICE);
>> +
>> +     if (out_payload_dma_addr)
> 
> Can we do these in reverse order of the setup path?
> It obviously makes limited difference in practice.
> With them in strict reverse order you could avoid need to
> zero out_payload_dma_addr and similar in error paths by
> using multiple labels.

Yeah, I'll reorder this with a couple more labels.

> 
>> +             dma_unmap_single(dev->parent, out_payload_dma_addr,
>> +                              rpc->out.len, DMA_FROM_DEVICE);
>> +
>> +     kfree(in_payload);
>> +     kfree(out_payload);
>> +
>> +     return out;
> 
> At least some cases are simplified if you do
>          if (err)
>                  return ERR_PTR(err);
> 
>          return in;
> 
> and handle all errors via err integer until here.
>>   }
> 
>>   static const struct auxiliary_device_id pdsfc_id_table[] = {
>> diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
>> index ae6886ebc841..03bca2d27de0 100644
>> --- a/include/linux/pds/pds_adminq.h
>> +++ b/include/linux/pds/pds_adminq.h
> 
>> +/**
>> + * struct pds_fwctl_query_data - query data structure
>> + * @version:     Version of the query data structure
>> + * @rsvd:        Word boundary padding
>> + * @num_entries: Number of entries in the union
>> + * @entries:     Array of query data entries, depending on the entity type.
>> + */
>> +struct pds_fwctl_query_data {
>> +     u8      version;
>> +     u8      rsvd[3];
>> +     __le32  num_entries;
>> +     uint8_t entries[];
> 
> __counted_by_le() probably a nice to have here.

Yes.


> 
> 
>> +} __packed;
> 
>> +/**
>> + * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element
>> + * @addr:    DMA address of SG element data buffer
>> + * @len:     Length of SG element data buffer, in bytes
>> + * @rsvd:    Word boundary padding
>> + */
>> +struct pds_sg_elem {
>> +     __le64 addr;
>> +     __le32 len;
>> +     __le16 rsvd[2];
> 
> __le32 rsvd; ?  Or stick to u8 only.

I'll use u8

> 
> 
>> +} __packed;
>> +
>> +/**
>> + * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC.
>> + * @status:     Status of the command
>> + * @rsvd:       Word boundary padding
>> + * @comp_index: Completion index of the command
>> + * @err:        Error code, if any, from the RPC.
> 
> Odd mix of full stop and not.  Make it more consistent.

Sure


> 
>> + * @resp_sz:    Size of the response.
>> + * @rsvd2:      Word boundary padding
> 
> words changing size in one structure?  Just stick to "Reserved" for
> the docs. Never any reason to explicitly talk about 'why' things
> are reserved.

Sure

Thanks,
sln

> 
>> + * @color:      Color bit indicating the state of the completion.
>> + */
>> +struct pds_fwctl_rpc_comp {
>> +     u8     status;
>> +     u8     rsvd;
>> +     __le16 comp_index;
>> +     __le32 err;
>> +     __le32 resp_sz;
>> +     u8     rsvd2[3];
>> +     u8     color;
>> +} __packed;
>> +
>>   union pds_core_adminq_cmd {
>>        u8     opcode;
>>        u8     bytes[64];
>> @@ -1291,6 +1474,8 @@ union pds_core_adminq_cmd {
>>
>>        struct pds_fwctl_cmd              fwctl;
>>        struct pds_fwctl_ident_cmd        fwctl_ident;
>> +     struct pds_fwctl_rpc_cmd          fwctl_rpc;
>> +     struct pds_fwctl_query_cmd        fwctl_query;
>>   };
>>
>>   union pds_core_adminq_comp {
>> @@ -1320,6 +1505,8 @@ union pds_core_adminq_comp {
>>        struct pds_lm_dirty_status_comp   lm_dirty_status;
>>
>>        struct pds_fwctl_comp             fwctl;
>> +     struct pds_fwctl_rpc_comp         fwctl_rpc;
>> +     struct pds_fwctl_query_comp       fwctl_query;
>>   };
>>
>>   #ifndef __CHECKER__
>> diff --git a/include/uapi/fwctl/pds.h b/include/uapi/fwctl/pds.h
>> index a01b032cbdb1..da6cd2d1c6fa 100644
>> --- a/include/uapi/fwctl/pds.h
>> +++ b/include/uapi/fwctl/pds.h
>> @@ -24,4 +24,20 @@ enum pds_fwctl_capabilities {
>>        PDS_FWCTL_QUERY_CAP = 0,
>>        PDS_FWCTL_SEND_CAP,
>>   };
>> +
>> +struct fwctl_rpc_pds {
>> +     struct {
>> +             __u32 op;
>> +             __u32 ep;
>> +             __u32 rsvd;
>> +             __u32 len;
>> +             __u64 payload;
>> +     } in;
>> +     struct {
>> +             __u32 retval;
>> +             __u32 rsvd[2];
>> +             __u32 len;
>> +             __u64 payload;
>> +     } out;
>> +};
>>   #endif /* _UAPI_FWCTL_PDS_H_ */
>
Nelson, Shannon March 6, 2025, 2:02 a.m. UTC | #5
On 3/4/2025 9:24 AM, Jason Gunthorpe wrote:
> On Tue, Mar 04, 2025 at 05:08:08PM +0800, Jonathan Cameron wrote:
>>> +   dev_err(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
>>
>> Perhaps a little noisy as I think userspace can trigger this easily.  dev_dbg()
>> might be better.  -EINVAL should be all userspace needs under most circumstances.
> 
> Yes, please remove or degrade to dbg all the prints that userspace
> could trigger.

Sure

> 
>>> +   if (rpc->in.len > 0) {
>>> +           in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
>>> +           if (!in_payload) {
>>> +                   dev_err(dev, "Failed to allocate in_payload\n");
> 
> kzalloc is already super noisy if it fails

I know ... I get dinged on this all the time, but I still add these 
because there are multiple allocs in this one function and it isn't 
immediately obvious in the trace which alloc failed.  I could push each 
alloc off to separate functions to get that info in the trace, but it 
seems to me that's extra unnecessary code to get the same one more line.


> 
>>> +                   out = ERR_PTR(-ENOMEM);
>>> +                   goto done;
>>> +           }
>>> +
>>> +           if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
>>> +                              rpc->in.len)) {
>>> +                   dev_err(dev, "Failed to copy in_payload from user\n");
>>> +                   out = ERR_PTR(-EFAULT);
>>> +                   goto done;
>>> +           }
>>> +
>>> +           in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
>>> +                                                rpc->in.len, DMA_TO_DEVICE);
>>> +           err = dma_mapping_error(dev->parent, in_payload_dma_addr);
>>> +           if (err) {
>>> +                   dev_err(dev, "Failed to map in_payload\n");
>>> +                   in_payload_dma_addr = 0;
> 
> etc
> 
>>> +   err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
>>> +   if (err) {
>>> +           dev_err(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
>>> +                   __func__, rpc->in.ep, rpc->in.op,
>>> +                   cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
>>> +                   cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
>>> +                   err);
> 
> Triggerable by a malformed RPC?

That or misbehaving firmware.

> 
>>> +           out = ERR_PTR(err);
>>> +           goto done;
>>> +   }
>>> +
>>> +   dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
>>> +
>>> +   if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
>>> +           dev_err(dev, "Failed to copy out_payload to user\n");
> 
> Triggerable by a malformed user provided pointer

yes

> 
> Jason

Thanks,
sln
Nelson, Shannon March 6, 2025, 2:07 a.m. UTC | #6
On 3/4/2025 11:52 AM, Jason Gunthorpe wrote:
> On Fri, Feb 28, 2025 at 05:35:53PM -0800, Shannon Nelson wrote:
> 
>> +     /* reject unsupported and/or out of scope commands */
>> +     op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
>> +     for (i = 0; i < ep_info->operations->num_entries; i++) {
>> +             if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
>> +                     if (scope < op_entry[i].scope)
>> +                             return -EPERM;
>> +                     return 0;
> 
> Oh I see, you are doing the scope like CXL with a "command intent's
> report". That is a good idea.
> 
>> +     if (rpc->in.len > 0) {
>> +             in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
>> +             if (!in_payload) {
>> +                     dev_err(dev, "Failed to allocate in_payload\n");
>> +                     out = ERR_PTR(-ENOMEM);
>> +                     goto done;
>> +             }
>> +
>> +             if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
>> +                                rpc->in.len)) {
>> +                     dev_err(dev, "Failed to copy in_payload from user\n");
>> +                     out = ERR_PTR(-EFAULT);
>> +                     goto done;
>> +             }
> 
> This wasn't really the intention to have a payload pointer inside the
> existing payload pointer, is it the same issue as CXL where you wanted
> a header?
> 
> I see your FW API also can't take a scatterlist, so it makes sense it
> needs to be linearized like this.
> 
> I don't think it makes a difference to have the indirection or not, it
> would be a bunch of stuff to keep the header seperate from the payload
> and then still also end up with memcpy in the driver, so leave it.
> 
>> +#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT       0
>> +#define PDS_FWCTL_RPC_OPCODE_CMD_MASK        GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
>> +#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT       16
>> +#define PDS_FWCTL_RPC_OPCODE_VER_MASK        GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
>> +
>> +#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op)     \
>> +     (((op) & PDS_FWCTL_RPC_OPCODE_CMD_MASK) >> PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
> 
> Isn't that FIELD_GET?

Hmm... I think so.  I'll poke at that.

> 
>> +struct fwctl_rpc_pds {
>> +     struct {
>> +             __u32 op;
>> +             __u32 ep;
>> +             __u32 rsvd;
>> +             __u32 len;
>> +             __u64 payload;
>> +     } in;
>> +     struct {
>> +             __u32 retval;
>> +             __u32 rsvd[2];
>> +             __u32 len;
>> +             __u64 payload;
>> +     } out;
>> +};
> 
> Use __aligned_u64 in the uapi headers

Sure.

Thanks,
sln



> 
> Jason
diff mbox series

Patch

diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c
index afc7dc283ad5..bb2ac0ca3963 100644
--- a/drivers/fwctl/pds/main.c
+++ b/drivers/fwctl/pds/main.c
@@ -21,12 +21,22 @@  struct pdsfc_uctx {
 	u32 uctx_uid;
 };
 
+struct pdsfc_rpc_endpoint_info {
+	u32 endpoint;
+	dma_addr_t operations_pa;
+	struct pds_fwctl_query_data *operations;
+	struct mutex lock;	/* lock for endpoint info management */
+};
+
 struct pdsfc_dev {
 	struct fwctl_device fwctl;
 	struct pds_auxiliary_dev *padev;
 	struct pdsc *pdsc;
 	u32 caps;
 	struct pds_fwctl_ident ident;
+	dma_addr_t endpoints_pa;
+	struct pds_fwctl_query_data *endpoints;
+	struct pdsfc_rpc_endpoint_info *endpoint_info;
 };
 DEFINE_FREE(pdsfc_dev, struct pdsfc_dev *, if (_T) fwctl_put(&_T->fwctl));
 
@@ -95,10 +105,331 @@  static int pdsfc_identify(struct pdsfc_dev *pdsfc)
 	return 0;
 }
 
+static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc)
+{
+	struct device *dev = &pdsfc->fwctl.dev;
+	int i;
+
+	if (!pdsfc->endpoints)
+		return;
+
+	for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++)
+		mutex_destroy(&pdsfc->endpoint_info[i].lock);
+	vfree(pdsfc->endpoint_info);
+	pdsfc->endpoint_info = NULL;
+	dma_free_coherent(dev->parent, PAGE_SIZE,
+			  pdsfc->endpoints, pdsfc->endpoints_pa);
+	pdsfc->endpoints = NULL;
+	pdsfc->endpoints_pa = DMA_MAPPING_ERROR;
+}
+
+static void pdsfc_free_operations(struct pdsfc_dev *pdsfc)
+{
+	struct device *dev = &pdsfc->fwctl.dev;
+	u32 num_endpoints;
+	int i;
+
+	num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+	for (i = 0; i < num_endpoints; i++) {
+		struct pdsfc_rpc_endpoint_info *ei = &pdsfc->endpoint_info[i];
+
+		if (ei->operations) {
+			dma_free_coherent(dev->parent, PAGE_SIZE,
+					  ei->operations, ei->operations_pa);
+			ei->operations = NULL;
+			ei->operations_pa = DMA_MAPPING_ERROR;
+		}
+	}
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_endpoints(struct pdsfc_dev *pdsfc,
+							dma_addr_t *pa)
+{
+	struct device *dev = &pdsfc->fwctl.dev;
+	union pds_core_adminq_comp comp = {0};
+	struct pds_fwctl_query_data *data;
+	union pds_core_adminq_cmd cmd;
+	dma_addr_t data_pa;
+	int err;
+
+	data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+	err = dma_mapping_error(dev, data_pa);
+	if (err) {
+		dev_err(dev, "Failed to map endpoint list\n");
+		return ERR_PTR(err);
+	}
+
+	cmd = (union pds_core_adminq_cmd) {
+		.fwctl_query = {
+			.opcode = PDS_FWCTL_CMD_QUERY,
+			.entity = PDS_FWCTL_RPC_ROOT,
+			.version = 0,
+			.query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+			.query_data_buf_pa = cpu_to_le64(data_pa),
+		}
+	};
+
+	err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+	if (err) {
+		dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+			cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+		dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+		return ERR_PTR(err);
+	}
+
+	*pa = data_pa;
+
+	return data;
+}
+
+static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc)
+{
+	struct pds_fwctl_query_data_endpoint *ep_entry;
+	u32 num_endpoints;
+	int i;
+
+	pdsfc->endpoints = pdsfc_get_endpoints(pdsfc, &pdsfc->endpoints_pa);
+	if (IS_ERR(pdsfc->endpoints))
+		return PTR_ERR(pdsfc->endpoints);
+
+	num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
+	pdsfc->endpoint_info = vcalloc(num_endpoints,
+				       sizeof(*pdsfc->endpoint_info));
+	if (!pdsfc->endpoint_info) {
+		pdsfc_free_endpoints(pdsfc);
+		return -ENOMEM;
+	}
+
+	ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries;
+	for (i = 0; i < num_endpoints; i++) {
+		mutex_init(&pdsfc->endpoint_info[i].lock);
+		pdsfc->endpoint_info[i].endpoint = ep_entry[i].id;
+	}
+
+	return 0;
+}
+
+static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc,
+							 dma_addr_t *pa, u32 ep)
+{
+	struct device *dev = &pdsfc->fwctl.dev;
+	union pds_core_adminq_comp comp = {0};
+	struct pds_fwctl_query_data *data;
+	union pds_core_adminq_cmd cmd;
+	dma_addr_t data_pa;
+	int err;
+
+	/* Query the operations list for the given endpoint */
+	data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
+	err = dma_mapping_error(dev->parent, data_pa);
+	if (err) {
+		dev_err(dev, "Failed to map operations list\n");
+		return ERR_PTR(err);
+	}
+
+	cmd = (union pds_core_adminq_cmd) {
+		.fwctl_query = {
+			.opcode = PDS_FWCTL_CMD_QUERY,
+			.entity = PDS_FWCTL_RPC_ENDPOINT,
+			.version = 0,
+			.query_data_buf_len = cpu_to_le32(PAGE_SIZE),
+			.query_data_buf_pa = cpu_to_le64(data_pa),
+			.ep = cpu_to_le32(ep),
+		}
+	};
+
+	err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+	if (err) {
+		dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
+			cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
+		dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
+		return ERR_PTR(err);
+	}
+
+	*pa = data_pa;
+
+	return data;
+}
+
+static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
+			      struct fwctl_rpc_pds *rpc,
+			      enum fwctl_rpc_scope scope)
+{
+	struct pds_fwctl_query_data_operation *op_entry;
+	struct pdsfc_rpc_endpoint_info *ep_info = NULL;
+	struct device *dev = &pdsfc->fwctl.dev;
+	int i;
+
+	/* validate rpc in_len & out_len based
+	 * on ident.max_req_sz & max_resp_sz
+	 */
+	if (rpc->in.len > pdsfc->ident.max_req_sz) {
+		dev_err(dev, "Invalid request size %u, max %u\n",
+			rpc->in.len, pdsfc->ident.max_req_sz);
+		return -EINVAL;
+	}
+
+	if (rpc->out.len > pdsfc->ident.max_resp_sz) {
+		dev_err(dev, "Invalid response size %u, max %u\n",
+			rpc->out.len, pdsfc->ident.max_resp_sz);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
+		if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
+			ep_info = &pdsfc->endpoint_info[i];
+			break;
+		}
+	}
+	if (!ep_info) {
+		dev_err(dev, "Invalid endpoint %d\n", rpc->in.ep);
+		return -EINVAL;
+	}
+
+	/* query and cache this endpoint's operations */
+	mutex_lock(&ep_info->lock);
+	if (!ep_info->operations) {
+		ep_info->operations = pdsfc_get_operations(pdsfc,
+							   &ep_info->operations_pa,
+							   rpc->in.ep);
+		if (!ep_info->operations) {
+			mutex_unlock(&ep_info->lock);
+			dev_err(dev, "Failed to allocate operations list\n");
+			return -ENOMEM;
+		}
+	}
+	mutex_unlock(&ep_info->lock);
+
+	/* reject unsupported and/or out of scope commands */
+	op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
+	for (i = 0; i < ep_info->operations->num_entries; i++) {
+		if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
+			if (scope < op_entry[i].scope)
+				return -EPERM;
+			return 0;
+		}
+	}
+
+	dev_err(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
+
+	return -EINVAL;
+}
+
 static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
 			  void *in, size_t in_len, size_t *out_len)
 {
-	return NULL;
+	struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
+	struct fwctl_rpc_pds *rpc = (struct fwctl_rpc_pds *)in;
+	struct device *dev = &uctx->fwctl->dev;
+	union pds_core_adminq_comp comp = {0};
+	dma_addr_t out_payload_dma_addr = 0;
+	dma_addr_t in_payload_dma_addr = 0;
+	union pds_core_adminq_cmd cmd;
+	void *out_payload = NULL;
+	void *in_payload = NULL;
+	void *out = NULL;
+	int err;
+
+	err = pdsfc_validate_rpc(pdsfc, rpc, scope);
+	if (err) {
+		dev_err(dev, "Invalid RPC request\n");
+		return ERR_PTR(err);
+	}
+
+	if (rpc->in.len > 0) {
+		in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
+		if (!in_payload) {
+			dev_err(dev, "Failed to allocate in_payload\n");
+			out = ERR_PTR(-ENOMEM);
+			goto done;
+		}
+
+		if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
+				   rpc->in.len)) {
+			dev_err(dev, "Failed to copy in_payload from user\n");
+			out = ERR_PTR(-EFAULT);
+			goto done;
+		}
+
+		in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
+						     rpc->in.len, DMA_TO_DEVICE);
+		err = dma_mapping_error(dev->parent, in_payload_dma_addr);
+		if (err) {
+			dev_err(dev, "Failed to map in_payload\n");
+			in_payload_dma_addr = 0;
+			out = ERR_PTR(err);
+			goto done;
+		}
+	}
+
+	if (rpc->out.len > 0) {
+		out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
+		if (!out_payload) {
+			dev_err(dev, "Failed to allocate out_payload\n");
+			out = ERR_PTR(-ENOMEM);
+			goto done;
+		}
+
+		out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
+						      rpc->out.len, DMA_FROM_DEVICE);
+		err = dma_mapping_error(dev->parent, out_payload_dma_addr);
+		if (err) {
+			dev_err(dev, "Failed to map out_payload\n");
+			out_payload_dma_addr = 0;
+			out = ERR_PTR(err);
+			goto done;
+		}
+	}
+
+	cmd = (union pds_core_adminq_cmd) {
+		.fwctl_rpc = {
+			.opcode = PDS_FWCTL_CMD_RPC,
+			.flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
+			.ep = cpu_to_le32(rpc->in.ep),
+			.op = cpu_to_le32(rpc->in.op),
+			.req_pa = cpu_to_le64(in_payload_dma_addr),
+			.req_sz = cpu_to_le32(rpc->in.len),
+			.resp_pa = cpu_to_le64(out_payload_dma_addr),
+			.resp_sz = cpu_to_le32(rpc->out.len),
+		}
+	};
+
+	err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
+	if (err) {
+		dev_err(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
+			__func__, rpc->in.ep, rpc->in.op,
+			cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
+			cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
+			err);
+		out = ERR_PTR(err);
+		goto done;
+	}
+
+	dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
+
+	if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
+		dev_err(dev, "Failed to copy out_payload to user\n");
+		out = ERR_PTR(-EFAULT);
+		goto done;
+	}
+
+	rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
+	*out_len = in_len;
+	out = in;
+
+done:
+	if (in_payload_dma_addr)
+		dma_unmap_single(dev->parent, in_payload_dma_addr,
+				 rpc->in.len, DMA_TO_DEVICE);
+
+	if (out_payload_dma_addr)
+		dma_unmap_single(dev->parent, out_payload_dma_addr,
+				 rpc->out.len, DMA_FROM_DEVICE);
+
+	kfree(in_payload);
+	kfree(out_payload);
+
+	return out;
 }
 
 static const struct fwctl_ops pdsfc_ops = {
@@ -131,9 +462,15 @@  static int pdsfc_probe(struct auxiliary_device *adev,
 	if (err)
 		return dev_err_probe(dev, err, "Failed to identify device\n");
 
-	err = fwctl_register(&pdsfc->fwctl);
+	err = pdsfc_init_endpoints(pdsfc);
 	if (err)
+		return dev_err_probe(dev, err, "Failed to init endpoints\n");
+
+	err = fwctl_register(&pdsfc->fwctl);
+	if (err) {
+		pdsfc_free_endpoints(pdsfc);
 		return dev_err_probe(dev, err, "Failed to register device\n");
+	}
 
 	auxiliary_set_drvdata(adev, no_free_ptr(pdsfc));
 
@@ -145,6 +482,8 @@  static void pdsfc_remove(struct auxiliary_device *adev)
 	struct pdsfc_dev *pdsfc __free(pdsfc_dev) = auxiliary_get_drvdata(adev);
 
 	fwctl_unregister(&pdsfc->fwctl);
+	pdsfc_free_operations(pdsfc);
+	pdsfc_free_endpoints(pdsfc);
 }
 
 static const struct auxiliary_device_id pdsfc_id_table[] = {
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index ae6886ebc841..03bca2d27de0 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -1181,6 +1181,8 @@  struct pds_lm_host_vf_status_cmd {
 
 enum pds_fwctl_cmd_opcode {
 	PDS_FWCTL_CMD_IDENT = 70,
+	PDS_FWCTL_CMD_RPC   = 71,
+	PDS_FWCTL_CMD_QUERY = 72,
 };
 
 /**
@@ -1251,6 +1253,187 @@  struct pds_fwctl_ident {
 	u8     max_resp_sg_elems;
 } __packed;
 
+enum pds_fwctl_query_entity {
+	PDS_FWCTL_RPC_ROOT	= 0,
+	PDS_FWCTL_RPC_ENDPOINT	= 1,
+	PDS_FWCTL_RPC_OPERATION	= 2,
+};
+
+#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT	0
+#define PDS_FWCTL_RPC_OPCODE_CMD_MASK	GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
+#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT	16
+#define PDS_FWCTL_RPC_OPCODE_VER_MASK	GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
+
+#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op)	\
+	(((op) & PDS_FWCTL_RPC_OPCODE_CMD_MASK) >> PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
+#define PDS_FWCTL_RPC_OPCODE_GET_VER(op)	\
+	(((op) & PDS_FWCTL_RPC_OPCODE_VER_MASK) >> PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
+
+#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \
+	(PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \
+	 PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2))
+
+/**
+ * struct pds_fwctl_query_cmd - Firmware control query command structure
+ * @opcode: Operation code for the command
+ * @entity:  Entity type to query (enum pds_fwctl_query_entity)
+ * @version: Version of the query data structure supported by the driver
+ * @rsvd:    Word boundary padding
+ * @query_data_buf_len: Length of the query data buffer
+ * @query_data_buf_pa:  Physical address of the query data buffer
+ * @ep:      Endpoint identifier to query  (when entity is PDS_FWCTL_RPC_ENDPOINT)
+ * @op:      Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION)
+ *
+ * This structure is used to send a query command to the firmware control
+ * interface. The structure is packed to ensure there is no padding between
+ * the fields.
+ */
+struct pds_fwctl_query_cmd {
+	u8     opcode;
+	u8     entity;
+	u8     version;
+	u8     rsvd;
+	__le32 query_data_buf_len;
+	__le64 query_data_buf_pa;
+	union {
+		__le32 ep;
+		__le32 op;
+	};
+} __packed;
+
+/**
+ * struct pds_fwctl_query_comp - Firmware control query completion structure
+ * @status:     Status of the query command
+ * @rsvd:       Word boundary padding
+ * @comp_index: Completion index in little-endian format
+ * @version:    Version of the query data structure returned by firmware. This
+ *		 should be less than or equal to the version supported by the driver.
+ * @rsvd2:      Word boundary padding
+ * @color:      Color bit indicating the state of the completion
+ */
+struct pds_fwctl_query_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     version;
+	u8     rsvd2[2];
+	u8     color;
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT
+ * @id: The identifier for the data endpoint.
+ */
+struct pds_fwctl_query_data_endpoint {
+	__le32 id;
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT
+ * @id:    Operation identifier.
+ * @scope: Scope of the operation (enum fwctl_rpc_scope).
+ * @rsvd:  Word boundary padding
+ */
+struct pds_fwctl_query_data_operation {
+	__le32 id;
+	u8     scope;
+	u8     rsvd[3];
+} __packed;
+
+/**
+ * struct pds_fwctl_query_data - query data structure
+ * @version:     Version of the query data structure
+ * @rsvd:        Word boundary padding
+ * @num_entries: Number of entries in the union
+ * @entries:     Array of query data entries, depending on the entity type.
+ */
+struct pds_fwctl_query_data {
+	u8      version;
+	u8      rsvd[3];
+	__le32  num_entries;
+	uint8_t entries[];
+} __packed;
+
+/**
+ * struct pds_fwctl_rpc_cmd - Firmware control RPC command.
+ * @opcode:        opcode PDS_FWCTL_CMD_RPC
+ * @rsvd:          Word boundary padding
+ * @flags:         Indicates indirect request and/or response handling
+ * @ep:            Endpoint identifier.
+ * @op:            Operation identifier.
+ * @inline_req0:   Buffer for inline request
+ * @inline_req1:   Buffer for inline request
+ * @req_pa:        Physical address of request data.
+ * @req_sz:        Size of the request.
+ * @req_sg_elems:  Number of request SGs
+ * @req_rsvd:      Word boundary padding
+ * @inline_req2:   Buffer for inline request
+ * @resp_pa:       Physical address of response data.
+ * @resp_sz:       Size of the response.
+ * @resp_sg_elems: Number of response SGs
+ * @resp_rsvd:     Word boundary padding
+ */
+struct pds_fwctl_rpc_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 flags;
+#define PDS_FWCTL_RPC_IND_REQ		0x1
+#define PDS_FWCTL_RPC_IND_RESP		0x2
+	__le32 ep;
+	__le32 op;
+	u8 inline_req0[16];
+	union {
+		u8 inline_req1[16];
+		struct {
+			__le64 req_pa;
+			__le32 req_sz;
+			u8     req_sg_elems;
+			u8     req_rsvd[3];
+		};
+	};
+	union {
+		u8 inline_req2[16];
+		struct {
+			__le64 resp_pa;
+			__le32 resp_sz;
+			u8     resp_sg_elems;
+			u8     resp_rsvd[3];
+		};
+	};
+} __packed;
+
+/**
+ * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element
+ * @addr:	DMA address of SG element data buffer
+ * @len:	Length of SG element data buffer, in bytes
+ * @rsvd:	Word boundary padding
+ */
+struct pds_sg_elem {
+	__le64 addr;
+	__le32 len;
+	__le16 rsvd[2];
+} __packed;
+
+/**
+ * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC.
+ * @status:     Status of the command
+ * @rsvd:       Word boundary padding
+ * @comp_index: Completion index of the command
+ * @err:        Error code, if any, from the RPC.
+ * @resp_sz:    Size of the response.
+ * @rsvd2:      Word boundary padding
+ * @color:      Color bit indicating the state of the completion.
+ */
+struct pds_fwctl_rpc_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	__le32 err;
+	__le32 resp_sz;
+	u8     rsvd2[3];
+	u8     color;
+} __packed;
+
 union pds_core_adminq_cmd {
 	u8     opcode;
 	u8     bytes[64];
@@ -1291,6 +1474,8 @@  union pds_core_adminq_cmd {
 
 	struct pds_fwctl_cmd		  fwctl;
 	struct pds_fwctl_ident_cmd	  fwctl_ident;
+	struct pds_fwctl_rpc_cmd	  fwctl_rpc;
+	struct pds_fwctl_query_cmd	  fwctl_query;
 };
 
 union pds_core_adminq_comp {
@@ -1320,6 +1505,8 @@  union pds_core_adminq_comp {
 	struct pds_lm_dirty_status_comp	  lm_dirty_status;
 
 	struct pds_fwctl_comp		  fwctl;
+	struct pds_fwctl_rpc_comp	  fwctl_rpc;
+	struct pds_fwctl_query_comp	  fwctl_query;
 };
 
 #ifndef __CHECKER__
diff --git a/include/uapi/fwctl/pds.h b/include/uapi/fwctl/pds.h
index a01b032cbdb1..da6cd2d1c6fa 100644
--- a/include/uapi/fwctl/pds.h
+++ b/include/uapi/fwctl/pds.h
@@ -24,4 +24,20 @@  enum pds_fwctl_capabilities {
 	PDS_FWCTL_QUERY_CAP = 0,
 	PDS_FWCTL_SEND_CAP,
 };
+
+struct fwctl_rpc_pds {
+	struct {
+		__u32 op;
+		__u32 ep;
+		__u32 rsvd;
+		__u32 len;
+		__u64 payload;
+	} in;
+	struct {
+		__u32 retval;
+		__u32 rsvd[2];
+		__u32 len;
+		__u64 payload;
+	} out;
+};
 #endif /* _UAPI_FWCTL_PDS_H_ */