diff mbox series

[v3,2/3] cxl/pci: Get rid of pointer arithmetic reading CDAT table

Message ID 20240209192647.163042-3-rrichter@amd.com
State Superseded
Headers show
Series CDAT updates and fixes | expand

Commit Message

Robert Richter Feb. 9, 2024, 7:26 p.m. UTC
Reading the CDAT table using DOE requires a Table Access Response
Header in addition to the CDAT entry. In current implementation this
has caused offsets with sizeof(__le32) to the actual buffers. This led
to hardly readable code and even bugs. E.g., see fix of devm_kfree()
in read_cdat_data():

 c65efe3685f5 cxl/cdat: Free correct buffer on checksum error

Rework code to avoid calculations with sizeof(__le32). Introduce
struct cdat_doe_rsp for this which contains the Table Access Response
Header and a variable payload size for various data structures
afterwards to access the CDAT table and its CDAT Data Structures
without recalculating buffer offsets.

Cc: Lukas Wunner <lukas@wunner.de>
Cc: Fan Ni <nifan.cxl@gmail.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
---
 drivers/cxl/core/pci.c | 75 ++++++++++++++++++++++--------------------
 drivers/cxl/cxlpci.h   | 20 +++++++++++
 2 files changed, 59 insertions(+), 36 deletions(-)

Comments

Jonathan Cameron Feb. 14, 2024, 5:31 p.m. UTC | #1
On Fri, 9 Feb 2024 20:26:46 +0100
Robert Richter <rrichter@amd.com> wrote:

> Reading the CDAT table using DOE requires a Table Access Response
> Header in addition to the CDAT entry. In current implementation this
> has caused offsets with sizeof(__le32) to the actual buffers. This led
> to hardly readable code and even bugs. E.g., see fix of devm_kfree()
> in read_cdat_data():
> 
>  c65efe3685f5 cxl/cdat: Free correct buffer on checksum error
> 
> Rework code to avoid calculations with sizeof(__le32). Introduce
> struct cdat_doe_rsp for this which contains the Table Access Response
> Header and a variable payload size for various data structures
> afterwards to access the CDAT table and its CDAT Data Structures
> without recalculating buffer offsets.
> 
> Cc: Lukas Wunner <lukas@wunner.de>
> Cc: Fan Ni <nifan.cxl@gmail.com>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Robert Richter <rrichter@amd.com>

Hi Robert,

I like this in general.  A few comments inline though.

> ---
>  drivers/cxl/core/pci.c | 75 ++++++++++++++++++++++--------------------
>  drivers/cxl/cxlpci.h   | 20 +++++++++++
>  2 files changed, 59 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 39366ce94985..569354a5536f 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -544,55 +544,55 @@ static int cxl_cdat_get_length(struct device *dev,
>  
>  static int cxl_cdat_read_table(struct device *dev,
>  			       struct pci_doe_mb *doe_mb,
> -			       void *cdat_table, size_t *cdat_length)
> +			       struct cdat_doe_rsp *rsp, size_t *length)

Nitpick, but rsp isn't a response, it's the whole table.
Maybe it's worth a 
#define cdat_doe_table cdat_doe_rsp
or a typedef so the two are different in name at least whilst sharing
same structure definition?

>  {
> -	size_t length = *cdat_length + sizeof(__le32);
> -	__le32 *data = cdat_table;
> -	int entry_handle = 0;
> +	size_t received, remaining = *length;
> +	unsigned int entry_handle = 0;
> +	union cdat_data *data;
>  	__le32 saved_dw = 0;
>  
>  	do {
>  		__le32 request = CDAT_DOE_REQ(entry_handle);
> -		struct cdat_entry_header *entry;
> -		size_t entry_dw;
>  		int rc;
>  
>  		rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
>  			     CXL_DOE_PROTOCOL_TABLE_ACCESS,
>  			     &request, sizeof(request),
> -			     data, length);
> +			     rsp, sizeof(*rsp) + remaining);

I guess it's not really worth using struct_size here.
It's main advantage is making it clear we are dealing with a
trailing [] 

>  		if (rc < 0) {
>  			dev_err(dev, "DOE failed: %d", rc);
>  			return rc;
>  		}
>  
> -		/* 1 DW Table Access Response Header + CDAT entry */
> -		entry = (struct cdat_entry_header *)(data + 1);
> -		if ((entry_handle == 0 &&
> -		     rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
> -		    (entry_handle > 0 &&
> -		     (rc < sizeof(__le32) + sizeof(*entry) ||
> -		      rc != sizeof(__le32) + le16_to_cpu(entry->length))))
> +		if (rc < sizeof(*rsp))
> +			return -EIO;
> +
> +		data = (void *)rsp->data;

Nicer to cast to (union cdat_data *) than rely on bounce via a void *

> +		received = rc - sizeof(*rsp);
> +
> +		if ((!entry_handle &&

Prefer == 0 for this because 0 is a magic value here.

> +		     received != sizeof(data->header)) ||
> +		    (entry_handle &&
> +		     (received < sizeof(data->entry) ||
> +		      received != le16_to_cpu(data->entry.length))))
>  			return -EIO;

Given it's two rather involved conditions maybe better to do.

		if (entry_handle == 0) {
			if (received != sizeof(data->header)
				return -EIO;
		} else {
			if (received < sizeof(data->entry) ||
			    received != le16_to_cpu(data->entry.length))
				return -EIO;
		}

More code but easier to see the header vs entry checks.
Could even define a little utility function / macro.

		cdat_is_head_handle(val) entry_handle == 0
so you get somewhat more self documenting code.

		if (cdat_is_head_handle(entry_handle)) {
		} else {
		}

>  
>  		/* Get the CXL table access header entry handle */
>  		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
> -					 le32_to_cpu(data[0]));
> -		entry_dw = rc / sizeof(__le32);
> -		/* Skip Header */
> -		entry_dw -= 1;
> +					 le32_to_cpu(rsp->doe_header));
> +
>  		/*
>  		 * Table Access Response Header overwrote the last DW of
>  		 * previous entry, so restore that DW
>  		 */
> -		*data = saved_dw;
> -		length -= entry_dw * sizeof(__le32);
> -		data += entry_dw;
> -		saved_dw = *data;
> +		rsp->doe_header = saved_dw;

I'm not keen on this looking like we are writing the doe header
as we are writing the tail of the last response.

Maybe the comment is enough.  I don't have a better idea on how
to make this more obvious.

> +		remaining -= received;
> +		rsp = (void *)rsp + received;

Was a potential problem with previous code, but this could
in theory become unaligned and we should be using unaligned accessors
for it as a result, or maybe adding a check that it doesn't ever become so.
The check is probably the easier path given CDAT entries are thankfully
(I think) all dword multiples as are the two headers.

> +		saved_dw = rsp->doe_header;
>  	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
>  
>  	/* Length in CDAT header may exceed concatenation of CDAT entries */
> -	*cdat_length -= length - sizeof(__le32);
> +	*length -= remaining;
>  
>  	return 0;
>  }
Robert Richter Feb. 16, 2024, 12:10 p.m. UTC | #2
Hi Jonathan,

thanks for your review.

On 14.02.24 17:31:58, Jonathan Cameron wrote:
> On Fri, 9 Feb 2024 20:26:46 +0100
> Robert Richter <rrichter@amd.com> wrote:
> 
> > Reading the CDAT table using DOE requires a Table Access Response
> > Header in addition to the CDAT entry. In current implementation this
> > has caused offsets with sizeof(__le32) to the actual buffers. This led
> > to hardly readable code and even bugs. E.g., see fix of devm_kfree()
> > in read_cdat_data():
> > 
> >  c65efe3685f5 cxl/cdat: Free correct buffer on checksum error
> > 
> > Rework code to avoid calculations with sizeof(__le32). Introduce
> > struct cdat_doe_rsp for this which contains the Table Access Response
> > Header and a variable payload size for various data structures
> > afterwards to access the CDAT table and its CDAT Data Structures
> > without recalculating buffer offsets.
> > 
> > Cc: Lukas Wunner <lukas@wunner.de>
> > Cc: Fan Ni <nifan.cxl@gmail.com>
> > Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> > Signed-off-by: Robert Richter <rrichter@amd.com>
> 
> Hi Robert,
> 
> I like this in general.  A few comments inline though.
> 
> > ---
> >  drivers/cxl/core/pci.c | 75 ++++++++++++++++++++++--------------------
> >  drivers/cxl/cxlpci.h   | 20 +++++++++++
> >  2 files changed, 59 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > index 39366ce94985..569354a5536f 100644
> > --- a/drivers/cxl/core/pci.c
> > +++ b/drivers/cxl/core/pci.c
> > @@ -544,55 +544,55 @@ static int cxl_cdat_get_length(struct device *dev,
> >  
> >  static int cxl_cdat_read_table(struct device *dev,
> >  			       struct pci_doe_mb *doe_mb,
> > -			       void *cdat_table, size_t *cdat_length)
> > +			       struct cdat_doe_rsp *rsp, size_t *length)
> 
> Nitpick, but rsp isn't a response, it's the whole table.
> Maybe it's worth a 
> #define cdat_doe_table cdat_doe_rsp
> or a typedef so the two are different in name at least whilst sharing
> same structure definition?

There is a comment near the kzalloc of buf. I think introducing
another type here for single use will just add confusion.

I will also update the description of cdat_doe_rsp.

> 
> >  {
> > -	size_t length = *cdat_length + sizeof(__le32);
> > -	__le32 *data = cdat_table;
> > -	int entry_handle = 0;
> > +	size_t received, remaining = *length;
> > +	unsigned int entry_handle = 0;
> > +	union cdat_data *data;
> >  	__le32 saved_dw = 0;
> >  
> >  	do {
> >  		__le32 request = CDAT_DOE_REQ(entry_handle);
> > -		struct cdat_entry_header *entry;
> > -		size_t entry_dw;
> >  		int rc;
> >  
> >  		rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
> >  			     CXL_DOE_PROTOCOL_TABLE_ACCESS,
> >  			     &request, sizeof(request),
> > -			     data, length);
> > +			     rsp, sizeof(*rsp) + remaining);
> 
> I guess it's not really worth using struct_size here.
> It's main advantage is making it clear we are dealing with a
> trailing [] 

Yes, will keep it as is. Since it's a u8 array, count is equal the
size for the remaining data and we do not need struct_size() here.

> 
> >  		if (rc < 0) {
> >  			dev_err(dev, "DOE failed: %d", rc);
> >  			return rc;
> >  		}
> >  
> > -		/* 1 DW Table Access Response Header + CDAT entry */
> > -		entry = (struct cdat_entry_header *)(data + 1);
> > -		if ((entry_handle == 0 &&
> > -		     rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
> > -		    (entry_handle > 0 &&
> > -		     (rc < sizeof(__le32) + sizeof(*entry) ||
> > -		      rc != sizeof(__le32) + le16_to_cpu(entry->length))))
> > +		if (rc < sizeof(*rsp))
> > +			return -EIO;
> > +
> > +		data = (void *)rsp->data;
> 
> Nicer to cast to (union cdat_data *) than rely on bounce via a void *

Will change.

> 
> > +		received = rc - sizeof(*rsp);
> > +
> > +		if ((!entry_handle &&
> 
> Prefer == 0 for this because 0 is a magic value here.
> 
> > +		     received != sizeof(data->header)) ||
> > +		    (entry_handle &&
> > +		     (received < sizeof(data->entry) ||
> > +		      received != le16_to_cpu(data->entry.length))))
> >  			return -EIO;
> 
> Given it's two rather involved conditions maybe better to do.
> 
> 		if (entry_handle == 0) {
> 			if (received != sizeof(data->header)
> 				return -EIO;
> 		} else {
> 			if (received < sizeof(data->entry) ||
> 			    received != le16_to_cpu(data->entry.length))
> 				return -EIO;
> 		}
> 
> More code but easier to see the header vs entry checks.
> Could even define a little utility function / macro.
> 
> 		cdat_is_head_handle(val) entry_handle == 0
> so you get somewhat more self documenting code.
> 
> 		if (cdat_is_head_handle(entry_handle)) {
> 		} else {
> 		}

I will take this but without the macro.

> 
> >  
> >  		/* Get the CXL table access header entry handle */
> >  		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
> > -					 le32_to_cpu(data[0]));
> > -		entry_dw = rc / sizeof(__le32);
> > -		/* Skip Header */
> > -		entry_dw -= 1;
> > +					 le32_to_cpu(rsp->doe_header));
> > +
> >  		/*
> >  		 * Table Access Response Header overwrote the last DW of
> >  		 * previous entry, so restore that DW
> >  		 */
> > -		*data = saved_dw;
> > -		length -= entry_dw * sizeof(__le32);
> > -		data += entry_dw;
> > -		saved_dw = *data;
> > +		rsp->doe_header = saved_dw;
> 
> I'm not keen on this looking like we are writing the doe header
> as we are writing the tail of the last response.
> 
> Maybe the comment is enough.  I don't have a better idea on how
> to make this more obvious.

I think the comment is good enough here.

> 
> > +		remaining -= received;
> > +		rsp = (void *)rsp + received;
> 
> Was a potential problem with previous code, but this could
> in theory become unaligned and we should be using unaligned accessors
> for it as a result, or maybe adding a check that it doesn't ever become so.
> The check is probably the easier path given CDAT entries are thankfully
> (I think) all dword multiples as are the two headers.

Yes, buffers are dwords. In any case, pci_doe_recv_resp() is safe to
be used unaligned anyway.

Thanks for your review, will prepare a v4.

-Robert

> 
> > +		saved_dw = rsp->doe_header;
> >  	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
> >  
> >  	/* Length in CDAT header may exceed concatenation of CDAT entries */
> > -	*cdat_length -= length - sizeof(__le32);
> > +	*length -= remaining;
> >  
> >  	return 0;
> >  }
> 
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 39366ce94985..569354a5536f 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -544,55 +544,55 @@  static int cxl_cdat_get_length(struct device *dev,
 
 static int cxl_cdat_read_table(struct device *dev,
 			       struct pci_doe_mb *doe_mb,
-			       void *cdat_table, size_t *cdat_length)
+			       struct cdat_doe_rsp *rsp, size_t *length)
 {
-	size_t length = *cdat_length + sizeof(__le32);
-	__le32 *data = cdat_table;
-	int entry_handle = 0;
+	size_t received, remaining = *length;
+	unsigned int entry_handle = 0;
+	union cdat_data *data;
 	__le32 saved_dw = 0;
 
 	do {
 		__le32 request = CDAT_DOE_REQ(entry_handle);
-		struct cdat_entry_header *entry;
-		size_t entry_dw;
 		int rc;
 
 		rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
 			     CXL_DOE_PROTOCOL_TABLE_ACCESS,
 			     &request, sizeof(request),
-			     data, length);
+			     rsp, sizeof(*rsp) + remaining);
 		if (rc < 0) {
 			dev_err(dev, "DOE failed: %d", rc);
 			return rc;
 		}
 
-		/* 1 DW Table Access Response Header + CDAT entry */
-		entry = (struct cdat_entry_header *)(data + 1);
-		if ((entry_handle == 0 &&
-		     rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
-		    (entry_handle > 0 &&
-		     (rc < sizeof(__le32) + sizeof(*entry) ||
-		      rc != sizeof(__le32) + le16_to_cpu(entry->length))))
+		if (rc < sizeof(*rsp))
+			return -EIO;
+
+		data = (void *)rsp->data;
+		received = rc - sizeof(*rsp);
+
+		if ((!entry_handle &&
+		     received != sizeof(data->header)) ||
+		    (entry_handle &&
+		     (received < sizeof(data->entry) ||
+		      received != le16_to_cpu(data->entry.length))))
 			return -EIO;
 
 		/* Get the CXL table access header entry handle */
 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-					 le32_to_cpu(data[0]));
-		entry_dw = rc / sizeof(__le32);
-		/* Skip Header */
-		entry_dw -= 1;
+					 le32_to_cpu(rsp->doe_header));
+
 		/*
 		 * Table Access Response Header overwrote the last DW of
 		 * previous entry, so restore that DW
 		 */
-		*data = saved_dw;
-		length -= entry_dw * sizeof(__le32);
-		data += entry_dw;
-		saved_dw = *data;
+		rsp->doe_header = saved_dw;
+		remaining -= received;
+		rsp = (void *)rsp + received;
+		saved_dw = rsp->doe_header;
 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
 
 	/* Length in CDAT header may exceed concatenation of CDAT entries */
-	*cdat_length -= length - sizeof(__le32);
+	*length -= remaining;
 
 	return 0;
 }
@@ -620,8 +620,8 @@  void read_cdat_data(struct cxl_port *port)
 	struct pci_doe_mb *doe_mb;
 	struct pci_dev *pdev = NULL;
 	struct cxl_memdev *cxlmd;
-	size_t cdat_length;
-	void *cdat_table, *cdat_buf;
+	struct cdat_doe_rsp *buf;
+	size_t length;
 	int rc;
 
 	if (is_cxl_memdev(uport)) {
@@ -647,30 +647,33 @@  void read_cdat_data(struct cxl_port *port)
 
 	port->cdat_available = true;
 
-	if (cxl_cdat_get_length(dev, doe_mb, &cdat_length)) {
+	if (cxl_cdat_get_length(dev, doe_mb, &length)) {
 		dev_dbg(dev, "No CDAT length\n");
 		return;
 	}
 
-	cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL);
-	if (!cdat_buf)
-		return;
+	/*
+	 * The begin of the CDAT buffer needs space for additional 4
+	 * bytes for the DOE header. Table data starts afterwards.
+	 */
+	buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
+	if (!buf)
+		goto err;
 
-	rc = cxl_cdat_read_table(dev, doe_mb, cdat_buf, &cdat_length);
+	rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
 	if (rc)
 		goto err;
 
-	cdat_table = cdat_buf + sizeof(__le32);
-	if (cdat_checksum(cdat_table, cdat_length))
+	if (cdat_checksum(buf->data, length))
 		goto err;
 
-	port->cdat.table = cdat_table;
-	port->cdat.length = cdat_length;
-	return;
+	port->cdat.table = buf->data;
+	port->cdat.length = length;
 
+	return;
 err:
 	/* Don't leave table data allocated on error */
-	devm_kfree(dev, cdat_buf);
+	devm_kfree(dev, buf);
 	dev_err(dev, "Failed to read/validate CDAT.\n");
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 711b05d9a370..152bd453c623 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -85,6 +85,26 @@  struct cdat_entry_header {
 	__le16 length;
 } __packed;
 
+union cdat_data {
+	struct cdat_header header;
+	struct cdat_entry_header entry;
+} __packed;
+
+/*
+ * Response contains the CDAT only response header of the DOE. The
+ * response payload is a CDAT structure (either CDAT header or entry),
+ * it may also mark the beginning of the CDAT table.
+ *
+ * Spec refs:
+ *
+ * CXL 3.1 Table 8-14: Read Entry Response
+ * CDAT Specification 1.03: 2 CDAT Data Structures
+ */
+struct cdat_doe_rsp {
+	__le32 doe_header;
+	u8 data[];
+} __packed;
+
 /*
  * CXL v3.0 6.2.3 Table 6-4
  * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits