diff mbox series

cxl/cdat: Free correct buffer on checksum error

Message ID 20231116-fix-cdat-devm-free-v1-1-b148b40707d7@intel.com
State Accepted
Commit c65efe3685f5d150eeca5599afeabdc85da899d1
Headers show
Series cxl/cdat: Free correct buffer on checksum error | expand

Commit Message

Ira Weiny Nov. 17, 2023, 12:03 a.m. UTC
The new 6.7-rc1 kernel now checks the checksum on CDAT data.  While
using a branch of Fan's DCD qemu work (and specifying DCD devices), the
following splat was observed.

	WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60
	...
	RIP: 0010:devm_kfree+0x4f/0x60
	...
 	? devm_kfree+0x4f/0x60
 	read_cdat_data+0x1a0/0x2a0 [cxl_core]
 	cxl_port_probe+0xdf/0x200 [cxl_port]
	...

The issue in qemu is still unknown but the spat is a straight forward
bug in the CDAT checksum processing code.  Use a CDAT buffer variable to
ensure the devm_free() works correctly on error.

Cc: jonathan.cameron@huawei.com
Cc: Fan Ni <nifan.cxl@gmail.com>
Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL")
Cc: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/cxl/core/pci.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)


---
base-commit: 7475e51b87969e01a6812eac713a1c8310372e8a
change-id: 20231116-fix-cdat-devm-free-b47d32b4b833

Best regards,

Comments

Dave Jiang Nov. 17, 2023, 3:50 p.m. UTC | #1
On 11/16/23 17:03, Ira Weiny wrote:
> The new 6.7-rc1 kernel now checks the checksum on CDAT data.  While
> using a branch of Fan's DCD qemu work (and specifying DCD devices), the
> following splat was observed.
> 
> 	WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60
> 	...
> 	RIP: 0010:devm_kfree+0x4f/0x60
> 	...
>  	? devm_kfree+0x4f/0x60
>  	read_cdat_data+0x1a0/0x2a0 [cxl_core]
>  	cxl_port_probe+0xdf/0x200 [cxl_port]
> 	...
> 
> The issue in qemu is still unknown but the spat is a straight forward
> bug in the CDAT checksum processing code.  Use a CDAT buffer variable to
> ensure the devm_free() works correctly on error.
> 
> Cc: jonathan.cameron@huawei.com
> Cc: Fan Ni <nifan.cxl@gmail.com>
> Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL")
> Cc: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Thanks for the fix

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>  drivers/cxl/core/pci.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index eff20e83d0a6..5aaa0b36c42a 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -620,7 +620,7 @@ void read_cdat_data(struct cxl_port *port)
>  	struct pci_dev *pdev = NULL;
>  	struct cxl_memdev *cxlmd;
>  	size_t cdat_length;
> -	void *cdat_table;
> +	void *cdat_table, *cdat_buf;
>  	int rc;
>  
>  	if (is_cxl_memdev(uport)) {
> @@ -651,16 +651,16 @@ void read_cdat_data(struct cxl_port *port)
>  		return;
>  	}
>  
> -	cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32),
> +	cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32),
>  				  GFP_KERNEL);
> -	if (!cdat_table)
> +	if (!cdat_buf)
>  		return;
>  
> -	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
> +	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
>  	if (rc)
>  		goto err;
>  
> -	cdat_table = cdat_table + sizeof(__le32);
> +	cdat_table = cdat_buf + sizeof(__le32);
>  	if (cdat_checksum(cdat_table, cdat_length))
>  		goto err;
>  
> @@ -670,7 +670,7 @@ void read_cdat_data(struct cxl_port *port)
>  
>  err:
>  	/* Don't leave table data allocated on error */
> -	devm_kfree(dev, cdat_table);
> +	devm_kfree(dev, cdat_buf);
>  	dev_err(dev, "Failed to read/validate CDAT.\n");
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> 
> ---
> base-commit: 7475e51b87969e01a6812eac713a1c8310372e8a
> change-id: 20231116-fix-cdat-devm-free-b47d32b4b833
> 
> Best regards,
fan Nov. 17, 2023, 5:14 p.m. UTC | #2
On Thu, Nov 16, 2023 at 04:03:29PM -0800, Ira Weiny wrote:
> The new 6.7-rc1 kernel now checks the checksum on CDAT data.  While
> using a branch of Fan's DCD qemu work (and specifying DCD devices), the
> following splat was observed.
> 
> 	WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60
> 	...
> 	RIP: 0010:devm_kfree+0x4f/0x60
> 	...
>  	? devm_kfree+0x4f/0x60
>  	read_cdat_data+0x1a0/0x2a0 [cxl_core]
>  	cxl_port_probe+0xdf/0x200 [cxl_port]
> 	...
> 
> The issue in qemu is still unknown but the spat is a straight forward
> bug in the CDAT checksum processing code.  Use a CDAT buffer variable to
> ensure the devm_free() works correctly on error.
> 
> Cc: jonathan.cameron@huawei.com
> Cc: Fan Ni <nifan.cxl@gmail.com>
> Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL")
> Cc: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> ---

Good catch.

Reviewed-by: Fan Ni <fan.ni@samsung.com>

>  drivers/cxl/core/pci.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index eff20e83d0a6..5aaa0b36c42a 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -620,7 +620,7 @@ void read_cdat_data(struct cxl_port *port)
>  	struct pci_dev *pdev = NULL;
>  	struct cxl_memdev *cxlmd;
>  	size_t cdat_length;
> -	void *cdat_table;
> +	void *cdat_table, *cdat_buf;
>  	int rc;
>  
>  	if (is_cxl_memdev(uport)) {
> @@ -651,16 +651,16 @@ void read_cdat_data(struct cxl_port *port)
>  		return;
>  	}
>  
> -	cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32),
> +	cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32),
>  				  GFP_KERNEL);
> -	if (!cdat_table)
> +	if (!cdat_buf)
>  		return;
>  
> -	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
> +	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
>  	if (rc)
>  		goto err;
>  
> -	cdat_table = cdat_table + sizeof(__le32);
> +	cdat_table = cdat_buf + sizeof(__le32);
>  	if (cdat_checksum(cdat_table, cdat_length))
>  		goto err;
>  
> @@ -670,7 +670,7 @@ void read_cdat_data(struct cxl_port *port)
>  
>  err:
>  	/* Don't leave table data allocated on error */
> -	devm_kfree(dev, cdat_table);
> +	devm_kfree(dev, cdat_buf);
>  	dev_err(dev, "Failed to read/validate CDAT.\n");
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> 
> ---
> base-commit: 7475e51b87969e01a6812eac713a1c8310372e8a
> change-id: 20231116-fix-cdat-devm-free-b47d32b4b833
> 
> Best regards,
> -- 
> Ira Weiny <ira.weiny@intel.com>
>
Robert Richter Nov. 17, 2023, 8:09 p.m. UTC | #3
On 16.11.23 16:03:29, Ira Weiny wrote:
> The new 6.7-rc1 kernel now checks the checksum on CDAT data.  While
> using a branch of Fan's DCD qemu work (and specifying DCD devices), the
> following splat was observed.
> 
> 	WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60
> 	...
> 	RIP: 0010:devm_kfree+0x4f/0x60
> 	...
>  	? devm_kfree+0x4f/0x60
>  	read_cdat_data+0x1a0/0x2a0 [cxl_core]
>  	cxl_port_probe+0xdf/0x200 [cxl_port]
> 	...
> 
> The issue in qemu is still unknown but the spat is a straight forward
> bug in the CDAT checksum processing code.  Use a CDAT buffer variable to
> ensure the devm_free() works correctly on error.
> 
> Cc: jonathan.cameron@huawei.com
> Cc: Fan Ni <nifan.cxl@gmail.com>
> Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL")
> Cc: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Reviewed-by: Robert Richter <rrichter@amd.com>

I will send an on-top patch for 6.8 that reworks that code area to
remove the pointer arithmetic.

-Robert
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index eff20e83d0a6..5aaa0b36c42a 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -620,7 +620,7 @@  void read_cdat_data(struct cxl_port *port)
 	struct pci_dev *pdev = NULL;
 	struct cxl_memdev *cxlmd;
 	size_t cdat_length;
-	void *cdat_table;
+	void *cdat_table, *cdat_buf;
 	int rc;
 
 	if (is_cxl_memdev(uport)) {
@@ -651,16 +651,16 @@  void read_cdat_data(struct cxl_port *port)
 		return;
 	}
 
-	cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32),
+	cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32),
 				  GFP_KERNEL);
-	if (!cdat_table)
+	if (!cdat_buf)
 		return;
 
-	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
+	rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
 	if (rc)
 		goto err;
 
-	cdat_table = cdat_table + sizeof(__le32);
+	cdat_table = cdat_buf + sizeof(__le32);
 	if (cdat_checksum(cdat_table, cdat_length))
 		goto err;
 
@@ -670,7 +670,7 @@  void read_cdat_data(struct cxl_port *port)
 
 err:
 	/* Don't leave table data allocated on error */
-	devm_kfree(dev, cdat_table);
+	devm_kfree(dev, cdat_buf);
 	dev_err(dev, "Failed to read/validate CDAT.\n");
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);