diff mbox series

[v3,12/20] efx: use acquire_endpoint when looking for free HPA

Message ID 20240907081836.5801-13-alejandro.lucero-palau@amd.com
State New
Headers show
Series cxl: add Type2 device support | expand

Commit Message

Lucero Palau, Alejandro Sept. 7, 2024, 8:18 a.m. UTC
From: Alejandro Lucero <alucerop@amd.com>

Asking for availbale HPA space is the previous step to try to obtain
an HPA range suitable to accel driver purposes.

Add this call to efx cxl initialization and use acquire_endpoint for
avoiding potential races with cxl port creation.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
---
 drivers/net/ethernet/sfc/efx.c     |  8 +++++++-
 drivers/net/ethernet/sfc/efx_cxl.c | 32 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

Comments

kernel test robot Sept. 7, 2024, 7:33 p.m. UTC | #1
Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on cxl/next]
[also build test WARNING on linus/master v6.11-rc6 next-20240906]
[cannot apply to cxl/pending horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/alejandro-lucero-palau-amd-com/cxl-add-type2-device-basic-support/20240907-162231
base:   https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git next
patch link:    https://lore.kernel.org/r/20240907081836.5801-13-alejandro.lucero-palau%40amd.com
patch subject: [PATCH v3 12/20] efx: use acquire_endpoint when looking for free HPA
config: powerpc-ppc6xx_defconfig (https://download.01.org/0day-ci/archive/20240908/202409080317.xjZq1ABN-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 13.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240908/202409080317.xjZq1ABN-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409080317.xjZq1ABN-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from include/linux/device.h:15,
                    from include/linux/cxl/cxl.h:7,
                    from drivers/net/ethernet/sfc/efx_cxl.c:12:
   drivers/net/ethernet/sfc/efx_cxl.c: In function 'efx_cxl_init':
>> drivers/net/ethernet/sfc/efx_cxl.c:114:34: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=]
     114 |                 pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
         |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:110:30: note: in definition of macro 'dev_printk_index_wrap'
     110 |                 _p_func(dev, fmt, ##__VA_ARGS__);                       \
         |                              ^~~
   include/linux/dev_printk.h:154:56: note: in expansion of macro 'dev_fmt'
     154 |         dev_printk_index_wrap(_dev_err, KERN_ERR, dev, dev_fmt(fmt), ##__VA_ARGS__)
         |                                                        ^~~~~~~
   include/linux/pci.h:2679:41: note: in expansion of macro 'dev_err'
    2679 | #define pci_err(pdev, fmt, arg...)      dev_err(&(pdev)->dev, fmt, ##arg)
         |                                         ^~~~~~~
   drivers/net/ethernet/sfc/efx_cxl.c:114:17: note: in expansion of macro 'pci_err'
     114 |                 pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
         |                 ^~~~~~~
   drivers/net/ethernet/sfc/efx_cxl.c:114:67: note: format string is defined here
     114 |                 pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
         |                                                                ~~~^
         |                                                                   |
         |                                                                   long long unsigned int
         |                                                                %u


vim +114 drivers/net/ethernet/sfc/efx_cxl.c

  > 12	#include <linux/cxl/cxl.h>
    13	#include <linux/cxl/pci.h>
    14	#include <linux/pci.h>
    15	
    16	#include "net_driver.h"
    17	#include "efx_cxl.h"
    18	
    19	#define EFX_CTPIO_BUFFER_SIZE	(1024 * 1024 * 256)
    20	
    21	int efx_cxl_init(struct efx_nic *efx)
    22	{
    23		struct pci_dev *pci_dev = efx->pci_dev;
    24		struct efx_cxl *cxl;
    25		struct resource res;
    26		resource_size_t max;
    27		u16 dvsec;
    28		int rc;
    29	
    30		efx->efx_cxl_pio_initialised = false;
    31	
    32		dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL,
    33						  CXL_DVSEC_PCIE_DEVICE);
    34	
    35		if (!dvsec)
    36			return 0;
    37	
    38		pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n");
    39	
    40		efx->cxl = kzalloc(sizeof(*cxl), GFP_KERNEL);
    41		if (!efx->cxl)
    42			return -ENOMEM;
    43	
    44		cxl = efx->cxl;
    45	
    46		cxl->cxlds = cxl_accel_state_create(&pci_dev->dev);
    47		if (IS_ERR(cxl->cxlds)) {
    48			pci_err(pci_dev, "CXL accel device state failed");
    49			kfree(efx->cxl);
    50			return -ENOMEM;
    51		}
    52	
    53		cxl_set_dvsec(cxl->cxlds, dvsec);
    54		cxl_set_serial(cxl->cxlds, pci_dev->dev.id);
    55	
    56		res = DEFINE_RES_MEM(0, EFX_CTPIO_BUFFER_SIZE);
    57		if (cxl_set_resource(cxl->cxlds, res, CXL_ACCEL_RES_DPA)) {
    58			pci_err(pci_dev, "cxl_set_resource DPA failed\n");
    59			rc = -EINVAL;
    60			goto err;
    61		}
    62	
    63		res = DEFINE_RES_MEM_NAMED(0, EFX_CTPIO_BUFFER_SIZE, "ram");
    64		if (cxl_set_resource(cxl->cxlds, res, CXL_ACCEL_RES_RAM)) {
    65			pci_err(pci_dev, "cxl_set_resource RAM failed\n");
    66			rc = -EINVAL;
    67			goto err;
    68		}
    69	
    70		rc = cxl_pci_accel_setup_regs(pci_dev, cxl->cxlds);
    71		if (rc) {
    72			pci_err(pci_dev, "CXL accel setup regs failed");
    73			goto err;
    74		}
    75	
    76		rc = cxl_request_resource(cxl->cxlds, CXL_ACCEL_RES_RAM);
    77		if (rc) {
    78			pci_err(pci_dev, "CXL request resource failed");
    79			goto err;
    80		}
    81	
    82		/* We do not have the register about media status. Hardware design
    83		 * implies it is ready.
    84		 */
    85		cxl_set_media_ready(cxl->cxlds);
    86	
    87		cxl->cxlmd = devm_cxl_add_memdev(&pci_dev->dev, cxl->cxlds);
    88		if (IS_ERR(cxl->cxlmd)) {
    89			pci_err(pci_dev, "CXL accel memdev creation failed");
    90			rc = PTR_ERR(cxl->cxlmd);
    91			goto err;
    92		}
    93	
    94		cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd);
    95		if (IS_ERR(cxl->endpoint)) {
    96			rc = PTR_ERR(cxl->endpoint);
    97			if (rc != -EPROBE_DEFER) {
    98				pci_err(pci_dev, "CXL accel acquire endpoint failed");
    99				goto err;
   100			}
   101		}
   102	
   103		cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint,
   104						   CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
   105						   &max);
   106	
   107		if (IS_ERR(cxl->cxlrd)) {
   108			pci_err(pci_dev, "cxl_get_hpa_freespace failed\n");
   109			rc = PTR_ERR(cxl->cxlrd);
   110			goto err_release;
   111		}
   112	
   113		if (max < EFX_CTPIO_BUFFER_SIZE) {
 > 114			pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
   115				__func__, max, EFX_CTPIO_BUFFER_SIZE);
   116			rc = -ENOSPC;
   117			goto err;
   118		}
   119	
   120		cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
   121	
   122		return 0;
   123	
   124	err_release:
   125		cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
   126	err:
   127		kfree(cxl->cxlds);
   128		kfree(cxl);
   129		efx->cxl = NULL;
   130	
   131		return rc;
   132	}
   133
Dave Jiang Sept. 12, 2024, 11:09 p.m. UTC | #2
On 9/7/24 1:18 AM, alejandro.lucero-palau@amd.com wrote:
> From: Alejandro Lucero <alucerop@amd.com>
> 
> Asking for availbale HPA space is the previous step to try to obtain
> an HPA range suitable to accel driver purposes.
> 
> Add this call to efx cxl initialization and use acquire_endpoint for
> avoiding potential races with cxl port creation.
> 
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> ---
>  drivers/net/ethernet/sfc/efx.c     |  8 +++++++-
>  drivers/net/ethernet/sfc/efx_cxl.c | 32 ++++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
> index 3a7406aa950c..08a2f527df16 100644
> --- a/drivers/net/ethernet/sfc/efx.c
> +++ b/drivers/net/ethernet/sfc/efx.c
> @@ -1117,10 +1117,16 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
>  	 * used for PIO buffers. If there is no CXL support, or initialization
>  	 * fails, efx_cxl_pio_initialised wll be false and legacy PIO buffers
>  	 * defined at specific PCI BAR regions will be used.
> +	 *
> +	 * The only error to handle is -EPROBE_DEFER happening if the root port
> +	 * is not there yet.
>  	 */
>  	rc = efx_cxl_init(efx);
> -	if (rc)
> +	if (rc) {
> +		if (rc == -EPROBE_DEFER)
> +			goto fail2;
>  		pci_err(pci_dev, "CXL initialization failed with error %d\n", rc);
> +	}
>  
>  	rc = efx_pci_probe_post_io(efx);
>  	if (rc) {
> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
> index 899bc823a212..826759caa552 100644
> --- a/drivers/net/ethernet/sfc/efx_cxl.c
> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
> @@ -23,6 +23,7 @@ int efx_cxl_init(struct efx_nic *efx)
>  	struct pci_dev *pci_dev = efx->pci_dev;
>  	struct efx_cxl *cxl;
>  	struct resource res;
> +	resource_size_t max;
>  	u16 dvsec;
>  	int rc;
>  
> @@ -90,7 +91,38 @@ int efx_cxl_init(struct efx_nic *efx)
>  		goto err;
>  	}
>  
> +	cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd);
> +	if (IS_ERR(cxl->endpoint)) {
> +		rc = PTR_ERR(cxl->endpoint);
> +		if (rc != -EPROBE_DEFER) {
> +			pci_err(pci_dev, "CXL accel acquire endpoint failed");
> +			goto err;
> +		}

What happens if (rc == -EPROBE_DEFER)? Here it drops down but you don't have a valid cxl->endpoint when cxl_get_hpa_freespace() is called.

DJ
 
> +	}
> +
> +	cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint,
> +					   CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
> +					   &max);
> +
> +	if (IS_ERR(cxl->cxlrd)) {
> +		pci_err(pci_dev, "cxl_get_hpa_freespace failed\n");
> +		rc = PTR_ERR(cxl->cxlrd);
> +		goto err_release;
> +	}
> +
> +	if (max < EFX_CTPIO_BUFFER_SIZE) {
> +		pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
> +			__func__, max, EFX_CTPIO_BUFFER_SIZE);
> +		rc = -ENOSPC;
> +		goto err;
> +	}
> +
> +	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
> +
>  	return 0;
> +
> +err_release:
> +	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
>  err:
>  	kfree(cxl->cxlds);
>  	kfree(cxl);
Alejandro Lucero Palau Sept. 16, 2024, 10:29 a.m. UTC | #3
On 9/13/24 00:09, Dave Jiang wrote:
>
> On 9/7/24 1:18 AM, alejandro.lucero-palau@amd.com wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> Asking for availbale HPA space is the previous step to try to obtain
>> an HPA range suitable to accel driver purposes.
>>
>> Add this call to efx cxl initialization and use acquire_endpoint for
>> avoiding potential races with cxl port creation.
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> ---
>>   drivers/net/ethernet/sfc/efx.c     |  8 +++++++-
>>   drivers/net/ethernet/sfc/efx_cxl.c | 32 ++++++++++++++++++++++++++++++
>>   2 files changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
>> index 3a7406aa950c..08a2f527df16 100644
>> --- a/drivers/net/ethernet/sfc/efx.c
>> +++ b/drivers/net/ethernet/sfc/efx.c
>> @@ -1117,10 +1117,16 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
>>   	 * used for PIO buffers. If there is no CXL support, or initialization
>>   	 * fails, efx_cxl_pio_initialised wll be false and legacy PIO buffers
>>   	 * defined at specific PCI BAR regions will be used.
>> +	 *
>> +	 * The only error to handle is -EPROBE_DEFER happening if the root port
>> +	 * is not there yet.
>>   	 */
>>   	rc = efx_cxl_init(efx);
>> -	if (rc)
>> +	if (rc) {
>> +		if (rc == -EPROBE_DEFER)
>> +			goto fail2;
>>   		pci_err(pci_dev, "CXL initialization failed with error %d\n", rc);
>> +	}
>>   
>>   	rc = efx_pci_probe_post_io(efx);
>>   	if (rc) {
>> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
>> index 899bc823a212..826759caa552 100644
>> --- a/drivers/net/ethernet/sfc/efx_cxl.c
>> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
>> @@ -23,6 +23,7 @@ int efx_cxl_init(struct efx_nic *efx)
>>   	struct pci_dev *pci_dev = efx->pci_dev;
>>   	struct efx_cxl *cxl;
>>   	struct resource res;
>> +	resource_size_t max;
>>   	u16 dvsec;
>>   	int rc;
>>   
>> @@ -90,7 +91,38 @@ int efx_cxl_init(struct efx_nic *efx)
>>   		goto err;
>>   	}
>>   
>> +	cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd);
>> +	if (IS_ERR(cxl->endpoint)) {
>> +		rc = PTR_ERR(cxl->endpoint);
>> +		if (rc != -EPROBE_DEFER) {
>> +			pci_err(pci_dev, "CXL accel acquire endpoint failed");
>> +			goto err;
>> +		}
> What happens if (rc == -EPROBE_DEFER)? Here it drops down but you don't have a valid cxl->endpoint when cxl_get_hpa_freespace() is called.
>
> DJ
>   


I missed it!

I thought about implementing a test case for this situation what would 
have shown the bug ...

FWIW, the caller is checking that specific return.

Thanks!


>> +	}
>> +
>> +	cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint,
>> +					   CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
>> +					   &max);
>> +
>> +	if (IS_ERR(cxl->cxlrd)) {
>> +		pci_err(pci_dev, "cxl_get_hpa_freespace failed\n");
>> +		rc = PTR_ERR(cxl->cxlrd);
>> +		goto err_release;
>> +	}
>> +
>> +	if (max < EFX_CTPIO_BUFFER_SIZE) {
>> +		pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
>> +			__func__, max, EFX_CTPIO_BUFFER_SIZE);
>> +		rc = -ENOSPC;
>> +		goto err;
>> +	}
>> +
>> +	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
>> +
>>   	return 0;
>> +
>> +err_release:
>> +	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
>>   err:
>>   	kfree(cxl->cxlds);
>>   	kfree(cxl);
diff mbox series

Patch

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 3a7406aa950c..08a2f527df16 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1117,10 +1117,16 @@  static int efx_pci_probe(struct pci_dev *pci_dev,
 	 * used for PIO buffers. If there is no CXL support, or initialization
 	 * fails, efx_cxl_pio_initialised wll be false and legacy PIO buffers
 	 * defined at specific PCI BAR regions will be used.
+	 *
+	 * The only error to handle is -EPROBE_DEFER happening if the root port
+	 * is not there yet.
 	 */
 	rc = efx_cxl_init(efx);
-	if (rc)
+	if (rc) {
+		if (rc == -EPROBE_DEFER)
+			goto fail2;
 		pci_err(pci_dev, "CXL initialization failed with error %d\n", rc);
+	}
 
 	rc = efx_pci_probe_post_io(efx);
 	if (rc) {
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 899bc823a212..826759caa552 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -23,6 +23,7 @@  int efx_cxl_init(struct efx_nic *efx)
 	struct pci_dev *pci_dev = efx->pci_dev;
 	struct efx_cxl *cxl;
 	struct resource res;
+	resource_size_t max;
 	u16 dvsec;
 	int rc;
 
@@ -90,7 +91,38 @@  int efx_cxl_init(struct efx_nic *efx)
 		goto err;
 	}
 
+	cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd);
+	if (IS_ERR(cxl->endpoint)) {
+		rc = PTR_ERR(cxl->endpoint);
+		if (rc != -EPROBE_DEFER) {
+			pci_err(pci_dev, "CXL accel acquire endpoint failed");
+			goto err;
+		}
+	}
+
+	cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint,
+					   CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2,
+					   &max);
+
+	if (IS_ERR(cxl->cxlrd)) {
+		pci_err(pci_dev, "cxl_get_hpa_freespace failed\n");
+		rc = PTR_ERR(cxl->cxlrd);
+		goto err_release;
+	}
+
+	if (max < EFX_CTPIO_BUFFER_SIZE) {
+		pci_err(pci_dev, "%s: no enough free HPA space %llu < %u\n",
+			__func__, max, EFX_CTPIO_BUFFER_SIZE);
+		rc = -ENOSPC;
+		goto err;
+	}
+
+	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
+
 	return 0;
+
+err_release:
+	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
 err:
 	kfree(cxl->cxlds);
 	kfree(cxl);