diff mbox series

[V9,8/9] cxl/port: Retry reading CDAT on failure

Message ID 20220531152632.1397976-9-ira.weiny@intel.com (mailing list archive)
State Superseded
Headers show
Series CXL: Read CDAT and DSMAS data | expand

Commit Message

Ira Weiny May 31, 2022, 3:26 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

The CDAT read may fail for a number of reasons but mainly it is possible
to get different parts of a valid state.  The checksum in the CDAT table
protects against this.

Now that the cdat data is validated issue a retries if the CDAT read
fails.  For now 5 retries are implemented.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V8
	Move code to cxl/core/pci.c

Changes from V6
	Move to pci.c
	Fix retries count
	Change to 5 retries

Changes from V5:
	New patch -- easy to push off or drop.
---
 drivers/cxl/core/pci.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

Comments

Alison Schofield May 31, 2022, 5:07 p.m. UTC | #1
On Tue, May 31, 2022 at 08:26:31AM -0700, Ira Weiny wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The CDAT read may fail for a number of reasons but mainly it is possible
> to get different parts of a valid state.  The checksum in the CDAT table
> protects against this.
> 
> Now that the cdat data is validated issue a retries if the CDAT read
> fails.  For now 5 retries are implemented.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---

snip

> +
> +void read_cdat_data(struct cxl_port *port)
> +{
> +	int retries = 5;
> +	int rc;
> +
> +	while (retries--) {
> +		rc = __read_cdat_data(port);
> +		if (!rc)
> +			break;
> +		dev_err(&port->dev,
> +			"CDAT data read error rc=%d (retries %d)\n",
> +			rc, retries);
> +	}

Perhaps dev_dbg() on retries and dev_err() only when retries are exhausted.


>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> -- 
> 2.35.1
>
Davidlohr Bueso May 31, 2022, 7:30 p.m. UTC | #2
On Tue, 31 May 2022, ira.weiny@intel.com wrote:

>From: Ira Weiny <ira.weiny@intel.com>
>
>The CDAT read may fail for a number of reasons but mainly it is possible
>to get different parts of a valid state.  The checksum in the CDAT table
>protects against this.
>
>Now that the cdat data is validated issue a retries if the CDAT read
>fails.  For now 5 retries are implemented.
>
>Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>---
>Changes from V8
>	Move code to cxl/core/pci.c
>
>Changes from V6
>	Move to pci.c
>	Fix retries count
>	Change to 5 retries
>
>Changes from V5:
>	New patch -- easy to push off or drop.
>---
> drivers/cxl/core/pci.c | 32 +++++++++++++++++++++-----------
> 1 file changed, 21 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>index bb370df1cb6c..7f7dac9030a6 100644
>--- a/drivers/cxl/core/pci.c
>+++ b/drivers/cxl/core/pci.c
>@@ -620,20 +620,18 @@ static int cxl_cdat_read_table(struct cxl_port *port,
> 	return rc;
> }
>
>-void read_cdat_data(struct cxl_port *port)
>+static int __read_cdat_data(struct cxl_port *port)
> {
> 	struct device *dev = &port->dev;
> 	size_t cdat_length;
> 	int ret;
>
> 	if (cxl_cdat_get_length(port, &cdat_length))
>-		return;
>+		return 0;
>
> 	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
>-	if (!port->cdat.table) {
>-		ret = -ENOMEM;
>-		goto error;
>-	}
>+	if (!port->cdat.table)
>+		return -ENOMEM;
>
> 	port->cdat.length = cdat_length;
> 	ret = cxl_cdat_read_table(port, &port->cdat);
>@@ -641,12 +639,24 @@ void read_cdat_data(struct cxl_port *port)
> 		devm_kfree(dev, port->cdat.table);
> 		port->cdat.table = NULL;
> 		port->cdat.length = 0;
>-		ret = -EIO;
>-		goto error;
>+		return -EIO;
> 	}
>
>-	return;
>-error:
>-	dev_err(dev, "CDAT data read error (%d)\n", ret);
>+	return 0;
>+}
>+
>+void read_cdat_data(struct cxl_port *port)
>+{
>+	int retries = 5;
>+	int rc;
>+
>+	while (retries--) {
>+		rc = __read_cdat_data(port);
>+		if (!rc)
>+			break;
>+		dev_err(&port->dev,
>+			"CDAT data read error rc=%d (retries %d)\n",
>+			rc, retries);
>+	}
> }
> EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);


Better a single final error message after exhausting all retries
instead of each time.

Thanks,
Davidlohr
Ira Weiny June 1, 2022, 4:54 a.m. UTC | #3
On Tue, May 31, 2022 at 10:07:43AM -0700, Alison Schofield wrote:
> On Tue, May 31, 2022 at 08:26:31AM -0700, Ira Weiny wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > The CDAT read may fail for a number of reasons but mainly it is possible
> > to get different parts of a valid state.  The checksum in the CDAT table
> > protects against this.
> > 
> > Now that the cdat data is validated issue a retries if the CDAT read
> > fails.  For now 5 retries are implemented.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> 
> snip
> 
> > +
> > +void read_cdat_data(struct cxl_port *port)
> > +{
> > +	int retries = 5;
> > +	int rc;
> > +
> > +	while (retries--) {
> > +		rc = __read_cdat_data(port);
> > +		if (!rc)
> > +			break;
> > +		dev_err(&port->dev,
> > +			"CDAT data read error rc=%d (retries %d)\n",
> > +			rc, retries);
> > +	}
> 
> Perhaps dev_dbg() on retries and dev_err() only when retries are exhausted.

Yes thanks!
Ira

> 
> 
> >  }
> >  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> > -- 
> > 2.35.1
> >
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index bb370df1cb6c..7f7dac9030a6 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -620,20 +620,18 @@  static int cxl_cdat_read_table(struct cxl_port *port,
 	return rc;
 }
 
-void read_cdat_data(struct cxl_port *port)
+static int __read_cdat_data(struct cxl_port *port)
 {
 	struct device *dev = &port->dev;
 	size_t cdat_length;
 	int ret;
 
 	if (cxl_cdat_get_length(port, &cdat_length))
-		return;
+		return 0;
 
 	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
-	if (!port->cdat.table) {
-		ret = -ENOMEM;
-		goto error;
-	}
+	if (!port->cdat.table)
+		return -ENOMEM;
 
 	port->cdat.length = cdat_length;
 	ret = cxl_cdat_read_table(port, &port->cdat);
@@ -641,12 +639,24 @@  void read_cdat_data(struct cxl_port *port)
 		devm_kfree(dev, port->cdat.table);
 		port->cdat.table = NULL;
 		port->cdat.length = 0;
-		ret = -EIO;
-		goto error;
+		return -EIO;
 	}
 
-	return;
-error:
-	dev_err(dev, "CDAT data read error (%d)\n", ret);
+	return 0;
+}
+
+void read_cdat_data(struct cxl_port *port)
+{
+	int retries = 5;
+	int rc;
+
+	while (retries--) {
+		rc = __read_cdat_data(port);
+		if (!rc)
+			break;
+		dev_err(&port->dev,
+			"CDAT data read error rc=%d (retries %d)\n",
+			rc, retries);
+	}
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);