diff mbox series

[V6,09/10] cxl/mem: Retry reading CDAT on failure

Message ID 20220201071952.900068-10-ira.weiny@intel.com
State Superseded
Headers show
Series CXL: Read CDAT and DSMAS data from the device | expand

Commit Message

Ira Weiny Feb. 1, 2022, 7:19 a.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

The CDAT read may fail for a number of reasons but mainly it is possible
to get different parts of a valid state.  The checksum in the CDAT table
protects against this.

Now that the checksum is validated issue a retry if the CDAT read fails.
For now 2 retries are implemented.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
NOTE: Is 2 enough?  Should this just be delayed until the time when the
data is actually needed and not there?

Changes from V5:
	New patch -- easy to push off or drop.
---
 drivers/cxl/core/memdev.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

Comments

Ben Widawsky Feb. 1, 2022, 6:59 p.m. UTC | #1
On 22-01-31 23:19:51, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The CDAT read may fail for a number of reasons but mainly it is possible
> to get different parts of a valid state.  The checksum in the CDAT table
> protects against this.
> 
> Now that the checksum is validated issue a retry if the CDAT read fails.
> For now 2 retries are implemented.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> NOTE: Is 2 enough?  Should this just be delayed until the time when the
> data is actually needed and not there?

I can't speak to retries at all, but one small issue below. It might make sense
if we keep this to make it a modparam.

> 
> Changes from V5:
> 	New patch -- easy to push off or drop.
> ---
>  drivers/cxl/core/memdev.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index a01068e98333..11d721c56f08 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -356,7 +356,8 @@ static const struct file_operations cxl_memdev_fops = {
>  	.llseek = noop_llseek,
>  };
>  
> -static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
> +static int __read_cdat_data(struct cxl_memdev *cxlmd,
> +			    struct cxl_dev_state *cxlds)
>  {
>  	struct device *dev = &cxlmd->dev;
>  	size_t cdat_length;
> @@ -371,6 +372,20 @@ static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
>  	return cxl_mem_cdat_read_table(cxlds, &cxlmd->cdat);
>  }
>  
> +static int read_cdat_data(struct cxl_memdev *cxlmd,
> +			  struct cxl_dev_state *cxlds)
> +{
> +	int retries = 2;
> +	int rc;
> +
> +	while (--retries) {

You either want retries--, or retries = 3...

> +		rc = __read_cdat_data(cxlmd, cxlds);
> +		if (!rc)
> +			break;
> +	}
> +	return rc;
> +}
> +
>  struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
>  {
>  	struct cxl_memdev *cxlmd;
> -- 
> 2.31.1
>
Ira Weiny Feb. 1, 2022, 10:31 p.m. UTC | #2
On Tue, Feb 01, 2022 at 10:59:28AM -0800, Widawsky, Ben wrote:
> On 22-01-31 23:19:51, ira.weiny@intel.com wrote:
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > The CDAT read may fail for a number of reasons but mainly it is possible
> > to get different parts of a valid state.  The checksum in the CDAT table
> > protects against this.
> > 
> > Now that the checksum is validated issue a retry if the CDAT read fails.
> > For now 2 retries are implemented.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> > NOTE: Is 2 enough?  Should this just be delayed until the time when the
> > data is actually needed and not there?
> 
> I can't speak to retries at all, but one small issue below. It might make sense
> if we keep this to make it a modparam.

Not a bad idea.

> 
> > 
> > Changes from V5:
> > 	New patch -- easy to push off or drop.
> > ---
> >  drivers/cxl/core/memdev.c | 17 ++++++++++++++++-
> >  1 file changed, 16 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > index a01068e98333..11d721c56f08 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -356,7 +356,8 @@ static const struct file_operations cxl_memdev_fops = {
> >  	.llseek = noop_llseek,
> >  };
> >  
> > -static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
> > +static int __read_cdat_data(struct cxl_memdev *cxlmd,
> > +			    struct cxl_dev_state *cxlds)
> >  {
> >  	struct device *dev = &cxlmd->dev;
> >  	size_t cdat_length;
> > @@ -371,6 +372,20 @@ static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
> >  	return cxl_mem_cdat_read_table(cxlds, &cxlmd->cdat);
> >  }
> >  
> > +static int read_cdat_data(struct cxl_memdev *cxlmd,
> > +			  struct cxl_dev_state *cxlds)
> > +{
> > +	int retries = 2;
> > +	int rc;
> > +
> > +	while (--retries) {
> 
> You either want retries--, or retries = 3...

Opps yea.

Thanks,
Ira

> 
> > +		rc = __read_cdat_data(cxlmd, cxlds);
> > +		if (!rc)
> > +			break;
> > +	}
> > +	return rc;
> > +}
> > +
> >  struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
> >  {
> >  	struct cxl_memdev *cxlmd;
> > -- 
> > 2.31.1
> >
Jonathan Cameron Feb. 4, 2022, 1:20 p.m. UTC | #3
On Tue, 1 Feb 2022 14:31:59 -0800
Ira Weiny <ira.weiny@intel.com> wrote:

> On Tue, Feb 01, 2022 at 10:59:28AM -0800, Widawsky, Ben wrote:
> > On 22-01-31 23:19:51, ira.weiny@intel.com wrote:  
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > The CDAT read may fail for a number of reasons but mainly it is possible
> > > to get different parts of a valid state.  The checksum in the CDAT table
> > > protects against this.
> > > 
> > > Now that the checksum is validated issue a retry if the CDAT read fails.
> > > For now 2 retries are implemented.
> > > 
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > ---
> > > NOTE: Is 2 enough?  Should this just be delayed until the time when the
> > > data is actually needed and not there?  
> > 
> > I can't speak to retries at all, but one small issue below. It might make sense
> > if we keep this to make it a modparam.  
> 
> Not a bad idea.

Ah. Here is the retry - I should have read the rest of the thread :)

This whole cycle isn't in a hot path and is fairly cheap. I'd just
go with c. 5 and assume that is enough for anyone.  If we need a module
parameter later because this race turns out to be something that
actually happens then it is easy enough to add then.




> 
> >   
> > > 
> > > Changes from V5:
> > > 	New patch -- easy to push off or drop.
> > > ---
> > >  drivers/cxl/core/memdev.c | 17 ++++++++++++++++-
> > >  1 file changed, 16 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > > index a01068e98333..11d721c56f08 100644
> > > --- a/drivers/cxl/core/memdev.c
> > > +++ b/drivers/cxl/core/memdev.c
> > > @@ -356,7 +356,8 @@ static const struct file_operations cxl_memdev_fops = {
> > >  	.llseek = noop_llseek,
> > >  };
> > >  
> > > -static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
> > > +static int __read_cdat_data(struct cxl_memdev *cxlmd,
> > > +			    struct cxl_dev_state *cxlds)
> > >  {
> > >  	struct device *dev = &cxlmd->dev;
> > >  	size_t cdat_length;
> > > @@ -371,6 +372,20 @@ static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
> > >  	return cxl_mem_cdat_read_table(cxlds, &cxlmd->cdat);
> > >  }
> > >  
> > > +static int read_cdat_data(struct cxl_memdev *cxlmd,
> > > +			  struct cxl_dev_state *cxlds)
> > > +{
> > > +	int retries = 2;
> > > +	int rc;
> > > +
> > > +	while (--retries) {  
> > 
> > You either want retries--, or retries = 3...  
> 
> Opps yea.
> 
> Thanks,
> Ira
> 
> >   
> > > +		rc = __read_cdat_data(cxlmd, cxlds);
> > > +		if (!rc)
> > > +			break;
> > > +	}
> > > +	return rc;
> > > +}
> > > +
> > >  struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
> > >  {
> > >  	struct cxl_memdev *cxlmd;
> > > -- 
> > > 2.31.1
> > >
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index a01068e98333..11d721c56f08 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -356,7 +356,8 @@  static const struct file_operations cxl_memdev_fops = {
 	.llseek = noop_llseek,
 };
 
-static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
+static int __read_cdat_data(struct cxl_memdev *cxlmd,
+			    struct cxl_dev_state *cxlds)
 {
 	struct device *dev = &cxlmd->dev;
 	size_t cdat_length;
@@ -371,6 +372,20 @@  static int read_cdat_data(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
 	return cxl_mem_cdat_read_table(cxlds, &cxlmd->cdat);
 }
 
+static int read_cdat_data(struct cxl_memdev *cxlmd,
+			  struct cxl_dev_state *cxlds)
+{
+	int retries = 2;
+	int rc;
+
+	while (--retries) {
+		rc = __read_cdat_data(cxlmd, cxlds);
+		if (!rc)
+			break;
+	}
+	return rc;
+}
+
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
 {
 	struct cxl_memdev *cxlmd;