diff mbox series

[v3,1/1] fpga: dfl: afu: harden port enable logic

Message ID 20210202230631.198950-1-russell.h.weight@intel.com (mailing list archive)
State Superseded, archived
Headers show
Series [v3,1/1] fpga: dfl: afu: harden port enable logic | expand

Commit Message

Russ Weight Feb. 2, 2021, 11:06 p.m. UTC
Port enable is not complete until ACK = 0. Change
__afu_port_enable() to guarantee that the enable process
is complete by polling for ACK == 0.

Signed-off-by: Russ Weight <russell.h.weight@intel.com>
---
v3:
  - afu_port_err_clear() changed to prioritize port_enable failure over
    other a detected mismatch in port errors.
  - reorganized code in port_reset() to be more readable.
v2:
  - Fixed typo in commit message
---
 drivers/fpga/dfl-afu-error.c |  8 ++++----
 drivers/fpga/dfl-afu-main.c  | 31 ++++++++++++++++++++++---------
 drivers/fpga/dfl-afu.h       |  2 +-
 3 files changed, 27 insertions(+), 14 deletions(-)

Comments

Wu, Hao Feb. 3, 2021, 9:01 a.m. UTC | #1
> Subject: [PATCH v3 1/1] fpga: dfl: afu: harden port enable logic
> 
> Port enable is not complete until ACK = 0. Change
> __afu_port_enable() to guarantee that the enable process
> is complete by polling for ACK == 0.
> 
> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
> ---
> v3:
>   - afu_port_err_clear() changed to prioritize port_enable failure over
>     other a detected mismatch in port errors.
>   - reorganized code in port_reset() to be more readable.
> v2:
>   - Fixed typo in commit message
> ---
>  drivers/fpga/dfl-afu-error.c |  8 ++++----
>  drivers/fpga/dfl-afu-main.c  | 31 ++++++++++++++++++++++---------
>  drivers/fpga/dfl-afu.h       |  2 +-
>  3 files changed, 27 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
> index c4691187cca9..2ced610059cc 100644
> --- a/drivers/fpga/dfl-afu-error.c
> +++ b/drivers/fpga/dfl-afu-error.c
> @@ -52,7 +52,7 @@ static int afu_port_err_clear(struct device *dev, u64 err)
>  	struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
>  	struct platform_device *pdev = to_platform_device(dev);
>  	void __iomem *base_err, *base_hdr;
> -	int ret = -EBUSY;
> +	int enable_ret = 0, ret = -EBUSY;
>  	u64 v;
> 
>  	base_err = dfl_get_feature_ioaddr_by_id(dev,
> PORT_FEATURE_ID_ERROR);
> @@ -102,12 +102,12 @@ static int afu_port_err_clear(struct device *dev, u64
> err)
>  	/* Clear mask */
>  	__afu_port_err_mask(dev, false);
> 
> -	/* Enable the Port by clear the reset */
> -	__afu_port_enable(pdev);
> +	/* Enable the Port by clearing the reset */
> +	enable_ret = __afu_port_enable(pdev);
> 
>  done:
>  	mutex_unlock(&pdata->lock);
> -	return ret;
> +	return enable_ret ? enable_ret : ret;

Maybe we should add some error message to notify user, there are more errors happened,
as some ret value is not returned.

>  }
> 
>  static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
> index 753cda4b2568..729eb306062e 100644
> --- a/drivers/fpga/dfl-afu-main.c
> +++ b/drivers/fpga/dfl-afu-main.c
> @@ -21,6 +21,9 @@
> 
>  #include "dfl-afu.h"
> 
> +#define RST_POLL_INVL 10 /* us */
> +#define RST_POLL_TIMEOUT 1000 /* us */
> +
>  /**
>   * __afu_port_enable - enable a port by clear reset
>   * @pdev: port platform device.
> @@ -32,7 +35,7 @@
>   *
>   * The caller needs to hold lock for protection.
>   */
> -void __afu_port_enable(struct platform_device *pdev)
> +int __afu_port_enable(struct platform_device *pdev)
>  {
>  	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
> >dev);
>  	void __iomem *base;
> @@ -41,7 +44,7 @@ void __afu_port_enable(struct platform_device *pdev)
>  	WARN_ON(!pdata->disable_count);
> 
>  	if (--pdata->disable_count != 0)
> -		return;
> +		return 0;
> 
>  	base = dfl_get_feature_ioaddr_by_id(&pdev->dev,
> PORT_FEATURE_ID_HEADER);
> 
> @@ -49,10 +52,20 @@ void __afu_port_enable(struct platform_device *pdev)
>  	v = readq(base + PORT_HDR_CTRL);
>  	v &= ~PORT_CTRL_SFTRST;
>  	writeq(v, base + PORT_HDR_CTRL);
> -}
> 
> -#define RST_POLL_INVL 10 /* us */
> -#define RST_POLL_TIMEOUT 1000 /* us */
> +	/*
> +	 * HW clears the ack bit to indicate that the port is fully out
> +	 * of reset.
> +	 */
> +	if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
> +			       !(v & PORT_CTRL_SFTRST_ACK),
> +			       RST_POLL_INVL, RST_POLL_TIMEOUT)) {
> +		dev_err(&pdev->dev, "timeout, failure to enable device\n");

Maybe we can change dev_err message in port disable to "disable device" as well. : )

Hao

> +		return -ETIMEDOUT;
> +	}
> +
> +	return 0;
> +}
> 
>  /**
>   * __afu_port_disable - disable a port by hold reset
> @@ -111,9 +124,9 @@ static int __port_reset(struct platform_device *pdev)
> 
>  	ret = __afu_port_disable(pdev);
>  	if (!ret)
> -		__afu_port_enable(pdev);
> +		return ret;
> 
> -	return ret;
> +	return __afu_port_enable(pdev);
>  }
> 
>  static int port_reset(struct platform_device *pdev)
> @@ -872,11 +885,11 @@ static int afu_dev_destroy(struct platform_device
> *pdev)
>  static int port_enable_set(struct platform_device *pdev, bool enable)
>  {
>  	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
> >dev);
> -	int ret = 0;
> +	int ret;
> 
>  	mutex_lock(&pdata->lock);
>  	if (enable)
> -		__afu_port_enable(pdev);
> +		ret = __afu_port_enable(pdev);
>  	else
>  		ret = __afu_port_disable(pdev);
>  	mutex_unlock(&pdata->lock);
> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
> index 576e94960086..e5020e2b1f3d 100644
> --- a/drivers/fpga/dfl-afu.h
> +++ b/drivers/fpga/dfl-afu.h
> @@ -80,7 +80,7 @@ struct dfl_afu {
>  };
> 
>  /* hold pdata->lock when call __afu_port_enable/disable */
> -void __afu_port_enable(struct platform_device *pdev);
> +int __afu_port_enable(struct platform_device *pdev);
>  int __afu_port_disable(struct platform_device *pdev);
> 
>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
> --
> 2.25.1
Tom Rix Feb. 3, 2021, 3:25 p.m. UTC | #2
..snip..

On 2/2/21 3:06 PM, Russ Weight wrote:
> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
> index 576e94960086..e5020e2b1f3d 100644
> --- a/drivers/fpga/dfl-afu.h
> +++ b/drivers/fpga/dfl-afu.h
> @@ -80,7 +80,7 @@ struct dfl_afu {
>  };
>  
>  /* hold pdata->lock when call __afu_port_enable/disable */
> -void __afu_port_enable(struct platform_device *pdev);
> +int __afu_port_enable(struct platform_device *pdev);
>  int __afu_port_disable(struct platform_device *pdev);
>  

Should the '__' prefix be removed from __afu_port* ?

This would make the function names consistent with the other decls

Tom

>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
Russ Weight Feb. 3, 2021, 10:55 p.m. UTC | #3
On 2/3/21 1:01 AM, Wu, Hao wrote:
>> Subject: [PATCH v3 1/1] fpga: dfl: afu: harden port enable logic
>>
>> Port enable is not complete until ACK = 0. Change
>> __afu_port_enable() to guarantee that the enable process
>> is complete by polling for ACK == 0.
>>
>> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
>> ---
>> v3:
>>   - afu_port_err_clear() changed to prioritize port_enable failure over
>>     other a detected mismatch in port errors.
>>   - reorganized code in port_reset() to be more readable.
>> v2:
>>   - Fixed typo in commit message
>> ---
>>  drivers/fpga/dfl-afu-error.c |  8 ++++----
>>  drivers/fpga/dfl-afu-main.c  | 31 ++++++++++++++++++++++---------
>>  drivers/fpga/dfl-afu.h       |  2 +-
>>  3 files changed, 27 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
>> index c4691187cca9..2ced610059cc 100644
>> --- a/drivers/fpga/dfl-afu-error.c
>> +++ b/drivers/fpga/dfl-afu-error.c
>> @@ -52,7 +52,7 @@ static int afu_port_err_clear(struct device *dev, u64 err)
>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
>>  struct platform_device *pdev = to_platform_device(dev);
>>  void __iomem *base_err, *base_hdr;
>> -int ret = -EBUSY;
>> +int enable_ret = 0, ret = -EBUSY;
>>  u64 v;
>>
>>  base_err = dfl_get_feature_ioaddr_by_id(dev,
>> PORT_FEATURE_ID_ERROR);
>> @@ -102,12 +102,12 @@ static int afu_port_err_clear(struct device *dev, u64
>> err)
>>  /* Clear mask */
>>  __afu_port_err_mask(dev, false);
>>
>> -/* Enable the Port by clear the reset */
>> -__afu_port_enable(pdev);
>> +/* Enable the Port by clearing the reset */
>> +enable_ret = __afu_port_enable(pdev);
>>
>>  done:
>>  mutex_unlock(&pdata->lock);
>> -return ret;
>> +return enable_ret ? enable_ret : ret;
> Maybe we should add some error message to notify user, there are more errors happened,
> as some ret value is not returned.
It is the -EINVAL error case that would get lost if there was a double error.
This error indicates that the value written to sysfs by the user does not
correspond to the current port errors. This is not a hardware error, and could
even be a user error. Do you think a warning in the error log is needed here?

>
>>  }
>>
>>  static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
>> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
>> index 753cda4b2568..729eb306062e 100644
>> --- a/drivers/fpga/dfl-afu-main.c
>> +++ b/drivers/fpga/dfl-afu-main.c
>> @@ -21,6 +21,9 @@
>>
>>  #include "dfl-afu.h"
>>
>> +#define RST_POLL_INVL 10 /* us */
>> +#define RST_POLL_TIMEOUT 1000 /* us */
>> +
>>  /**
>>   * __afu_port_enable - enable a port by clear reset
>>   * @pdev: port platform device.
>> @@ -32,7 +35,7 @@
>>   *
>>   * The caller needs to hold lock for protection.
>>   */
>> -void __afu_port_enable(struct platform_device *pdev)
>> +int __afu_port_enable(struct platform_device *pdev)
>>  {
>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
>>> dev);
>>  void __iomem *base;
>> @@ -41,7 +44,7 @@ void __afu_port_enable(struct platform_device *pdev)
>>  WARN_ON(!pdata->disable_count);
>>
>>  if (--pdata->disable_count != 0)
>> -return;
>> +return 0;
>>
>>  base = dfl_get_feature_ioaddr_by_id(&pdev->dev,
>> PORT_FEATURE_ID_HEADER);
>>
>> @@ -49,10 +52,20 @@ void __afu_port_enable(struct platform_device *pdev)
>>  v = readq(base + PORT_HDR_CTRL);
>>  v &= ~PORT_CTRL_SFTRST;
>>  writeq(v, base + PORT_HDR_CTRL);
>> -}
>>
>> -#define RST_POLL_INVL 10 /* us */
>> -#define RST_POLL_TIMEOUT 1000 /* us */
>> +/*
>> + * HW clears the ack bit to indicate that the port is fully out
>> + * of reset.
>> + */
>> +if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
>> +       !(v & PORT_CTRL_SFTRST_ACK),
>> +       RST_POLL_INVL, RST_POLL_TIMEOUT)) {
>> +dev_err(&pdev->dev, "timeout, failure to enable device\n");
> Maybe we can change dev_err message in port disable to "disable device" as well. : )
Thank you. I'll submit a new version of the patch with this fix.

- Russ
>
> Hao
>
>> +return -ETIMEDOUT;
>> +}
>> +
>> +return 0;
>> +}
>>
>>  /**
>>   * __afu_port_disable - disable a port by hold reset
>> @@ -111,9 +124,9 @@ static int __port_reset(struct platform_device *pdev)
>>
>>  ret = __afu_port_disable(pdev);
>>  if (!ret)
>> -__afu_port_enable(pdev);
>> +return ret;
>>
>> -return ret;
>> +return __afu_port_enable(pdev);
>>  }
>>
>>  static int port_reset(struct platform_device *pdev)
>> @@ -872,11 +885,11 @@ static int afu_dev_destroy(struct platform_device
>> *pdev)
>>  static int port_enable_set(struct platform_device *pdev, bool enable)
>>  {
>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
>>> dev);
>> -int ret = 0;
>> +int ret;
>>
>>  mutex_lock(&pdata->lock);
>>  if (enable)
>> -__afu_port_enable(pdev);
>> +ret = __afu_port_enable(pdev);
>>  else
>>  ret = __afu_port_disable(pdev);
>>  mutex_unlock(&pdata->lock);
>> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
>> index 576e94960086..e5020e2b1f3d 100644
>> --- a/drivers/fpga/dfl-afu.h
>> +++ b/drivers/fpga/dfl-afu.h
>> @@ -80,7 +80,7 @@ struct dfl_afu {
>>  };
>>
>>  /* hold pdata->lock when call __afu_port_enable/disable */
>> -void __afu_port_enable(struct platform_device *pdev);
>> +int __afu_port_enable(struct platform_device *pdev);
>>  int __afu_port_disable(struct platform_device *pdev);
>>
>>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
>> --
>> 2.25.1
Russ Weight Feb. 3, 2021, 11:06 p.m. UTC | #4
On 2/3/21 7:25 AM, Tom Rix wrote:
> ..snip..
>
> On 2/2/21 3:06 PM, Russ Weight wrote:
>> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
>> index 576e94960086..e5020e2b1f3d 100644
>> --- a/drivers/fpga/dfl-afu.h
>> +++ b/drivers/fpga/dfl-afu.h
>> @@ -80,7 +80,7 @@ struct dfl_afu {
>>  };
>>  
>>  /* hold pdata->lock when call __afu_port_enable/disable */
>> -void __afu_port_enable(struct platform_device *pdev);
>> +int __afu_port_enable(struct platform_device *pdev);
>>  int __afu_port_disable(struct platform_device *pdev);
>>  
> Should the '__' prefix be removed from __afu_port* ?
>
> This would make the function names consistent with the other decls

The '__' prefix is used here to help highlight the fact that these functions go not manage
the locking themselves and must be called while holding the port mutex. There are additional
functions, such as__port_reset(), that are following this same convention. I think these
are OK as they are.

- Russ

>
> Tom
>
>>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
Wu, Hao Feb. 4, 2021, 1:38 a.m. UTC | #5
> On 2/3/21 1:01 AM, Wu, Hao wrote:
> >> Subject: [PATCH v3 1/1] fpga: dfl: afu: harden port enable logic
> >>
> >> Port enable is not complete until ACK = 0. Change
> >> __afu_port_enable() to guarantee that the enable process
> >> is complete by polling for ACK == 0.
> >>
> >> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
> >> ---
> >> v3:
> >>   - afu_port_err_clear() changed to prioritize port_enable failure over
> >>     other a detected mismatch in port errors.
> >>   - reorganized code in port_reset() to be more readable.
> >> v2:
> >>   - Fixed typo in commit message
> >> ---
> >>  drivers/fpga/dfl-afu-error.c |  8 ++++----
> >>  drivers/fpga/dfl-afu-main.c  | 31 ++++++++++++++++++++++---------
> >>  drivers/fpga/dfl-afu.h       |  2 +-
> >>  3 files changed, 27 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
> >> index c4691187cca9..2ced610059cc 100644
> >> --- a/drivers/fpga/dfl-afu-error.c
> >> +++ b/drivers/fpga/dfl-afu-error.c
> >> @@ -52,7 +52,7 @@ static int afu_port_err_clear(struct device *dev, u64 err)
> >>  struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
> >>  struct platform_device *pdev = to_platform_device(dev);
> >>  void __iomem *base_err, *base_hdr;
> >> -int ret = -EBUSY;
> >> +int enable_ret = 0, ret = -EBUSY;
> >>  u64 v;
> >>
> >>  base_err = dfl_get_feature_ioaddr_by_id(dev,
> >> PORT_FEATURE_ID_ERROR);
> >> @@ -102,12 +102,12 @@ static int afu_port_err_clear(struct device *dev,
> u64
> >> err)
> >>  /* Clear mask */
> >>  __afu_port_err_mask(dev, false);
> >>
> >> -/* Enable the Port by clear the reset */
> >> -__afu_port_enable(pdev);
> >> +/* Enable the Port by clearing the reset */
> >> +enable_ret = __afu_port_enable(pdev);
> >>
> >>  done:
> >>  mutex_unlock(&pdata->lock);
> >> -return ret;
> >> +return enable_ret ? enable_ret : ret;
> > Maybe we should add some error message to notify user, there are more
> errors happened,
> > as some ret value is not returned.
> It is the -EINVAL error case that would get lost if there was a double error.
> This error indicates that the value written to sysfs by the user does not
> correspond to the current port errors. This is not a hardware error, and could
> even be a user error. Do you think a warning in the error log is needed here?

I think so, as there are actually two errors there, I feel it's better to let user know
their input is not accepted too than just keeping silence, right? as this error
should be triggered by user input?

Hao

> 
> >
> >>  }
> >>
> >>  static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
> >> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
> >> index 753cda4b2568..729eb306062e 100644
> >> --- a/drivers/fpga/dfl-afu-main.c
> >> +++ b/drivers/fpga/dfl-afu-main.c
> >> @@ -21,6 +21,9 @@
> >>
> >>  #include "dfl-afu.h"
> >>
> >> +#define RST_POLL_INVL 10 /* us */
> >> +#define RST_POLL_TIMEOUT 1000 /* us */
> >> +
> >>  /**
> >>   * __afu_port_enable - enable a port by clear reset
> >>   * @pdev: port platform device.
> >> @@ -32,7 +35,7 @@
> >>   *
> >>   * The caller needs to hold lock for protection.
> >>   */
> >> -void __afu_port_enable(struct platform_device *pdev)
> >> +int __afu_port_enable(struct platform_device *pdev)
> >>  {
> >>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
> >>> dev);
> >>  void __iomem *base;
> >> @@ -41,7 +44,7 @@ void __afu_port_enable(struct platform_device *pdev)
> >>  WARN_ON(!pdata->disable_count);
> >>
> >>  if (--pdata->disable_count != 0)
> >> -return;
> >> +return 0;
> >>
> >>  base = dfl_get_feature_ioaddr_by_id(&pdev->dev,
> >> PORT_FEATURE_ID_HEADER);
> >>
> >> @@ -49,10 +52,20 @@ void __afu_port_enable(struct platform_device
> *pdev)
> >>  v = readq(base + PORT_HDR_CTRL);
> >>  v &= ~PORT_CTRL_SFTRST;
> >>  writeq(v, base + PORT_HDR_CTRL);
> >> -}
> >>
> >> -#define RST_POLL_INVL 10 /* us */
> >> -#define RST_POLL_TIMEOUT 1000 /* us */
> >> +/*
> >> + * HW clears the ack bit to indicate that the port is fully out
> >> + * of reset.
> >> + */
> >> +if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
> >> +       !(v & PORT_CTRL_SFTRST_ACK),
> >> +       RST_POLL_INVL, RST_POLL_TIMEOUT)) {
> >> +dev_err(&pdev->dev, "timeout, failure to enable device\n");
> > Maybe we can change dev_err message in port disable to "disable device" as
> well. : )
> Thank you. I'll submit a new version of the patch with this fix.
> 
> - Russ
> >
> > Hao
> >
> >> +return -ETIMEDOUT;
> >> +}
> >> +
> >> +return 0;
> >> +}
> >>
> >>  /**
> >>   * __afu_port_disable - disable a port by hold reset
> >> @@ -111,9 +124,9 @@ static int __port_reset(struct platform_device *pdev)
> >>
> >>  ret = __afu_port_disable(pdev);
> >>  if (!ret)
> >> -__afu_port_enable(pdev);
> >> +return ret;
> >>
> >> -return ret;
> >> +return __afu_port_enable(pdev);
> >>  }
> >>
> >>  static int port_reset(struct platform_device *pdev)
> >> @@ -872,11 +885,11 @@ static int afu_dev_destroy(struct platform_device
> >> *pdev)
> >>  static int port_enable_set(struct platform_device *pdev, bool enable)
> >>  {
> >>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
> >>> dev);
> >> -int ret = 0;
> >> +int ret;
> >>
> >>  mutex_lock(&pdata->lock);
> >>  if (enable)
> >> -__afu_port_enable(pdev);
> >> +ret = __afu_port_enable(pdev);
> >>  else
> >>  ret = __afu_port_disable(pdev);
> >>  mutex_unlock(&pdata->lock);
> >> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
> >> index 576e94960086..e5020e2b1f3d 100644
> >> --- a/drivers/fpga/dfl-afu.h
> >> +++ b/drivers/fpga/dfl-afu.h
> >> @@ -80,7 +80,7 @@ struct dfl_afu {
> >>  };
> >>
> >>  /* hold pdata->lock when call __afu_port_enable/disable */
> >> -void __afu_port_enable(struct platform_device *pdev);
> >> +int __afu_port_enable(struct platform_device *pdev);
> >>  int __afu_port_disable(struct platform_device *pdev);
> >>
> >>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
> >> --
> >> 2.25.1
Tom Rix Feb. 4, 2021, 2:24 p.m. UTC | #6
On 2/3/21 3:06 PM, Russ Weight wrote:
>
> On 2/3/21 7:25 AM, Tom Rix wrote:
>> ..snip..
>>
>> On 2/2/21 3:06 PM, Russ Weight wrote:
>>> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
>>> index 576e94960086..e5020e2b1f3d 100644
>>> --- a/drivers/fpga/dfl-afu.h
>>> +++ b/drivers/fpga/dfl-afu.h
>>> @@ -80,7 +80,7 @@ struct dfl_afu {
>>>  };
>>>  
>>>  /* hold pdata->lock when call __afu_port_enable/disable */
>>> -void __afu_port_enable(struct platform_device *pdev);
>>> +int __afu_port_enable(struct platform_device *pdev);
>>>  int __afu_port_disable(struct platform_device *pdev);
>>>  
>> Should the '__' prefix be removed from __afu_port* ?
>>
>> This would make the function names consistent with the other decls
> The '__' prefix is used here to help highlight the fact that these functions go not manage
> the locking themselves and must be called while holding the port mutex. There are additional
> functions, such as__port_reset(), that are following this same convention. I think these
> are OK as they are.

ok

Reviewed-by: Tom Rix <trix@redhat.com>

Tom

>
> - Russ
>
>> Tom
>>
>>>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
Russ Weight Feb. 4, 2021, 5:59 p.m. UTC | #7
On 2/3/21 5:38 PM, Wu, Hao wrote:
>> On 2/3/21 1:01 AM, Wu, Hao wrote:
>>>> Subject: [PATCH v3 1/1] fpga: dfl: afu: harden port enable logic
>>>>
>>>> Port enable is not complete until ACK = 0. Change
>>>> __afu_port_enable() to guarantee that the enable process
>>>> is complete by polling for ACK == 0.
>>>>
>>>> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
>>>> ---
>>>> v3:
>>>>   - afu_port_err_clear() changed to prioritize port_enable failure over
>>>>     other a detected mismatch in port errors.
>>>>   - reorganized code in port_reset() to be more readable.
>>>> v2:
>>>>   - Fixed typo in commit message
>>>> ---
>>>>  drivers/fpga/dfl-afu-error.c |  8 ++++----
>>>>  drivers/fpga/dfl-afu-main.c  | 31 ++++++++++++++++++++++---------
>>>>  drivers/fpga/dfl-afu.h       |  2 +-
>>>>  3 files changed, 27 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
>>>> index c4691187cca9..2ced610059cc 100644
>>>> --- a/drivers/fpga/dfl-afu-error.c
>>>> +++ b/drivers/fpga/dfl-afu-error.c
>>>> @@ -52,7 +52,7 @@ static int afu_port_err_clear(struct device *dev, u64 err)
>>>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
>>>>  struct platform_device *pdev = to_platform_device(dev);
>>>>  void __iomem *base_err, *base_hdr;
>>>> -int ret = -EBUSY;
>>>> +int enable_ret = 0, ret = -EBUSY;
>>>>  u64 v;
>>>>
>>>>  base_err = dfl_get_feature_ioaddr_by_id(dev,
>>>> PORT_FEATURE_ID_ERROR);
>>>> @@ -102,12 +102,12 @@ static int afu_port_err_clear(struct device *dev,
>> u64
>>>> err)
>>>>  /* Clear mask */
>>>>  __afu_port_err_mask(dev, false);
>>>>
>>>> -/* Enable the Port by clear the reset */
>>>> -__afu_port_enable(pdev);
>>>> +/* Enable the Port by clearing the reset */
>>>> +enable_ret = __afu_port_enable(pdev);
>>>>
>>>>  done:
>>>>  mutex_unlock(&pdata->lock);
>>>> -return ret;
>>>> +return enable_ret ? enable_ret : ret;
>>> Maybe we should add some error message to notify user, there are more
>> errors happened,
>>> as some ret value is not returned.
>> It is the -EINVAL error case that would get lost if there was a double error.
>> This error indicates that the value written to sysfs by the user does not
>> correspond to the current port errors. This is not a hardware error, and could
>> even be a user error. Do you think a warning in the error log is needed here?
> I think so, as there are actually two errors there, I feel it's better to let user know
> their input is not accepted too than just keeping silence, right? as this error
> should be triggered by user input?
In the single error case, the EINVAL would be returned to the user-space caller. That is
the common case. In the double error case, the EINVAL would be lost. Either way, it is
helpful to know what the expected/actual values are. I have added a warning message to
the kernel log for the next version of the patch which also shows the expected/actual
values.

- Russ
>
> Hao
>
>>>>  }
>>>>
>>>>  static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
>>>> diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
>>>> index 753cda4b2568..729eb306062e 100644
>>>> --- a/drivers/fpga/dfl-afu-main.c
>>>> +++ b/drivers/fpga/dfl-afu-main.c
>>>> @@ -21,6 +21,9 @@
>>>>
>>>>  #include "dfl-afu.h"
>>>>
>>>> +#define RST_POLL_INVL 10 /* us */
>>>> +#define RST_POLL_TIMEOUT 1000 /* us */
>>>> +
>>>>  /**
>>>>   * __afu_port_enable - enable a port by clear reset
>>>>   * @pdev: port platform device.
>>>> @@ -32,7 +35,7 @@
>>>>   *
>>>>   * The caller needs to hold lock for protection.
>>>>   */
>>>> -void __afu_port_enable(struct platform_device *pdev)
>>>> +int __afu_port_enable(struct platform_device *pdev)
>>>>  {
>>>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
>>>>> dev);
>>>>  void __iomem *base;
>>>> @@ -41,7 +44,7 @@ void __afu_port_enable(struct platform_device *pdev)
>>>>  WARN_ON(!pdata->disable_count);
>>>>
>>>>  if (--pdata->disable_count != 0)
>>>> -return;
>>>> +return 0;
>>>>
>>>>  base = dfl_get_feature_ioaddr_by_id(&pdev->dev,
>>>> PORT_FEATURE_ID_HEADER);
>>>>
>>>> @@ -49,10 +52,20 @@ void __afu_port_enable(struct platform_device
>> *pdev)
>>>>  v = readq(base + PORT_HDR_CTRL);
>>>>  v &= ~PORT_CTRL_SFTRST;
>>>>  writeq(v, base + PORT_HDR_CTRL);
>>>> -}
>>>>
>>>> -#define RST_POLL_INVL 10 /* us */
>>>> -#define RST_POLL_TIMEOUT 1000 /* us */
>>>> +/*
>>>> + * HW clears the ack bit to indicate that the port is fully out
>>>> + * of reset.
>>>> + */
>>>> +if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
>>>> +       !(v & PORT_CTRL_SFTRST_ACK),
>>>> +       RST_POLL_INVL, RST_POLL_TIMEOUT)) {
>>>> +dev_err(&pdev->dev, "timeout, failure to enable device\n");
>>> Maybe we can change dev_err message in port disable to "disable device" as
>> well. : )
>> Thank you. I'll submit a new version of the patch with this fix.
>>
>> - Russ
>>> Hao
>>>
>>>> +return -ETIMEDOUT;
>>>> +}
>>>> +
>>>> +return 0;
>>>> +}
>>>>
>>>>  /**
>>>>   * __afu_port_disable - disable a port by hold reset
>>>> @@ -111,9 +124,9 @@ static int __port_reset(struct platform_device *pdev)
>>>>
>>>>  ret = __afu_port_disable(pdev);
>>>>  if (!ret)
>>>> -__afu_port_enable(pdev);
>>>> +return ret;
>>>>
>>>> -return ret;
>>>> +return __afu_port_enable(pdev);
>>>>  }
>>>>
>>>>  static int port_reset(struct platform_device *pdev)
>>>> @@ -872,11 +885,11 @@ static int afu_dev_destroy(struct platform_device
>>>> *pdev)
>>>>  static int port_enable_set(struct platform_device *pdev, bool enable)
>>>>  {
>>>>  struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev-
>>>>> dev);
>>>> -int ret = 0;
>>>> +int ret;
>>>>
>>>>  mutex_lock(&pdata->lock);
>>>>  if (enable)
>>>> -__afu_port_enable(pdev);
>>>> +ret = __afu_port_enable(pdev);
>>>>  else
>>>>  ret = __afu_port_disable(pdev);
>>>>  mutex_unlock(&pdata->lock);
>>>> diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
>>>> index 576e94960086..e5020e2b1f3d 100644
>>>> --- a/drivers/fpga/dfl-afu.h
>>>> +++ b/drivers/fpga/dfl-afu.h
>>>> @@ -80,7 +80,7 @@ struct dfl_afu {
>>>>  };
>>>>
>>>>  /* hold pdata->lock when call __afu_port_enable/disable */
>>>> -void __afu_port_enable(struct platform_device *pdev);
>>>> +int __afu_port_enable(struct platform_device *pdev);
>>>>  int __afu_port_disable(struct platform_device *pdev);
>>>>
>>>>  void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
>>>> --
>>>> 2.25.1
diff mbox series

Patch

diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
index c4691187cca9..2ced610059cc 100644
--- a/drivers/fpga/dfl-afu-error.c
+++ b/drivers/fpga/dfl-afu-error.c
@@ -52,7 +52,7 @@  static int afu_port_err_clear(struct device *dev, u64 err)
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
 	struct platform_device *pdev = to_platform_device(dev);
 	void __iomem *base_err, *base_hdr;
-	int ret = -EBUSY;
+	int enable_ret = 0, ret = -EBUSY;
 	u64 v;
 
 	base_err = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
@@ -102,12 +102,12 @@  static int afu_port_err_clear(struct device *dev, u64 err)
 	/* Clear mask */
 	__afu_port_err_mask(dev, false);
 
-	/* Enable the Port by clear the reset */
-	__afu_port_enable(pdev);
+	/* Enable the Port by clearing the reset */
+	enable_ret = __afu_port_enable(pdev);
 
 done:
 	mutex_unlock(&pdata->lock);
-	return ret;
+	return enable_ret ? enable_ret : ret;
 }
 
 static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 753cda4b2568..729eb306062e 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -21,6 +21,9 @@ 
 
 #include "dfl-afu.h"
 
+#define RST_POLL_INVL 10 /* us */
+#define RST_POLL_TIMEOUT 1000 /* us */
+
 /**
  * __afu_port_enable - enable a port by clear reset
  * @pdev: port platform device.
@@ -32,7 +35,7 @@ 
  *
  * The caller needs to hold lock for protection.
  */
-void __afu_port_enable(struct platform_device *pdev)
+int __afu_port_enable(struct platform_device *pdev)
 {
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	void __iomem *base;
@@ -41,7 +44,7 @@  void __afu_port_enable(struct platform_device *pdev)
 	WARN_ON(!pdata->disable_count);
 
 	if (--pdata->disable_count != 0)
-		return;
+		return 0;
 
 	base = dfl_get_feature_ioaddr_by_id(&pdev->dev, PORT_FEATURE_ID_HEADER);
 
@@ -49,10 +52,20 @@  void __afu_port_enable(struct platform_device *pdev)
 	v = readq(base + PORT_HDR_CTRL);
 	v &= ~PORT_CTRL_SFTRST;
 	writeq(v, base + PORT_HDR_CTRL);
-}
 
-#define RST_POLL_INVL 10 /* us */
-#define RST_POLL_TIMEOUT 1000 /* us */
+	/*
+	 * HW clears the ack bit to indicate that the port is fully out
+	 * of reset.
+	 */
+	if (readq_poll_timeout(base + PORT_HDR_CTRL, v,
+			       !(v & PORT_CTRL_SFTRST_ACK),
+			       RST_POLL_INVL, RST_POLL_TIMEOUT)) {
+		dev_err(&pdev->dev, "timeout, failure to enable device\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
 
 /**
  * __afu_port_disable - disable a port by hold reset
@@ -111,9 +124,9 @@  static int __port_reset(struct platform_device *pdev)
 
 	ret = __afu_port_disable(pdev);
 	if (!ret)
-		__afu_port_enable(pdev);
+		return ret;
 
-	return ret;
+	return __afu_port_enable(pdev);
 }
 
 static int port_reset(struct platform_device *pdev)
@@ -872,11 +885,11 @@  static int afu_dev_destroy(struct platform_device *pdev)
 static int port_enable_set(struct platform_device *pdev, bool enable)
 {
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&pdata->lock);
 	if (enable)
-		__afu_port_enable(pdev);
+		ret = __afu_port_enable(pdev);
 	else
 		ret = __afu_port_disable(pdev);
 	mutex_unlock(&pdata->lock);
diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
index 576e94960086..e5020e2b1f3d 100644
--- a/drivers/fpga/dfl-afu.h
+++ b/drivers/fpga/dfl-afu.h
@@ -80,7 +80,7 @@  struct dfl_afu {
 };
 
 /* hold pdata->lock when call __afu_port_enable/disable */
-void __afu_port_enable(struct platform_device *pdev);
+int __afu_port_enable(struct platform_device *pdev);
 int __afu_port_disable(struct platform_device *pdev);
 
 void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);