diff mbox series

cxl/pmem: Fix reference counting for delayed work

Message ID 163553734757.2509761.3305231863616785470.stgit@dwillia2-desk3.amr.corp.intel.com
State Accepted
Commit 08b9e0ab8af48895337192e683de44ab1e1b7427
Headers show
Series cxl/pmem: Fix reference counting for delayed work | expand

Commit Message

Dan Williams Oct. 29, 2021, 7:55 p.m. UTC
There is a potential race between queue_work() returning and the
queued-work running that could result in put_device() running before
get_device(). Introduce the cxl_nvdimm_bridge_state_work() helper that
takes the reference unconditionally, but drops it if no new work was
queued, to keep the references balanced.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/pmem.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

Comments

Ben Widawsky Oct. 31, 2021, 6:58 p.m. UTC | #1
On 21-10-29 12:55:47, Dan Williams wrote:
> There is a potential race between queue_work() returning and the
> queued-work running that could result in put_device() running before
> get_device(). Introduce the cxl_nvdimm_bridge_state_work() helper that
> takes the reference unconditionally, but drops it if no new work was
> queued, to keep the references balanced.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Arguably fixes/stable?
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>

> ---
>  drivers/cxl/pmem.c |   17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index ceb2115981e5..38bcbb4e9409 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -266,14 +266,24 @@ static void cxl_nvb_update_state(struct work_struct *work)
>  	put_device(&cxl_nvb->dev);
>  }
>  
> +static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
> +{
> +	/*
> +	 * Take a reference that the workqueue will drop if new work
> +	 * gets queued.
> +	 */
> +	get_device(&cxl_nvb->dev);
> +	if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> +		put_device(&cxl_nvb->dev);
> +}
> +
>  static void cxl_nvdimm_bridge_remove(struct device *dev)
>  {
>  	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
>  
>  	if (cxl_nvb->state == CXL_NVB_ONLINE)
>  		cxl_nvb->state = CXL_NVB_OFFLINE;
> -	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> -		get_device(&cxl_nvb->dev);
> +	cxl_nvdimm_bridge_state_work(cxl_nvb);
>  }
>  
>  static int cxl_nvdimm_bridge_probe(struct device *dev)
> @@ -294,8 +304,7 @@ static int cxl_nvdimm_bridge_probe(struct device *dev)
>  	}
>  
>  	cxl_nvb->state = CXL_NVB_ONLINE;
> -	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> -		get_device(&cxl_nvb->dev);
> +	cxl_nvdimm_bridge_state_work(cxl_nvb);
>  
>  	return 0;
>  }
>
Dan Williams Oct. 31, 2021, 7:27 p.m. UTC | #2
On Sun, Oct 31, 2021 at 11:58 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-10-29 12:55:47, Dan Williams wrote:
> > There is a potential race between queue_work() returning and the
> > queued-work running that could result in put_device() running before
> > get_device(). Introduce the cxl_nvdimm_bridge_state_work() helper that
> > takes the reference unconditionally, but drops it if no new work was
> > queued, to keep the references balanced.
> >
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>
> Arguably fixes/stable?

Whoops, yes, definitely needs a Fixes:. A stable cc is debatable, but
if autosel picked up this fix I wouldn't say no, so might as well cc
stable upfront.

> Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>

Thanks!
Jonathan Cameron Nov. 1, 2021, 11:30 a.m. UTC | #3
On Fri, 29 Oct 2021 12:55:47 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> There is a potential race between queue_work() returning and the
> queued-work running that could result in put_device() running before
> get_device(). Introduce the cxl_nvdimm_bridge_state_work() helper that
> takes the reference unconditionally, but drops it if no new work was
> queued, to keep the references balanced.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Good spot, I'm guessing this was an inspection thing rather than a problem
you've managed to trigger, but either way.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/cxl/pmem.c |   17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index ceb2115981e5..38bcbb4e9409 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -266,14 +266,24 @@ static void cxl_nvb_update_state(struct work_struct *work)
>  	put_device(&cxl_nvb->dev);
>  }
>  
> +static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
> +{
> +	/*
> +	 * Take a reference that the workqueue will drop if new work
> +	 * gets queued.
> +	 */
> +	get_device(&cxl_nvb->dev);
> +	if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> +		put_device(&cxl_nvb->dev);
> +}
> +
>  static void cxl_nvdimm_bridge_remove(struct device *dev)
>  {
>  	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
>  
>  	if (cxl_nvb->state == CXL_NVB_ONLINE)
>  		cxl_nvb->state = CXL_NVB_OFFLINE;
> -	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> -		get_device(&cxl_nvb->dev);
> +	cxl_nvdimm_bridge_state_work(cxl_nvb);
>  }
>  
>  static int cxl_nvdimm_bridge_probe(struct device *dev)
> @@ -294,8 +304,7 @@ static int cxl_nvdimm_bridge_probe(struct device *dev)
>  	}
>  
>  	cxl_nvb->state = CXL_NVB_ONLINE;
> -	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
> -		get_device(&cxl_nvb->dev);
> +	cxl_nvdimm_bridge_state_work(cxl_nvb);
>  
>  	return 0;
>  }
>
Dan Williams Nov. 3, 2021, 12:57 a.m. UTC | #4
On Mon, Nov 1, 2021 at 4:30 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 29 Oct 2021 12:55:47 -0700
> Dan Williams <dan.j.williams@intel.com> wrote:
>
> > There is a potential race between queue_work() returning and the
> > queued-work running that could result in put_device() running before
> > get_device(). Introduce the cxl_nvdimm_bridge_state_work() helper that
> > takes the reference unconditionally, but drops it if no new work was
> > queued, to keep the references balanced.
> >
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>
> Good spot, I'm guessing this was an inspection thing rather than a problem
> you've managed to trigger, but either way.

Correct, it was by inspection. I expect it would be extremely
difficult to hit this race in practice.

> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks!
diff mbox series

Patch

diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index ceb2115981e5..38bcbb4e9409 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -266,14 +266,24 @@  static void cxl_nvb_update_state(struct work_struct *work)
 	put_device(&cxl_nvb->dev);
 }
 
+static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
+{
+	/*
+	 * Take a reference that the workqueue will drop if new work
+	 * gets queued.
+	 */
+	get_device(&cxl_nvb->dev);
+	if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
+		put_device(&cxl_nvb->dev);
+}
+
 static void cxl_nvdimm_bridge_remove(struct device *dev)
 {
 	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
 
 	if (cxl_nvb->state == CXL_NVB_ONLINE)
 		cxl_nvb->state = CXL_NVB_OFFLINE;
-	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
-		get_device(&cxl_nvb->dev);
+	cxl_nvdimm_bridge_state_work(cxl_nvb);
 }
 
 static int cxl_nvdimm_bridge_probe(struct device *dev)
@@ -294,8 +304,7 @@  static int cxl_nvdimm_bridge_probe(struct device *dev)
 	}
 
 	cxl_nvb->state = CXL_NVB_ONLINE;
-	if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
-		get_device(&cxl_nvb->dev);
+	cxl_nvdimm_bridge_state_work(cxl_nvb);
 
 	return 0;
 }