diff mbox series

[v2,2/4] dax/bus.c: fix locking for unregister_dax_dev / unregister_dax_mapping paths

Message ID 20240416-vv-dax_abi_fixes-v2-2-d5f0c8ec162e@intel.com
State Superseded
Headers show
Series dax/bus.c: Fixups for dax-bus locking | expand

Commit Message

Verma, Vishal L April 16, 2024, 9:46 p.m. UTC
Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
was a bit overzealous in eliminating device_lock() usage, and ended up
removing a couple of lock acquisitions which were needed, and as a
result, fix some of the conditional locking missteps that the above
commit introduced in unregister_dax_dev() and unregister_dax_mapping().

Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
Reported-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 drivers/dax/bus.c | 44 ++++++++++----------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

Comments

Dan Williams April 30, 2024, 1:25 a.m. UTC | #1
Vishal Verma wrote:
> Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> was a bit overzealous in eliminating device_lock() usage, and ended up
> removing a couple of lock acquisitions which were needed, and as a
> result, fix some of the conditional locking missteps that the above
> commit introduced in unregister_dax_dev() and unregister_dax_mapping().

I think it makes sense to tell the story a bit about why the
delete_store() conversion was problematic, because the
unregister_dev_dax() changes were just a knock-on effect to fixing the
delete_store() flow.

Something like:

---
commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
aimed to undo device_lock() abuses for protecting changes to dax-driver
internal data-structures like the dax_region resource tree to
device-dax-instance range structures. However, the device_lock() was legitamately
enforcing that devices to be deleted were not current actively attached
to any driver nor assigned any capacity from the region.
---

...you can fill in a couple notes about the knock-on fixups after that
was restored.

> Fixes: c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> Reported-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/dax/bus.c | 44 ++++++++++----------------------------------
>  1 file changed, 10 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 7924dd542a13..4e04b228b080 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -465,26 +465,17 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
>  		trim_dev_dax_range(dev_dax);
>  }
>  
> -static void __unregister_dev_dax(void *dev)
> +static void unregister_dev_dax(void *dev)
>  {
>  	struct dev_dax *dev_dax = to_dev_dax(dev);
>  
>  	dev_dbg(dev, "%s\n", __func__);
>  
> +	down_write(&dax_region_rwsem);
>  	kill_dev_dax(dev_dax);
>  	device_del(dev);
>  	free_dev_dax_ranges(dev_dax);
>  	put_device(dev);
> -}
> -
> -static void unregister_dev_dax(void *dev)
> -{
> -	if (rwsem_is_locked(&dax_region_rwsem))
> -		return __unregister_dev_dax(dev);
> -
> -	if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
> -		return;
> -	__unregister_dev_dax(dev);
>  	up_write(&dax_region_rwsem);
>  }
>  
> @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
>  	if (!victim)
>  		return -ENXIO;
>  
> -	rc = down_write_killable(&dax_region_rwsem);
> -	if (rc)
> -		return rc;
> -	rc = down_write_killable(&dax_dev_rwsem);
> -	if (rc) {
> -		up_write(&dax_region_rwsem);
> -		return rc;
> -	}
> +	device_lock(dev);
> +	device_lock(victim);
>  	dev_dax = to_dev_dax(victim);
> +	rc = down_write_killable(&dax_dev_rwsem);

This begs the question, why down_write_killable(), but not
device_lock_interruptible()?

I do not expect any of this is long running so likely down_write() is
sufficient here, especially since the heaviest locks to acquire are
already held by the time rwsem is considered.

Other than that this looks good to me:

You can include my Reviewed-by on the next posting.
Verma, Vishal L April 30, 2024, 4:11 a.m. UTC | #2
On Mon, 2024-04-29 at 18:25 -0700, Dan Williams wrote:
> Vishal Verma wrote:
> > Commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> > was a bit overzealous in eliminating device_lock() usage, and ended up
> > removing a couple of lock acquisitions which were needed, and as a
> > result, fix some of the conditional locking missteps that the above
> > commit introduced in unregister_dax_dev() and unregister_dax_mapping().
> 
> I think it makes sense to tell the story a bit about why the
> delete_store() conversion was problematic, because the
> unregister_dev_dax() changes were just a knock-on effect to fixing the
> delete_store() flow.
> 
> Something like:
> 
> ---
> commit c05ae9d85b47 ("dax/bus.c: replace driver-core lock usage by a local rwsem")
> aimed to undo device_lock() abuses for protecting changes to dax-driver
> internal data-structures like the dax_region resource tree to
> device-dax-instance range structures. However, the device_lock() was legitamately
> enforcing that devices to be deleted were not current actively attached
> to any driver nor assigned any capacity from the region.
> ---
> 
> ...you can fill in a couple notes about the knock-on fixups after that
> was restored.

Sounds good, updated!

> 
> >  
> > @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
> >  	if (!victim)
> >  		return -ENXIO;
> >  
> > -	rc = down_write_killable(&dax_region_rwsem);
> > -	if (rc)
> > -		return rc;
> > -	rc = down_write_killable(&dax_dev_rwsem);
> > -	if (rc) {
> > -		up_write(&dax_region_rwsem);
> > -		return rc;
> > -	}
> > +	device_lock(dev);
> > +	device_lock(victim);
> >  	dev_dax = to_dev_dax(victim);
> > +	rc = down_write_killable(&dax_dev_rwsem);
> 
> This begs the question, why down_write_killable(), but not
> device_lock_interruptible()?

Do you mean change the device_lock()s to device_lock_interruptible() in
addition to the taking the rwsem (i.e. not instead of the rwsem..)?
I guess I just restored what was there previously - but the
interruptible variant makes sense, I can make that change.

> 
> I do not expect any of this is long running so likely down_write() is
> sufficient here, especially since the heaviest locks to acquire are
> already held by the time rwsem is considered.
> 
> Other than that this looks good to me:
> 
> You can include my Reviewed-by on the next posting.

Thanks for the review Dan!
Dan Williams April 30, 2024, 5:25 a.m. UTC | #3
Verma, Vishal L wrote:
> > > @@ -560,15 +551,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
> > >  	if (!victim)
> > >  		return -ENXIO;
> > >  
> > > -	rc = down_write_killable(&dax_region_rwsem);
> > > -	if (rc)
> > > -		return rc;
> > > -	rc = down_write_killable(&dax_dev_rwsem);
> > > -	if (rc) {
> > > -		up_write(&dax_region_rwsem);
> > > -		return rc;
> > > -	}
> > > +	device_lock(dev);
> > > +	device_lock(victim);
> > >  	dev_dax = to_dev_dax(victim);
> > > +	rc = down_write_killable(&dax_dev_rwsem);
> > 
> > This begs the question, why down_write_killable(), but not
> > device_lock_interruptible()?
> 
> Do you mean change the device_lock()s to device_lock_interruptible() in
> addition to the taking the rwsem (i.e. not instead of the rwsem..)?

I mean convert the rwsem to drop _killable.

> I guess I just restored what was there previously - but the
> interruptible variant makes sense, I can make that change.

So the original code did device_lock(), then the rework added killable
rwsem (deleted device_lock()), and now the fixes add device_lock() back.
So now that there is a mix of killable/interruptible lock usage all the
locks should agree.

Since there really is no risk of these operations being long running
there is no driving need to make them killable/interruptible, so go with
the simple option.
diff mbox series

Patch

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 7924dd542a13..4e04b228b080 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -465,26 +465,17 @@  static void free_dev_dax_ranges(struct dev_dax *dev_dax)
 		trim_dev_dax_range(dev_dax);
 }
 
-static void __unregister_dev_dax(void *dev)
+static void unregister_dev_dax(void *dev)
 {
 	struct dev_dax *dev_dax = to_dev_dax(dev);
 
 	dev_dbg(dev, "%s\n", __func__);
 
+	down_write(&dax_region_rwsem);
 	kill_dev_dax(dev_dax);
 	device_del(dev);
 	free_dev_dax_ranges(dev_dax);
 	put_device(dev);
-}
-
-static void unregister_dev_dax(void *dev)
-{
-	if (rwsem_is_locked(&dax_region_rwsem))
-		return __unregister_dev_dax(dev);
-
-	if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
-		return;
-	__unregister_dev_dax(dev);
 	up_write(&dax_region_rwsem);
 }
 
@@ -560,15 +551,12 @@  static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
 	if (!victim)
 		return -ENXIO;
 
-	rc = down_write_killable(&dax_region_rwsem);
-	if (rc)
-		return rc;
-	rc = down_write_killable(&dax_dev_rwsem);
-	if (rc) {
-		up_write(&dax_region_rwsem);
-		return rc;
-	}
+	device_lock(dev);
+	device_lock(victim);
 	dev_dax = to_dev_dax(victim);
+	rc = down_write_killable(&dax_dev_rwsem);
+	if (rc)
+		return rc;
 	if (victim->driver || dev_dax_size(dev_dax))
 		rc = -EBUSY;
 	else {
@@ -589,11 +577,12 @@  static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
 			rc = -EBUSY;
 	}
 	up_write(&dax_dev_rwsem);
+	device_unlock(victim);
 
 	/* won the race to invalidate the device, clean it up */
 	if (do_del)
 		devm_release_action(dev, unregister_dev_dax, victim);
-	up_write(&dax_region_rwsem);
+	device_unlock(dev);
 	put_device(victim);
 
 	return rc;
@@ -705,7 +694,7 @@  static void dax_mapping_release(struct device *dev)
 	put_device(parent);
 }
 
-static void __unregister_dax_mapping(void *data)
+static void unregister_dax_mapping(void *data)
 {
 	struct device *dev = data;
 	struct dax_mapping *mapping = to_dax_mapping(dev);
@@ -713,25 +702,12 @@  static void __unregister_dax_mapping(void *data)
 
 	dev_dbg(dev, "%s\n", __func__);
 
-	lockdep_assert_held_write(&dax_region_rwsem);
-
 	dev_dax->ranges[mapping->range_id].mapping = NULL;
 	mapping->range_id = -1;
 
 	device_unregister(dev);
 }
 
-static void unregister_dax_mapping(void *data)
-{
-	if (rwsem_is_locked(&dax_region_rwsem))
-		return __unregister_dax_mapping(data);
-
-	if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
-		return;
-	__unregister_dax_mapping(data);
-	up_write(&dax_region_rwsem);
-}
-
 static struct dev_dax_range *get_dax_range(struct device *dev)
 {
 	struct dax_mapping *mapping = to_dax_mapping(dev);