diff mbox series

[7/8] vfio/mdev: Fix aborting mdev child device removal if one fails

Message ID 1553296835-37522-8-git-send-email-parav@mellanox.com (mailing list archive)
State New, archived
Headers show
Series vfio/mdev: Improve vfio/mdev core module | expand

Commit Message

Parav Pandit March 22, 2019, 11:20 p.m. UTC
device_for_each_child() stops executing callback function for remaining
child devices, if callback hits an error.
Each child mdev device is independent of each other.
While unregistering parent device, mdev core must remove all child mdev
devices.
Therefore, mdev_device_remove_cb() always returns success so that
device_for_each_child doesn't abort if one child removal hits error.

While at it, improve remove and unregister functions for below simplicity.

There isn't need to pass forced flag pointer during mdev parent
removal which invokes mdev_device_remove(). So simplify the flow.

mdev_device_remove() is called from two paths.
1. mdev_unregister_driver()
     mdev_device_remove_cb()
       mdev_device_remove()
2. remove_store()
     mdev_device_remove()

When device is removed by user using remote_store(), device under
removal is mdev device.
When device is removed during parent device removal using generic child
iterator, mdev check is already done using dev_is_mdev().

Hence, remove the unnecessary loop in mdev_device_remove().

Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
 drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

Comments

Maxim Levitsky March 25, 2019, 11:58 a.m. UTC | #1
On Fri, 2019-03-22 at 18:20 -0500, Parav Pandit wrote:
> device_for_each_child() stops executing callback function for remaining
> child devices, if callback hits an error.
> Each child mdev device is independent of each other.
> While unregistering parent device, mdev core must remove all child mdev
> devices.
> Therefore, mdev_device_remove_cb() always returns success so that
> device_for_each_child doesn't abort if one child removal hits error.
> 
> While at it, improve remove and unregister functions for below simplicity.
> 
> There isn't need to pass forced flag pointer during mdev parent
> removal which invokes mdev_device_remove(). So simplify the flow.
> 
> mdev_device_remove() is called from two paths.
> 1. mdev_unregister_driver()
>      mdev_device_remove_cb()
>        mdev_device_remove()
> 2. remove_store()
>      mdev_device_remove()
> 
> When device is removed by user using remote_store(), device under
> removal is mdev device.
> When device is removed during parent device removal using generic child
> iterator, mdev check is already done using dev_is_mdev().
> 
> Hence, remove the unnecessary loop in mdev_device_remove().
> 
> Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> ---
>  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
>  1 file changed, 5 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index ab05464..944a058 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct mdev_device
> *mdev, bool force_remove)
>  
>  static int mdev_device_remove_cb(struct device *dev, void *data)
>  {
> -	if (!dev_is_mdev(dev))
> -		return 0;
> +	if (dev_is_mdev(dev))
> +		mdev_device_remove(dev, true);
>  
> -	return mdev_device_remove(dev, data ? *(bool *)data : true);
> +	return 0;
>  }
>  
>  /*
> @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev, const struct
> mdev_parent_ops *ops)
>  void mdev_unregister_device(struct device *dev)
>  {
>  	struct mdev_parent *parent;
> -	bool force_remove = true;
>  
>  	mutex_lock(&parent_list_lock);
>  	parent = __find_parent_device(dev);
> @@ -255,8 +254,7 @@ void mdev_unregister_device(struct device *dev)
>  	list_del(&parent->next);
>  	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
>  
> -	device_for_each_child(dev, (void *)&force_remove,
> -			      mdev_device_remove_cb);
> +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
>  
>  	parent_remove_sysfs_files(parent);
>  
> @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject *kobj, struct
> device *dev, uuid_le uuid)
>  
>  int mdev_device_remove(struct device *dev, bool force_remove)
>  {
> -	struct mdev_device *mdev, *tmp;
> +	struct mdev_device *mdev;
>  	struct mdev_parent *parent;
>  	struct mdev_type *type;
>  	int ret;
>  
>  	mdev = to_mdev_device(dev);
> -
> -	mutex_lock(&mdev_list_lock);
> -	list_for_each_entry(tmp, &mdev_list, next) {
> -		if (tmp == mdev)
> -			break;
> -	}
> -
> -	if (tmp != mdev) {
> -		mutex_unlock(&mdev_list_lock);
> -		return -ENODEV;
> -	}
> -
>  	if (!mdev->active) {
>  		mutex_unlock(&mdev_list_lock);
>  		return -EAGAIN;

Very nice catch and good refactoring.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky
Kirti Wankhede March 25, 2019, 7:35 p.m. UTC | #2
On 3/23/2019 4:50 AM, Parav Pandit wrote:
> device_for_each_child() stops executing callback function for remaining
> child devices, if callback hits an error.
> Each child mdev device is independent of each other.
> While unregistering parent device, mdev core must remove all child mdev
> devices.
> Therefore, mdev_device_remove_cb() always returns success so that
> device_for_each_child doesn't abort if one child removal hits error.
> 

When unregistering parent device, force_remove is set to true amd
mdev_device_remove_ops() always returns success.

> While at it, improve remove and unregister functions for below simplicity.
> 
> There isn't need to pass forced flag pointer during mdev parent
> removal which invokes mdev_device_remove().

There is a need to pass the flag, pasting here the comment above
mdev_device_remove_ops() which explains why the flag is needed:

/*
 * mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
 * device is being unregistered from mdev device framework.
 * - 'force_remove' is set to 'false' when called from sysfs's 'remove'
which
 *   indicates that if the mdev device is active, used by VMM or userspace
 *   application, vendor driver could return error then don't remove the
device.
 * - 'force_remove' is set to 'true' when called from
mdev_unregister_device()
 *   which indicate that parent device is being removed from mdev device
 *   framework so remove mdev device forcefully.
 */

Thanks,
Kirti

 So simplify the flow.
> 
> mdev_device_remove() is called from two paths.
> 1. mdev_unregister_driver()
>      mdev_device_remove_cb()
>        mdev_device_remove()
> 2. remove_store()
>      mdev_device_remove()
> 
> When device is removed by user using remote_store(), device under
> removal is mdev device.
> When device is removed during parent device removal using generic child
> iterator, mdev check is already done using dev_is_mdev().
> 
> Hence, remove the unnecessary loop in mdev_device_remove().
> 
> Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> ---
>  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
>  1 file changed, 5 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index ab05464..944a058 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
>  
>  static int mdev_device_remove_cb(struct device *dev, void *data)
>  {
> -	if (!dev_is_mdev(dev))
> -		return 0;
> +	if (dev_is_mdev(dev))
> +		mdev_device_remove(dev, true);
>  
> -	return mdev_device_remove(dev, data ? *(bool *)data : true);
> +	return 0;
>  }
>  
>  /*
> @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
>  void mdev_unregister_device(struct device *dev)
>  {
>  	struct mdev_parent *parent;
> -	bool force_remove = true;
>  
>  	mutex_lock(&parent_list_lock);
>  	parent = __find_parent_device(dev);
> @@ -255,8 +254,7 @@ void mdev_unregister_device(struct device *dev)
>  	list_del(&parent->next);
>  	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
>  
> -	device_for_each_child(dev, (void *)&force_remove,
> -			      mdev_device_remove_cb);
> +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
>  
>  	parent_remove_sysfs_files(parent);
>  
> @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
>  
>  int mdev_device_remove(struct device *dev, bool force_remove)
>  {
> -	struct mdev_device *mdev, *tmp;
> +	struct mdev_device *mdev;
>  	struct mdev_parent *parent;
>  	struct mdev_type *type;
>  	int ret;
>  
>  	mdev = to_mdev_device(dev);
> -
> -	mutex_lock(&mdev_list_lock);
> -	list_for_each_entry(tmp, &mdev_list, next) {
> -		if (tmp == mdev)
> -			break;
> -	}
> -
> -	if (tmp != mdev) {
> -		mutex_unlock(&mdev_list_lock);
> -		return -ENODEV;
> -	}
> -
>  	if (!mdev->active) {
>  		mutex_unlock(&mdev_list_lock);
>  		return -EAGAIN;
>
Alex Williamson March 25, 2019, 8:49 p.m. UTC | #3
On Tue, 26 Mar 2019 01:05:34 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 3/23/2019 4:50 AM, Parav Pandit wrote:
> > device_for_each_child() stops executing callback function for remaining
> > child devices, if callback hits an error.
> > Each child mdev device is independent of each other.
> > While unregistering parent device, mdev core must remove all child mdev
> > devices.
> > Therefore, mdev_device_remove_cb() always returns success so that
> > device_for_each_child doesn't abort if one child removal hits error.
> >   
> 
> When unregistering parent device, force_remove is set to true amd
> mdev_device_remove_ops() always returns success.

Can we know that?  mdev_device_remove() doesn't guarantee to return
zero.

> > While at it, improve remove and unregister functions for below simplicity.
> > 
> > There isn't need to pass forced flag pointer during mdev parent
> > removal which invokes mdev_device_remove().  
> 
> There is a need to pass the flag, pasting here the comment above
> mdev_device_remove_ops() which explains why the flag is needed:
> 
> /*
>  * mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
>  * device is being unregistered from mdev device framework.
>  * - 'force_remove' is set to 'false' when called from sysfs's 'remove'
> which
>  *   indicates that if the mdev device is active, used by VMM or userspace
>  *   application, vendor driver could return error then don't remove the
> device.
>  * - 'force_remove' is set to 'true' when called from
> mdev_unregister_device()
>  *   which indicate that parent device is being removed from mdev device
>  *   framework so remove mdev device forcefully.
>  */

I don't see that this changes the force behavior, it's simply noting
that in order to continue the device_for_each_child() iterator, we need
to return zero, regardless of what mdev_device_remove() returns, and
the parent remove path is the only caller of mdev_device_remove_cb(),
so we can assume force = true when calling mdev_device_remove().  Aside
from maybe a WARN_ON if mdev_device_remove() returns non-zero, that
much looks reasonable to me.

>  So simplify the flow.
> > 
> > mdev_device_remove() is called from two paths.
> > 1. mdev_unregister_driver()
> >      mdev_device_remove_cb()
> >        mdev_device_remove()
> > 2. remove_store()
> >      mdev_device_remove()
> > 
> > When device is removed by user using remote_store(), device under
> > removal is mdev device.
> > When device is removed during parent device removal using generic child
> > iterator, mdev check is already done using dev_is_mdev().
> > 
> > Hence, remove the unnecessary loop in mdev_device_remove().

I don't think knowing the device type is the only reason for this loop
though.  Both paths you mention above can race with each other, so we
need to serialize them and pick a winner.  The mdev_list_lock allows us
to do that.  Additionally...

> > 
> > Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > ---
> >  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
> >  1 file changed, 5 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> > index ab05464..944a058 100644
> > --- a/drivers/vfio/mdev/mdev_core.c
> > +++ b/drivers/vfio/mdev/mdev_core.c
> > @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
> >  
> >  static int mdev_device_remove_cb(struct device *dev, void *data)
> >  {
> > -	if (!dev_is_mdev(dev))
> > -		return 0;
> > +	if (dev_is_mdev(dev))
> > +		mdev_device_remove(dev, true);
> >  
> > -	return mdev_device_remove(dev, data ? *(bool *)data : true);
> > +	return 0;
> >  }
> >  
> >  /*
> > @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
> >  void mdev_unregister_device(struct device *dev)
> >  {
> >  	struct mdev_parent *parent;
> > -	bool force_remove = true;
> >  
> >  	mutex_lock(&parent_list_lock);
> >  	parent = __find_parent_device(dev);
> > @@ -255,8 +254,7 @@ void mdev_unregister_device(struct device *dev)
> >  	list_del(&parent->next);
> >  	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> >  
> > -	device_for_each_child(dev, (void *)&force_remove,
> > -			      mdev_device_remove_cb);
> > +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
> >  
> >  	parent_remove_sysfs_files(parent);
> >  
> > @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> >  
> >  int mdev_device_remove(struct device *dev, bool force_remove)
> >  {
> > -	struct mdev_device *mdev, *tmp;
> > +	struct mdev_device *mdev;
> >  	struct mdev_parent *parent;
> >  	struct mdev_type *type;
> >  	int ret;
> >  
> >  	mdev = to_mdev_device(dev);
> > -
> > -	mutex_lock(&mdev_list_lock);

Acquiring the lock is removed, but...

> > -	list_for_each_entry(tmp, &mdev_list, next) {
> > -		if (tmp == mdev)
> > -			break;
> > -	}
> > -
> > -	if (tmp != mdev) {
> > -		mutex_unlock(&mdev_list_lock);
> > -		return -ENODEV;
> > -	}
> > -
> >  	if (!mdev->active) {
> >  		mutex_unlock(&mdev_list_lock);
> >  		return -EAGAIN;
> >   

We still release it in this path and the code below here.  If we don't
find the device on the list under lock, then we're working with a stale
device and playing with the 'active' flag of that device outside of any
sort of mutual exclusion is racy.  Thanks,

Alex
Parav Pandit March 25, 2019, 9:36 p.m. UTC | #4
> -----Original Message-----
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Monday, March 25, 2019 3:50 PM
> To: Kirti Wankhede <kwankhede@nvidia.com>
> Cc: Parav Pandit <parav@mellanox.com>; kvm@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH 7/8] vfio/mdev: Fix aborting mdev child device removal if
> one fails
> 
> On Tue, 26 Mar 2019 01:05:34 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
> > On 3/23/2019 4:50 AM, Parav Pandit wrote:
> > > device_for_each_child() stops executing callback function for
> > > remaining child devices, if callback hits an error.
> > > Each child mdev device is independent of each other.
> > > While unregistering parent device, mdev core must remove all child
> > > mdev devices.
> > > Therefore, mdev_device_remove_cb() always returns success so that
> > > device_for_each_child doesn't abort if one child removal hits error.
> > >
> >
> > When unregistering parent device, force_remove is set to true amd
> > mdev_device_remove_ops() always returns success.
> 
> Can we know that?  mdev_device_remove() doesn't guarantee to return
> zero.
> 
> > > While at it, improve remove and unregister functions for below
> simplicity.
> > >
> > > There isn't need to pass forced flag pointer during mdev parent
> > > removal which invokes mdev_device_remove().
> >
> > There is a need to pass the flag, pasting here the comment above
> > mdev_device_remove_ops() which explains why the flag is needed:
> >
> > /*
> >  * mdev_device_remove_ops gets called from sysfs's 'remove' and when
> > parent
> >  * device is being unregistered from mdev device framework.
> >  * - 'force_remove' is set to 'false' when called from sysfs's 'remove'
> > which
> >  *   indicates that if the mdev device is active, used by VMM or userspace
> >  *   application, vendor driver could return error then don't remove the
> > device.
> >  * - 'force_remove' is set to 'true' when called from
> > mdev_unregister_device()
> >  *   which indicate that parent device is being removed from mdev device
> >  *   framework so remove mdev device forcefully.
> >  */
> 
> I don't see that this changes the force behavior, it's simply noting that in
> order to continue the device_for_each_child() iterator, we need to return
> zero, regardless of what mdev_device_remove() returns, and the parent
> remove path is the only caller of mdev_device_remove_cb(), so we can
> assume force = true when calling mdev_device_remove().  Aside from maybe
> a WARN_ON if mdev_device_remove() returns non-zero, that much looks
> reasonable to me.
> 
> >  So simplify the flow.
> > >
> > > mdev_device_remove() is called from two paths.
> > > 1. mdev_unregister_driver()
> > >      mdev_device_remove_cb()
> > >        mdev_device_remove()
> > > 2. remove_store()
> > >      mdev_device_remove()
> > >
> > > When device is removed by user using remote_store(), device under
> > > removal is mdev device.
> > > When device is removed during parent device removal using generic
> > > child iterator, mdev check is already done using dev_is_mdev().
> > >
> > > Hence, remove the unnecessary loop in mdev_device_remove().
> 
> I don't think knowing the device type is the only reason for this loop though.
> Both paths you mention above can race with each other, so we need to
> serialize them and pick a winner.  The mdev_list_lock allows us to do that.
> Additionally...
> 
> > >
> > > Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > ---
> > >  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
> > >  1 file changed, 5 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/drivers/vfio/mdev/mdev_core.c
> > > b/drivers/vfio/mdev/mdev_core.c index ab05464..944a058 100644
> > > --- a/drivers/vfio/mdev/mdev_core.c
> > > +++ b/drivers/vfio/mdev/mdev_core.c
> > > @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct
> > > mdev_device *mdev, bool force_remove)
> > >
> > >  static int mdev_device_remove_cb(struct device *dev, void *data)  {
> > > -	if (!dev_is_mdev(dev))
> > > -		return 0;
> > > +	if (dev_is_mdev(dev))
> > > +		mdev_device_remove(dev, true);
> > >
> > > -	return mdev_device_remove(dev, data ? *(bool *)data : true);
> > > +	return 0;
> > >  }
> > >
> > >  /*
> > > @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev,
> > > const struct mdev_parent_ops *ops)  void
> > > mdev_unregister_device(struct device *dev)  {
> > >  	struct mdev_parent *parent;
> > > -	bool force_remove = true;
> > >
> > >  	mutex_lock(&parent_list_lock);
> > >  	parent = __find_parent_device(dev); @@ -255,8 +254,7 @@ void
> > > mdev_unregister_device(struct device *dev)
> > >  	list_del(&parent->next);
> > >  	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> > >
> > > -	device_for_each_child(dev, (void *)&force_remove,
> > > -			      mdev_device_remove_cb);
> > > +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
> > >
> > >  	parent_remove_sysfs_files(parent);
> > >
> > > @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject *kobj,
> > > struct device *dev, uuid_le uuid)
> > >
> > >  int mdev_device_remove(struct device *dev, bool force_remove)  {
> > > -	struct mdev_device *mdev, *tmp;
> > > +	struct mdev_device *mdev;
> > >  	struct mdev_parent *parent;
> > >  	struct mdev_type *type;
> > >  	int ret;
> > >
> > >  	mdev = to_mdev_device(dev);
> > > -
> > > -	mutex_lock(&mdev_list_lock);
> 
> Acquiring the lock is removed, but...
> 
Crap. Missed the lower part.

> > > -	list_for_each_entry(tmp, &mdev_list, next) {
> > > -		if (tmp == mdev)
> > > -			break;
> > > -	}
> > > -
> > > -	if (tmp != mdev) {
> > > -		mutex_unlock(&mdev_list_lock);
> > > -		return -ENODEV;
> > > -	}
> > > -
> > >  	if (!mdev->active) {
> > >  		mutex_unlock(&mdev_list_lock);
> > >  		return -EAGAIN;
> > >
> 
> We still release it in this path and the code below here.  If we don't find the
> device on the list under lock, then we're working with a stale device and
> playing with the 'active' flag of that device outside of any sort of mutual
> exclusion is racy.  Thanks,
Subsequent patch makes the order sane.
I think I should merge this change with patch-8 in the series.
Alex Williamson March 25, 2019, 9:52 p.m. UTC | #5
On Mon, 25 Mar 2019 21:36:42 +0000
Parav Pandit <parav@mellanox.com> wrote:

> > -----Original Message-----
> > From: Alex Williamson <alex.williamson@redhat.com>
> > Sent: Monday, March 25, 2019 3:50 PM
> > To: Kirti Wankhede <kwankhede@nvidia.com>
> > Cc: Parav Pandit <parav@mellanox.com>; kvm@vger.kernel.org; linux-
> > kernel@vger.kernel.org
> > Subject: Re: [PATCH 7/8] vfio/mdev: Fix aborting mdev child device removal if
> > one fails
> > 
> > On Tue, 26 Mar 2019 01:05:34 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> > > On 3/23/2019 4:50 AM, Parav Pandit wrote:  
> > > > device_for_each_child() stops executing callback function for
> > > > remaining child devices, if callback hits an error.
> > > > Each child mdev device is independent of each other.
> > > > While unregistering parent device, mdev core must remove all child
> > > > mdev devices.
> > > > Therefore, mdev_device_remove_cb() always returns success so that
> > > > device_for_each_child doesn't abort if one child removal hits error.
> > > >  
> > >
> > > When unregistering parent device, force_remove is set to true amd
> > > mdev_device_remove_ops() always returns success.  
> > 
> > Can we know that?  mdev_device_remove() doesn't guarantee to return
> > zero.
> >   
> > > > While at it, improve remove and unregister functions for below  
> > simplicity.  
> > > >
> > > > There isn't need to pass forced flag pointer during mdev parent
> > > > removal which invokes mdev_device_remove().  
> > >
> > > There is a need to pass the flag, pasting here the comment above
> > > mdev_device_remove_ops() which explains why the flag is needed:
> > >
> > > /*
> > >  * mdev_device_remove_ops gets called from sysfs's 'remove' and when
> > > parent
> > >  * device is being unregistered from mdev device framework.
> > >  * - 'force_remove' is set to 'false' when called from sysfs's 'remove'
> > > which
> > >  *   indicates that if the mdev device is active, used by VMM or userspace
> > >  *   application, vendor driver could return error then don't remove the
> > > device.
> > >  * - 'force_remove' is set to 'true' when called from
> > > mdev_unregister_device()
> > >  *   which indicate that parent device is being removed from mdev device
> > >  *   framework so remove mdev device forcefully.
> > >  */  
> > 
> > I don't see that this changes the force behavior, it's simply noting that in
> > order to continue the device_for_each_child() iterator, we need to return
> > zero, regardless of what mdev_device_remove() returns, and the parent
> > remove path is the only caller of mdev_device_remove_cb(), so we can
> > assume force = true when calling mdev_device_remove().  Aside from maybe
> > a WARN_ON if mdev_device_remove() returns non-zero, that much looks
> > reasonable to me.
> >   
> > >  So simplify the flow.  
> > > >
> > > > mdev_device_remove() is called from two paths.
> > > > 1. mdev_unregister_driver()
> > > >      mdev_device_remove_cb()
> > > >        mdev_device_remove()
> > > > 2. remove_store()
> > > >      mdev_device_remove()
> > > >
> > > > When device is removed by user using remote_store(), device under
> > > > removal is mdev device.
> > > > When device is removed during parent device removal using generic
> > > > child iterator, mdev check is already done using dev_is_mdev().
> > > >
> > > > Hence, remove the unnecessary loop in mdev_device_remove().  
> > 
> > I don't think knowing the device type is the only reason for this loop though.
> > Both paths you mention above can race with each other, so we need to
> > serialize them and pick a winner.  The mdev_list_lock allows us to do that.
> > Additionally...
> >   
> > > >
> > > > Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> > > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > > ---
> > > >  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
> > > >  1 file changed, 5 insertions(+), 19 deletions(-)
> > > >
> > > > diff --git a/drivers/vfio/mdev/mdev_core.c
> > > > b/drivers/vfio/mdev/mdev_core.c index ab05464..944a058 100644
> > > > --- a/drivers/vfio/mdev/mdev_core.c
> > > > +++ b/drivers/vfio/mdev/mdev_core.c
> > > > @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct
> > > > mdev_device *mdev, bool force_remove)
> > > >
> > > >  static int mdev_device_remove_cb(struct device *dev, void *data)  {
> > > > -	if (!dev_is_mdev(dev))
> > > > -		return 0;
> > > > +	if (dev_is_mdev(dev))
> > > > +		mdev_device_remove(dev, true);
> > > >
> > > > -	return mdev_device_remove(dev, data ? *(bool *)data : true);
> > > > +	return 0;
> > > >  }
> > > >
> > > >  /*
> > > > @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev,
> > > > const struct mdev_parent_ops *ops)  void
> > > > mdev_unregister_device(struct device *dev)  {
> > > >  	struct mdev_parent *parent;
> > > > -	bool force_remove = true;
> > > >
> > > >  	mutex_lock(&parent_list_lock);
> > > >  	parent = __find_parent_device(dev); @@ -255,8 +254,7 @@ void
> > > > mdev_unregister_device(struct device *dev)
> > > >  	list_del(&parent->next);
> > > >  	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
> > > >
> > > > -	device_for_each_child(dev, (void *)&force_remove,
> > > > -			      mdev_device_remove_cb);
> > > > +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
> > > >
> > > >  	parent_remove_sysfs_files(parent);
> > > >
> > > > @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject *kobj,
> > > > struct device *dev, uuid_le uuid)
> > > >
> > > >  int mdev_device_remove(struct device *dev, bool force_remove)  {
> > > > -	struct mdev_device *mdev, *tmp;
> > > > +	struct mdev_device *mdev;
> > > >  	struct mdev_parent *parent;
> > > >  	struct mdev_type *type;
> > > >  	int ret;
> > > >
> > > >  	mdev = to_mdev_device(dev);
> > > > -
> > > > -	mutex_lock(&mdev_list_lock);  
> > 
> > Acquiring the lock is removed, but...
> >   
> Crap. Missed the lower part.
> 
> > > > -	list_for_each_entry(tmp, &mdev_list, next) {
> > > > -		if (tmp == mdev)
> > > > -			break;
> > > > -	}
> > > > -
> > > > -	if (tmp != mdev) {
> > > > -		mutex_unlock(&mdev_list_lock);
> > > > -		return -ENODEV;
> > > > -	}
> > > > -
> > > >  	if (!mdev->active) {
> > > >  		mutex_unlock(&mdev_list_lock);
> > > >  		return -EAGAIN;
> > > >  
> > 
> > We still release it in this path and the code below here.  If we don't find the
> > device on the list under lock, then we're working with a stale device and
> > playing with the 'active' flag of that device outside of any sort of mutual
> > exclusion is racy.  Thanks,  
> Subsequent patch makes the order sane.
> I think I should merge this change with patch-8 in the series.

Please don't incorporate more fixes into patch 8, it has too many
already.  I'd really prefer to see patch 8 split into issues you've
identified as much as possible.  Thanks,

Alex
Parav Pandit March 25, 2019, 10:07 p.m. UTC | #6
Hi Alex,

> -----Original Message-----
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Monday, March 25, 2019 4:52 PM
> To: Parav Pandit <parav@mellanox.com>
> Cc: Kirti Wankhede <kwankhede@nvidia.com>; kvm@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH 7/8] vfio/mdev: Fix aborting mdev child device removal if
> one fails
> 
> On Mon, 25 Mar 2019 21:36:42 +0000
> Parav Pandit <parav@mellanox.com> wrote:
> 
> > > -----Original Message-----
> > > From: Alex Williamson <alex.williamson@redhat.com>
> > > Sent: Monday, March 25, 2019 3:50 PM
> > > To: Kirti Wankhede <kwankhede@nvidia.com>
> > > Cc: Parav Pandit <parav@mellanox.com>; kvm@vger.kernel.org; linux-
> > > kernel@vger.kernel.org
> > > Subject: Re: [PATCH 7/8] vfio/mdev: Fix aborting mdev child device
> > > removal if one fails
> > >
> > > On Tue, 26 Mar 2019 01:05:34 +0530
> > > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> > >
> > > > On 3/23/2019 4:50 AM, Parav Pandit wrote:
> > > > > device_for_each_child() stops executing callback function for
> > > > > remaining child devices, if callback hits an error.
> > > > > Each child mdev device is independent of each other.
> > > > > While unregistering parent device, mdev core must remove all
> > > > > child mdev devices.
> > > > > Therefore, mdev_device_remove_cb() always returns success so
> > > > > that device_for_each_child doesn't abort if one child removal hits
> error.
> > > > >
> > > >
> > > > When unregistering parent device, force_remove is set to true amd
> > > > mdev_device_remove_ops() always returns success.
> > >
> > > Can we know that?  mdev_device_remove() doesn't guarantee to return
> > > zero.
> > >
> > > > > While at it, improve remove and unregister functions for below
> > > simplicity.
> > > > >
> > > > > There isn't need to pass forced flag pointer during mdev parent
> > > > > removal which invokes mdev_device_remove().
> > > >
> > > > There is a need to pass the flag, pasting here the comment above
> > > > mdev_device_remove_ops() which explains why the flag is needed:
> > > >
> > > > /*
> > > >  * mdev_device_remove_ops gets called from sysfs's 'remove' and
> > > > when parent
> > > >  * device is being unregistered from mdev device framework.
> > > >  * - 'force_remove' is set to 'false' when called from sysfs's 'remove'
> > > > which
> > > >  *   indicates that if the mdev device is active, used by VMM or
> userspace
> > > >  *   application, vendor driver could return error then don't remove the
> > > > device.
> > > >  * - 'force_remove' is set to 'true' when called from
> > > > mdev_unregister_device()
> > > >  *   which indicate that parent device is being removed from mdev
> device
> > > >  *   framework so remove mdev device forcefully.
> > > >  */
> > >
> > > I don't see that this changes the force behavior, it's simply noting
> > > that in order to continue the device_for_each_child() iterator, we
> > > need to return zero, regardless of what mdev_device_remove()
> > > returns, and the parent remove path is the only caller of
> > > mdev_device_remove_cb(), so we can assume force = true when calling
> > > mdev_device_remove().  Aside from maybe a WARN_ON if
> > > mdev_device_remove() returns non-zero, that much looks reasonable to
> me.
> > >
> > > >  So simplify the flow.
> > > > >
> > > > > mdev_device_remove() is called from two paths.
> > > > > 1. mdev_unregister_driver()
> > > > >      mdev_device_remove_cb()
> > > > >        mdev_device_remove()
> > > > > 2. remove_store()
> > > > >      mdev_device_remove()
> > > > >
> > > > > When device is removed by user using remote_store(), device
> > > > > under removal is mdev device.
> > > > > When device is removed during parent device removal using
> > > > > generic child iterator, mdev check is already done using
> dev_is_mdev().
> > > > >
> > > > > Hence, remove the unnecessary loop in mdev_device_remove().
> > >
> > > I don't think knowing the device type is the only reason for this loop
> though.
> > > Both paths you mention above can race with each other, so we need to
> > > serialize them and pick a winner.  The mdev_list_lock allows us to do
> that.
> > > Additionally...
> > >
> > > > >
> > > > > Fixes: 7b96953bc640 ("vfio: Mediated device Core driver")
> > > > > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > > > > ---
> > > > >  drivers/vfio/mdev/mdev_core.c | 24 +++++-------------------
> > > > >  1 file changed, 5 insertions(+), 19 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vfio/mdev/mdev_core.c
> > > > > b/drivers/vfio/mdev/mdev_core.c index ab05464..944a058 100644
> > > > > --- a/drivers/vfio/mdev/mdev_core.c
> > > > > +++ b/drivers/vfio/mdev/mdev_core.c
> > > > > @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct
> > > > > mdev_device *mdev, bool force_remove)
> > > > >
> > > > >  static int mdev_device_remove_cb(struct device *dev, void *data)  {
> > > > > -	if (!dev_is_mdev(dev))
> > > > > -		return 0;
> > > > > +	if (dev_is_mdev(dev))
> > > > > +		mdev_device_remove(dev, true);
> > > > >
> > > > > -	return mdev_device_remove(dev, data ? *(bool *)data :
> true);
> > > > > +	return 0;
> > > > >  }
> > > > >
> > > > >  /*
> > > > > @@ -241,7 +241,6 @@ int mdev_register_device(struct device *dev,
> > > > > const struct mdev_parent_ops *ops)  void
> > > > > mdev_unregister_device(struct device *dev)  {
> > > > >  	struct mdev_parent *parent;
> > > > > -	bool force_remove = true;
> > > > >
> > > > >  	mutex_lock(&parent_list_lock);
> > > > >  	parent = __find_parent_device(dev); @@ -255,8 +254,7 @@
> void
> > > > > mdev_unregister_device(struct device *dev)
> > > > >  	list_del(&parent->next);
> > > > >  	class_compat_remove_link(mdev_bus_compat_class, dev,
> NULL);
> > > > >
> > > > > -	device_for_each_child(dev, (void *)&force_remove,
> > > > > -			      mdev_device_remove_cb);
> > > > > +	device_for_each_child(dev, NULL, mdev_device_remove_cb);
> > > > >
> > > > >  	parent_remove_sysfs_files(parent);
> > > > >
> > > > > @@ -346,24 +344,12 @@ int mdev_device_create(struct kobject
> > > > > *kobj, struct device *dev, uuid_le uuid)
> > > > >
> > > > >  int mdev_device_remove(struct device *dev, bool force_remove)  {
> > > > > -	struct mdev_device *mdev, *tmp;
> > > > > +	struct mdev_device *mdev;
> > > > >  	struct mdev_parent *parent;
> > > > >  	struct mdev_type *type;
> > > > >  	int ret;
> > > > >
> > > > >  	mdev = to_mdev_device(dev);
> > > > > -
> > > > > -	mutex_lock(&mdev_list_lock);
> > >
> > > Acquiring the lock is removed, but...
> > >
> > Crap. Missed the lower part.
> >
> > > > > -	list_for_each_entry(tmp, &mdev_list, next) {
> > > > > -		if (tmp == mdev)
> > > > > -			break;
> > > > > -	}
> > > > > -
> > > > > -	if (tmp != mdev) {
> > > > > -		mutex_unlock(&mdev_list_lock);
> > > > > -		return -ENODEV;
> > > > > -	}
> > > > > -
> > > > >  	if (!mdev->active) {
> > > > >  		mutex_unlock(&mdev_list_lock);
> > > > >  		return -EAGAIN;
> > > > >
> > >
> > > We still release it in this path and the code below here.  If we
> > > don't find the device on the list under lock, then we're working
> > > with a stale device and playing with the 'active' flag of that
> > > device outside of any sort of mutual exclusion is racy.  Thanks,
> > Subsequent patch makes the order sane.
> > I think I should merge this change with patch-8 in the series.
> 
> Please don't incorporate more fixes into patch 8, it has too many already.  I'd
> really prefer to see patch 8 split into issues you've identified as much as
> possible.  Thanks,
> 
I tried to split into two patches.
one for user initiated race conditions, second for driver side race conditions.
But its generating more code churn as synchronization is inter-related. So dropped it.

This patch is just fine, only thing I messed up is accidental mutex lock removal.
Below is the fixup patch for patch-7 that I want to roll in v2.
Rest all stays same in patch-7 and 8.

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 5bd8d22..e09b94f 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -349,6 +349,7 @@ int mdev_device_remove(struct device *dev, bool force_remove)
        struct mdev_type *type;
        int ret;

+       mutex_lock(&mdev_list_lock);
        mdev = to_mdev_device(dev);
        if (!mdev->active) {
                mutex_unlock(&mdev_list_lock);


> Alex
diff mbox series

Patch

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index ab05464..944a058 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -150,10 +150,10 @@  static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
 
 static int mdev_device_remove_cb(struct device *dev, void *data)
 {
-	if (!dev_is_mdev(dev))
-		return 0;
+	if (dev_is_mdev(dev))
+		mdev_device_remove(dev, true);
 
-	return mdev_device_remove(dev, data ? *(bool *)data : true);
+	return 0;
 }
 
 /*
@@ -241,7 +241,6 @@  int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 void mdev_unregister_device(struct device *dev)
 {
 	struct mdev_parent *parent;
-	bool force_remove = true;
 
 	mutex_lock(&parent_list_lock);
 	parent = __find_parent_device(dev);
@@ -255,8 +254,7 @@  void mdev_unregister_device(struct device *dev)
 	list_del(&parent->next);
 	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
 
-	device_for_each_child(dev, (void *)&force_remove,
-			      mdev_device_remove_cb);
+	device_for_each_child(dev, NULL, mdev_device_remove_cb);
 
 	parent_remove_sysfs_files(parent);
 
@@ -346,24 +344,12 @@  int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 
 int mdev_device_remove(struct device *dev, bool force_remove)
 {
-	struct mdev_device *mdev, *tmp;
+	struct mdev_device *mdev;
 	struct mdev_parent *parent;
 	struct mdev_type *type;
 	int ret;
 
 	mdev = to_mdev_device(dev);
-
-	mutex_lock(&mdev_list_lock);
-	list_for_each_entry(tmp, &mdev_list, next) {
-		if (tmp == mdev)
-			break;
-	}
-
-	if (tmp != mdev) {
-		mutex_unlock(&mdev_list_lock);
-		return -ENODEV;
-	}
-
 	if (!mdev->active) {
 		mutex_unlock(&mdev_list_lock);
 		return -EAGAIN;