[01/13] driver core, libnvdimm: disable manual unbind of dimms while region active
diff mbox

Message ID 146507355828.8347.11600179233094511139.stgit@dwillia2-desk3.amr.corp.intel.com
State New, archived
Headers show

Commit Message

Dan Williams June 4, 2016, 8:52 p.m. UTC
There are scenarios where we need a middle ground between disabling all
manual bind/unbind attempts (via driver->suppress_bind_attrs) and
allowing unbind at any userspace-determined time.  Pinning modules takes
away one vector for unwanted out-of-sequence device_release_driver()
invocations, this new mechanism (via device->suppress_unbind_attr) takes
away another.

The first user of this mechanism is the libnvdimm sub-system where
manual dimm disabling should be prevented while the dimm is active in
any region.  Note that there is a 1:N dimm-to-region relationship which
is why this is implemented as a disable count rather than a flag.  This
forces userspace to disable regions before dimms when manually shutting
down a bus topology.

This does not affect any of the kernel-internal initiated invocations of
device_release_driver().

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/base/base.h             |    1 +
 drivers/base/bus.c              |   12 ++++++++++--
 drivers/base/core.c             |    1 +
 drivers/base/dd.c               |    2 +-
 drivers/nvdimm/namespace_devs.c |    1 +
 drivers/nvdimm/region_devs.c    |    4 +++-
 include/linux/device.h          |   20 ++++++++++++++++++++
 7 files changed, 37 insertions(+), 4 deletions(-)

Comments

Greg Kroah-Hartman June 4, 2016, 9:10 p.m. UTC | #1
On Sat, Jun 04, 2016 at 01:52:38PM -0700, Dan Williams wrote:
> There are scenarios where we need a middle ground between disabling all
> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
> allowing unbind at any userspace-determined time.  Pinning modules takes
> away one vector for unwanted out-of-sequence device_release_driver()
> invocations, this new mechanism (via device->suppress_unbind_attr) takes
> away another.
> 
> The first user of this mechanism is the libnvdimm sub-system where
> manual dimm disabling should be prevented while the dimm is active in
> any region.  Note that there is a 1:N dimm-to-region relationship which
> is why this is implemented as a disable count rather than a flag.  This
> forces userspace to disable regions before dimms when manually shutting
> down a bus topology.
> 
> This does not affect any of the kernel-internal initiated invocations of
> device_release_driver().
> 
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/base/base.h             |    1 +
>  drivers/base/bus.c              |   12 ++++++++++--
>  drivers/base/core.c             |    1 +
>  drivers/base/dd.c               |    2 +-
>  drivers/nvdimm/namespace_devs.c |    1 +
>  drivers/nvdimm/region_devs.c    |    4 +++-
>  include/linux/device.h          |   20 ++++++++++++++++++++
>  7 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/base/base.h b/drivers/base/base.h
> index e05db388bd1c..129814b17ca6 100644
> --- a/drivers/base/base.h
> +++ b/drivers/base/base.h
> @@ -109,6 +109,7 @@ extern int bus_add_driver(struct device_driver *drv);
>  extern void bus_remove_driver(struct device_driver *drv);
>  
>  extern void driver_detach(struct device_driver *drv);
> +extern void __device_release_driver(struct device *dev);
>  extern int driver_probe_device(struct device_driver *drv, struct device *dev);
>  extern void driver_deferred_probe_del(struct device *dev);
>  static inline int driver_match_device(struct device_driver *drv,
> diff --git a/drivers/base/bus.c b/drivers/base/bus.c
> index 6470eb8088f4..b48a903f2d28 100644
> --- a/drivers/base/bus.c
> +++ b/drivers/base/bus.c
> @@ -188,10 +188,18 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
>  	if (dev && dev->driver == drv) {
>  		if (dev->parent)	/* Needed for USB */
>  			device_lock(dev->parent);
> -		device_release_driver(dev);
> +
> +		device_lock(dev);
> +		if (atomic_read(&dev->suppress_unbind_attr) > 0)
> +			err = -EBUSY;
> +		else {
> +			__device_release_driver(dev);
> +			err = count;
> +		}
> +		device_unlock(dev);
> +
>  		if (dev->parent)
>  			device_unlock(dev->parent);
> -		err = count;
>  	}
>  	put_device(dev);
>  	bus_put(bus);
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 0a8bdade53f2..0ea0e8560e1d 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -708,6 +708,7 @@ void device_initialize(struct device *dev)
>  	INIT_LIST_HEAD(&dev->devres_head);
>  	device_pm_init(dev);
>  	set_dev_node(dev, -1);
> +	atomic_set(&dev->suppress_unbind_attr, 0);
>  #ifdef CONFIG_GENERIC_MSI_IRQ
>  	INIT_LIST_HEAD(&dev->msi_list);
>  #endif
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 16688f50729c..9e21817ca2d6 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
>   */
> -static void __device_release_driver(struct device *dev)
> +void __device_release_driver(struct device *dev)
>  {
>  	struct device_driver *drv;
>  
> diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
> index c5e3196c45b0..e65572b6092c 100644
> --- a/drivers/nvdimm/namespace_devs.c
> +++ b/drivers/nvdimm/namespace_devs.c
> @@ -1950,6 +1950,7 @@ static int init_active_labels(struct nd_region *nd_region)
>  		}
>  		nd_mapping->ndd = ndd;
>  		atomic_inc(&nvdimm->busy);
> +		device_disable_unbind_attr(&nvdimm->dev);
>  		get_ndd(ndd);
>  
>  		count = nd_label_active_count(ndd);
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 40fcfea26fbb..320f0f3ea367 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -427,8 +427,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
>  			nd_mapping->labels = NULL;
>  			put_ndd(ndd);
>  			nd_mapping->ndd = NULL;
> -			if (ndd)
> +			if (ndd) {
>  				atomic_dec(&nvdimm->busy);
> +				device_enable_unbind_attr(&nvdimm->dev);
> +			}
>  		}
>  
>  		if (is_nd_pmem(dev))
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 38f02814d53a..d9eaa85bb9cf 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -849,6 +849,7 @@ struct device {
>  
>  	void	(*release)(struct device *dev);
>  	struct iommu_group	*iommu_group;
> +	atomic_t		suppress_unbind_attr; /* disable manual unbind */
>  
>  	bool			offline_disabled:1;
>  	bool			offline:1;
> @@ -988,6 +989,25 @@ static inline void device_lock_assert(struct device *dev)
>  	lockdep_assert_held(&dev->mutex);
>  }
>  
> +static inline bool device_disable_unbind_attr(struct device *dev)
> +{
> +	bool suppressed = false;
> +
> +	device_lock(dev);
> +	if (dev->driver) {
> +		atomic_inc(&dev->suppress_unbind_attr);
> +		suppressed = true;
> +	}
> +	device_unlock(dev);
> +
> +	return suppressed;
> +}
> +
> +static inline bool device_enable_unbind_attr(struct device *dev)
> +{
> +	return atomic_dec_and_test(&dev->suppress_unbind_attr);
> +}
> +

Ick, that's a mess.

Why not just prevent the unbinding from happening in your bus when you
need it?

And as you are grabbing a lock, why is this an atomic variable?

This is just making things _really_ complex for very limited gain for
what I can tell.

thanks,

greg k-h
Dan Williams June 4, 2016, 9:39 p.m. UTC | #2
On Sat, Jun 4, 2016 at 2:10 PM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Sat, Jun 04, 2016 at 01:52:38PM -0700, Dan Williams wrote:
>> There are scenarios where we need a middle ground between disabling all
>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>> allowing unbind at any userspace-determined time.  Pinning modules takes
>> away one vector for unwanted out-of-sequence device_release_driver()
>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>> away another.
>>
>> The first user of this mechanism is the libnvdimm sub-system where
>> manual dimm disabling should be prevented while the dimm is active in
>> any region.  Note that there is a 1:N dimm-to-region relationship which
>> is why this is implemented as a disable count rather than a flag.  This
>> forces userspace to disable regions before dimms when manually shutting
>> down a bus topology.
>>
>> This does not affect any of the kernel-internal initiated invocations of
>> device_release_driver().
>>
>> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>  drivers/base/base.h             |    1 +
>>  drivers/base/bus.c              |   12 ++++++++++--
>>  drivers/base/core.c             |    1 +
>>  drivers/base/dd.c               |    2 +-
>>  drivers/nvdimm/namespace_devs.c |    1 +
>>  drivers/nvdimm/region_devs.c    |    4 +++-
>>  include/linux/device.h          |   20 ++++++++++++++++++++
>>  7 files changed, 37 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/base/base.h b/drivers/base/base.h
>> index e05db388bd1c..129814b17ca6 100644
>> --- a/drivers/base/base.h
>> +++ b/drivers/base/base.h
>> @@ -109,6 +109,7 @@ extern int bus_add_driver(struct device_driver *drv);
>>  extern void bus_remove_driver(struct device_driver *drv);
>>
>>  extern void driver_detach(struct device_driver *drv);
>> +extern void __device_release_driver(struct device *dev);
>>  extern int driver_probe_device(struct device_driver *drv, struct device *dev);
>>  extern void driver_deferred_probe_del(struct device *dev);
>>  static inline int driver_match_device(struct device_driver *drv,
>> diff --git a/drivers/base/bus.c b/drivers/base/bus.c
>> index 6470eb8088f4..b48a903f2d28 100644
>> --- a/drivers/base/bus.c
>> +++ b/drivers/base/bus.c
>> @@ -188,10 +188,18 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
>>       if (dev && dev->driver == drv) {
>>               if (dev->parent)        /* Needed for USB */
>>                       device_lock(dev->parent);
>> -             device_release_driver(dev);
>> +
>> +             device_lock(dev);
>> +             if (atomic_read(&dev->suppress_unbind_attr) > 0)
>> +                     err = -EBUSY;
>> +             else {
>> +                     __device_release_driver(dev);
>> +                     err = count;
>> +             }
>> +             device_unlock(dev);
>> +
>>               if (dev->parent)
>>                       device_unlock(dev->parent);
>> -             err = count;
>>       }
>>       put_device(dev);
>>       bus_put(bus);
>> diff --git a/drivers/base/core.c b/drivers/base/core.c
>> index 0a8bdade53f2..0ea0e8560e1d 100644
>> --- a/drivers/base/core.c
>> +++ b/drivers/base/core.c
>> @@ -708,6 +708,7 @@ void device_initialize(struct device *dev)
>>       INIT_LIST_HEAD(&dev->devres_head);
>>       device_pm_init(dev);
>>       set_dev_node(dev, -1);
>> +     atomic_set(&dev->suppress_unbind_attr, 0);
>>  #ifdef CONFIG_GENERIC_MSI_IRQ
>>       INIT_LIST_HEAD(&dev->msi_list);
>>  #endif
>> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
>> index 16688f50729c..9e21817ca2d6 100644
>> --- a/drivers/base/dd.c
>> +++ b/drivers/base/dd.c
>> @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
>>   * __device_release_driver() must be called with @dev lock held.
>>   * When called for a USB interface, @dev->parent lock must be held as well.
>>   */
>> -static void __device_release_driver(struct device *dev)
>> +void __device_release_driver(struct device *dev)
>>  {
>>       struct device_driver *drv;
>>
>> diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
>> index c5e3196c45b0..e65572b6092c 100644
>> --- a/drivers/nvdimm/namespace_devs.c
>> +++ b/drivers/nvdimm/namespace_devs.c
>> @@ -1950,6 +1950,7 @@ static int init_active_labels(struct nd_region *nd_region)
>>               }
>>               nd_mapping->ndd = ndd;
>>               atomic_inc(&nvdimm->busy);
>> +             device_disable_unbind_attr(&nvdimm->dev);
>>               get_ndd(ndd);
>>
>>               count = nd_label_active_count(ndd);
>> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
>> index 40fcfea26fbb..320f0f3ea367 100644
>> --- a/drivers/nvdimm/region_devs.c
>> +++ b/drivers/nvdimm/region_devs.c
>> @@ -427,8 +427,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
>>                       nd_mapping->labels = NULL;
>>                       put_ndd(ndd);
>>                       nd_mapping->ndd = NULL;
>> -                     if (ndd)
>> +                     if (ndd) {
>>                               atomic_dec(&nvdimm->busy);
>> +                             device_enable_unbind_attr(&nvdimm->dev);
>> +                     }
>>               }
>>
>>               if (is_nd_pmem(dev))
>> diff --git a/include/linux/device.h b/include/linux/device.h
>> index 38f02814d53a..d9eaa85bb9cf 100644
>> --- a/include/linux/device.h
>> +++ b/include/linux/device.h
>> @@ -849,6 +849,7 @@ struct device {
>>
>>       void    (*release)(struct device *dev);
>>       struct iommu_group      *iommu_group;
>> +     atomic_t                suppress_unbind_attr; /* disable manual unbind */
>>
>>       bool                    offline_disabled:1;
>>       bool                    offline:1;
>> @@ -988,6 +989,25 @@ static inline void device_lock_assert(struct device *dev)
>>       lockdep_assert_held(&dev->mutex);
>>  }
>>
>> +static inline bool device_disable_unbind_attr(struct device *dev)
>> +{
>> +     bool suppressed = false;
>> +
>> +     device_lock(dev);
>> +     if (dev->driver) {
>> +             atomic_inc(&dev->suppress_unbind_attr);
>> +             suppressed = true;
>> +     }
>> +     device_unlock(dev);
>> +
>> +     return suppressed;
>> +}
>> +
>> +static inline bool device_enable_unbind_attr(struct device *dev)
>> +{
>> +     return atomic_dec_and_test(&dev->suppress_unbind_attr);
>> +}
>> +
>
> Ick, that's a mess.
>
> Why not just prevent the unbinding from happening in your bus when you
> need it?

Because historically unbind never fails...

    void device_release_driver(struct device *dev);

> And as you are grabbing a lock, why is this an atomic variable?
>
> This is just making things _really_ complex for very limited gain for
> what I can tell.

I thought it was cleaner to have the disable action synchronize with
unbind, but the lock isn't needed to re-enable unbind.  I can move the
complexity to the caller to bounce the device_lock() and re-validate
that dev->driver is still set, but that does not seem cleaner.

If making device_release_driver() fail is an option I can take a look,
but that seems like a major change in policy.
Greg Kroah-Hartman June 4, 2016, 9:45 p.m. UTC | #3
On Sat, Jun 04, 2016 at 02:39:02PM -0700, Dan Williams wrote:
> On Sat, Jun 4, 2016 at 2:10 PM, Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> > On Sat, Jun 04, 2016 at 01:52:38PM -0700, Dan Williams wrote:
> >> There are scenarios where we need a middle ground between disabling all
> >> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
> >> allowing unbind at any userspace-determined time.  Pinning modules takes
> >> away one vector for unwanted out-of-sequence device_release_driver()
> >> invocations, this new mechanism (via device->suppress_unbind_attr) takes
> >> away another.
> >>
> >> The first user of this mechanism is the libnvdimm sub-system where
> >> manual dimm disabling should be prevented while the dimm is active in
> >> any region.  Note that there is a 1:N dimm-to-region relationship which
> >> is why this is implemented as a disable count rather than a flag.  This
> >> forces userspace to disable regions before dimms when manually shutting
> >> down a bus topology.
> >>
> >> This does not affect any of the kernel-internal initiated invocations of
> >> device_release_driver().
> >>
> >> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> >> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> >> ---
> >>  drivers/base/base.h             |    1 +
> >>  drivers/base/bus.c              |   12 ++++++++++--
> >>  drivers/base/core.c             |    1 +
> >>  drivers/base/dd.c               |    2 +-
> >>  drivers/nvdimm/namespace_devs.c |    1 +
> >>  drivers/nvdimm/region_devs.c    |    4 +++-
> >>  include/linux/device.h          |   20 ++++++++++++++++++++
> >>  7 files changed, 37 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/base/base.h b/drivers/base/base.h
> >> index e05db388bd1c..129814b17ca6 100644
> >> --- a/drivers/base/base.h
> >> +++ b/drivers/base/base.h
> >> @@ -109,6 +109,7 @@ extern int bus_add_driver(struct device_driver *drv);
> >>  extern void bus_remove_driver(struct device_driver *drv);
> >>
> >>  extern void driver_detach(struct device_driver *drv);
> >> +extern void __device_release_driver(struct device *dev);
> >>  extern int driver_probe_device(struct device_driver *drv, struct device *dev);
> >>  extern void driver_deferred_probe_del(struct device *dev);
> >>  static inline int driver_match_device(struct device_driver *drv,
> >> diff --git a/drivers/base/bus.c b/drivers/base/bus.c
> >> index 6470eb8088f4..b48a903f2d28 100644
> >> --- a/drivers/base/bus.c
> >> +++ b/drivers/base/bus.c
> >> @@ -188,10 +188,18 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
> >>       if (dev && dev->driver == drv) {
> >>               if (dev->parent)        /* Needed for USB */
> >>                       device_lock(dev->parent);
> >> -             device_release_driver(dev);
> >> +
> >> +             device_lock(dev);
> >> +             if (atomic_read(&dev->suppress_unbind_attr) > 0)
> >> +                     err = -EBUSY;
> >> +             else {
> >> +                     __device_release_driver(dev);
> >> +                     err = count;
> >> +             }
> >> +             device_unlock(dev);
> >> +
> >>               if (dev->parent)
> >>                       device_unlock(dev->parent);
> >> -             err = count;
> >>       }
> >>       put_device(dev);
> >>       bus_put(bus);
> >> diff --git a/drivers/base/core.c b/drivers/base/core.c
> >> index 0a8bdade53f2..0ea0e8560e1d 100644
> >> --- a/drivers/base/core.c
> >> +++ b/drivers/base/core.c
> >> @@ -708,6 +708,7 @@ void device_initialize(struct device *dev)
> >>       INIT_LIST_HEAD(&dev->devres_head);
> >>       device_pm_init(dev);
> >>       set_dev_node(dev, -1);
> >> +     atomic_set(&dev->suppress_unbind_attr, 0);
> >>  #ifdef CONFIG_GENERIC_MSI_IRQ
> >>       INIT_LIST_HEAD(&dev->msi_list);
> >>  #endif
> >> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> >> index 16688f50729c..9e21817ca2d6 100644
> >> --- a/drivers/base/dd.c
> >> +++ b/drivers/base/dd.c
> >> @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
> >>   * __device_release_driver() must be called with @dev lock held.
> >>   * When called for a USB interface, @dev->parent lock must be held as well.
> >>   */
> >> -static void __device_release_driver(struct device *dev)
> >> +void __device_release_driver(struct device *dev)
> >>  {
> >>       struct device_driver *drv;
> >>
> >> diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
> >> index c5e3196c45b0..e65572b6092c 100644
> >> --- a/drivers/nvdimm/namespace_devs.c
> >> +++ b/drivers/nvdimm/namespace_devs.c
> >> @@ -1950,6 +1950,7 @@ static int init_active_labels(struct nd_region *nd_region)
> >>               }
> >>               nd_mapping->ndd = ndd;
> >>               atomic_inc(&nvdimm->busy);
> >> +             device_disable_unbind_attr(&nvdimm->dev);
> >>               get_ndd(ndd);
> >>
> >>               count = nd_label_active_count(ndd);
> >> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> >> index 40fcfea26fbb..320f0f3ea367 100644
> >> --- a/drivers/nvdimm/region_devs.c
> >> +++ b/drivers/nvdimm/region_devs.c
> >> @@ -427,8 +427,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
> >>                       nd_mapping->labels = NULL;
> >>                       put_ndd(ndd);
> >>                       nd_mapping->ndd = NULL;
> >> -                     if (ndd)
> >> +                     if (ndd) {
> >>                               atomic_dec(&nvdimm->busy);
> >> +                             device_enable_unbind_attr(&nvdimm->dev);
> >> +                     }
> >>               }
> >>
> >>               if (is_nd_pmem(dev))
> >> diff --git a/include/linux/device.h b/include/linux/device.h
> >> index 38f02814d53a..d9eaa85bb9cf 100644
> >> --- a/include/linux/device.h
> >> +++ b/include/linux/device.h
> >> @@ -849,6 +849,7 @@ struct device {
> >>
> >>       void    (*release)(struct device *dev);
> >>       struct iommu_group      *iommu_group;
> >> +     atomic_t                suppress_unbind_attr; /* disable manual unbind */
> >>
> >>       bool                    offline_disabled:1;
> >>       bool                    offline:1;
> >> @@ -988,6 +989,25 @@ static inline void device_lock_assert(struct device *dev)
> >>       lockdep_assert_held(&dev->mutex);
> >>  }
> >>
> >> +static inline bool device_disable_unbind_attr(struct device *dev)
> >> +{
> >> +     bool suppressed = false;
> >> +
> >> +     device_lock(dev);
> >> +     if (dev->driver) {
> >> +             atomic_inc(&dev->suppress_unbind_attr);
> >> +             suppressed = true;
> >> +     }
> >> +     device_unlock(dev);
> >> +
> >> +     return suppressed;
> >> +}
> >> +
> >> +static inline bool device_enable_unbind_attr(struct device *dev)
> >> +{
> >> +     return atomic_dec_and_test(&dev->suppress_unbind_attr);
> >> +}
> >> +
> >
> > Ick, that's a mess.
> >
> > Why not just prevent the unbinding from happening in your bus when you
> > need it?
> 
> Because historically unbind never fails...
> 
>     void device_release_driver(struct device *dev);

Ah, yes, forgot about that.

> > And as you are grabbing a lock, why is this an atomic variable?
> >
> > This is just making things _really_ complex for very limited gain for
> > what I can tell.
> 
> I thought it was cleaner to have the disable action synchronize with
> unbind, but the lock isn't needed to re-enable unbind.  I can move the
> complexity to the caller to bounce the device_lock() and re-validate
> that dev->driver is still set, but that does not seem cleaner.

No, if the user wants to unbind, then they can unbind, don't try to
muddy up the situation by letting it work sometimes and others not at
all.

bind/unbind is a "I really really know what I am doing here" action,
it's rare, and the user gets to keep both pieces if something fails.  I
know some busses don't like it, so we allow them to opt-out.  But to
make it "sometimes yes, sometimes no" is a mess, I don't want to support
that.

I suggest you just don't allow this if you don't want it.  It's not
anything you should ever be relying on for configuration so it shouldn't
be a big deal.

thanks,

greg k-h
Dan Williams June 4, 2016, 9:48 p.m. UTC | #4
On Sat, Jun 4, 2016 at 2:45 PM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Sat, Jun 04, 2016 at 02:39:02PM -0700, Dan Williams wrote:
>> On Sat, Jun 4, 2016 at 2:10 PM, Greg Kroah-Hartman
>> <gregkh@linuxfoundation.org> wrote:
>> > On Sat, Jun 04, 2016 at 01:52:38PM -0700, Dan Williams wrote:
>> >> There are scenarios where we need a middle ground between disabling all
>> >> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>> >> allowing unbind at any userspace-determined time.  Pinning modules takes
>> >> away one vector for unwanted out-of-sequence device_release_driver()
>> >> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>> >> away another.
>> >>
>> >> The first user of this mechanism is the libnvdimm sub-system where
>> >> manual dimm disabling should be prevented while the dimm is active in
>> >> any region.  Note that there is a 1:N dimm-to-region relationship which
>> >> is why this is implemented as a disable count rather than a flag.  This
>> >> forces userspace to disable regions before dimms when manually shutting
>> >> down a bus topology.
>> >>
>> >> This does not affect any of the kernel-internal initiated invocations of
>> >> device_release_driver().
>> >>
>> >> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> >> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> >> ---
>> >>  drivers/base/base.h             |    1 +
>> >>  drivers/base/bus.c              |   12 ++++++++++--
>> >>  drivers/base/core.c             |    1 +
>> >>  drivers/base/dd.c               |    2 +-
>> >>  drivers/nvdimm/namespace_devs.c |    1 +
>> >>  drivers/nvdimm/region_devs.c    |    4 +++-
>> >>  include/linux/device.h          |   20 ++++++++++++++++++++
>> >>  7 files changed, 37 insertions(+), 4 deletions(-)
>> >>
>> >> diff --git a/drivers/base/base.h b/drivers/base/base.h
>> >> index e05db388bd1c..129814b17ca6 100644
>> >> --- a/drivers/base/base.h
>> >> +++ b/drivers/base/base.h
>> >> @@ -109,6 +109,7 @@ extern int bus_add_driver(struct device_driver *drv);
>> >>  extern void bus_remove_driver(struct device_driver *drv);
>> >>
>> >>  extern void driver_detach(struct device_driver *drv);
>> >> +extern void __device_release_driver(struct device *dev);
>> >>  extern int driver_probe_device(struct device_driver *drv, struct device *dev);
>> >>  extern void driver_deferred_probe_del(struct device *dev);
>> >>  static inline int driver_match_device(struct device_driver *drv,
>> >> diff --git a/drivers/base/bus.c b/drivers/base/bus.c
>> >> index 6470eb8088f4..b48a903f2d28 100644
>> >> --- a/drivers/base/bus.c
>> >> +++ b/drivers/base/bus.c
>> >> @@ -188,10 +188,18 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
>> >>       if (dev && dev->driver == drv) {
>> >>               if (dev->parent)        /* Needed for USB */
>> >>                       device_lock(dev->parent);
>> >> -             device_release_driver(dev);
>> >> +
>> >> +             device_lock(dev);
>> >> +             if (atomic_read(&dev->suppress_unbind_attr) > 0)
>> >> +                     err = -EBUSY;
>> >> +             else {
>> >> +                     __device_release_driver(dev);
>> >> +                     err = count;
>> >> +             }
>> >> +             device_unlock(dev);
>> >> +
>> >>               if (dev->parent)
>> >>                       device_unlock(dev->parent);
>> >> -             err = count;
>> >>       }
>> >>       put_device(dev);
>> >>       bus_put(bus);
>> >> diff --git a/drivers/base/core.c b/drivers/base/core.c
>> >> index 0a8bdade53f2..0ea0e8560e1d 100644
>> >> --- a/drivers/base/core.c
>> >> +++ b/drivers/base/core.c
>> >> @@ -708,6 +708,7 @@ void device_initialize(struct device *dev)
>> >>       INIT_LIST_HEAD(&dev->devres_head);
>> >>       device_pm_init(dev);
>> >>       set_dev_node(dev, -1);
>> >> +     atomic_set(&dev->suppress_unbind_attr, 0);
>> >>  #ifdef CONFIG_GENERIC_MSI_IRQ
>> >>       INIT_LIST_HEAD(&dev->msi_list);
>> >>  #endif
>> >> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
>> >> index 16688f50729c..9e21817ca2d6 100644
>> >> --- a/drivers/base/dd.c
>> >> +++ b/drivers/base/dd.c
>> >> @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
>> >>   * __device_release_driver() must be called with @dev lock held.
>> >>   * When called for a USB interface, @dev->parent lock must be held as well.
>> >>   */
>> >> -static void __device_release_driver(struct device *dev)
>> >> +void __device_release_driver(struct device *dev)
>> >>  {
>> >>       struct device_driver *drv;
>> >>
>> >> diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
>> >> index c5e3196c45b0..e65572b6092c 100644
>> >> --- a/drivers/nvdimm/namespace_devs.c
>> >> +++ b/drivers/nvdimm/namespace_devs.c
>> >> @@ -1950,6 +1950,7 @@ static int init_active_labels(struct nd_region *nd_region)
>> >>               }
>> >>               nd_mapping->ndd = ndd;
>> >>               atomic_inc(&nvdimm->busy);
>> >> +             device_disable_unbind_attr(&nvdimm->dev);
>> >>               get_ndd(ndd);
>> >>
>> >>               count = nd_label_active_count(ndd);
>> >> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
>> >> index 40fcfea26fbb..320f0f3ea367 100644
>> >> --- a/drivers/nvdimm/region_devs.c
>> >> +++ b/drivers/nvdimm/region_devs.c
>> >> @@ -427,8 +427,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
>> >>                       nd_mapping->labels = NULL;
>> >>                       put_ndd(ndd);
>> >>                       nd_mapping->ndd = NULL;
>> >> -                     if (ndd)
>> >> +                     if (ndd) {
>> >>                               atomic_dec(&nvdimm->busy);
>> >> +                             device_enable_unbind_attr(&nvdimm->dev);
>> >> +                     }
>> >>               }
>> >>
>> >>               if (is_nd_pmem(dev))
>> >> diff --git a/include/linux/device.h b/include/linux/device.h
>> >> index 38f02814d53a..d9eaa85bb9cf 100644
>> >> --- a/include/linux/device.h
>> >> +++ b/include/linux/device.h
>> >> @@ -849,6 +849,7 @@ struct device {
>> >>
>> >>       void    (*release)(struct device *dev);
>> >>       struct iommu_group      *iommu_group;
>> >> +     atomic_t                suppress_unbind_attr; /* disable manual unbind */
>> >>
>> >>       bool                    offline_disabled:1;
>> >>       bool                    offline:1;
>> >> @@ -988,6 +989,25 @@ static inline void device_lock_assert(struct device *dev)
>> >>       lockdep_assert_held(&dev->mutex);
>> >>  }
>> >>
>> >> +static inline bool device_disable_unbind_attr(struct device *dev)
>> >> +{
>> >> +     bool suppressed = false;
>> >> +
>> >> +     device_lock(dev);
>> >> +     if (dev->driver) {
>> >> +             atomic_inc(&dev->suppress_unbind_attr);
>> >> +             suppressed = true;
>> >> +     }
>> >> +     device_unlock(dev);
>> >> +
>> >> +     return suppressed;
>> >> +}
>> >> +
>> >> +static inline bool device_enable_unbind_attr(struct device *dev)
>> >> +{
>> >> +     return atomic_dec_and_test(&dev->suppress_unbind_attr);
>> >> +}
>> >> +
>> >
>> > Ick, that's a mess.
>> >
>> > Why not just prevent the unbinding from happening in your bus when you
>> > need it?
>>
>> Because historically unbind never fails...
>>
>>     void device_release_driver(struct device *dev);
>
> Ah, yes, forgot about that.
>
>> > And as you are grabbing a lock, why is this an atomic variable?
>> >
>> > This is just making things _really_ complex for very limited gain for
>> > what I can tell.
>>
>> I thought it was cleaner to have the disable action synchronize with
>> unbind, but the lock isn't needed to re-enable unbind.  I can move the
>> complexity to the caller to bounce the device_lock() and re-validate
>> that dev->driver is still set, but that does not seem cleaner.
>
> No, if the user wants to unbind, then they can unbind, don't try to
> muddy up the situation by letting it work sometimes and others not at
> all.
>
> bind/unbind is a "I really really know what I am doing here" action,
> it's rare, and the user gets to keep both pieces if something fails.  I
> know some busses don't like it, so we allow them to opt-out.  But to
> make it "sometimes yes, sometimes no" is a mess, I don't want to support
> that.
>
> I suggest you just don't allow this if you don't want it.  It's not
> anything you should ever be relying on for configuration so it shouldn't
> be a big deal.

Fair enough, I'll push this down into the sub-system.
kernel test robot June 4, 2016, 9:50 p.m. UTC | #5
Hi,

[auto build test WARNING on v4.7-rc1]
[also build test WARNING on next-20160603]
[cannot apply to linux-nvdimm/libnvdimm-for-next tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Dan-Williams/deprecate-pcommit/20160605-045935
reproduce: make htmldocs

All warnings (new ones prefixed by >>):

   include/linux/init.h:1: warning: no structured comments found
   kernel/sched/core.c:2079: warning: No description found for parameter 'cookie'
   kernel/sys.c:1: warning: no structured comments found
>> include/linux/device.h:856: warning: No description found for parameter 'suppress_unbind_attr'
   drivers/dma-buf/seqno-fence.c:1: warning: no structured comments found
   include/linux/fence.h:84: warning: No description found for parameter 'child_list'
   include/linux/fence.h:84: warning: No description found for parameter 'active_list'
   drivers/dma-buf/reservation.c:1: warning: no structured comments found
   include/linux/reservation.h:1: warning: no structured comments found

vim +/suppress_unbind_attr +856 include/linux/device.h

929d2fa5 Matthew Wilcox     2008-10-16  840  	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
ca22e56d Kay Sievers        2011-12-14  841  	u32			id;	/* device instance */
929d2fa5 Matthew Wilcox     2008-10-16  842  
9ac7849e Tejun Heo          2007-01-20  843  	spinlock_t		devres_lock;
9ac7849e Tejun Heo          2007-01-20  844  	struct list_head	devres_head;
9ac7849e Tejun Heo          2007-01-20  845  
5a3ceb86 Tejun Heo          2008-08-25  846  	struct klist_node	knode_class;
b7a3e813 Kay Sievers        2006-10-07  847  	struct class		*class;
a4dbd674 David Brownell     2009-06-24  848  	const struct attribute_group **groups;	/* optional groups */
23681e47 Greg Kroah-Hartman 2006-06-14  849  
^1da177e Linus Torvalds     2005-04-16  850  	void	(*release)(struct device *dev);
74416e1e Alex Williamson    2012-05-30  851  	struct iommu_group	*iommu_group;
031a8908 Dan Williams       2016-06-04  852  	atomic_t		suppress_unbind_attr; /* disable manual unbind */
4f3549d7 Rafael J. Wysocki  2013-05-02  853  
4f3549d7 Rafael J. Wysocki  2013-05-02  854  	bool			offline_disabled:1;
4f3549d7 Rafael J. Wysocki  2013-05-02  855  	bool			offline:1;
^1da177e Linus Torvalds     2005-04-16 @856  };
^1da177e Linus Torvalds     2005-04-16  857  
a4232963 Lars-Peter Clausen 2012-07-03  858  static inline struct device *kobj_to_dev(struct kobject *kobj)
a4232963 Lars-Peter Clausen 2012-07-03  859  {
a4232963 Lars-Peter Clausen 2012-07-03  860  	return container_of(kobj, struct device, kobj);
a4232963 Lars-Peter Clausen 2012-07-03  861  }
a4232963 Lars-Peter Clausen 2012-07-03  862  
9a3df1f7 Alan Stern         2008-03-19  863  /* Get the wakeup routines, which depend on struct device */
9a3df1f7 Alan Stern         2008-03-19  864  #include <linux/pm_wakeup.h>

:::::: The code at line 856 was first introduced by commit
:::::: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:::::: TO: Linus Torvalds <torvalds@ppc970.osdl.org>
:::::: CC: Linus Torvalds <torvalds@ppc970.osdl.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Linda Knippers June 6, 2016, 7:25 p.m. UTC | #6
On 6/4/2016 4:52 PM, Dan Williams wrote:
> There are scenarios where we need a middle ground between disabling all
> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
> allowing unbind at any userspace-determined time.  Pinning modules takes
> away one vector for unwanted out-of-sequence device_release_driver()
> invocations, this new mechanism (via device->suppress_unbind_attr) takes
> away another.
> 
> The first user of this mechanism is the libnvdimm sub-system where
> manual dimm disabling should be prevented while the dimm is active in
> any region.  Note that there is a 1:N dimm-to-region relationship which
> is why this is implemented as a disable count rather than a flag.  This
> forces userspace to disable regions before dimms when manually shutting
> down a bus topology.

How is this related to deprecating pcommit?

-- ljk
> 
> This does not affect any of the kernel-internal initiated invocations of
> device_release_driver().
> 
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/base/base.h             |    1 +
>  drivers/base/bus.c              |   12 ++++++++++--
>  drivers/base/core.c             |    1 +
>  drivers/base/dd.c               |    2 +-
>  drivers/nvdimm/namespace_devs.c |    1 +
>  drivers/nvdimm/region_devs.c    |    4 +++-
>  include/linux/device.h          |   20 ++++++++++++++++++++
>  7 files changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/base/base.h b/drivers/base/base.h
> index e05db388bd1c..129814b17ca6 100644
> --- a/drivers/base/base.h
> +++ b/drivers/base/base.h
> @@ -109,6 +109,7 @@ extern int bus_add_driver(struct device_driver *drv);
>  extern void bus_remove_driver(struct device_driver *drv);
>  
>  extern void driver_detach(struct device_driver *drv);
> +extern void __device_release_driver(struct device *dev);
>  extern int driver_probe_device(struct device_driver *drv, struct device *dev);
>  extern void driver_deferred_probe_del(struct device *dev);
>  static inline int driver_match_device(struct device_driver *drv,
> diff --git a/drivers/base/bus.c b/drivers/base/bus.c
> index 6470eb8088f4..b48a903f2d28 100644
> --- a/drivers/base/bus.c
> +++ b/drivers/base/bus.c
> @@ -188,10 +188,18 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
>  	if (dev && dev->driver == drv) {
>  		if (dev->parent)	/* Needed for USB */
>  			device_lock(dev->parent);
> -		device_release_driver(dev);
> +
> +		device_lock(dev);
> +		if (atomic_read(&dev->suppress_unbind_attr) > 0)
> +			err = -EBUSY;
> +		else {
> +			__device_release_driver(dev);
> +			err = count;
> +		}
> +		device_unlock(dev);
> +
>  		if (dev->parent)
>  			device_unlock(dev->parent);
> -		err = count;
>  	}
>  	put_device(dev);
>  	bus_put(bus);
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 0a8bdade53f2..0ea0e8560e1d 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -708,6 +708,7 @@ void device_initialize(struct device *dev)
>  	INIT_LIST_HEAD(&dev->devres_head);
>  	device_pm_init(dev);
>  	set_dev_node(dev, -1);
> +	atomic_set(&dev->suppress_unbind_attr, 0);
>  #ifdef CONFIG_GENERIC_MSI_IRQ
>  	INIT_LIST_HEAD(&dev->msi_list);
>  #endif
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 16688f50729c..9e21817ca2d6 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(driver_attach);
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
>   */
> -static void __device_release_driver(struct device *dev)
> +void __device_release_driver(struct device *dev)
>  {
>  	struct device_driver *drv;
>  
> diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
> index c5e3196c45b0..e65572b6092c 100644
> --- a/drivers/nvdimm/namespace_devs.c
> +++ b/drivers/nvdimm/namespace_devs.c
> @@ -1950,6 +1950,7 @@ static int init_active_labels(struct nd_region *nd_region)
>  		}
>  		nd_mapping->ndd = ndd;
>  		atomic_inc(&nvdimm->busy);
> +		device_disable_unbind_attr(&nvdimm->dev);
>  		get_ndd(ndd);
>  
>  		count = nd_label_active_count(ndd);
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 40fcfea26fbb..320f0f3ea367 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -427,8 +427,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
>  			nd_mapping->labels = NULL;
>  			put_ndd(ndd);
>  			nd_mapping->ndd = NULL;
> -			if (ndd)
> +			if (ndd) {
>  				atomic_dec(&nvdimm->busy);
> +				device_enable_unbind_attr(&nvdimm->dev);
> +			}
>  		}
>  
>  		if (is_nd_pmem(dev))
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 38f02814d53a..d9eaa85bb9cf 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -849,6 +849,7 @@ struct device {
>  
>  	void	(*release)(struct device *dev);
>  	struct iommu_group	*iommu_group;
> +	atomic_t		suppress_unbind_attr; /* disable manual unbind */
>  
>  	bool			offline_disabled:1;
>  	bool			offline:1;
> @@ -988,6 +989,25 @@ static inline void device_lock_assert(struct device *dev)
>  	lockdep_assert_held(&dev->mutex);
>  }
>  
> +static inline bool device_disable_unbind_attr(struct device *dev)
> +{
> +	bool suppressed = false;
> +
> +	device_lock(dev);
> +	if (dev->driver) {
> +		atomic_inc(&dev->suppress_unbind_attr);
> +		suppressed = true;
> +	}
> +	device_unlock(dev);
> +
> +	return suppressed;
> +}
> +
> +static inline bool device_enable_unbind_attr(struct device *dev)
> +{
> +	return atomic_dec_and_test(&dev->suppress_unbind_attr);
> +}
> +
>  static inline struct device_node *dev_of_node(struct device *dev)
>  {
>  	if (!IS_ENABLED(CONFIG_OF))
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
>
Dan Williams June 6, 2016, 7:31 p.m. UTC | #7
On Mon, Jun 6, 2016 at 12:25 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
> On 6/4/2016 4:52 PM, Dan Williams wrote:
>> There are scenarios where we need a middle ground between disabling all
>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>> allowing unbind at any userspace-determined time.  Pinning modules takes
>> away one vector for unwanted out-of-sequence device_release_driver()
>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>> away another.
>>
>> The first user of this mechanism is the libnvdimm sub-system where
>> manual dimm disabling should be prevented while the dimm is active in
>> any region.  Note that there is a 1:N dimm-to-region relationship which
>> is why this is implemented as a disable count rather than a flag.  This
>> forces userspace to disable regions before dimms when manually shutting
>> down a bus topology.
>
> How is this related to deprecating pcommit?

We need guarantees that the flush hint mappings are valid for the
duration of a pmem namespace being enabled.  I am going to move the
mapping of the flush hint region from per-dimm to per-region.  However
since multiple regions may reference the same dimm the mapping needs
to be reference counted and shared across regions.  This will be
similar to the arrangement we have for BLK-regions that share a
control region mapping.
Dan Williams June 6, 2016, 7:36 p.m. UTC | #8
On Mon, Jun 6, 2016 at 12:31 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Mon, Jun 6, 2016 at 12:25 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>> On 6/4/2016 4:52 PM, Dan Williams wrote:
>>> There are scenarios where we need a middle ground between disabling all
>>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>>> allowing unbind at any userspace-determined time.  Pinning modules takes
>>> away one vector for unwanted out-of-sequence device_release_driver()
>>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>>> away another.
>>>
>>> The first user of this mechanism is the libnvdimm sub-system where
>>> manual dimm disabling should be prevented while the dimm is active in
>>> any region.  Note that there is a 1:N dimm-to-region relationship which
>>> is why this is implemented as a disable count rather than a flag.  This
>>> forces userspace to disable regions before dimms when manually shutting
>>> down a bus topology.
>>
>> How is this related to deprecating pcommit?
>
> We need guarantees that the flush hint mappings are valid for the
> duration of a pmem namespace being enabled.  I am going to move the
> mapping of the flush hint region from per-dimm to per-region.  However
> since multiple regions may reference the same dimm the mapping needs
> to be reference counted and shared across regions.  This will be
> similar to the arrangement we have for BLK-regions that share a
> control region mapping.

The usage of the word "region" is multiplexed too many times above.

per-region => per PMEM-region-device / BLK-region-device
flush hint region => per-dimm flush hint address range
control region => address range where block window mmio control registers reside
Linda Knippers June 6, 2016, 7:36 p.m. UTC | #9
On 6/6/2016 3:31 PM, Dan Williams wrote:
> On Mon, Jun 6, 2016 at 12:25 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>> On 6/4/2016 4:52 PM, Dan Williams wrote:
>>> There are scenarios where we need a middle ground between disabling all
>>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>>> allowing unbind at any userspace-determined time.  Pinning modules takes
>>> away one vector for unwanted out-of-sequence device_release_driver()
>>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>>> away another.
>>>
>>> The first user of this mechanism is the libnvdimm sub-system where
>>> manual dimm disabling should be prevented while the dimm is active in
>>> any region.  Note that there is a 1:N dimm-to-region relationship which
>>> is why this is implemented as a disable count rather than a flag.  This
>>> forces userspace to disable regions before dimms when manually shutting
>>> down a bus topology.
>>
>> How is this related to deprecating pcommit?
> 
> We need guarantees that the flush hint mappings are valid for the
> duration of a pmem namespace being enabled.  I am going to move the
> mapping of the flush hint region from per-dimm to per-region.  However
> since multiple regions may reference the same dimm the mapping needs
> to be reference counted and shared across regions.  This will be
> similar to the arrangement we have for BLK-regions that share a
> control region mapping.

Why are things moving around?  Aren't flush hints defined per NFIT device
handle, making them an optional per-dimm thing?

I don't understand a lot of this patch series and had the same questions
as Jeff.  How does deprecating pcommit, because it's not necessary with ADR,
change so much?

-- ljk
>
Dan Williams June 6, 2016, 7:46 p.m. UTC | #10
On Mon, Jun 6, 2016 at 12:36 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>
>
> On 6/6/2016 3:31 PM, Dan Williams wrote:
>> On Mon, Jun 6, 2016 at 12:25 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>>> On 6/4/2016 4:52 PM, Dan Williams wrote:
>>>> There are scenarios where we need a middle ground between disabling all
>>>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>>>> allowing unbind at any userspace-determined time.  Pinning modules takes
>>>> away one vector for unwanted out-of-sequence device_release_driver()
>>>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>>>> away another.
>>>>
>>>> The first user of this mechanism is the libnvdimm sub-system where
>>>> manual dimm disabling should be prevented while the dimm is active in
>>>> any region.  Note that there is a 1:N dimm-to-region relationship which
>>>> is why this is implemented as a disable count rather than a flag.  This
>>>> forces userspace to disable regions before dimms when manually shutting
>>>> down a bus topology.
>>>
>>> How is this related to deprecating pcommit?
>>
>> We need guarantees that the flush hint mappings are valid for the
>> duration of a pmem namespace being enabled.  I am going to move the
>> mapping of the flush hint region from per-dimm to per-region.  However
>> since multiple regions may reference the same dimm the mapping needs
>> to be reference counted and shared across regions.  This will be
>> similar to the arrangement we have for BLK-regions that share a
>> control region mapping.
>
> Why are things moving around?  Aren't flush hints defined per NFIT device
> handle, making them an optional per-dimm thing?
>
> I don't understand a lot of this patch series and had the same questions
> as Jeff.  How does deprecating pcommit, because it's not necessary with ADR,
> change so much?

This patch set deprecates pcommit, and introduces the usage of flush
hints into the pmem path.  The introduction patch used the word
"usually", I should have fleshed that out to say "usually ADR, or
explicit use of flush hints".

A solution to the posted-write-queue flushing needs to be available
and a platform can choose to use flush hints or ADR.  If the NFIT
defines an NVDIMM device without hints we assume the platform must
have ADR.  If the platform NFIT neglects to define an NVDIMM to
physical address range mapping, we warn about a potentially broken
BIOS.  Hopefully we can make this clearer in future versions of the
spec.
Linda Knippers June 6, 2016, 8:20 p.m. UTC | #11
On 6/6/2016 3:46 PM, Dan Williams wrote:
> On Mon, Jun 6, 2016 at 12:36 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>>
>>
>> On 6/6/2016 3:31 PM, Dan Williams wrote:
>>> On Mon, Jun 6, 2016 at 12:25 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
>>>> On 6/4/2016 4:52 PM, Dan Williams wrote:
>>>>> There are scenarios where we need a middle ground between disabling all
>>>>> manual bind/unbind attempts (via driver->suppress_bind_attrs) and
>>>>> allowing unbind at any userspace-determined time.  Pinning modules takes
>>>>> away one vector for unwanted out-of-sequence device_release_driver()
>>>>> invocations, this new mechanism (via device->suppress_unbind_attr) takes
>>>>> away another.
>>>>>
>>>>> The first user of this mechanism is the libnvdimm sub-system where
>>>>> manual dimm disabling should be prevented while the dimm is active in
>>>>> any region.  Note that there is a 1:N dimm-to-region relationship which
>>>>> is why this is implemented as a disable count rather than a flag.  This
>>>>> forces userspace to disable regions before dimms when manually shutting
>>>>> down a bus topology.
>>>>
>>>> How is this related to deprecating pcommit?
>>>
>>> We need guarantees that the flush hint mappings are valid for the
>>> duration of a pmem namespace being enabled.  I am going to move the
>>> mapping of the flush hint region from per-dimm to per-region.  However
>>> since multiple regions may reference the same dimm the mapping needs
>>> to be reference counted and shared across regions.  This will be
>>> similar to the arrangement we have for BLK-regions that share a
>>> control region mapping.
>>
>> Why are things moving around?  Aren't flush hints defined per NFIT device
>> handle, making them an optional per-dimm thing?
>>
>> I don't understand a lot of this patch series and had the same questions
>> as Jeff.  How does deprecating pcommit, because it's not necessary with ADR,
>> change so much?
> 
> This patch set deprecates pcommit, and introduces the usage of flush
> hints into the pmem path.  The introduction patch used the word
> "usually", I should have fleshed that out to say "usually ADR, or
> explicit use of flush hints".
> 
> A solution to the posted-write-queue flushing needs to be available
> and a platform can choose to use flush hints or ADR.  If the NFIT
> defines an NVDIMM device without hints we assume the platform must
> have ADR.  If the platform NFIT neglects to define an NVDIMM to
> physical address range mapping, we warn about a potentially broken
> BIOS.  Hopefully we can make this clearer in future versions of the
> spec.

You lost me on those last 2 sentences.  An NVDIMM doesn't have to have
an SPA range, but that seems to be unrelated to pcommit or flushes.

-- ljk
Dan Williams June 6, 2016, 8:36 p.m. UTC | #12
On Mon, Jun 6, 2016 at 1:20 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
[..]
>> A solution to the posted-write-queue flushing needs to be available
>> and a platform can choose to use flush hints or ADR.  If the NFIT
>> defines an NVDIMM device without hints we assume the platform must
>> have ADR.  If the platform NFIT neglects to define an NVDIMM to
>> physical address range mapping, we warn about a potentially broken
>> BIOS.  Hopefully we can make this clearer in future versions of the
>> spec.
>
> You lost me on those last 2 sentences.  An NVDIMM doesn't have to have
> an SPA range, but that seems to be unrelated to pcommit or flushes.

Ok, now you've lost me...

We need a SPA range to be able to do I/O whether that SPA range is
direct access to media or a block-window aperture.  If an NFIT
inculdes a "System Physical Address (SPA) Range Structure", but
neglects to include a corresponding "NVDIMM Region Mapping Structure"
then the kernel has no idea what actual dimm device(s) back that
memory region.  Without a memory device mapping it is undefined
whether I/O to a SPA range requires flushing or not.

This patch set silences the warning about "not being able to guarantee
persistence" when the BIOS provides a "NVDIMM Region Mapping
Structure".  When that structure is present the kernel uses flush
hints when provided, but ADR otherwise.  See the implementation of
nvdimm_flush() in patch 4.
Linda Knippers June 6, 2016, 9:15 p.m. UTC | #13
On 6/6/2016 4:36 PM, Dan Williams wrote:
> On Mon, Jun 6, 2016 at 1:20 PM, Linda Knippers <linda.knippers@hpe.com> wrote:
> [..]
>>> A solution to the posted-write-queue flushing needs to be available
>>> and a platform can choose to use flush hints or ADR.  If the NFIT
>>> defines an NVDIMM device without hints we assume the platform must
>>> have ADR.  If the platform NFIT neglects to define an NVDIMM to
>>> physical address range mapping, we warn about a potentially broken
>>> BIOS.  Hopefully we can make this clearer in future versions of the
>>> spec.
>>
>> You lost me on those last 2 sentences.  An NVDIMM doesn't have to have
>> an SPA range, but that seems to be unrelated to pcommit or flushes.
> 
> Ok, now you've lost me...
> 
> We need a SPA range to be able to do I/O whether that SPA range is
> direct access to media or a block-window aperture.  If an NFIT
> inculdes a "System Physical Address (SPA) Range Structure", but
> neglects to include a corresponding "NVDIMM Region Mapping Structure"
> then the kernel has no idea what actual dimm device(s) back that
> memory region.  Without a memory device mapping it is undefined
> whether I/O to a SPA range requires flushing or not.

Right, if you have an SPA Range Structure, you need an NVDIMM Region
Mapping Structure referencing it (although now I'm wondering if that's
true if it's one of the other GUID types, like a virtual CD...).  But the
reverse isn't true.  You can have a Region mapping Structure without an
SPA range, and that's what I thought you were flagging as an error in
your previous sentence.

> This patch set silences the warning about "not being able to guarantee
> persistence" when the BIOS provides a "NVDIMM Region Mapping
> Structure".  When that structure is present the kernel uses flush
> hints when provided, but ADR otherwise.  See the implementation of
> nvdimm_flush() in patch 4.

Ok, I see that now.

Patch 0 and the commit messages could have used a bit more info.
The big picture was a bit difficult to see and patch 4 doesn't say
anything about finishing in the TODOs from the previous patch.

Thanks,

-- ljk

Patch
diff mbox

diff --git a/drivers/base/base.h b/drivers/base/base.h
index e05db388bd1c..129814b17ca6 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -109,6 +109,7 @@  extern int bus_add_driver(struct device_driver *drv);
 extern void bus_remove_driver(struct device_driver *drv);
 
 extern void driver_detach(struct device_driver *drv);
+extern void __device_release_driver(struct device *dev);
 extern int driver_probe_device(struct device_driver *drv, struct device *dev);
 extern void driver_deferred_probe_del(struct device *dev);
 static inline int driver_match_device(struct device_driver *drv,
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 6470eb8088f4..b48a903f2d28 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -188,10 +188,18 @@  static ssize_t unbind_store(struct device_driver *drv, const char *buf,
 	if (dev && dev->driver == drv) {
 		if (dev->parent)	/* Needed for USB */
 			device_lock(dev->parent);
-		device_release_driver(dev);
+
+		device_lock(dev);
+		if (atomic_read(&dev->suppress_unbind_attr) > 0)
+			err = -EBUSY;
+		else {
+			__device_release_driver(dev);
+			err = count;
+		}
+		device_unlock(dev);
+
 		if (dev->parent)
 			device_unlock(dev->parent);
-		err = count;
 	}
 	put_device(dev);
 	bus_put(bus);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0a8bdade53f2..0ea0e8560e1d 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -708,6 +708,7 @@  void device_initialize(struct device *dev)
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
 	set_dev_node(dev, -1);
+	atomic_set(&dev->suppress_unbind_attr, 0);
 #ifdef CONFIG_GENERIC_MSI_IRQ
 	INIT_LIST_HEAD(&dev->msi_list);
 #endif
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 16688f50729c..9e21817ca2d6 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -756,7 +756,7 @@  EXPORT_SYMBOL_GPL(driver_attach);
  * __device_release_driver() must be called with @dev lock held.
  * When called for a USB interface, @dev->parent lock must be held as well.
  */
-static void __device_release_driver(struct device *dev)
+void __device_release_driver(struct device *dev)
 {
 	struct device_driver *drv;
 
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index c5e3196c45b0..e65572b6092c 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1950,6 +1950,7 @@  static int init_active_labels(struct nd_region *nd_region)
 		}
 		nd_mapping->ndd = ndd;
 		atomic_inc(&nvdimm->busy);
+		device_disable_unbind_attr(&nvdimm->dev);
 		get_ndd(ndd);
 
 		count = nd_label_active_count(ndd);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 40fcfea26fbb..320f0f3ea367 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -427,8 +427,10 @@  static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
 			nd_mapping->labels = NULL;
 			put_ndd(ndd);
 			nd_mapping->ndd = NULL;
-			if (ndd)
+			if (ndd) {
 				atomic_dec(&nvdimm->busy);
+				device_enable_unbind_attr(&nvdimm->dev);
+			}
 		}
 
 		if (is_nd_pmem(dev))
diff --git a/include/linux/device.h b/include/linux/device.h
index 38f02814d53a..d9eaa85bb9cf 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -849,6 +849,7 @@  struct device {
 
 	void	(*release)(struct device *dev);
 	struct iommu_group	*iommu_group;
+	atomic_t		suppress_unbind_attr; /* disable manual unbind */
 
 	bool			offline_disabled:1;
 	bool			offline:1;
@@ -988,6 +989,25 @@  static inline void device_lock_assert(struct device *dev)
 	lockdep_assert_held(&dev->mutex);
 }
 
+static inline bool device_disable_unbind_attr(struct device *dev)
+{
+	bool suppressed = false;
+
+	device_lock(dev);
+	if (dev->driver) {
+		atomic_inc(&dev->suppress_unbind_attr);
+		suppressed = true;
+	}
+	device_unlock(dev);
+
+	return suppressed;
+}
+
+static inline bool device_enable_unbind_attr(struct device *dev)
+{
+	return atomic_dec_and_test(&dev->suppress_unbind_attr);
+}
+
 static inline struct device_node *dev_of_node(struct device *dev)
 {
 	if (!IS_ENABLED(CONFIG_OF))