diff mbox series

cxl/core/port: Fix unregister_port() lock assertion

Message ID 164439224893.2941117.18331456248117887720.stgit@dwillia2-desk3.amr.corp.intel.com
State Accepted
Commit 5c3c067b601bcbcd381214e3a40e666fda5f3d6f
Headers show
Series cxl/core/port: Fix unregister_port() lock assertion | expand

Commit Message

Dan Williams Feb. 9, 2022, 7:37 a.m. UTC
The device_lock_assert() in unregister_port() fails to pick the right
device leading to splats like the following from:

echo "ACPI0017:00" > /sys/bus/platform/drivers/cxl_acpi/unbind

 WARNING: CPU: 32 PID: 1147 at include/linux/device.h:787 unregister_port+0x49/0x50 [cxl_c
 [..]
 RIP: 0010:unregister_port+0x49/0x50 [cxl_core]
 [..]
 Call Trace:
  <TASK>
  release_nodes+0x63/0x80
  devres_release_all+0x8b/0xc0
  __device_release_driver+0x190/0x240
  device_driver_detach+0x3e/0xa0
  unbind_store+0x113/0x130

Fix it up to assert on the device_lock() for ACPI0017 for root and 1st
level ports, and parent ports for all the rest.

Fixes: 54cdbf845cf7 ("cxl/port: Add a driver for 'struct cxl_port' objects")
Reported-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/port.c |   24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Ben Widawsky Feb. 9, 2022, 5:49 p.m. UTC | #1
On 22-02-08 23:37:28, Dan Williams wrote:
> The device_lock_assert() in unregister_port() fails to pick the right
> device leading to splats like the following from:
> 
> echo "ACPI0017:00" > /sys/bus/platform/drivers/cxl_acpi/unbind
> 
>  WARNING: CPU: 32 PID: 1147 at include/linux/device.h:787 unregister_port+0x49/0x50 [cxl_c
>  [..]
>  RIP: 0010:unregister_port+0x49/0x50 [cxl_core]
>  [..]
>  Call Trace:
>   <TASK>
>   release_nodes+0x63/0x80
>   devres_release_all+0x8b/0xc0
>   __device_release_driver+0x190/0x240
>   device_driver_detach+0x3e/0xa0
>   unbind_store+0x113/0x130
> 
> Fix it up to assert on the device_lock() for ACPI0017 for root and 1st
> level ports, and parent ports for all the rest.
> 
> Fixes: 54cdbf845cf7 ("cxl/port: Add a driver for 'struct cxl_port' objects")
> Reported-by: Ben Widawsky <ben.widawsky@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Comment/question below...
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>

> ---
>  drivers/cxl/core/port.c |   24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index c3e83c624700..9b4bbd51fbaa 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -348,12 +348,28 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
>  static void unregister_port(void *_port)
>  {
>  	struct cxl_port *port = _port;
> +	struct cxl_port *parent;
> +	struct device *lock_dev;
>  
> -	if (!is_cxl_root(port)) {
> -		device_lock_assert(port->dev.parent);
> -		port->uport = NULL;
> -	}
> +	if (is_cxl_root(port))
> +		parent = NULL;
> +	else
> +		parent = to_cxl_port(port->dev.parent);
> +
> +	/*
> +	 * CXL root port's and the first level of ports are unregistered
> +	 * under the platform firmware device lock, all other ports are
> +	 * unregistered while holding their parent port lock.
> +	 */
> +	if (!parent)
> +		lock_dev = port->uport;
> +	else if (is_cxl_root(parent))
> +		lock_dev = parent->uport;
> +	else
> +		lock_dev = &parent->dev;

I wonder if it would be a little cleaner if the root port and host bridge ports
had a separate unregistration callback from switches and endpoints.

>  
> +	device_lock_assert(lock_dev);
> +	port->uport = NULL;
>  	device_unregister(&port->dev);
>  }
>  
>
Dan Williams Feb. 9, 2022, 6:33 p.m. UTC | #2
On Wed, Feb 9, 2022 at 9:49 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 22-02-08 23:37:28, Dan Williams wrote:
> > The device_lock_assert() in unregister_port() fails to pick the right
> > device leading to splats like the following from:
> >
> > echo "ACPI0017:00" > /sys/bus/platform/drivers/cxl_acpi/unbind
> >
> >  WARNING: CPU: 32 PID: 1147 at include/linux/device.h:787 unregister_port+0x49/0x50 [cxl_c
> >  [..]
> >  RIP: 0010:unregister_port+0x49/0x50 [cxl_core]
> >  [..]
> >  Call Trace:
> >   <TASK>
> >   release_nodes+0x63/0x80
> >   devres_release_all+0x8b/0xc0
> >   __device_release_driver+0x190/0x240
> >   device_driver_detach+0x3e/0xa0
> >   unbind_store+0x113/0x130
> >
> > Fix it up to assert on the device_lock() for ACPI0017 for root and 1st
> > level ports, and parent ports for all the rest.
> >
> > Fixes: 54cdbf845cf7 ("cxl/port: Add a driver for 'struct cxl_port' objects")
> > Reported-by: Ben Widawsky <ben.widawsky@intel.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>
> Comment/question below...
> Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
>
> > ---
> >  drivers/cxl/core/port.c |   24 ++++++++++++++++++++----
> >  1 file changed, 20 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> > index c3e83c624700..9b4bbd51fbaa 100644
> > --- a/drivers/cxl/core/port.c
> > +++ b/drivers/cxl/core/port.c
> > @@ -348,12 +348,28 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
> >  static void unregister_port(void *_port)
> >  {
> >       struct cxl_port *port = _port;
> > +     struct cxl_port *parent;
> > +     struct device *lock_dev;
> >
> > -     if (!is_cxl_root(port)) {
> > -             device_lock_assert(port->dev.parent);
> > -             port->uport = NULL;
> > -     }
> > +     if (is_cxl_root(port))
> > +             parent = NULL;
> > +     else
> > +             parent = to_cxl_port(port->dev.parent);
> > +
> > +     /*
> > +      * CXL root port's and the first level of ports are unregistered
> > +      * under the platform firmware device lock, all other ports are
> > +      * unregistered while holding their parent port lock.
> > +      */
> > +     if (!parent)
> > +             lock_dev = port->uport;
> > +     else if (is_cxl_root(parent))
> > +             lock_dev = parent->uport;
> > +     else
> > +             lock_dev = &parent->dev;
>
> I wonder if it would be a little cleaner if the root port and host bridge ports
> had a separate unregistration callback from switches and endpoints.

I think it's "a pay me now or pay me later" situation to just move
this same complexity to the registration side, not sure it would be
net cleaner.
diff mbox series

Patch

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index c3e83c624700..9b4bbd51fbaa 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -348,12 +348,28 @@  EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
 static void unregister_port(void *_port)
 {
 	struct cxl_port *port = _port;
+	struct cxl_port *parent;
+	struct device *lock_dev;
 
-	if (!is_cxl_root(port)) {
-		device_lock_assert(port->dev.parent);
-		port->uport = NULL;
-	}
+	if (is_cxl_root(port))
+		parent = NULL;
+	else
+		parent = to_cxl_port(port->dev.parent);
+
+	/*
+	 * CXL root port's and the first level of ports are unregistered
+	 * under the platform firmware device lock, all other ports are
+	 * unregistered while holding their parent port lock.
+	 */
+	if (!parent)
+		lock_dev = port->uport;
+	else if (is_cxl_root(parent))
+		lock_dev = parent->uport;
+	else
+		lock_dev = &parent->dev;
 
+	device_lock_assert(lock_dev);
+	port->uport = NULL;
 	device_unregister(&port->dev);
 }