diff mbox series

[v2,1/3] PCI: Revert the cfg_access_lock lockdep mechanism

Message ID 171711746402.1628941.14575335981264103013.stgit@dwillia2-xfh.jf.intel.com (mailing list archive)
State Accepted
Commit e6d1cd96b33c40a10cec434193897a2cbc8ac09c
Delegated to: Bjorn Helgaas
Headers show
Series PCI: Revert / replace the cfg_access_lock lockdep mechanism | expand

Commit Message

Dan Williams May 31, 2024, 1:04 a.m. UTC
While the experiment did reveal that there are additional places that
are missing the lock during secondary bus reset, one of the places that
needs to take cfg_access_lock (pci_bus_lock()) is not prepared for
lockdep annotation.

Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
currently dependent on the fact that the device_lock() is marked
lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
annotation, pci_bus_lock() would need to use something like a new
pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in
the topology, and a hope that the depth of a PCI tree never exceeds the
max value for a lockdep subclass.

The alternative to ripping out the lockdep coverage would be to deploy a
dynamic lock key for every PCI device. Unfortunately, there is evidence
that increasing the number of keys that lockdep needs to track to be
per-PCI-device is prohibitively expensive for something like the
cfg_access_lock.

The main motivation for adding the annotation in the first place was to
catch unlocked secondary bus resets, not necessarily catch lock ordering
problems between cfg_access_lock and other locks. Solve that narrower
problem with follow-on patches, and just due to targeted revert for now.

Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
Reported-by: Imre Deak <imre.deak@intel.com>
Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
Cc: Jani Saarinen <jani.saarinen@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/pci/access.c    |    4 ----
 drivers/pci/pci.c       |    1 -
 drivers/pci/probe.c     |    3 ---
 include/linux/lockdep.h |    5 -----
 include/linux/pci.h     |    2 --
 5 files changed, 15 deletions(-)

Comments

Dave Jiang May 31, 2024, 6:05 p.m. UTC | #1
On 5/30/24 6:04 PM, Dan Williams wrote:
> While the experiment did reveal that there are additional places that
> are missing the lock during secondary bus reset, one of the places that
> needs to take cfg_access_lock (pci_bus_lock()) is not prepared for
> lockdep annotation.
> 
> Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
> currently dependent on the fact that the device_lock() is marked
> lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
> annotation, pci_bus_lock() would need to use something like a new
> pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in
> the topology, and a hope that the depth of a PCI tree never exceeds the
> max value for a lockdep subclass.
> 
> The alternative to ripping out the lockdep coverage would be to deploy a
> dynamic lock key for every PCI device. Unfortunately, there is evidence
> that increasing the number of keys that lockdep needs to track to be
> per-PCI-device is prohibitively expensive for something like the
> cfg_access_lock.
> 
> The main motivation for adding the annotation in the first place was to
> catch unlocked secondary bus resets, not necessarily catch lock ordering
> problems between cfg_access_lock and other locks. Solve that narrower
> problem with follow-on patches, and just due to targeted revert for now.
> 
> Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
> Reported-by: Imre Deak <imre.deak@intel.com>
> Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
> Cc: Jani Saarinen <jani.saarinen@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>  drivers/pci/access.c    |    4 ----
>  drivers/pci/pci.c       |    1 -
>  drivers/pci/probe.c     |    3 ---
>  include/linux/lockdep.h |    5 -----
>  include/linux/pci.h     |    2 --
>  5 files changed, 15 deletions(-)
> 
> diff --git a/drivers/pci/access.c b/drivers/pci/access.c
> index 30f031de9cfe..b123da16b63b 100644
> --- a/drivers/pci/access.c
> +++ b/drivers/pci/access.c
> @@ -289,8 +289,6 @@ void pci_cfg_access_lock(struct pci_dev *dev)
>  {
>  	might_sleep();
>  
> -	lock_map_acquire(&dev->cfg_access_lock);
> -
>  	raw_spin_lock_irq(&pci_lock);
>  	if (dev->block_cfg_access)
>  		pci_wait_cfg(dev);
> @@ -345,8 +343,6 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
>  	raw_spin_unlock_irqrestore(&pci_lock, flags);
>  
>  	wake_up_all(&pci_cfg_wait);
> -
> -	lock_map_release(&dev->cfg_access_lock);
>  }
>  EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
>  
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 59e0949fb079..35fb1f17a589 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4883,7 +4883,6 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
>   */
>  int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
>  {
> -	lock_map_assert_held(&dev->cfg_access_lock);
>  	pcibios_reset_secondary_bus(dev);
>  
>  	return pci_bridge_wait_for_secondary_bus(dev, "bus reset");
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 8e696e547565..5fbabb4e3425 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2546,9 +2546,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
>  	dev->dev.dma_mask = &dev->dma_mask;
>  	dev->dev.dma_parms = &dev->dma_parms;
>  	dev->dev.coherent_dma_mask = 0xffffffffull;
> -	lockdep_register_key(&dev->cfg_access_key);
> -	lockdep_init_map(&dev->cfg_access_lock, dev_name(&dev->dev),
> -			 &dev->cfg_access_key, 0);
>  
>  	dma_set_max_seg_size(&dev->dev, 65536);
>  	dma_set_seg_boundary(&dev->dev, 0xffffffff);
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 5e51b0de4c4b..08b0d1d9d78b 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -297,9 +297,6 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
>  		.wait_type_inner = _wait_type,		\
>  		.lock_type = LD_LOCK_WAIT_OVERRIDE, }
>  
> -#define lock_map_assert_held(l)		\
> -	lockdep_assert(lock_is_held(l) != LOCK_STATE_NOT_HELD)
> -
>  #else /* !CONFIG_LOCKDEP */
>  
>  static inline void lockdep_init_task(struct task_struct *task)
> @@ -391,8 +388,6 @@ extern int lockdep_is_held(const void *);
>  #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)	\
>  	struct lockdep_map __maybe_unused _name = {}
>  
> -#define lock_map_assert_held(l)			do { (void)(l); } while (0)
> -
>  #endif /* !LOCKDEP */
>  
>  #ifdef CONFIG_PROVE_LOCKING
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index fb004fd4e889..cafc5ab1cbcb 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -413,8 +413,6 @@ struct pci_dev {
>  	struct resource driver_exclusive_resource;	 /* driver exclusive resource ranges */
>  
>  	bool		match_driver;		/* Skip attaching driver */
> -	struct lock_class_key cfg_access_key;
> -	struct lockdep_map cfg_access_lock;
>  
>  	unsigned int	transparent:1;		/* Subtractive decode bridge */
>  	unsigned int	io_window:1;		/* Bridge has I/O window */
>
Kalle Valo June 4, 2024, 8:03 a.m. UTC | #2
Dan Williams <dan.j.williams@intel.com> writes:

> While the experiment did reveal that there are additional places that
> are missing the lock during secondary bus reset, one of the places that
> needs to take cfg_access_lock (pci_bus_lock()) is not prepared for
> lockdep annotation.
>
> Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
> currently dependent on the fact that the device_lock() is marked
> lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
> annotation, pci_bus_lock() would need to use something like a new
> pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in
> the topology, and a hope that the depth of a PCI tree never exceeds the
> max value for a lockdep subclass.
>
> The alternative to ripping out the lockdep coverage would be to deploy a
> dynamic lock key for every PCI device. Unfortunately, there is evidence
> that increasing the number of keys that lockdep needs to track to be
> per-PCI-device is prohibitively expensive for something like the
> cfg_access_lock.
>
> The main motivation for adding the annotation in the first place was to
> catch unlocked secondary bus resets, not necessarily catch lock ordering
> problems between cfg_access_lock and other locks. Solve that narrower
> problem with follow-on patches, and just due to targeted revert for now.
>
> Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
> Reported-by: Imre Deak <imre.deak@intel.com>
> Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
> Cc: Jani Saarinen <jani.saarinen@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

In our ath11k test box commit 7e89efc6e9e4 was causing random kernel
crashes. I tested patches 1-3 and did not see anymore crashes so:

Tested-by: Kalle Valo <kvalo@kernel.org>

Unfortunately I didn't realise to test patch 1 alone but I would assume
that's enough to fix the crashes. Please prioritise this patch so that
the regression in Linus' tree is fixed.
Bjorn Helgaas June 4, 2024, 5:11 p.m. UTC | #3
On Tue, Jun 04, 2024 at 11:03:54AM +0300, Kalle Valo wrote:
> Dan Williams <dan.j.williams@intel.com> writes:
> 
> > While the experiment did reveal that there are additional places that
> > are missing the lock during secondary bus reset, one of the places that
> > needs to take cfg_access_lock (pci_bus_lock()) is not prepared for
> > lockdep annotation.
> >
> > Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
> > currently dependent on the fact that the device_lock() is marked
> > lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
> > annotation, pci_bus_lock() would need to use something like a new
> > pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in
> > the topology, and a hope that the depth of a PCI tree never exceeds the
> > max value for a lockdep subclass.
> >
> > The alternative to ripping out the lockdep coverage would be to deploy a
> > dynamic lock key for every PCI device. Unfortunately, there is evidence
> > that increasing the number of keys that lockdep needs to track to be
> > per-PCI-device is prohibitively expensive for something like the
> > cfg_access_lock.
> >
> > The main motivation for adding the annotation in the first place was to
> > catch unlocked secondary bus resets, not necessarily catch lock ordering
> > problems between cfg_access_lock and other locks. Solve that narrower
> > problem with follow-on patches, and just due to targeted revert for now.
> >
> > Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
> > Reported-by: Imre Deak <imre.deak@intel.com>
> > Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
> > Cc: Jani Saarinen <jani.saarinen@intel.com>
> > Cc: Dave Jiang <dave.jiang@intel.com>
> > Cc: Bjorn Helgaas <bhelgaas@google.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> 
> In our ath11k test box commit 7e89efc6e9e4 was causing random kernel
> crashes. I tested patches 1-3 and did not see anymore crashes so:
> 
> Tested-by: Kalle Valo <kvalo@kernel.org>

Added to commit logs, thank you!
Leon Romanovsky June 6, 2024, 10:04 a.m. UTC | #4
On Tue, Jun 04, 2024 at 12:11:21PM -0500, Bjorn Helgaas wrote:
> On Tue, Jun 04, 2024 at 11:03:54AM +0300, Kalle Valo wrote:
> > Dan Williams <dan.j.williams@intel.com> writes:
> > 
> > > While the experiment did reveal that there are additional places that
> > > are missing the lock during secondary bus reset, one of the places that
> > > needs to take cfg_access_lock (pci_bus_lock()) is not prepared for
> > > lockdep annotation.
> > >
> > > Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
> > > currently dependent on the fact that the device_lock() is marked
> > > lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
> > > annotation, pci_bus_lock() would need to use something like a new
> > > pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in
> > > the topology, and a hope that the depth of a PCI tree never exceeds the
> > > max value for a lockdep subclass.
> > >
> > > The alternative to ripping out the lockdep coverage would be to deploy a
> > > dynamic lock key for every PCI device. Unfortunately, there is evidence
> > > that increasing the number of keys that lockdep needs to track to be
> > > per-PCI-device is prohibitively expensive for something like the
> > > cfg_access_lock.
> > >
> > > The main motivation for adding the annotation in the first place was to
> > > catch unlocked secondary bus resets, not necessarily catch lock ordering
> > > problems between cfg_access_lock and other locks. Solve that narrower
> > > problem with follow-on patches, and just due to targeted revert for now.
> > >
> > > Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
> > > Reported-by: Imre Deak <imre.deak@intel.com>
> > > Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
> > > Cc: Jani Saarinen <jani.saarinen@intel.com>
> > > Cc: Dave Jiang <dave.jiang@intel.com>
> > > Cc: Bjorn Helgaas <bhelgaas@google.com>
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > 
> > In our ath11k test box commit 7e89efc6e9e4 was causing random kernel
> > crashes. I tested patches 1-3 and did not see anymore crashes so:
> > 
> > Tested-by: Kalle Valo <kvalo@kernel.org>
> 
> Added to commit logs, thank you!
> 

Thanks,
Tested-by: Leon Romanovsky <leonro@nvidia.com>
diff mbox series

Patch

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 30f031de9cfe..b123da16b63b 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -289,8 +289,6 @@  void pci_cfg_access_lock(struct pci_dev *dev)
 {
 	might_sleep();
 
-	lock_map_acquire(&dev->cfg_access_lock);
-
 	raw_spin_lock_irq(&pci_lock);
 	if (dev->block_cfg_access)
 		pci_wait_cfg(dev);
@@ -345,8 +343,6 @@  void pci_cfg_access_unlock(struct pci_dev *dev)
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 
 	wake_up_all(&pci_cfg_wait);
-
-	lock_map_release(&dev->cfg_access_lock);
 }
 EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 59e0949fb079..35fb1f17a589 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4883,7 +4883,6 @@  void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
  */
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 {
-	lock_map_assert_held(&dev->cfg_access_lock);
 	pcibios_reset_secondary_bus(dev);
 
 	return pci_bridge_wait_for_secondary_bus(dev, "bus reset");
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8e696e547565..5fbabb4e3425 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2546,9 +2546,6 @@  void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
-	lockdep_register_key(&dev->cfg_access_key);
-	lockdep_init_map(&dev->cfg_access_lock, dev_name(&dev->dev),
-			 &dev->cfg_access_key, 0);
 
 	dma_set_max_seg_size(&dev->dev, 65536);
 	dma_set_seg_boundary(&dev->dev, 0xffffffff);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 5e51b0de4c4b..08b0d1d9d78b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -297,9 +297,6 @@  extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 		.wait_type_inner = _wait_type,		\
 		.lock_type = LD_LOCK_WAIT_OVERRIDE, }
 
-#define lock_map_assert_held(l)		\
-	lockdep_assert(lock_is_held(l) != LOCK_STATE_NOT_HELD)
-
 #else /* !CONFIG_LOCKDEP */
 
 static inline void lockdep_init_task(struct task_struct *task)
@@ -391,8 +388,6 @@  extern int lockdep_is_held(const void *);
 #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)	\
 	struct lockdep_map __maybe_unused _name = {}
 
-#define lock_map_assert_held(l)			do { (void)(l); } while (0)
-
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fb004fd4e889..cafc5ab1cbcb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -413,8 +413,6 @@  struct pci_dev {
 	struct resource driver_exclusive_resource;	 /* driver exclusive resource ranges */
 
 	bool		match_driver;		/* Skip attaching driver */
-	struct lock_class_key cfg_access_key;
-	struct lockdep_map cfg_access_lock;
 
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	io_window:1;		/* Bridge has I/O window */