diff mbox

[v2,1/2] PCI: Use a local mutex instead of pci_bus_sem to avoid deadlock

Message ID 1437124592-2070-2-git-send-email-wangyijing@huawei.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Yijing Wang July 17, 2015, 9:16 a.m. UTC
Rajat Jain reported a deadlock when a hierarchical hot plug
thread and aer recovery thread both run.
https://lkml.org/lkml/2015/3/11/861

thread 1:
pciehp_enable_slot()
	pciehp_configure_device()
		pci_bus_add_devices()
			device_attach(dev)
				device_lock(dev) //acquire device mutex successfully
			...
			pciehp_probe(dev)
				__pci_hp_register()
					pci_create_slot()
						down_write(pci_bus_sem) //deadlock here

thread 2:
aer_isr_one_error()
	aer_process_err_device()
		do_recovery()
			broadcast_error_message()
				pci_walk_bus()
					down_read(&pci_bus_sem) //acquire pci_bus_sem successfully
						report_error_detected(dev)
							device_lock(dev) // deadlock here

We use down_write(&pci_bus_sem) to protect the bus->slots list, because the
bus->slots list is only accessed in drivers/pci/slot.c, we could introduce
a new local mutex to protect bus->slots, and use down_read(&pci_bus_sem)
instead of down_write(&pci_bus_sem) to protect the bus->devices list.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
---
 drivers/pci/slot.c  |   15 ++++++++++-----
 include/linux/pci.h |    3 ++-
 2 files changed, 12 insertions(+), 6 deletions(-)

Comments

Guenter Roeck July 24, 2015, 3:07 a.m. UTC | #1
On 07/17/2015 02:16 AM, Yijing Wang wrote:
> Rajat Jain reported a deadlock when a hierarchical hot plug
> thread and aer recovery thread both run.
> https://lkml.org/lkml/2015/3/11/861
>
> thread 1:
> pciehp_enable_slot()
> 	pciehp_configure_device()
> 		pci_bus_add_devices()
> 			device_attach(dev)
> 				device_lock(dev) //acquire device mutex successfully
> 			...
> 			pciehp_probe(dev)
> 				__pci_hp_register()
> 					pci_create_slot()
> 						down_write(pci_bus_sem) //deadlock here
>
> thread 2:
> aer_isr_one_error()
> 	aer_process_err_device()
> 		do_recovery()
> 			broadcast_error_message()
> 				pci_walk_bus()
> 					down_read(&pci_bus_sem) //acquire pci_bus_sem successfully
> 						report_error_detected(dev)
> 							device_lock(dev) // deadlock here
>
> We use down_write(&pci_bus_sem) to protect the bus->slots list, because the
> bus->slots list is only accessed in drivers/pci/slot.c, we could introduce
> a new local mutex to protect bus->slots, and use down_read(&pci_bus_sem)
> instead of down_write(&pci_bus_sem) to protect the bus->devices list.
>
> Signed-off-by: Yijing Wang <wangyijing@huawei.com>

I applied both patches to our system and ran a number of tests.
Works fine as far as I can see.

Tested-by: Guenter Roeck <linux@roeck-us.net>

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yijing Wang July 24, 2015, 4:08 a.m. UTC | #2
? 2015/7/24 11:07, Guenter Roeck ??:
> On 07/17/2015 02:16 AM, Yijing Wang wrote:
>> Rajat Jain reported a deadlock when a hierarchical hot plug
>> thread and aer recovery thread both run.
>> https://lkml.org/lkml/2015/3/11/861
>>
>> thread 1:
>> pciehp_enable_slot()
>>     pciehp_configure_device()
>>         pci_bus_add_devices()
>>             device_attach(dev)
>>                 device_lock(dev) //acquire device mutex successfully
>>             ...
>>             pciehp_probe(dev)
>>                 __pci_hp_register()
>>                     pci_create_slot()
>>                         down_write(pci_bus_sem) //deadlock here
>>
>> thread 2:
>> aer_isr_one_error()
>>     aer_process_err_device()
>>         do_recovery()
>>             broadcast_error_message()
>>                 pci_walk_bus()
>>                     down_read(&pci_bus_sem) //acquire pci_bus_sem successfully
>>                         report_error_detected(dev)
>>                             device_lock(dev) // deadlock here
>>
>> We use down_write(&pci_bus_sem) to protect the bus->slots list, because the
>> bus->slots list is only accessed in drivers/pci/slot.c, we could introduce
>> a new local mutex to protect bus->slots, and use down_read(&pci_bus_sem)
>> instead of down_write(&pci_bus_sem) to protect the bus->devices list.
>>
>> Signed-off-by: Yijing Wang <wangyijing@huawei.com>
> 
> I applied both patches to our system and ran a number of tests.
> Works fine as far as I can see.
> 
> Tested-by: Guenter Roeck <linux@roeck-us.net>

Guenter, thanks very much!

Thanks!
Yijing.

> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 396c200..a9079d9 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -14,6 +14,7 @@ 
 
 struct kset *pci_slots_kset;
 EXPORT_SYMBOL_GPL(pci_slots_kset);
+static DEFINE_MUTEX(pci_slot_mutex);
 
 static ssize_t pci_slot_attr_show(struct kobject *kobj,
 					struct attribute *attr, char *buf)
@@ -106,9 +107,11 @@  static void pci_slot_release(struct kobject *kobj)
 	dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
 		slot->number, pci_slot_name(slot));
 
+	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
 			dev->slot = NULL;
+	up_read(&pci_bus_sem);
 
 	list_del(&slot->list);
 
@@ -195,7 +198,7 @@  static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
 {
 	struct pci_slot *slot;
 	/*
-	 * We already hold pci_bus_sem so don't worry
+	 * We already hold pci_slot_mutex so don't worry
 	 */
 	list_for_each_entry(slot, &parent->slots, list)
 		if (slot->number == slot_nr) {
@@ -253,7 +256,7 @@  struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 	int err = 0;
 	char *slot_name = NULL;
 
-	down_write(&pci_bus_sem);
+	mutex_lock(&pci_slot_mutex);
 
 	if (slot_nr == -1)
 		goto placeholder;
@@ -301,16 +304,18 @@  placeholder:
 	INIT_LIST_HEAD(&slot->list);
 	list_add(&slot->list, &parent->slots);
 
+	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &parent->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot_nr)
 			dev->slot = slot;
+	up_read(&pci_bus_sem);
 
 	dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
 		slot_nr, pci_slot_name(slot));
 
 out:
 	kfree(slot_name);
-	up_write(&pci_bus_sem);
+	mutex_unlock(&pci_slot_mutex);
 	return slot;
 err:
 	kfree(slot);
@@ -332,9 +337,9 @@  void pci_destroy_slot(struct pci_slot *slot)
 	dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
 		slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
 
-	down_write(&pci_bus_sem);
+	mutex_lock(&pci_slot_mutex);
 	kobject_put(&slot->kobj);
-	up_write(&pci_bus_sem);
+	mutex_unlock(&pci_slot_mutex);
 }
 EXPORT_SYMBOL_GPL(pci_destroy_slot);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a..34cc95d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -446,7 +446,8 @@  struct pci_bus {
 	struct list_head children;	/* list of child buses */
 	struct list_head devices;	/* list of devices on this bus */
 	struct pci_dev	*self;		/* bridge device as seen by parent */
-	struct list_head slots;		/* list of slots on this bus */
+	struct list_head slots;		/* list of slots on this bus, we use a local pci_slot_mutex instead of
+		                           pci_bus_sem to protect this list to fix a potential ABBA deadlock */
 	struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
 	struct list_head resources;	/* address space routed to this bus */
 	struct resource busn_res;	/* bus numbers routed to this bus */