Message ID | 1244776109.2560.319.camel@ymzhang (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
On Fri, 2009-06-12 at 11:08 +0800, Zhang, Yanmin wrote: > Anyone could help me test it on powerpc? Or at least a compilation > to see there is any compiling error/warning. I have no powerpc machine. > Thanks. > > Changelog V4: > Port the patches to Jesses' linux-next tree (mostly based on > 2.6.30). > > Changelog V3: > 1) Probe devices under the root port when the bus id of > the source id is equal to 0; V2 does so when device id is > equal to 0; > 2) Add more comments on critical path of finding devices. > > Changelog V2: > Version 2 adds nosourceid, a boot parameter. When > aerdriver.nosourceid=y, aerdriver doesn't use the source > id saved by root port. Instead, it searches the device > tree under the root port to find the reporter. So if hardware > has errata and root port saves a bad source id, aerdriver > still could find the reporter. > There are 2 scenarios under which aerdriver searches the > device tree under root port: > 1) nosourceid=n and error source id is equal to 0; > 2) nosourceid=y. > > Based on PCI Express AER specs, a root port might receive multiple > TLP errors while it could only save a correctable error source id > and an uncorrectable error source id at the same time. In addition, > some root port hardware might be unable to provide a correct source > id, i.e., the source id, or the bus id part of the source id provided > by root port might be equal to 0. > > The patchset implements the support in kernel by searching the device > tree under the root port. > > Patch 1 changes parameter cb of function pci_walk_bus to return a value. > When cb return non-zero, pci_walk_bus stops more searching on the > device tree. > > Signed-off-by: Zhang Yanmin <yanmin.zhang@linux.intel.com> > This one looks fine to me. Reviewed-by: Andrew Patterson <andrew.patterson@hp.com> > --- > > diff -Nraup linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c > --- linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c 2009-06-12 05:25:14.000000000 +0800 > +++ linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c 2009-06-12 05:28:08.000000000 +0800 > @@ -122,7 +122,7 @@ static void eeh_enable_irq(struct pci_de > * passed back in "userdata". > */ > > -static void eeh_report_error(struct pci_dev *dev, void *userdata) > +static int eeh_report_error(struct pci_dev *dev, void *userdata) > { > enum pci_ers_result rc, *res = userdata; > struct pci_driver *driver = dev->driver; > @@ -130,19 +130,21 @@ static void eeh_report_error(struct pci_ > dev->error_state = pci_channel_io_frozen; > > if (!driver) > - return; > + return 0; > > eeh_disable_irq(dev); > > if (!driver->err_handler || > !driver->err_handler->error_detected) > - return; > + return 0; > > rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); > > /* A driver that needs a reset trumps all others */ > if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; > if (*res == PCI_ERS_RESULT_NONE) *res = rc; > + > + return 0; > } > > /** > @@ -153,7 +155,7 @@ static void eeh_report_error(struct pci_ > * Cumulative response passed back in "userdata". > */ > > -static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) > +static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) > { > enum pci_ers_result rc, *res = userdata; > struct pci_driver *driver = dev->driver; > @@ -161,26 +163,28 @@ static void eeh_report_mmio_enabled(stru > if (!driver || > !driver->err_handler || > !driver->err_handler->mmio_enabled) > - return; > + return 0; > > rc = driver->err_handler->mmio_enabled (dev); > > /* A driver that needs a reset trumps all others */ > if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; > if (*res == PCI_ERS_RESULT_NONE) *res = rc; > + > + return 0; > } > > /** > * eeh_report_reset - tell device that slot has been reset > */ > > -static void eeh_report_reset(struct pci_dev *dev, void *userdata) > +static int eeh_report_reset(struct pci_dev *dev, void *userdata) > { > enum pci_ers_result rc, *res = userdata; > struct pci_driver *driver = dev->driver; > > if (!driver) > - return; > + return 0; > > dev->error_state = pci_channel_io_normal; > > @@ -188,35 +192,39 @@ static void eeh_report_reset(struct pci_ > > if (!driver->err_handler || > !driver->err_handler->slot_reset) > - return; > + return 0; > > rc = driver->err_handler->slot_reset(dev); > if ((*res == PCI_ERS_RESULT_NONE) || > (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; > if (*res == PCI_ERS_RESULT_DISCONNECT && > rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; > + > + return 0; > } > > /** > * eeh_report_resume - tell device to resume normal operations > */ > > -static void eeh_report_resume(struct pci_dev *dev, void *userdata) > +static int eeh_report_resume(struct pci_dev *dev, void *userdata) > { > struct pci_driver *driver = dev->driver; > > dev->error_state = pci_channel_io_normal; > > if (!driver) > - return; > + return 0; > > eeh_enable_irq(dev); > > if (!driver->err_handler || > !driver->err_handler->resume) > - return; > + return 0; > > driver->err_handler->resume(dev); > + > + return 0; > } > > /** > @@ -226,22 +234,24 @@ static void eeh_report_resume(struct pci > * dead, and that no further recovery attempts will be made on it. > */ > > -static void eeh_report_failure(struct pci_dev *dev, void *userdata) > +static int eeh_report_failure(struct pci_dev *dev, void *userdata) > { > struct pci_driver *driver = dev->driver; > > dev->error_state = pci_channel_io_perm_failure; > > if (!driver) > - return; > + return 0; > > eeh_disable_irq(dev); > > if (!driver->err_handler || > !driver->err_handler->error_detected) > - return; > + return 0; > > driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); > + > + return 0; > } > > /* ------------------------------------------------------- */ > diff -Nraup linux-2.6_next/drivers/pci/bus.c linux-2.6_next_pciwalk/drivers/pci/bus.c > --- linux-2.6_next/drivers/pci/bus.c 2009-06-12 05:25:43.000000000 +0800 > +++ linux-2.6_next_pciwalk/drivers/pci/bus.c 2009-06-12 05:28:08.000000000 +0800 > @@ -206,13 +206,18 @@ void pci_enable_bridges(struct pci_bus * > * Walk the given bus, including any bridged devices > * on buses under this bus. Call the provided callback > * on each device found. > + * > + * We check the return of @cb each time. If it returns anything > + * other than 0, we break out. > + * > */ > -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), > +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), > void *userdata) > { > struct pci_dev *dev; > struct pci_bus *bus; > struct list_head *next; > + int retval; > > bus = top; > down_read(&pci_bus_sem); > @@ -236,8 +241,10 @@ void pci_walk_bus(struct pci_bus *top, v > > /* Run device routines with the device locked */ > down(&dev->dev.sem); > - cb(dev, userdata); > + retval = cb(dev, userdata); > up(&dev->dev.sem); > + if (retval) > + break; > } > up_read(&pci_bus_sem); > } > diff -Nraup linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c > --- linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c 2009-06-12 05:25:43.000000000 +0800 > +++ linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c 2009-06-12 05:31:53.000000000 +0800 > @@ -109,7 +109,7 @@ int pci_cleanup_aer_correct_error_status > #endif /* 0 */ > > > -static void set_device_error_reporting(struct pci_dev *dev, void *data) > +static int set_device_error_reporting(struct pci_dev *dev, void *data) > { > bool enable = *((bool *)data); > > @@ -124,6 +124,8 @@ static void set_device_error_reporting(s > > if (enable) > pcie_set_ecrc_checking(dev); > + > + return 0; > } > > /** > @@ -207,7 +209,7 @@ static struct device* find_source_device > return NULL; > } > > -static void report_error_detected(struct pci_dev *dev, void *data) > +static int report_error_detected(struct pci_dev *dev, void *data) > { > pci_ers_result_t vote; > struct pci_error_handlers *err_handler; > @@ -232,16 +234,16 @@ static void report_error_detected(struct > dev->driver ? > "no AER-aware driver" : "no driver"); > } > - return; > + return 0; > } > > err_handler = dev->driver->err_handler; > vote = err_handler->error_detected(dev, result_data->state); > result_data->result = merge_result(result_data->result, vote); > - return; > + return 0; > } > > -static void report_mmio_enabled(struct pci_dev *dev, void *data) > +static int report_mmio_enabled(struct pci_dev *dev, void *data) > { > pci_ers_result_t vote; > struct pci_error_handlers *err_handler; > @@ -251,15 +253,15 @@ static void report_mmio_enabled(struct p > if (!dev->driver || > !dev->driver->err_handler || > !dev->driver->err_handler->mmio_enabled) > - return; > + return 0; > > err_handler = dev->driver->err_handler; > vote = err_handler->mmio_enabled(dev); > result_data->result = merge_result(result_data->result, vote); > - return; > + return 0; > } > > -static void report_slot_reset(struct pci_dev *dev, void *data) > +static int report_slot_reset(struct pci_dev *dev, void *data) > { > pci_ers_result_t vote; > struct pci_error_handlers *err_handler; > @@ -269,15 +271,15 @@ static void report_slot_reset(struct pci > if (!dev->driver || > !dev->driver->err_handler || > !dev->driver->err_handler->slot_reset) > - return; > + return 0; > > err_handler = dev->driver->err_handler; > vote = err_handler->slot_reset(dev); > result_data->result = merge_result(result_data->result, vote); > - return; > + return 0; > } > > -static void report_resume(struct pci_dev *dev, void *data) > +static int report_resume(struct pci_dev *dev, void *data) > { > struct pci_error_handlers *err_handler; > > @@ -286,11 +288,11 @@ static void report_resume(struct pci_dev > if (!dev->driver || > !dev->driver->err_handler || > !dev->driver->err_handler->resume) > - return; > + return 0; > > err_handler = dev->driver->err_handler; > err_handler->resume(dev); > - return; > + return 0; > } > > /** > @@ -307,7 +309,7 @@ static void report_resume(struct pci_dev > static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, > enum pci_channel_state state, > char *error_mesg, > - void (*cb)(struct pci_dev *, void *)) > + int (*cb)(struct pci_dev *, void *)) > { > struct aer_broadcast_data result_data; > > diff -Nraup linux-2.6_next/include/linux/pci.h linux-2.6_next_pciwalk/include/linux/pci.h > --- linux-2.6_next/include/linux/pci.h 2009-06-12 05:24:32.000000000 +0800 > +++ linux-2.6_next_pciwalk/include/linux/pci.h 2009-06-12 05:28:08.000000000 +0800 > @@ -789,7 +789,7 @@ const struct pci_device_id *pci_match_id > int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, > int pass); > > -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), > +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), > void *userdata); > int pci_cfg_space_size_ext(struct pci_dev *dev); > int pci_cfg_space_size(struct pci_dev *dev); > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On Fri, 2009-06-12 at 22:17 +0000, Andrew Patterson wrote: > On Fri, 2009-06-12 at 11:08 +0800, Zhang, Yanmin wrote: > > Anyone could help me test it on powerpc? Or at least a compilation > > to see there is any compiling error/warning. I have no powerpc machine. > > Thanks. > > > > Changelog V4: > > Port the patches to Jesses' linux-next tree (mostly based on > > 2.6.30). > > > > Changelog V3: > > 1) Probe devices under the root port when the bus id of > > the source id is equal to 0; V2 does so when device id is > > equal to 0; > > 2) Add more comments on critical path of finding devices. > > > > Changelog V2: > > Version 2 adds nosourceid, a boot parameter. When > > aerdriver.nosourceid=y, aerdriver doesn't use the source > > id saved by root port. Instead, it searches the device > > tree under the root port to find the reporter. So if hardware > > has errata and root port saves a bad source id, aerdriver > > still could find the reporter. > > There are 2 scenarios under which aerdriver searches the > > device tree under root port: > > 1) nosourceid=n and error source id is equal to 0; > > 2) nosourceid=y. > > > > Based on PCI Express AER specs, a root port might receive multiple > > TLP errors while it could only save a correctable error source id > > and an uncorrectable error source id at the same time. In addition, > > some root port hardware might be unable to provide a correct source > > id, i.e., the source id, or the bus id part of the source id provided > > by root port might be equal to 0. > > > > The patchset implements the support in kernel by searching the device > > tree under the root port. > > > > Patch 1 changes parameter cb of function pci_walk_bus to return a value. > > When cb return non-zero, pci_walk_bus stops more searching on the > > device tree. > > > > Signed-off-by: Zhang Yanmin <yanmin.zhang@linux.intel.com> > > > > This one looks fine to me. > > Reviewed-by: Andrew Patterson <andrew.patterson@hp.com> Thanks for helping review the patches. Yanmin -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff -Nraup linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c --- linux-2.6_next/arch/powerpc/platforms/pseries/eeh_driver.c 2009-06-12 05:25:14.000000000 +0800 +++ linux-2.6_next_pciwalk/arch/powerpc/platforms/pseries/eeh_driver.c 2009-06-12 05:28:08.000000000 +0800 @@ -122,7 +122,7 @@ static void eeh_enable_irq(struct pci_de * passed back in "userdata". */ -static void eeh_report_error(struct pci_dev *dev, void *userdata) +static int eeh_report_error(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; @@ -130,19 +130,21 @@ static void eeh_report_error(struct pci_ dev->error_state = pci_channel_io_frozen; if (!driver) - return; + return 0; eeh_disable_irq(dev); if (!driver->err_handler || !driver->err_handler->error_detected) - return; + return 0; rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); /* A driver that needs a reset trumps all others */ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + + return 0; } /** @@ -153,7 +155,7 @@ static void eeh_report_error(struct pci_ * Cumulative response passed back in "userdata". */ -static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) +static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; @@ -161,26 +163,28 @@ static void eeh_report_mmio_enabled(stru if (!driver || !driver->err_handler || !driver->err_handler->mmio_enabled) - return; + return 0; rc = driver->err_handler->mmio_enabled (dev); /* A driver that needs a reset trumps all others */ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + + return 0; } /** * eeh_report_reset - tell device that slot has been reset */ -static void eeh_report_reset(struct pci_dev *dev, void *userdata) +static int eeh_report_reset(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; if (!driver) - return; + return 0; dev->error_state = pci_channel_io_normal; @@ -188,35 +192,39 @@ static void eeh_report_reset(struct pci_ if (!driver->err_handler || !driver->err_handler->slot_reset) - return; + return 0; rc = driver->err_handler->slot_reset(dev); if ((*res == PCI_ERS_RESULT_NONE) || (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + + return 0; } /** * eeh_report_resume - tell device to resume normal operations */ -static void eeh_report_resume(struct pci_dev *dev, void *userdata) +static int eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; dev->error_state = pci_channel_io_normal; if (!driver) - return; + return 0; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume) - return; + return 0; driver->err_handler->resume(dev); + + return 0; } /** @@ -226,22 +234,24 @@ static void eeh_report_resume(struct pci * dead, and that no further recovery attempts will be made on it. */ -static void eeh_report_failure(struct pci_dev *dev, void *userdata) +static int eeh_report_failure(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; dev->error_state = pci_channel_io_perm_failure; if (!driver) - return; + return 0; eeh_disable_irq(dev); if (!driver->err_handler || !driver->err_handler->error_detected) - return; + return 0; driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); + + return 0; } /* ------------------------------------------------------- */ diff -Nraup linux-2.6_next/drivers/pci/bus.c linux-2.6_next_pciwalk/drivers/pci/bus.c --- linux-2.6_next/drivers/pci/bus.c 2009-06-12 05:25:43.000000000 +0800 +++ linux-2.6_next_pciwalk/drivers/pci/bus.c 2009-06-12 05:28:08.000000000 +0800 @@ -206,13 +206,18 @@ void pci_enable_bridges(struct pci_bus * * Walk the given bus, including any bridged devices * on buses under this bus. Call the provided callback * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + * */ -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) { struct pci_dev *dev; struct pci_bus *bus; struct list_head *next; + int retval; bus = top; down_read(&pci_bus_sem); @@ -236,8 +241,10 @@ void pci_walk_bus(struct pci_bus *top, v /* Run device routines with the device locked */ down(&dev->dev.sem); - cb(dev, userdata); + retval = cb(dev, userdata); up(&dev->dev.sem); + if (retval) + break; } up_read(&pci_bus_sem); } diff -Nraup linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c --- linux-2.6_next/drivers/pci/pcie/aer/aerdrv_core.c 2009-06-12 05:25:43.000000000 +0800 +++ linux-2.6_next_pciwalk/drivers/pci/pcie/aer/aerdrv_core.c 2009-06-12 05:31:53.000000000 +0800 @@ -109,7 +109,7 @@ int pci_cleanup_aer_correct_error_status #endif /* 0 */ -static void set_device_error_reporting(struct pci_dev *dev, void *data) +static int set_device_error_reporting(struct pci_dev *dev, void *data) { bool enable = *((bool *)data); @@ -124,6 +124,8 @@ static void set_device_error_reporting(s if (enable) pcie_set_ecrc_checking(dev); + + return 0; } /** @@ -207,7 +209,7 @@ static struct device* find_source_device return NULL; } -static void report_error_detected(struct pci_dev *dev, void *data) +static int report_error_detected(struct pci_dev *dev, void *data) { pci_ers_result_t vote; struct pci_error_handlers *err_handler; @@ -232,16 +234,16 @@ static void report_error_detected(struct dev->driver ? "no AER-aware driver" : "no driver"); } - return; + return 0; } err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, result_data->state); result_data->result = merge_result(result_data->result, vote); - return; + return 0; } -static void report_mmio_enabled(struct pci_dev *dev, void *data) +static int report_mmio_enabled(struct pci_dev *dev, void *data) { pci_ers_result_t vote; struct pci_error_handlers *err_handler; @@ -251,15 +253,15 @@ static void report_mmio_enabled(struct p if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->mmio_enabled) - return; + return 0; err_handler = dev->driver->err_handler; vote = err_handler->mmio_enabled(dev); result_data->result = merge_result(result_data->result, vote); - return; + return 0; } -static void report_slot_reset(struct pci_dev *dev, void *data) +static int report_slot_reset(struct pci_dev *dev, void *data) { pci_ers_result_t vote; struct pci_error_handlers *err_handler; @@ -269,15 +271,15 @@ static void report_slot_reset(struct pci if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->slot_reset) - return; + return 0; err_handler = dev->driver->err_handler; vote = err_handler->slot_reset(dev); result_data->result = merge_result(result_data->result, vote); - return; + return 0; } -static void report_resume(struct pci_dev *dev, void *data) +static int report_resume(struct pci_dev *dev, void *data) { struct pci_error_handlers *err_handler; @@ -286,11 +288,11 @@ static void report_resume(struct pci_dev if (!dev->driver || !dev->driver->err_handler || !dev->driver->err_handler->resume) - return; + return 0; err_handler = dev->driver->err_handler; err_handler->resume(dev); - return; + return 0; } /** @@ -307,7 +309,7 @@ static void report_resume(struct pci_dev static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, enum pci_channel_state state, char *error_mesg, - void (*cb)(struct pci_dev *, void *)) + int (*cb)(struct pci_dev *, void *)) { struct aer_broadcast_data result_data; diff -Nraup linux-2.6_next/include/linux/pci.h linux-2.6_next_pciwalk/include/linux/pci.h --- linux-2.6_next/include/linux/pci.h 2009-06-12 05:24:32.000000000 +0800 +++ linux-2.6_next_pciwalk/include/linux/pci.h 2009-06-12 05:28:08.000000000 +0800 @@ -789,7 +789,7 @@ const struct pci_device_id *pci_match_id int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass); -void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); int pci_cfg_space_size_ext(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev);