Message ID | fc7db7de4778803a3221d7fd5a203bf971fdac61.1679892337.git.alison.schofield@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | cxl: CXL Inject & Clear Poison | expand |
On Sun, 26 Mar 2023 22:03:09 -0700 alison.schofield@intel.com wrote: > From: Alison Schofield <alison.schofield@intel.com> > > Inject and clear poison capabilities and intended for debug usage only. > In order to be useful in debug environments, the driver needs to allow > inject and clear operations on DPAs mapped in regions. > > dev_warn_once() when either operation occurs. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > --- > drivers/cxl/core/memdev.c | 59 +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 59 insertions(+) > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 0e39c3c3fb09..a83619c31f61 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -213,6 +213,50 @@ ssize_t cxl_trigger_poison_list(struct device *dev, > } > EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); > > +struct cxl_dpa_to_region_context { > + struct cxl_region *cxlr; > + u64 dpa; > +}; > + > +static int __cxl_dpa_to_region(struct device *dev, void *arg) > +{ > + struct cxl_dpa_to_region_context *ctx = arg; > + struct cxl_endpoint_decoder *cxled; > + u64 dpa = ctx->dpa; > + > + if (!is_endpoint_decoder(dev)) > + return 0; > + > + cxled = to_cxl_endpoint_decoder(dev); > + if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) > + return 0; > + > + if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) > + return 0; > + > + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, > + dev_name(&cxled->cxld.region->dev)); > + > + ctx->cxlr = cxled->cxld.region; > + If we have a match, little point in letting walk continue. return 1; Also, I "think" we just know that the association has been built. Injecting poison is probably still fine if the region / decoder hasn't yet been committed. Jonathan > + return 0; > +} > + > +static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) > +{ > + struct cxl_dpa_to_region_context ctx; > + struct cxl_port *port; > + > + ctx = (struct cxl_dpa_to_region_context) { > + .dpa = dpa, > + }; > + port = dev_get_drvdata(&cxlmd->dev); > + if (port && is_cxl_endpoint(port) && port->commit_end != -1) > + device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); > + > + return ctx.cxlr; > +} > + > static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) > { > struct cxl_dev_state *cxlds = cxlmd->cxlds; > @@ -242,6 +286,7 @@ int cxl_inject_poison(struct device *dev, u64 dpa) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > struct cxl_mbox_inject_poison inject; > struct cxl_mbox_cmd mbox_cmd; > + struct cxl_region *cxlr; > int rc; > > if (!IS_ENABLED(CONFIG_DEBUG_FS)) > @@ -261,6 +306,13 @@ int cxl_inject_poison(struct device *dev, u64 dpa) > .payload_in = &inject, > }; > rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); > + if (rc) > + goto out; > + > + cxlr = cxl_dpa_to_region(cxlmd, dpa); > + if (cxlr) > + dev_warn_once(dev, "poison inject dpa:0x%llx region: %s\n", > + dpa, dev_name(&cxlr->dev)); > out: > up_read(&cxl_dpa_rwsem); > > @@ -273,6 +325,7 @@ int cxl_clear_poison(struct device *dev, u64 dpa) > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > struct cxl_mbox_clear_poison clear; > struct cxl_mbox_cmd mbox_cmd; > + struct cxl_region *cxlr; > int rc; > > if (!IS_ENABLED(CONFIG_DEBUG_FS)) > @@ -303,7 +356,13 @@ int cxl_clear_poison(struct device *dev, u64 dpa) > }; > > rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); > + if (rc) > + goto out; > > + cxlr = cxl_dpa_to_region(cxlmd, dpa); > + if (cxlr) > + dev_warn_once(dev, "poison clear dpa:0x%llx region: %s\n", > + dpa, dev_name(&cxlr->dev)); > out: > up_read(&cxl_dpa_rwsem); >
On Thu, Mar 30, 2023 at 07:55:46PM +0100, Jonathan Cameron wrote: > On Sun, 26 Mar 2023 22:03:09 -0700 > alison.schofield@intel.com wrote: > > > From: Alison Schofield <alison.schofield@intel.com> > > > > Inject and clear poison capabilities and intended for debug usage only. > > In order to be useful in debug environments, the driver needs to allow > > inject and clear operations on DPAs mapped in regions. > > > > dev_warn_once() when either operation occurs. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > > --- > > drivers/cxl/core/memdev.c | 59 +++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 59 insertions(+) > > > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > > index 0e39c3c3fb09..a83619c31f61 100644 > > --- a/drivers/cxl/core/memdev.c > > +++ b/drivers/cxl/core/memdev.c snip > > +static int __cxl_dpa_to_region(struct device *dev, void *arg) > > +{ > > + struct cxl_dpa_to_region_context *ctx = arg; > > + struct cxl_endpoint_decoder *cxled; > > + u64 dpa = ctx->dpa; > > + > > + if (!is_endpoint_decoder(dev)) > > + return 0; > > + > > + cxled = to_cxl_endpoint_decoder(dev); > > + if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) > > + return 0; > > + > > + if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) > > + return 0; > > + > > + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, > > + dev_name(&cxled->cxld.region->dev)); > > + > > + ctx->cxlr = cxled->cxld.region; > > + > If we have a match, little point in letting walk continue. > > return 1; Yes, thanks! Returning 1 now to stop the walk. > > Also, I "think" we just know that the association has been built. > Injecting poison is probably still fine if the region / decoder hasn't yet > been committed. I think you are right. If we want to allow inject in the space between mapping and commit, then this work needs to move to the region driver, similar to how cxl_get_poison_by_endpoint() in the get poison list series works. I'm not seeing how injecting poison in that gap, would be an important debug scenario. Is it? Alison > > Jonathan > > > > + return 0; > > +} > > + > > +static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) > > +{ > > + struct cxl_dpa_to_region_context ctx; > > + struct cxl_port *port; > > + > > + ctx = (struct cxl_dpa_to_region_context) { > > + .dpa = dpa, > > + }; > > + port = dev_get_drvdata(&cxlmd->dev); > > + if (port && is_cxl_endpoint(port) && port->commit_end != -1) > > + device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); > > + > > + return ctx.cxlr; > > +} > > + > > static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) > > { > > struct cxl_dev_state *cxlds = cxlmd->cxlds; > > @@ -242,6 +286,7 @@ int cxl_inject_poison(struct device *dev, u64 dpa) > > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > > struct cxl_mbox_inject_poison inject; > > struct cxl_mbox_cmd mbox_cmd; > > + struct cxl_region *cxlr; > > int rc; > > > > if (!IS_ENABLED(CONFIG_DEBUG_FS)) > > @@ -261,6 +306,13 @@ int cxl_inject_poison(struct device *dev, u64 dpa) > > .payload_in = &inject, > > }; > > rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); > > + if (rc) > > + goto out; > > + > > + cxlr = cxl_dpa_to_region(cxlmd, dpa); > > + if (cxlr) > > + dev_warn_once(dev, "poison inject dpa:0x%llx region: %s\n", > > + dpa, dev_name(&cxlr->dev)); > > out: > > up_read(&cxl_dpa_rwsem); > > > > @@ -273,6 +325,7 @@ int cxl_clear_poison(struct device *dev, u64 dpa) > > struct cxl_memdev *cxlmd = to_cxl_memdev(dev); > > struct cxl_mbox_clear_poison clear; > > struct cxl_mbox_cmd mbox_cmd; > > + struct cxl_region *cxlr; > > int rc; > > > > if (!IS_ENABLED(CONFIG_DEBUG_FS)) > > @@ -303,7 +356,13 @@ int cxl_clear_poison(struct device *dev, u64 dpa) > > }; > > > > rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); > > + if (rc) > > + goto out; > > > > + cxlr = cxl_dpa_to_region(cxlmd, dpa); > > + if (cxlr) > > + dev_warn_once(dev, "poison clear dpa:0x%llx region: %s\n", > > + dpa, dev_name(&cxlr->dev)); > > out: > > up_read(&cxl_dpa_rwsem); > > >
> > Also, I "think" we just know that the association has been built. > > Injecting poison is probably still fine if the region / decoder hasn't yet > > been committed. > > I think you are right. If we want to allow inject in the space between > mapping and commit, then this work needs to move to the region driver, > similar to how cxl_get_poison_by_endpoint() in the get poison list > series works. > > I'm not seeing how injecting poison in that gap, would be an important > debug scenario. Is it? > Probably not ;) Maybe a comment to say that this is being conservative by preventing it earlier than strictly necessary. If this merged whilst I wasn't paying attention no need to add one.
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 0e39c3c3fb09..a83619c31f61 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -213,6 +213,50 @@ ssize_t cxl_trigger_poison_list(struct device *dev, } EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); +struct cxl_dpa_to_region_context { + struct cxl_region *cxlr; + u64 dpa; +}; + +static int __cxl_dpa_to_region(struct device *dev, void *arg) +{ + struct cxl_dpa_to_region_context *ctx = arg; + struct cxl_endpoint_decoder *cxled; + u64 dpa = ctx->dpa; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + return 0; + + if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) + return 0; + + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, + dev_name(&cxled->cxld.region->dev)); + + ctx->cxlr = cxled->cxld.region; + + return 0; +} + +static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dpa_to_region_context ctx; + struct cxl_port *port; + + ctx = (struct cxl_dpa_to_region_context) { + .dpa = dpa, + }; + port = dev_get_drvdata(&cxlmd->dev); + if (port && is_cxl_endpoint(port) && port->commit_end != -1) + device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); + + return ctx.cxlr; +} + static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -242,6 +286,7 @@ int cxl_inject_poison(struct device *dev, u64 dpa) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mbox_inject_poison inject; struct cxl_mbox_cmd mbox_cmd; + struct cxl_region *cxlr; int rc; if (!IS_ENABLED(CONFIG_DEBUG_FS)) @@ -261,6 +306,13 @@ int cxl_inject_poison(struct device *dev, u64 dpa) .payload_in = &inject, }; rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); + if (rc) + goto out; + + cxlr = cxl_dpa_to_region(cxlmd, dpa); + if (cxlr) + dev_warn_once(dev, "poison inject dpa:0x%llx region: %s\n", + dpa, dev_name(&cxlr->dev)); out: up_read(&cxl_dpa_rwsem); @@ -273,6 +325,7 @@ int cxl_clear_poison(struct device *dev, u64 dpa) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mbox_clear_poison clear; struct cxl_mbox_cmd mbox_cmd; + struct cxl_region *cxlr; int rc; if (!IS_ENABLED(CONFIG_DEBUG_FS)) @@ -303,7 +356,13 @@ int cxl_clear_poison(struct device *dev, u64 dpa) }; rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd); + if (rc) + goto out; + cxlr = cxl_dpa_to_region(cxlmd, dpa); + if (cxlr) + dev_warn_once(dev, "poison clear dpa:0x%llx region: %s\n", + dpa, dev_name(&cxlr->dev)); out: up_read(&cxl_dpa_rwsem);