Message ID | 37ac2e3d3580af13755edd6f8e0bbdf11d5c206b.1668227077.git.alison.schofield@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL XOR Interleave Arithmetic | expand |
On Fri, 11 Nov 2022, alison.schofield@intel.com wrote: >+static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, >+ const unsigned long end) >+{ >+ struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header; >+ struct cxl_cxims_context *ctx = arg; >+ struct cxl_root_decoder *cxlrd = ctx->cxlrd; >+ struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; >+ struct device *dev = ctx->dev; >+ struct cxims_data *cximsd; >+ unsigned int hbig, nr_maps; >+ int rc; >+ >+ rc = cxl_to_granularity(cxims->hbig, &hbig); >+ if (rc) >+ return rc; >+ >+ if (hbig == cxld->interleave_granularity) { >+ /* IW 1,3 do not use xormaps and skip this parsing entirely */ >+ >+ if (is_power_of_2(cxld->interleave_ways)) >+ /* 2, 4, 8, 16 way */ >+ nr_maps = ilog2(cxld->interleave_ways); >+ else >+ /* 6, 12 way */ >+ nr_maps = ilog2(cxld->interleave_ways / 3); >+ >+ if (cxims->nr_xormaps < nr_maps) { >+ dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n", >+ cxims->nr_xormaps, nr_maps); >+ return -ENXIO; >+ } >+ >+ cximsd = devm_kzalloc(dev, >+ struct_size(cximsd, xormaps, nr_maps), >+ GFP_KERNEL); Need to check for failed kzalloc. >+ memcpy(cximsd->xormaps, cxims->xormap_list, >+ nr_maps * sizeof(*cximsd->xormaps)); >+ cximsd->nr_maps = nr_maps; >+ cxlrd->platform_data = cximsd; >+ cxlrd->calc_hb = cxl_hb_xor; >+ } >+ return 0; >+} >+ > static unsigned long cfmws_to_decoder_flags(int restrictions) > { > unsigned long flags = CXL_DECODER_F_ENABLE; >@@ -33,11 +130,6 @@ static int cxl_acpi_cfmws_verify(struct device *dev, > int rc, expected_len; > unsigned int ways; > >- if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { >- dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); >- return -EINVAL; >- } It seems more robust to keep the check and just add the ACPI_CEDT_CFMWS_ARITHMETIC_XOR case. >- > if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) { > dev_err(dev, "CFMWS Base HPA not 256MB aligned\n"); > return -EINVAL; >@@ -84,6 +176,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > struct cxl_cfmws_context *ctx = arg; > struct cxl_port *root_port = ctx->root_port; > struct resource *cxl_res = ctx->cxl_res; >+ struct cxl_cxims_context cxims_ctx; > struct cxl_root_decoder *cxlrd; > struct device *dev = ctx->dev; > struct acpi_cedt_cfmws *cfmws; >@@ -148,7 +241,33 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > ig = CXL_DECODER_MIN_GRANULARITY; > cxld->interleave_granularity = ig; > >+ if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { >+ if (ways == 1 || ways == 3) { >+ /* Skip CXIMS parsing for IW 1 or 3. No xormaps used */ >+ cxlrd->calc_hb = cxl_hb_xor; >+ goto decoder_add; >+ } >+ >+ cxims_ctx = (struct cxl_cxims_context) { >+ .dev = dev, >+ .cxlrd = cxlrd, >+ }; >+ >+ rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS, >+ cxl_parse_cxims, &cxims_ctx); >+ if (rc < 0) >+ goto err_xormap; >+ >+ if (cxlrd->calc_hb != cxl_hb_xor) { >+ rc = -ENXIO; >+ goto err_xormap; >+ } >+ } >+ I'm not crazy about now having different layers setting the ->calc_hb callback. Normally this is done when cxl_root_decoder_alloc(), and now it gets overwritten and reassigned in both parser calls (cfmws and cxims). How about just moving it out of the cxlrd allocation call and doing it all here in cxl_parse_cfmws()? You can also avoid the decoder_add goto. if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { if (ways != 1 && ways != 3) { // arm cxims_ctx rc = acpi_table_parse_cedt(); if (rc < 0) goto err_xormap; } cxlrd->calc_hb = cxl_hb_xor; } else cxlrd->calc_hb = cxl_hb_modulo; >+decoder_add: > rc = cxl_decoder_add(cxld, target_map); >+ >+err_xormap: > if (rc) > put_device(&cxld->dev); > else
On Fri, Nov 11, 2022 at 10:28:23PM -0800, Davidlohr Bueso wrote: Thanks for the review David! > On Fri, 11 Nov 2022, alison.schofield@intel.com wrote: > > > +static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, > > + const unsigned long end) > > +{ > > + struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header; > > + struct cxl_cxims_context *ctx = arg; > > + struct cxl_root_decoder *cxlrd = ctx->cxlrd; > > + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; > > + struct device *dev = ctx->dev; > > + struct cxims_data *cximsd; > > + unsigned int hbig, nr_maps; > > + int rc; > > + > > + rc = cxl_to_granularity(cxims->hbig, &hbig); > > + if (rc) > > + return rc; > > + > > + if (hbig == cxld->interleave_granularity) { > > + /* IW 1,3 do not use xormaps and skip this parsing entirely */ > > + > > + if (is_power_of_2(cxld->interleave_ways)) > > + /* 2, 4, 8, 16 way */ > > + nr_maps = ilog2(cxld->interleave_ways); > > + else > > + /* 6, 12 way */ > > + nr_maps = ilog2(cxld->interleave_ways / 3); > > + > > + if (cxims->nr_xormaps < nr_maps) { > > + dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n", > > + cxims->nr_xormaps, nr_maps); > > + return -ENXIO; > > + } > > + > > + cximsd = devm_kzalloc(dev, > > + struct_size(cximsd, xormaps, nr_maps), > > + GFP_KERNEL); > > Need to check for failed kzalloc. > Got it, thanks! > > + memcpy(cximsd->xormaps, cxims->xormap_list, > > + nr_maps * sizeof(*cximsd->xormaps)); > > + cximsd->nr_maps = nr_maps; > > + cxlrd->platform_data = cximsd; > > + cxlrd->calc_hb = cxl_hb_xor; > > + } > > + return 0; > > +} > > + > > static unsigned long cfmws_to_decoder_flags(int restrictions) > > { > > unsigned long flags = CXL_DECODER_F_ENABLE; > > @@ -33,11 +130,6 @@ static int cxl_acpi_cfmws_verify(struct device *dev, > > int rc, expected_len; > > unsigned int ways; > > > > - if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { > > - dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); > > - return -EINVAL; > > - } > > It seems more robust to keep the check and just add the ACPI_CEDT_CFMWS_ARITHMETIC_XOR case. > The reasoning behind removing the check, versus expanding it, is that the check shouldn't have been there in the first place. The ACPI driver is responsible for parsing the CFMWS's but it is the responsibility of the region driver to decide which CFMWS's it can use. If you believe that premise, then we let all interleave arithmetics 'get through' to the region driver. More below on same topic... > > - > > if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) { > > dev_err(dev, "CFMWS Base HPA not 256MB aligned\n"); > > return -EINVAL; > > @@ -84,6 +176,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > struct cxl_cfmws_context *ctx = arg; > > struct cxl_port *root_port = ctx->root_port; > > struct resource *cxl_res = ctx->cxl_res; > > + struct cxl_cxims_context cxims_ctx; > > struct cxl_root_decoder *cxlrd; > > struct device *dev = ctx->dev; > > struct acpi_cedt_cfmws *cfmws; > > @@ -148,7 +241,33 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > ig = CXL_DECODER_MIN_GRANULARITY; > > cxld->interleave_granularity = ig; > > > > + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { > > + if (ways == 1 || ways == 3) { > > + /* Skip CXIMS parsing for IW 1 or 3. No xormaps used */ > > + cxlrd->calc_hb = cxl_hb_xor; > > + goto decoder_add; > > + } > > + > > + cxims_ctx = (struct cxl_cxims_context) { > > + .dev = dev, > > + .cxlrd = cxlrd, > > + }; > > + > > + rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS, > > + cxl_parse_cxims, &cxims_ctx); > > + if (rc < 0) > > + goto err_xormap; > > + > > + if (cxlrd->calc_hb != cxl_hb_xor) { > > + rc = -ENXIO; > > + goto err_xormap; > > + } > > + } > > + > > I'm not crazy about now having different layers setting the ->calc_hb callback. Normally > this is done when cxl_root_decoder_alloc(), and now it gets overwritten and reassigned in > both parser calls (cfmws and cxims). How about just moving it out of the cxlrd allocation > call and doing it all here in cxl_parse_cfmws()? You can also avoid the decoder_add goto. Following on the theme of maintaining the separation of ACPI and REGION driver responsibilities, the decision on the calc_hb callback doesn't belong to the ACPI driver. Polluting the ACPI driver w knowledge of cxl_hb_xor() is not a clean soln either, as you point out. How about this as a clean(er) separation: ACPI Driver - adds cxlrd->interleave arithmetic field - adds fields: .interleave_arithemetic and .callback to the new platform data field (ie. cxims_data) Region Driver - uses the cxlrd->interleave_arithmetic field to assign callback - If that interleave_arithmetic is not 'known' to the region driver, it can lookup the callback in the cxlrd platform data I will try the above flow out, and await further discussion on the separation or ACPI & REGION driver responsibilites. Thanks for bringing it up David, Alison > snip >
On Mon, 14 Nov 2022, Alison Schofield wrote: >The reasoning behind removing the check, versus expanding it, is that the >check shouldn't have been there in the first place. The ACPI driver is >responsible for parsing the CFMWS's but it is the responsibility of the >region driver to decide which CFMWS's it can use. > >If you believe that premise, then we let all interleave arithmetics >'get through' to the region driver. The way I see it, acpi should be setting the callback because it is a _direct_ consequence of parsing the cfmws, and that's really the same logic as we have now, in that it sets it when allocating the rp decoder. And that doesn't take away from the region driver responsibility of choosing the actual host bridge. Thanks, Davidlohr
On Mon, Nov 14, 2022 at 10:57:29AM -0800, Davidlohr Bueso wrote: > On Mon, 14 Nov 2022, Alison Schofield wrote: > > > The reasoning behind removing the check, versus expanding it, is that the > > check shouldn't have been there in the first place. The ACPI driver is > > responsible for parsing the CFMWS's but it is the responsibility of the > > region driver to decide which CFMWS's it can use. > > > > If you believe that premise, then we let all interleave arithmetics > > 'get through' to the region driver. > > The way I see it, acpi should be setting the callback because it is a _direct_ > consequence of parsing the cfmws, and that's really the same logic as we have > now, in that it sets it when allocating the rp decoder. And that doesn't take > away from the region driver responsibility of choosing the actual host bridge. With a little help from my friends ;) I'm hopeful the next version meets your expectation by setting the calc_hb only once when allocating that root port decoder. > > Thanks, > Davidlohr
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index fb649683dd3a..1211c31c29d2 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,9 +6,106 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/pci.h> +#include <asm/div64.h> #include "cxlpci.h" #include "cxl.h" +struct cxims_data { + int nr_maps; + u64 xormaps[]; +}; + +/* + * Find a targets entry (n) in the host bridge interleave list. + * CXL Specfication 3.0 Table 9-22 + */ +static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) +{ + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxims_data *cximsd = cxlrd->platform_data; + struct cxl_decoder *cxld = &cxlsd->cxld; + int ig = cxld->interleave_granularity; + int iw = cxld->interleave_ways; + int eiw, i = 0, n = 0; + u64 hpa; + + if (dev_WARN_ONCE(&cxld->dev, + cxld->interleave_ways != cxlsd->nr_targets, + "misconfigured root decoder\n")) + return NULL; + + if (iw == 1) + /* Entry is always 0 for no interleave */ + return cxlrd->cxlsd.target[0]; + + hpa = cxlrd->res->start + pos * ig; + + if (iw == 3) + goto no_map; + + /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ + for (i = 0; i < cximsd->nr_maps; i++) + n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; + +no_map: + /* IW: 3,6,12 add a modulo calculation to 'n' */ + if (!is_power_of_2(iw)) { + eiw = ilog2(iw / 3) + 8; + hpa &= GENMASK_ULL(51, eiw + ig); + n |= do_div(hpa, 3) << i; + } + return cxlrd->cxlsd.target[n]; +} + +struct cxl_cxims_context { + struct device *dev; + struct cxl_root_decoder *cxlrd; +}; + +static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header; + struct cxl_cxims_context *ctx = arg; + struct cxl_root_decoder *cxlrd = ctx->cxlrd; + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct device *dev = ctx->dev; + struct cxims_data *cximsd; + unsigned int hbig, nr_maps; + int rc; + + rc = cxl_to_granularity(cxims->hbig, &hbig); + if (rc) + return rc; + + if (hbig == cxld->interleave_granularity) { + /* IW 1,3 do not use xormaps and skip this parsing entirely */ + + if (is_power_of_2(cxld->interleave_ways)) + /* 2, 4, 8, 16 way */ + nr_maps = ilog2(cxld->interleave_ways); + else + /* 6, 12 way */ + nr_maps = ilog2(cxld->interleave_ways / 3); + + if (cxims->nr_xormaps < nr_maps) { + dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n", + cxims->nr_xormaps, nr_maps); + return -ENXIO; + } + + cximsd = devm_kzalloc(dev, + struct_size(cximsd, xormaps, nr_maps), + GFP_KERNEL); + memcpy(cximsd->xormaps, cxims->xormap_list, + nr_maps * sizeof(*cximsd->xormaps)); + cximsd->nr_maps = nr_maps; + cxlrd->platform_data = cximsd; + cxlrd->calc_hb = cxl_hb_xor; + } + return 0; +} + static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; @@ -33,11 +130,6 @@ static int cxl_acpi_cfmws_verify(struct device *dev, int rc, expected_len; unsigned int ways; - if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { - dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); - return -EINVAL; - } - if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) { dev_err(dev, "CFMWS Base HPA not 256MB aligned\n"); return -EINVAL; @@ -84,6 +176,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; struct resource *cxl_res = ctx->cxl_res; + struct cxl_cxims_context cxims_ctx; struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; struct acpi_cedt_cfmws *cfmws; @@ -148,7 +241,33 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + if (ways == 1 || ways == 3) { + /* Skip CXIMS parsing for IW 1 or 3. No xormaps used */ + cxlrd->calc_hb = cxl_hb_xor; + goto decoder_add; + } + + cxims_ctx = (struct cxl_cxims_context) { + .dev = dev, + .cxlrd = cxlrd, + }; + + rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS, + cxl_parse_cxims, &cxims_ctx); + if (rc < 0) + goto err_xormap; + + if (cxlrd->calc_hb != cxl_hb_xor) { + rc = -ENXIO; + goto err_xormap; + } + } + +decoder_add: rc = cxl_decoder_add(cxld, target_map); + +err_xormap: if (rc) put_device(&cxld->dev); else diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ac75554b5d76..3f97a5e01c12 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -330,12 +330,14 @@ struct cxl_switch_decoder { * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event * @calc_hb: which host bridge covers the n'th position by granularity + * @platform_data: platform specific configuration data * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; atomic_t region_id; struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos); + void *platform_data; struct cxl_switch_decoder cxlsd; };