Message ID | 0eebc96f0e36ebc74b0dc0f4112b70cced6907a7.1707891715.git.alison.schofield@intel.com |
---|---|
State | New, archived |
Headers | show |
Series | XOR 3-6-12 HB Interleave Calc Repair | expand |
alison.schofield@ wrote: > From: Alison Schofield <alison.schofield@intel.com> > > The XOR host bridge look-up function is broken in its application > of the modulo calculation for interleaves that are multiples of 3. > > The failure appears like this: > [] cxl_core:cxl_region_attach_position:1433: cxl region4: mem0:decoder8.1 invalid target position for decoder3.2 > > Replace the broken modulo calc with the same modulo calc as used > for Modulo Math. This is per CXL spec definition but was overlooked > in the original over-complicated implementation. > > With the simple modulo calculation, the jump to a helper function > becomes needless and the work is now presented in a straight line. > > Display the interleave_arithmetic in the dev_dbg() of successfully > setup root decoders to make debug lookup easier. > > Fixes: f9db85bfec0d ("cxl/acpi: Support CXL XOR Interleave Math (CXIMS)") > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > --- > drivers/cxl/acpi.c | 51 ++++++++++++++++++++-------------------------- > 1 file changed, 22 insertions(+), 29 deletions(-) > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c > index dcf2b39e1048..86c735f733ea 100644 > --- a/drivers/cxl/acpi.c > +++ b/drivers/cxl/acpi.c > @@ -22,31 +22,6 @@ static const guid_t acpi_cxl_qtg_id_guid = > GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, > 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); > > -/* > - * Find a targets entry (n) in the host bridge interleave list. > - * CXL Specification 3.0 Table 9-22 > - */ > -static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, > - int ig) > -{ > - int i = 0, n = 0; > - u8 eiw; > - > - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ > - if (iw != 3) { > - for (i = 0; i < cximsd->nr_maps; i++) > - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; > - } > - /* IW: 3,6,12 add a modulo calculation to 'n' */ > - if (!is_power_of_2(iw)) { > - if (ways_to_eiw(iw, &eiw)) > - return -1; > - hpa &= GENMASK_ULL(51, eiw + ig); > - n |= do_div(hpa, 3) << i; > - } > - return n; > -} > - > static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) > { > struct cxl_cxims_data *cximsd = cxlrd->platform_data; > @@ -62,11 +37,28 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) > "misconfigured root decoder\n")) > return NULL; > > + /* > + * Find a targets entry in the host bridge interleave > + * list as defined in CXL Specification 3.0 Table 9-22 > + * > + * iw: 1 is no interleave, so entry is 0 > + * iw: 3 uses a modulo calc only > + * iw: 2,4,6,8,12,16 use xormaps > + * iw: 6,12 apply a modulo calc after xormaps > + */ > + > + if (iw == 1) > + return cxlrd->cxlsd.target[0]; > + > + if (iw == 3) > + return cxlrd->cxlsd.target[pos % iw]; > + > hpa = cxlrd->res->start + pos * ig; > + for (int i = 0; i < cximsd->nr_maps; i++) > + n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; I am with you up to this point... > > - /* Entry (n) is 0 for no interleave (iw == 1) */ > - if (iw != 1) > - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); > + if (iw == 6 || iw == 12) > + n |= pos % iw; Can you help me map this back to Table-9-22? Shouldn't this be: n |= (pos % iw) << (iw / 3); ?
Dan Williams wrote: > alison.schofield@ wrote: > > From: Alison Schofield <alison.schofield@intel.com> > > > > The XOR host bridge look-up function is broken in its application > > of the modulo calculation for interleaves that are multiples of 3. > > > > The failure appears like this: > > [] cxl_core:cxl_region_attach_position:1433: cxl region4: mem0:decoder8.1 invalid target position for decoder3.2 > > > > Replace the broken modulo calc with the same modulo calc as used > > for Modulo Math. This is per CXL spec definition but was overlooked > > in the original over-complicated implementation. > > > > With the simple modulo calculation, the jump to a helper function > > becomes needless and the work is now presented in a straight line. > > > > Display the interleave_arithmetic in the dev_dbg() of successfully > > setup root decoders to make debug lookup easier. > > > > Fixes: f9db85bfec0d ("cxl/acpi: Support CXL XOR Interleave Math (CXIMS)") > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> [..] > > - /* Entry (n) is 0 for no interleave (iw == 1) */ > > - if (iw != 1) > > - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); > > + if (iw == 6 || iw == 12) > > + n |= pos % iw; > > Can you help me map this back to Table-9-22? > > Shouldn't this be: > > n |= (pos % iw) << (iw / 3); Wait, no: n |= (pos % iw) * (iw / 3); ...to match: 6: + 2* HPA[51:9+HBIG] MOD 3 12: + 4* HPA[51:10+HBIG] MOD 3
On Thu, Feb 29, 2024 at 02:35:12PM -0800, Dan Williams wrote: > Dan Williams wrote: > > alison.schofield@ wrote: > > > From: Alison Schofield <alison.schofield@intel.com> > > > > > > The XOR host bridge look-up function is broken in its application > > > of the modulo calculation for interleaves that are multiples of 3. > > > > > > The failure appears like this: > > > [] cxl_core:cxl_region_attach_position:1433: cxl region4: mem0:decoder8.1 invalid target position for decoder3.2 > > > > > > Replace the broken modulo calc with the same modulo calc as used > > > for Modulo Math. This is per CXL spec definition but was overlooked > > > in the original over-complicated implementation. > > > > > > With the simple modulo calculation, the jump to a helper function > > > becomes needless and the work is now presented in a straight line. > > > > > > Display the interleave_arithmetic in the dev_dbg() of successfully > > > setup root decoders to make debug lookup easier. > > > > > > Fixes: f9db85bfec0d ("cxl/acpi: Support CXL XOR Interleave Math (CXIMS)") > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > [..] > > > - /* Entry (n) is 0 for no interleave (iw == 1) */ > > > - if (iw != 1) > > > - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); > > > + if (iw == 6 || iw == 12) > > > + n |= pos % iw; > > > > Can you help me map this back to Table-9-22? > > > > Shouldn't this be: > > > > n |= (pos % iw) << (iw / 3); > > Wait, no: > > n |= (pos % iw) * (iw / 3); > > ...to match: > > 6: + 2* HPA[51:9+HBIG] MOD 3 > > 12: + 4* HPA[51:10+HBIG] MOD 3 Hi Dan, tl;mrp <-- must read please :) Yes - that table is the key and I have a new found understanding, or misunderstaning, of how to use it to share here. But first, I did follow up on your suggestions along with more variation to try and fix the existing code path: Evaluating other calcs with an HBIW of 6 and pos[0..5]: > Shouldn't this be: > > n |= (pos % iw) << (iw / 3); No. For each pos in [0..5] and HBIW = 6 the right-hand side of that assignment results in 0, 4, 8, 12, 16, and 20, so goes beyond the HBIW of 6. > Wait, no: > > n |= (pos % iw) * (iw / 3); No. The right side yields 0,2,4,6,8,10, beyond HBIW of 6. I sampled many flavors of these and kept going back to Table-22 and a 6 way HB interleave setup, trying to make something work. The fact that the target-list is in HB interleave order hit me and here's where it took me: From Table - 22 "A list of all the Interleave Targets. The number of entries in this list shall match the Number of Interleave Ways (NIW). The order of the targets reported in this List indicates the order in the Interleave Set" Using a 6 way Host Bridge Interleave: HB0 HB1 HB2 HB3 HB4 HB5 Using this CFMWS and CXIMS. This is a sample of what a 6 way interleave may look like and how the xormap directs the traffic to HBs. cfmws30 = { .cfmws = { .header = { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws30), }, .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 9, .granularity = 0, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = 0, .window_size = SZ_256M * 24UL, }, .target = { HB0, HB3, HB4, HB1, HB2, HB5 }, }, cxims0 = { .cxims = { .header = { .type = ACPI_CEDT_TYPE_CXIMS, .length = sizeof(mock_cedt.cxims0), }, .hbig = 0, .nr_xormaps = 1, }, /* xormap for 6 way interleave */ .xormap_list = { 0x2020900 }, }, Given an HPA, HBIG, HBIW - the Table 9-22 calcs direct the routing of that HPA. In order to route an address to the correct decoder (N), do N = XORALLBITS (HPA AND XORMAP[0]) + 2* HPA[51:9+HBIG] MOD 3 Example data: hpa:0xf010000100 map:0x2020900 HBIG: 6 N = XORALLBITS (0xf01000010 AND 0x2020900) + 2* 1 MOD 3 N = 3 That says to route HPA 0xf01000100 to the 3rd Host Bridge: HB3 (And, of course, part of the routing is that gets reduced to an offset, the position bit gets removed and the offset is applied to the DPA base.) All very interesting but is not applicable for what the driver is trying to do in calc_hb. The driver wants to find the index into the root decoder (cxlsd) target list. It is not HB3. Recall that the CFMWS presents the target list is interleave order. The CXL driver stores that target_list in interleave order. Applying the Table 9-22 calc doesn't apply. It shouldn't and my ahah moment here is that the CXL driver does not need to use that calc. I didn't see this until I had that 6 way HBIW where a CFMWS target list was 'dis-ordered'. Previously, 2 & 4 way XOR, applying the calc was needless, but functioned, because the lists were ordered. Given that the drivers target-list is in interleave order, use (pos % HBIW) to get the index into the decoder target list. We don't need the complex calc to jump to the right location in the interleave since we stored the interleave order. And note that the (pos % HBIW) will fail with 'invalid target position' if the region creation is attempted with a badly ordered target list. The check is as valid as ever. I've verified this with these configs: configs="1 11 111 1111 111111 2 22 222 2222 222222 4 44" where an entry like 111 means 1+1+1 means 3 way HB Interleave I only reached this point while working on that 6 way XOR config. The upstream code works for 1 11 1111 and the like probably because the lists were simply ordered. I'm ready to hear where this goes wrong. It reminds me of the address translation work where I was hung up on the xormaps until figuring out that those were routing instructions and didn't come into play at the driver level of translation. Direction check please. If this makes sense, the fix is simple, but the cleanup of useless code will cause some churn. Alison
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index dcf2b39e1048..86c735f733ea 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -22,31 +22,6 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -/* - * Find a targets entry (n) in the host bridge interleave list. - * CXL Specification 3.0 Table 9-22 - */ -static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, - int ig) -{ - int i = 0, n = 0; - u8 eiw; - - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ - if (iw != 3) { - for (i = 0; i < cximsd->nr_maps; i++) - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; - } - /* IW: 3,6,12 add a modulo calculation to 'n' */ - if (!is_power_of_2(iw)) { - if (ways_to_eiw(iw, &eiw)) - return -1; - hpa &= GENMASK_ULL(51, eiw + ig); - n |= do_div(hpa, 3) << i; - } - return n; -} - static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; @@ -62,11 +37,28 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) "misconfigured root decoder\n")) return NULL; + /* + * Find a targets entry in the host bridge interleave + * list as defined in CXL Specification 3.0 Table 9-22 + * + * iw: 1 is no interleave, so entry is 0 + * iw: 3 uses a modulo calc only + * iw: 2,4,6,8,12,16 use xormaps + * iw: 6,12 apply a modulo calc after xormaps + */ + + if (iw == 1) + return cxlrd->cxlsd.target[0]; + + if (iw == 3) + return cxlrd->cxlsd.target[pos % iw]; + hpa = cxlrd->res->start + pos * ig; + for (int i = 0; i < cximsd->nr_maps; i++) + n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; - /* Entry (n) is 0 for no interleave (iw == 1) */ - if (iw != 1) - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); + if (iw == 6 || iw == 12) + n |= pos % iw; if (n < 0) return NULL; @@ -424,8 +416,9 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, dev_err(dev, "Failed to add decode range: %pr", res); return rc; } - dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", + dev_dbg(dev, "add: %s math: %s node: %d range [%#llx - %#llx]\n", dev_name(&cxld->dev), + cfmws->interleave_arithmetic ? "xor" : "modulo", phys_to_target_node(cxld->hpa_range.start), cxld->hpa_range.start, cxld->hpa_range.end);