diff mbox series

[v4,2/3] cxl/acpi: Support CXL XOR Interleave Math (CXIMS)

Message ID 8c93daf7080f45ad4085d6e8556e4deac03f6322.1663687681.git.alison.schofield@intel.com
State Superseded
Headers show
Series CXL XOR Interleave Arithmetic | expand

Commit Message

Alison Schofield Sept. 20, 2022, 3:39 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

When the CFMWS is using XOR math, parse the corresponding
CXIMS structure and store the xormaps in the root decoder
structure. Use the xormaps in a new lookup, cxl_hb_xor(),
to find a targets entry in the host bridge interleave
target list.

Defined in CXL Specfication 3.0 Section: 9.17.1

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/cxl.h  |   2 +
 drivers/cxl/acpi.c | 133 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 130 insertions(+), 5 deletions(-)

Comments

Dan Williams Oct. 22, 2022, 12:01 a.m. UTC | #1
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> When the CFMWS is using XOR math, parse the corresponding
> CXIMS structure and store the xormaps in the root decoder
> structure. Use the xormaps in a new lookup, cxl_hb_xor(),
> to find a targets entry in the host bridge interleave
> target list.
> 
> Defined in CXL Specfication 3.0 Section: 9.17.1
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  drivers/cxl/cxl.h  |   2 +
>  drivers/cxl/acpi.c | 133 +++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 130 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index f680450f0b16..0a17a7007bff 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -330,12 +330,14 @@ struct cxl_switch_decoder {
>   * @res: host / parent resource for region allocations
>   * @region_id: region id for next region provisioning event
>   * @calc_hb: which host bridge covers the n'th position by granularity
> + * @platform_data: platform specific configuration data
>   * @cxlsd: base cxl switch decoder
>   */
>  struct cxl_root_decoder {
>  	struct resource *res;
>  	atomic_t region_id;
>  	struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
> +	void *platform_data;
>  	struct cxl_switch_decoder cxlsd;
>  };
>  
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index fb649683dd3a..51cd45d2ed98 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -6,9 +6,110 @@
>  #include <linux/kernel.h>
>  #include <linux/acpi.h>
>  #include <linux/pci.h>
> +#include <asm/div64.h>
>  #include "cxlpci.h"
>  #include "cxl.h"
>  
> +struct cxims_data {
> +	int nr_maps;
> +	u64 xormaps[];
> +};
> +
> +/*
> + * Find a targets entry (n) in the host bridge interleave list.
> + * CXL Specfication 3.0 Table 9-22
> + */
> +static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> +{
> +	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
> +	struct cxims_data *cximsd = cxlrd->platform_data;
> +	struct cxl_decoder *cxld = &cxlsd->cxld;
> +	int ig = cxld->interleave_granularity;
> +	int iw = cxld->interleave_ways;
> +	int i, eiw, n = 0;
> +	u64 hpa;
> +
> +	if (dev_WARN_ONCE(&cxld->dev,
> +			  cxld->interleave_ways != cxlsd->nr_targets,
> +			  "misconfigured root decoder\n"))
> +		return NULL;
> +
> +	if (iw == 1)
> +		/* Entry is always 0 for no interleave */
> +		return cxlrd->cxlsd.target[0];
> +
> +	hpa = cxlrd->res->start + pos * ig;
> +
> +	if (iw == 3) {
> +		/* Initialize 'i' for the modulo calc */
> +		i = 0;

From cxl_acpi:

nr_maps = ilog2(cxld->interleave_ways / 3);

...so cximsd->nr_maps is already 0 when iw is 3, so no need for a goto,
unless I missed something.

> +		goto no_map;
> +	}
> +
> +	/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
> +	for (i = 0; i < cximsd->nr_maps; i++)
> +		n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
> +
> +no_map:
> +	/* IW: 3,6,12 add a modulo calculation to 'n' */
> +	if (!is_power_of_2(iw)) {
> +		eiw = ilog2(iw / 3) + 8;
> +		hpa &= GENMASK_ULL(51, eiw + ig);
> +		n = do_div(hpa, 3) << i;

I would just use "<< cxmisd->nr_maps" and move this before the
power-of-2 loop with its own early "return cxlrd->cxlsd.target[n];" in
the 'if ()' block. No need to force these 2 cases to have a common exit.
Alison Schofield Oct. 26, 2022, 7:12 a.m. UTC | #2
On Fri, Oct 21, 2022 at 05:01:42PM -0700, Dan Williams wrote:
> alison.schofield@ wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > When the CFMWS is using XOR math, parse the corresponding
> > CXIMS structure and store the xormaps in the root decoder
> > structure. Use the xormaps in a new lookup, cxl_hb_xor(),
> > to find a targets entry in the host bridge interleave
> > target list.
> > 
> > Defined in CXL Specfication 3.0 Section: 9.17.1
> > 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > ---
> >  drivers/cxl/cxl.h  |   2 +
> >  drivers/cxl/acpi.c | 133 +++++++++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 130 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index f680450f0b16..0a17a7007bff 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -330,12 +330,14 @@ struct cxl_switch_decoder {
> >   * @res: host / parent resource for region allocations
> >   * @region_id: region id for next region provisioning event
> >   * @calc_hb: which host bridge covers the n'th position by granularity
> > + * @platform_data: platform specific configuration data
> >   * @cxlsd: base cxl switch decoder
> >   */
> >  struct cxl_root_decoder {
> >  	struct resource *res;
> >  	atomic_t region_id;
> >  	struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
> > +	void *platform_data;
> >  	struct cxl_switch_decoder cxlsd;
> >  };
> >  
> > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > index fb649683dd3a..51cd45d2ed98 100644
> > --- a/drivers/cxl/acpi.c
> > +++ b/drivers/cxl/acpi.c
> > @@ -6,9 +6,110 @@
> >  #include <linux/kernel.h>
> >  #include <linux/acpi.h>
> >  #include <linux/pci.h>
> > +#include <asm/div64.h>
> >  #include "cxlpci.h"
> >  #include "cxl.h"
> >  
> > +struct cxims_data {
> > +	int nr_maps;
> > +	u64 xormaps[];
> > +};
> > +
> > +/*
> > + * Find a targets entry (n) in the host bridge interleave list.
> > + * CXL Specfication 3.0 Table 9-22
> > + */
> > +static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> > +{
> > +	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
> > +	struct cxims_data *cximsd = cxlrd->platform_data;
> > +	struct cxl_decoder *cxld = &cxlsd->cxld;
> > +	int ig = cxld->interleave_granularity;
> > +	int iw = cxld->interleave_ways;
> > +	int i, eiw, n = 0;
> > +	u64 hpa;
> > +
> > +	if (dev_WARN_ONCE(&cxld->dev,
> > +			  cxld->interleave_ways != cxlsd->nr_targets,
> > +			  "misconfigured root decoder\n"))
> > +		return NULL;
> > +
> > +	if (iw == 1)
> > +		/* Entry is always 0 for no interleave */
> > +		return cxlrd->cxlsd.target[0];
> > +
> > +	hpa = cxlrd->res->start + pos * ig;
> > +
> > +	if (iw == 3) {
> > +		/* Initialize 'i' for the modulo calc */
> > +		i = 0;
> 
> From cxl_acpi:
> 
> nr_maps = ilog2(cxld->interleave_ways / 3);
> 
> ...so cximsd->nr_maps is already 0 when iw is 3, so no need for a goto,
> unless I missed something.

That'd be a NULL dereference. For x1 and x3, which don't use
xormaps, no cximsd is saved. 

I kept the goto, but moved the init to the declaration.

> 
> > +		goto no_map;
> > +	}
> > +
> > +	/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
> > +	for (i = 0; i < cximsd->nr_maps; i++)
> > +		n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
> > +
> > +no_map:
> > +	/* IW: 3,6,12 add a modulo calculation to 'n' */
> > +	if (!is_power_of_2(iw)) {
> > +		eiw = ilog2(iw / 3) + 8;
> > +		hpa &= GENMASK_ULL(51, eiw + ig);
> > +		n = do_div(hpa, 3) << i;
> 
> I would just use "<< cxmisd->nr_maps" and move this before the
> power-of-2 loop with its own early "return cxlrd->cxlsd.target[n];" in
> the 'if ()' block. No need to force these 2 cases to have a common exit.

That makes sense based on what you see above, but the code is wrong.
It should add that final calc to the 'n' that was started with the xormaps.
Although x3 doesn't use any maps, x6 and x12 do.

this:
> +             n = do_div(hpa, 3) << i;
it should be
 +             n |= do_div(hpa, 3) << i;
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f680450f0b16..0a17a7007bff 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -330,12 +330,14 @@  struct cxl_switch_decoder {
  * @res: host / parent resource for region allocations
  * @region_id: region id for next region provisioning event
  * @calc_hb: which host bridge covers the n'th position by granularity
+ * @platform_data: platform specific configuration data
  * @cxlsd: base cxl switch decoder
  */
 struct cxl_root_decoder {
 	struct resource *res;
 	atomic_t region_id;
 	struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
+	void *platform_data;
 	struct cxl_switch_decoder cxlsd;
 };
 
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index fb649683dd3a..51cd45d2ed98 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -6,9 +6,110 @@ 
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <asm/div64.h>
 #include "cxlpci.h"
 #include "cxl.h"
 
+struct cxims_data {
+	int nr_maps;
+	u64 xormaps[];
+};
+
+/*
+ * Find a targets entry (n) in the host bridge interleave list.
+ * CXL Specfication 3.0 Table 9-22
+ */
+static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
+{
+	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
+	struct cxims_data *cximsd = cxlrd->platform_data;
+	struct cxl_decoder *cxld = &cxlsd->cxld;
+	int ig = cxld->interleave_granularity;
+	int iw = cxld->interleave_ways;
+	int i, eiw, n = 0;
+	u64 hpa;
+
+	if (dev_WARN_ONCE(&cxld->dev,
+			  cxld->interleave_ways != cxlsd->nr_targets,
+			  "misconfigured root decoder\n"))
+		return NULL;
+
+	if (iw == 1)
+		/* Entry is always 0 for no interleave */
+		return cxlrd->cxlsd.target[0];
+
+	hpa = cxlrd->res->start + pos * ig;
+
+	if (iw == 3) {
+		/* Initialize 'i' for the modulo calc */
+		i = 0;
+		goto no_map;
+	}
+
+	/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
+	for (i = 0; i < cximsd->nr_maps; i++)
+		n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
+
+no_map:
+	/* IW: 3,6,12 add a modulo calculation to 'n' */
+	if (!is_power_of_2(iw)) {
+		eiw = ilog2(iw / 3) + 8;
+		hpa &= GENMASK_ULL(51, eiw + ig);
+		n = do_div(hpa, 3) << i;
+	}
+
+	return cxlrd->cxlsd.target[n];
+}
+
+struct cxl_cxims_context {
+	struct device *dev;
+	struct cxl_root_decoder *cxlrd;
+};
+
+static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
+			   const unsigned long end)
+{
+	struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header;
+	struct cxl_cxims_context *ctx = arg;
+	struct cxl_root_decoder *cxlrd = ctx->cxlrd;
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct device *dev = ctx->dev;
+	struct cxims_data *cximsd;
+	unsigned int hbig, nr_maps;
+	int rc;
+
+	rc = cxl_to_granularity(cxims->hbig, &hbig);
+	if (rc)
+		return rc;
+
+	if (hbig == cxld->interleave_granularity) {
+		/* IW 1,3 do not use xormaps and skip this parsing entirely */
+
+		if (is_power_of_2(cxld->interleave_ways))
+			/* 2, 4, 8, 16 way */
+			nr_maps = ilog2(cxld->interleave_ways);
+		else
+			/* 6, 12 way */
+			nr_maps = ilog2(cxld->interleave_ways / 3);
+
+		if (cxims->nr_xormaps < nr_maps) {
+			dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
+				cxims->nr_xormaps, nr_maps);
+			return -ENXIO;
+		}
+
+		cximsd = devm_kzalloc(dev,
+				      struct_size(cximsd, xormaps, nr_maps),
+				      GFP_KERNEL);
+		memcpy(cximsd->xormaps, cxims->xormap_list,
+		       nr_maps * sizeof(*cximsd->xormaps));
+		cximsd->nr_maps = nr_maps;
+		cxlrd->platform_data = cximsd;
+		cxlrd->calc_hb = cxl_hb_xor;
+	}
+	return 0;
+}
+
 static unsigned long cfmws_to_decoder_flags(int restrictions)
 {
 	unsigned long flags = CXL_DECODER_F_ENABLE;
@@ -33,11 +134,6 @@  static int cxl_acpi_cfmws_verify(struct device *dev,
 	int rc, expected_len;
 	unsigned int ways;
 
-	if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
-		dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
-		return -EINVAL;
-	}
-
 	if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) {
 		dev_err(dev, "CFMWS Base HPA not 256MB aligned\n");
 		return -EINVAL;
@@ -84,6 +180,7 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	struct cxl_cfmws_context *ctx = arg;
 	struct cxl_port *root_port = ctx->root_port;
 	struct resource *cxl_res = ctx->cxl_res;
+	struct cxl_cxims_context cxims_ctx;
 	struct cxl_root_decoder *cxlrd;
 	struct device *dev = ctx->dev;
 	struct acpi_cedt_cfmws *cfmws;
@@ -148,7 +245,33 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 		ig = CXL_DECODER_MIN_GRANULARITY;
 	cxld->interleave_granularity = ig;
 
+	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+		if (ways == 1 || ways == 3)	{
+			/* Skip CXIMS parsing for IW 1 or 3. No xormaps used */
+			cxlrd->calc_hb = cxl_hb_xor;
+			goto decoder_add;
+		}
+
+		cxims_ctx = (struct cxl_cxims_context) {
+			.dev = dev,
+			.cxlrd = cxlrd,
+		};
+
+		rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS,
+					   cxl_parse_cxims, &cxims_ctx);
+		if (rc < 0)
+			goto err_xormap;
+
+		if (cxlrd->calc_hb != cxl_hb_xor) {
+			rc = -ENXIO;
+			goto err_xormap;
+		}
+	}
+
+decoder_add:
 	rc = cxl_decoder_add(cxld, target_map);
+
+err_xormap:
 	if (rc)
 		put_device(&cxld->dev);
 	else