diff mbox series

[v9,2/3] cxl/acpi: Support CXL XOR Interleave Math (CXIMS)

Message ID 5794813acdf7b67cfba3609c6aaff46932fa38d0.1669847017.git.alison.schofield@intel.com
State Accepted
Commit f9db85bfec0dcc01556a41d23aec47b866ab3569
Headers show
Series CXL XOR Interleave Arithmetic | expand

Commit Message

Alison Schofield Nov. 30, 2022, 10:47 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

When the CFMWS is using XOR math, parse the corresponding
CXIMS structure and store the xormaps in the root decoder
structure. Use the xormaps in a new lookup, cxl_hb_xor(),
to find a targets entry in the host bridge interleave
target list.

Defined in CXL Specfication 3.0 Section: 9.17.1

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/cxl/acpi.c      | 136 +++++++++++++++++++++++++++++++++++++++-
 drivers/cxl/core/port.c |   9 ++-
 drivers/cxl/cxl.h       |  11 +++-
 3 files changed, 148 insertions(+), 8 deletions(-)

Comments

Dan Williams Dec. 4, 2022, 1:08 a.m. UTC | #1
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> When the CFMWS is using XOR math, parse the corresponding
> CXIMS structure and store the xormaps in the root decoder
> structure. Use the xormaps in a new lookup, cxl_hb_xor(),
> to find a targets entry in the host bridge interleave
> target list.
> 
> Defined in CXL Specfication 3.0 Section: 9.17.1
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/cxl/acpi.c      | 136 +++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/core/port.c |   9 ++-
>  drivers/cxl/cxl.h       |  11 +++-
>  3 files changed, 148 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index fb649683dd3a..8eee0aa49a77 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -6,9 +6,117 @@
>  #include <linux/kernel.h>
>  #include <linux/acpi.h>
>  #include <linux/pci.h>
> +#include <asm/div64.h>
>  #include "cxlpci.h"
>  #include "cxl.h"
>  
> +struct cxl_cxims_data {
> +	int nr_maps;
> +	u64 xormaps[];
> +};
> +
> +/*
> + * Find a targets entry (n) in the host bridge interleave list.
> + * CXL Specfication 3.0 Table 9-22
> + */
> +static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
> +			  int ig)
> +{
> +	int i = 0, n = 0;
> +	u8 eiw;
> +
> +	/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
> +	if (iw != 3) {
> +		for (i = 0; i < cximsd->nr_maps; i++)
> +			n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
> +	}
> +	/* IW: 3,6,12 add a modulo calculation to 'n' */
> +	if (!is_power_of_2(iw)) {
> +		if (ways_to_cxl(iw, &eiw))
> +			return -1;
> +		hpa &= GENMASK_ULL(51, eiw + ig);
> +		n |= do_div(hpa, 3) << i;
> +	}
> +	return n;
> +}
> +
> +static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> +{
> +	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
> +	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
> +	struct cxl_decoder *cxld = &cxlsd->cxld;
> +	int ig = cxld->interleave_granularity;
> +	int iw = cxld->interleave_ways;
> +	int n = 0;
> +	u64 hpa;
> +
> +	if (dev_WARN_ONCE(&cxld->dev,
> +			  cxld->interleave_ways != cxlsd->nr_targets,
> +			  "misconfigured root decoder\n"))
> +		return NULL;
> +
> +	hpa = cxlrd->res->start + pos * ig;
> +
> +	/* Entry (n) is 0 for no interleave (iw == 1) */
> +	if (iw != 1)
> +		n = cxl_xor_calc_n(hpa, cximsd, iw, ig);
> +
> +	if (n < 0)
> +		return NULL;
> +
> +	return cxlrd->cxlsd.target[n];
> +}
> +
> +struct cxl_cxims_context {
> +	struct device *dev;
> +	struct cxl_root_decoder *cxlrd;
> +};
> +
> +static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
> +			   const unsigned long end)
> +{
> +	struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header;
> +	struct cxl_cxims_context *ctx = arg;
> +	struct cxl_root_decoder *cxlrd = ctx->cxlrd;
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> +	struct device *dev = ctx->dev;
> +	struct cxl_cxims_data *cximsd;
> +	unsigned int hbig, nr_maps;
> +	int rc;
> +
> +	rc = cxl_to_granularity(cxims->hbig, &hbig);
> +	if (rc)
> +		return rc;
> +
> +	if (hbig == cxld->interleave_granularity) {

In cases like this with a big indented block followed by a return I
prefer to just do an early return, and undo the indentation.

I'll fold in the following to relieve some 80 column pressure:

@@ -88,32 +88,33 @@ static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
        if (rc)
                return rc;
 
-       if (hbig == cxld->interleave_granularity) {
-               /* IW 1,3 do not use xormaps and skip this parsing entirely */
-
-               if (is_power_of_2(cxld->interleave_ways))
-                       /* 2, 4, 8, 16 way */
-                       nr_maps = ilog2(cxld->interleave_ways);
-               else
-                       /* 6, 12 way */
-                       nr_maps = ilog2(cxld->interleave_ways / 3);
-
-               if (cxims->nr_xormaps < nr_maps) {
-                       dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
-                               cxims->nr_xormaps, nr_maps);
-                       return -ENXIO;
-               }
+       /* Does this CXIMS entry apply to the given CXL Window? */
+       if (hbig != cxld->interleave_granularity)
+               return 0;
 
-               cximsd = devm_kzalloc(dev,
-                                     struct_size(cximsd, xormaps, nr_maps),
-                                     GFP_KERNEL);
-               if (!cximsd)
-                       return -ENOMEM;
-               memcpy(cximsd->xormaps, cxims->xormap_list,
-                      nr_maps * sizeof(*cximsd->xormaps));
-               cximsd->nr_maps = nr_maps;
-               cxlrd->platform_data = cximsd;
+       /* IW 1,3 do not use xormaps and skip this parsing entirely */
+       if (is_power_of_2(cxld->interleave_ways))
+               /* 2, 4, 8, 16 way */
+               nr_maps = ilog2(cxld->interleave_ways);
+       else
+               /* 6, 12 way */
+               nr_maps = ilog2(cxld->interleave_ways / 3);
+
+       if (cxims->nr_xormaps < nr_maps) {
+               dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
+                       cxims->nr_xormaps, nr_maps);
+               return -ENXIO;
        }
+
+       cximsd = devm_kzalloc(dev, struct_size(cximsd, xormaps, nr_maps),
+                             GFP_KERNEL);
+       if (!cximsd)
+               return -ENOMEM;
+       memcpy(cximsd->xormaps, cxims->xormap_list,
+              nr_maps * sizeof(*cximsd->xormaps));
+       cximsd->nr_maps = nr_maps;
+       cxlrd->platform_data = cximsd;
+
        return 0;
 }


Other than that this looks good to me.
Dan Williams Dec. 4, 2022, 1:14 a.m. UTC | #2
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> When the CFMWS is using XOR math, parse the corresponding
> CXIMS structure and store the xormaps in the root decoder
> structure. Use the xormaps in a new lookup, cxl_hb_xor(),
> to find a targets entry in the host bridge interleave
> target list.
> 
> Defined in CXL Specfication 3.0 Section: 9.17.1
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/cxl/acpi.c      | 136 +++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/core/port.c |   9 ++-
>  drivers/cxl/cxl.h       |  11 +++-
>  3 files changed, 148 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index fb649683dd3a..8eee0aa49a77 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
[..]
> @@ -148,7 +265,20 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  		ig = CXL_DECODER_MIN_GRANULARITY;
>  	cxld->interleave_granularity = ig;
>  
> +	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
> +		if (ways != 1 && ways != 3) {
> +			cxims_ctx = (struct cxl_cxims_context) {
> +				.dev = dev,
> +				.cxlrd = cxlrd,
> +			};
> +			rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS,
> +						   cxl_parse_cxims, &cxims_ctx);
> +			if (rc < 0)
> +				goto err_xormap;

Spotted one question, what about the case there the Window is XOR
arithmetic, but a lookup in the CXIMS comes up empty for the given
granularity? That's certainly a BIOS bug, but BIOSen have been known to
do worse. It would be nice to defend against that potential case with a
follow on fixup, unless I missed it?
diff mbox series

Patch

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index fb649683dd3a..8eee0aa49a77 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -6,9 +6,117 @@ 
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <asm/div64.h>
 #include "cxlpci.h"
 #include "cxl.h"
 
+struct cxl_cxims_data {
+	int nr_maps;
+	u64 xormaps[];
+};
+
+/*
+ * Find a targets entry (n) in the host bridge interleave list.
+ * CXL Specfication 3.0 Table 9-22
+ */
+static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
+			  int ig)
+{
+	int i = 0, n = 0;
+	u8 eiw;
+
+	/* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
+	if (iw != 3) {
+		for (i = 0; i < cximsd->nr_maps; i++)
+			n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
+	}
+	/* IW: 3,6,12 add a modulo calculation to 'n' */
+	if (!is_power_of_2(iw)) {
+		if (ways_to_cxl(iw, &eiw))
+			return -1;
+		hpa &= GENMASK_ULL(51, eiw + ig);
+		n |= do_div(hpa, 3) << i;
+	}
+	return n;
+}
+
+static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
+{
+	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
+	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
+	struct cxl_decoder *cxld = &cxlsd->cxld;
+	int ig = cxld->interleave_granularity;
+	int iw = cxld->interleave_ways;
+	int n = 0;
+	u64 hpa;
+
+	if (dev_WARN_ONCE(&cxld->dev,
+			  cxld->interleave_ways != cxlsd->nr_targets,
+			  "misconfigured root decoder\n"))
+		return NULL;
+
+	hpa = cxlrd->res->start + pos * ig;
+
+	/* Entry (n) is 0 for no interleave (iw == 1) */
+	if (iw != 1)
+		n = cxl_xor_calc_n(hpa, cximsd, iw, ig);
+
+	if (n < 0)
+		return NULL;
+
+	return cxlrd->cxlsd.target[n];
+}
+
+struct cxl_cxims_context {
+	struct device *dev;
+	struct cxl_root_decoder *cxlrd;
+};
+
+static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
+			   const unsigned long end)
+{
+	struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header;
+	struct cxl_cxims_context *ctx = arg;
+	struct cxl_root_decoder *cxlrd = ctx->cxlrd;
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct device *dev = ctx->dev;
+	struct cxl_cxims_data *cximsd;
+	unsigned int hbig, nr_maps;
+	int rc;
+
+	rc = cxl_to_granularity(cxims->hbig, &hbig);
+	if (rc)
+		return rc;
+
+	if (hbig == cxld->interleave_granularity) {
+		/* IW 1,3 do not use xormaps and skip this parsing entirely */
+
+		if (is_power_of_2(cxld->interleave_ways))
+			/* 2, 4, 8, 16 way */
+			nr_maps = ilog2(cxld->interleave_ways);
+		else
+			/* 6, 12 way */
+			nr_maps = ilog2(cxld->interleave_ways / 3);
+
+		if (cxims->nr_xormaps < nr_maps) {
+			dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
+				cxims->nr_xormaps, nr_maps);
+			return -ENXIO;
+		}
+
+		cximsd = devm_kzalloc(dev,
+				      struct_size(cximsd, xormaps, nr_maps),
+				      GFP_KERNEL);
+		if (!cximsd)
+			return -ENOMEM;
+		memcpy(cximsd->xormaps, cxims->xormap_list,
+		       nr_maps * sizeof(*cximsd->xormaps));
+		cximsd->nr_maps = nr_maps;
+		cxlrd->platform_data = cximsd;
+	}
+	return 0;
+}
+
 static unsigned long cfmws_to_decoder_flags(int restrictions)
 {
 	unsigned long flags = CXL_DECODER_F_ENABLE;
@@ -33,8 +141,10 @@  static int cxl_acpi_cfmws_verify(struct device *dev,
 	int rc, expected_len;
 	unsigned int ways;
 
-	if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
-		dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
+	if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO &&
+	    cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+		dev_err(dev, "CFMWS Unknown Interleave Arithmetic: %d\n",
+			cfmws->interleave_arithmetic);
 		return -EINVAL;
 	}
 
@@ -84,9 +194,11 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	struct cxl_cfmws_context *ctx = arg;
 	struct cxl_port *root_port = ctx->root_port;
 	struct resource *cxl_res = ctx->cxl_res;
+	struct cxl_cxims_context cxims_ctx;
 	struct cxl_root_decoder *cxlrd;
 	struct device *dev = ctx->dev;
 	struct acpi_cedt_cfmws *cfmws;
+	cxl_calc_hb_fn cxl_calc_hb;
 	struct cxl_decoder *cxld;
 	unsigned int ways, i, ig;
 	struct resource *res;
@@ -128,7 +240,12 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	if (rc)
 		goto err_insert;
 
-	cxlrd = cxl_root_decoder_alloc(root_port, ways);
+	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO)
+		cxl_calc_hb = cxl_hb_modulo;
+	else
+		cxl_calc_hb = cxl_hb_xor;
+
+	cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
 	if (IS_ERR(cxlrd))
 		return 0;
 
@@ -148,7 +265,20 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 		ig = CXL_DECODER_MIN_GRANULARITY;
 	cxld->interleave_granularity = ig;
 
+	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+		if (ways != 1 && ways != 3) {
+			cxims_ctx = (struct cxl_cxims_context) {
+				.dev = dev,
+				.cxlrd = cxlrd,
+			};
+			rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS,
+						   cxl_parse_cxims, &cxims_ctx);
+			if (rc < 0)
+				goto err_xormap;
+		}
+	}
 	rc = cxl_decoder_add(cxld, target_map);
+err_xormap:
 	if (rc)
 		put_device(&cxld->dev);
 	else
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index e7556864ea80..42cdf224a85d 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1428,7 +1428,7 @@  static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
 	return rc;
 }
 
-static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
+struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
 {
 	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
 	struct cxl_decoder *cxld = &cxlsd->cxld;
@@ -1441,6 +1441,7 @@  static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
 
 	return cxlrd->cxlsd.target[pos % iw];
 }
+EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL);
 
 static struct lock_class_key cxl_decoder_key;
 
@@ -1502,6 +1503,7 @@  static int cxl_switch_decoder_init(struct cxl_port *port,
  * cxl_root_decoder_alloc - Allocate a root level decoder
  * @port: owning CXL root of this decoder
  * @nr_targets: static number of downstream targets
+ * @calc_hb: which host bridge covers the n'th position by granularity
  *
  * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
  * 'CXL root' decoder is one that decodes from a top-level / static platform
@@ -1509,7 +1511,8 @@  static int cxl_switch_decoder_init(struct cxl_port *port,
  * topology.
  */
 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
-						unsigned int nr_targets)
+						unsigned int nr_targets,
+						cxl_calc_hb_fn calc_hb)
 {
 	struct cxl_root_decoder *cxlrd;
 	struct cxl_switch_decoder *cxlsd;
@@ -1531,7 +1534,7 @@  struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 		return ERR_PTR(rc);
 	}
 
-	cxlrd->calc_hb = cxl_hb_modulo;
+	cxlrd->calc_hb = calc_hb;
 
 	cxld = &cxlsd->cxld;
 	cxld->dev.type = &cxl_decoder_root_type;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ac75554b5d76..5f611b8a37cf 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -324,18 +324,23 @@  struct cxl_switch_decoder {
 	struct cxl_dport *target[];
 };
 
+struct cxl_root_decoder;
+typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
+					    int pos);
 
 /**
  * struct cxl_root_decoder - Static platform CXL address decoder
  * @res: host / parent resource for region allocations
  * @region_id: region id for next region provisioning event
  * @calc_hb: which host bridge covers the n'th position by granularity
+ * @platform_data: platform specific configuration data
  * @cxlsd: base cxl switch decoder
  */
 struct cxl_root_decoder {
 	struct resource *res;
 	atomic_t region_id;
-	struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
+	cxl_calc_hb_fn calc_hb;
+	void *platform_data;
 	struct cxl_switch_decoder cxlsd;
 };
 
@@ -581,7 +586,9 @@  struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
 bool is_endpoint_decoder(struct device *dev);
 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
-						unsigned int nr_targets);
+						unsigned int nr_targets,
+						cxl_calc_hb_fn calc_hb);
+struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos);
 struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 						    unsigned int nr_targets);
 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);