diff mbox series

[09/13] cxl/region: Implement XHB verification

Message ID 20220107003756.806582-10-ben.widawsky@intel.com
State Superseded
Headers show
Series CXL Region driver | expand

Commit Message

Ben Widawsky Jan. 7, 2022, 12:37 a.m. UTC
Cross host bridge verification primarily determines if the requested
interleave ordering can be achieved by the root decoder, which isn't as
programmable as other decoders.

The algorithm implemented here is based on the CXL Type 3 Memory Device
Software Guide, chapter 2.13.14

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
 .clang-format        |  2 +
 drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
 drivers/cxl/trace.h  |  3 ++
 3 files changed, 93 insertions(+), 1 deletion(-)

Comments

Jonathan Cameron Jan. 7, 2022, 10:07 a.m. UTC | #1
On Thu,  6 Jan 2022 16:37:52 -0800
Ben Widawsky <ben.widawsky@intel.com> wrote:

> Cross host bridge verification primarily determines if the requested
> interleave ordering can be achieved by the root decoder, which isn't as
> programmable as other decoders.
> 
> The algorithm implemented here is based on the CXL Type 3 Memory Device
> Software Guide, chapter 2.13.14
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>

Hi Ben,

A few things I'm carrying 'fixes' for in here.

Jonathan

> ---
>  .clang-format        |  2 +
>  drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/trace.h  |  3 ++
>  3 files changed, 93 insertions(+), 1 deletion(-)
> 
> diff --git a/.clang-format b/.clang-format
> index 15d4eaabc6b5..55f628f21722 100644
> --- a/.clang-format
> +++ b/.clang-format
> @@ -169,6 +169,8 @@ ForEachMacros:
>    - 'for_each_cpu_and'
>    - 'for_each_cpu_not'
>    - 'for_each_cpu_wrap'
> +  - 'for_each_cxl_decoder_target'
> +  - 'for_each_cxl_endpoint'
>    - 'for_each_dapm_widgets'
>    - 'for_each_dev_addr'
>    - 'for_each_dev_scope'
> diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> index c8e3c48dfbb9..ca559a4b5347 100644
> --- a/drivers/cxl/region.c
> +++ b/drivers/cxl/region.c
> @@ -28,6 +28,17 @@
>   */
>  
>  #define region_ways(region) ((region)->config.eniw)
> +#define region_ig(region) (ilog2((region)->config.ig))
> +
> +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> +	     idx < region_ways(region);                                        \
> +	     idx++, ep = (region)->config.targets[idx])
> +
> +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> +	for (idx = 0, target = (decoder)->target[idx];                         \
> +	     idx < (decoder)->nr_targets;                                      \
> +	     idx++, target++)
>  
>  static struct cxl_decoder *rootd_from_region(struct cxl_region *r)
>  {
> @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd,
>  	return true;
>  }
>  
> +static int get_unique_hostbridges(const struct cxl_region *region,
> +				  struct cxl_port **hbs)
> +{
> +	struct cxl_memdev *ep;
> +	int i, hb_count = 0;
> +
> +	for_each_cxl_endpoint(ep, region, i) {
> +		struct cxl_port *hb = get_hostbridge(ep);
> +		bool found = false;
> +		int j;
> +
> +		BUG_ON(!hb);
> +
> +		for (j = 0; j < hb_count; j++) {
> +			if (hbs[j] == hb)
> +				found = true;
> +		}
> +		if (!found)
> +			hbs[hb_count++] = hb;
> +	}
> +
> +	return hb_count;
> +}
> +
>  /**
>   * region_xhb_config_valid() - determine cross host bridge validity
>   * @rootd: The root decoder to check against
> @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd,
>  static bool region_xhb_config_valid(const struct cxl_region *region,
>  				    const struct cxl_decoder *rootd)
>  {
> -	/* TODO: */
> +	struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE];
> +	int rootd_ig, i;
> +	struct cxl_dport *target;
> +
> +	/* Are all devices in this region on the same CXL host bridge */
> +	if (get_unique_hostbridges(region, hbs) == 1)
> +		return true;
> +
> +	rootd_ig = rootd->interleave_granularity;
> +
> +	/* CFMWS.HBIG >= Device.Label.IG */
> +	if (rootd_ig < region_ig(region)) {
> +		trace_xhb_valid(region,
> +				"granularity does not support the region interleave granularity\n");
> +		return false;
> +	}
> +
> +	/* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */
> +	if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) >

This maths isn't what the comment says it is.
((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways)
so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the
actual number of ways not the log2 of it.

That feeds through below.


> +	    region_ways(region)) {
> +		trace_xhb_valid(region,
> +				"granularity to device granularity ratio requires a larger number of devices than currently configured");
> +		return false;
> +	}
> +
> +	/* Check that endpoints are hooked up in the correct order */
> +	for_each_cxl_decoder_target(target, rootd, i) {
> +		struct cxl_memdev *endpoint = region->config.targets[i];
> +
> +		if (get_hostbridge(endpoint) != target->port) {

I think this should be
get_hostbridge(endpoint)->uport != target->dport

As it stands you are comparing the host bridge with the root object.

> +			trace_xhb_valid(region, "device ordering bad\n");
> +			return false;
> +		}
> +	}
> +
> +	/*
> +	 * CFMWS.InterleaveTargetList[n] must contain all devices, x where:
> +	 *	(Device[x],RegionLabel.Position >> (CFMWS.HBIG -
> +	 *	Device[x].RegionLabel.InterleaveGranularity)) &
> +	 *	((2^CFMWS.ENIW) - 1) = n
> +	 *
> +	 * Linux notes: All devices are known to have the same interleave
> +	 * granularity at this point.
> +	 */
> +	for_each_cxl_decoder_target(target, rootd, i) {
> +		if (((i >> (rootd_ig - region_ig(region)))) &
> +		    (((1 << rootd->interleave_ways) - 1) != target->port_id)) {
> +			trace_xhb_valid(region,
> +					"One or more devices are not connected to the correct hostbridge.");
> +			return false;
> +		}
> +	}
> +
>  	return true;
>  }
>  
> diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h
> index a53f00ba5d0e..4de47d1111ac 100644
> --- a/drivers/cxl/trace.h
> +++ b/drivers/cxl/trace.h
> @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed,
>  DEFINE_EVENT(cxl_region_template, allocation_failed,
>  	     TP_PROTO(const struct cxl_region *region, char *status),
>  	     TP_ARGS(region, status));
> +DEFINE_EVENT(cxl_region_template, xhb_valid,
> +	     TP_PROTO(const struct cxl_region *region, char *status),
> +	     TP_ARGS(region, status));
>  
>  #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */
>
Jonathan Cameron Jan. 7, 2022, 10:30 a.m. UTC | #2
On Thu,  6 Jan 2022 16:37:52 -0800
Ben Widawsky <ben.widawsky@intel.com> wrote:

> Cross host bridge verification primarily determines if the requested
> interleave ordering can be achieved by the root decoder, which isn't as
> programmable as other decoders.
> 
> The algorithm implemented here is based on the CXL Type 3 Memory Device
> Software Guide, chapter 2.13.14
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>

Trivial thing inline.

> diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> index c8e3c48dfbb9..ca559a4b5347 100644
> --- a/drivers/cxl/region.c
> +++ b/drivers/cxl/region.c
> @@ -28,6 +28,17 @@
>   */
>  
>  #define region_ways(region) ((region)->config.eniw)
> +#define region_ig(region) (ilog2((region)->config.ig))
> +
> +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> +	     idx < region_ways(region);                                        \
> +	     idx++, ep = (region)->config.targets[idx])
> +
> +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> +	for (idx = 0, target = (decoder)->target[idx];                         \

As target is used too often in here, you'll replace it in ->target[idx] as well.
It happens to work today because the parameter always happens to be target

> +	     idx < (decoder)->nr_targets;                                      \
> +	     idx++, target++)
>
Jonathan Cameron Jan. 7, 2022, 10:38 a.m. UTC | #3
On Fri, 7 Jan 2022 10:30:52 +0000
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> On Thu,  6 Jan 2022 16:37:52 -0800
> Ben Widawsky <ben.widawsky@intel.com> wrote:
> 
> > Cross host bridge verification primarily determines if the requested
> > interleave ordering can be achieved by the root decoder, which isn't as
> > programmable as other decoders.
> > 
> > The algorithm implemented here is based on the CXL Type 3 Memory Device
> > Software Guide, chapter 2.13.14
> > 
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>  
> 
> Trivial thing inline.
> 
> > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> > index c8e3c48dfbb9..ca559a4b5347 100644
> > --- a/drivers/cxl/region.c
> > +++ b/drivers/cxl/region.c
> > @@ -28,6 +28,17 @@
> >   */
> >  
> >  #define region_ways(region) ((region)->config.eniw)
> > +#define region_ig(region) (ilog2((region)->config.ig))
> > +
> > +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> > +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> > +	     idx < region_ways(region);                                        \
> > +	     idx++, ep = (region)->config.targets[idx])
> > +
> > +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> > +	for (idx = 0, target = (decoder)->target[idx];                         \  
> 
> As target is used too often in here, you'll replace it in ->target[idx] as well.
> It happens to work today because the parameter always happens to be target
> 
> > +	     idx < (decoder)->nr_targets;                                      \
> > +	     idx++, target++)
I should have read the next few lines :)

target++ doesn't get (decoder)->target[idx] which is what we want - it indexes
off the end of a particular instance rather than through the array.

I'm guessing this was from my unclear comment yesterday. I should have spent
a little more time being explicit there.

Jonathan

> >
Jonathan Cameron Jan. 7, 2022, 11:55 a.m. UTC | #4
On Fri, 7 Jan 2022 10:07:14 +0000
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:

> On Thu,  6 Jan 2022 16:37:52 -0800
> Ben Widawsky <ben.widawsky@intel.com> wrote:
> 
> > Cross host bridge verification primarily determines if the requested
> > interleave ordering can be achieved by the root decoder, which isn't as
> > programmable as other decoders.
> > 
> > The algorithm implemented here is based on the CXL Type 3 Memory Device
> > Software Guide, chapter 2.13.14
> > 
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>  
> 
> Hi Ben,
> 
> A few things I'm carrying 'fixes' for in here.
> 
> Jonathan
> 
> > ---
> >  .clang-format        |  2 +
> >  drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
> >  drivers/cxl/trace.h  |  3 ++
> >  3 files changed, 93 insertions(+), 1 deletion(-)
> > 
> > diff --git a/.clang-format b/.clang-format
> > index 15d4eaabc6b5..55f628f21722 100644
> > --- a/.clang-format
> > +++ b/.clang-format
> > @@ -169,6 +169,8 @@ ForEachMacros:
> >    - 'for_each_cpu_and'
> >    - 'for_each_cpu_not'
> >    - 'for_each_cpu_wrap'
> > +  - 'for_each_cxl_decoder_target'
> > +  - 'for_each_cxl_endpoint'
> >    - 'for_each_dapm_widgets'
> >    - 'for_each_dev_addr'
> >    - 'for_each_dev_scope'
> > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> > index c8e3c48dfbb9..ca559a4b5347 100644
> > --- a/drivers/cxl/region.c
> > +++ b/drivers/cxl/region.c
> > @@ -28,6 +28,17 @@
> >   */
> >  
> >  #define region_ways(region) ((region)->config.eniw)
> > +#define region_ig(region) (ilog2((region)->config.ig))
> > +
> > +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> > +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> > +	     idx < region_ways(region);                                        \
> > +	     idx++, ep = (region)->config.targets[idx])
> > +
> > +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> > +	for (idx = 0, target = (decoder)->target[idx];                         \
> > +	     idx < (decoder)->nr_targets;                                      \
> > +	     idx++, target++)
> >  
> >  static struct cxl_decoder *rootd_from_region(struct cxl_region *r)
> >  {
> > @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd,
> >  	return true;
> >  }
> >  
> > +static int get_unique_hostbridges(const struct cxl_region *region,
> > +				  struct cxl_port **hbs)
> > +{
> > +	struct cxl_memdev *ep;
> > +	int i, hb_count = 0;
> > +
> > +	for_each_cxl_endpoint(ep, region, i) {
> > +		struct cxl_port *hb = get_hostbridge(ep);
> > +		bool found = false;
> > +		int j;
> > +
> > +		BUG_ON(!hb);
> > +
> > +		for (j = 0; j < hb_count; j++) {
> > +			if (hbs[j] == hb)
> > +				found = true;
> > +		}
> > +		if (!found)
> > +			hbs[hb_count++] = hb;
> > +	}
> > +
> > +	return hb_count;
> > +}
> > +
> >  /**
> >   * region_xhb_config_valid() - determine cross host bridge validity
> >   * @rootd: The root decoder to check against
> > @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd,
> >  static bool region_xhb_config_valid(const struct cxl_region *region,
> >  				    const struct cxl_decoder *rootd)
> >  {
> > -	/* TODO: */
> > +	struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE];
> > +	int rootd_ig, i;
> > +	struct cxl_dport *target;
> > +
> > +	/* Are all devices in this region on the same CXL host bridge */
> > +	if (get_unique_hostbridges(region, hbs) == 1)
> > +		return true;
> > +
> > +	rootd_ig = rootd->interleave_granularity;
> > +
> > +	/* CFMWS.HBIG >= Device.Label.IG */
> > +	if (rootd_ig < region_ig(region)) {
> > +		trace_xhb_valid(region,
> > +				"granularity does not support the region interleave granularity\n");
> > +		return false;
> > +	}
> > +
> > +	/* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */
> > +	if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) >  
> 
> This maths isn't what the comment says it is.
> ((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways)
> so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the
> actual number of ways not the log2 of it.
> 
> That feeds through below.
> 
> 
> > +	    region_ways(region)) {
> > +		trace_xhb_valid(region,
> > +				"granularity to device granularity ratio requires a larger number of devices than currently configured");
> > +		return false;
> > +	}
> > +
> > +	/* Check that endpoints are hooked up in the correct order */
> > +	for_each_cxl_decoder_target(target, rootd, i) {
> > +		struct cxl_memdev *endpoint = region->config.targets[i];
> > +
> > +		if (get_hostbridge(endpoint) != target->port) {  
> 
> I think this should be
> get_hostbridge(endpoint)->uport != target->dport
> 
> As it stands you are comparing the host bridge with the root object.

On closer inspection this code doesn't do what it is meant to do at all
if there are multiple EP below a given root bridge.

You'd expect multiple endpoints to match to each target->port.
Something along the lines of this should work:

        {
                struct cxl_memdev **epgroupstart = region->config.targets;
                struct cxl_memdev **endpoint;

                for_each_cxl_decoder_target(target, rootd, i) {
                        /* Find start of next endpoint group */
                        endpoint = epgroupstart;
                        if (*endpoint == NULL) {
                                printk("No endpoints under decoder target\n");
                                return false;
                        }
                        while (*epgroupstart &&
                                get_hostbridge(*endpoint) == get_hostbridge(*epgroupstart))
                                epgroupstart++;
                }
                if (*epgroupstart) {
                        printk("still some entries left. boom\n");
                        return false;
                }
        }

Only lightly tested with correct inputs...

Next up is figuring out why the EP HDM decoder won't commit. :)

Jonathan


> 
> > +			trace_xhb_valid(region, "device ordering bad\n");
> > +			return false;
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * CFMWS.InterleaveTargetList[n] must contain all devices, x where:
> > +	 *	(Device[x],RegionLabel.Position >> (CFMWS.HBIG -
> > +	 *	Device[x].RegionLabel.InterleaveGranularity)) &
> > +	 *	((2^CFMWS.ENIW) - 1) = n
> > +	 *
> > +	 * Linux notes: All devices are known to have the same interleave
> > +	 * granularity at this point.
> > +	 */
> > +	for_each_cxl_decoder_target(target, rootd, i) {
> > +		if (((i >> (rootd_ig - region_ig(region)))) &
> > +		    (((1 << rootd->interleave_ways) - 1) != target->port_id)) {
> > +			trace_xhb_valid(region,
> > +					"One or more devices are not connected to the correct hostbridge.");
> > +			return false;
> > +		}
> > +	}
> > +
> >  	return true;
> >  }
> >  
> > diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h
> > index a53f00ba5d0e..4de47d1111ac 100644
> > --- a/drivers/cxl/trace.h
> > +++ b/drivers/cxl/trace.h
> > @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed,
> >  DEFINE_EVENT(cxl_region_template, allocation_failed,
> >  	     TP_PROTO(const struct cxl_region *region, char *status),
> >  	     TP_ARGS(region, status));
> > +DEFINE_EVENT(cxl_region_template, xhb_valid,
> > +	     TP_PROTO(const struct cxl_region *region, char *status),
> > +	     TP_ARGS(region, status));
> >  
> >  #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */
> >    
>
Ben Widawsky Jan. 7, 2022, 4:32 p.m. UTC | #5
On 22-01-07 10:38:27, Jonathan Cameron wrote:
> On Fri, 7 Jan 2022 10:30:52 +0000
> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:
> 
> > On Thu,  6 Jan 2022 16:37:52 -0800
> > Ben Widawsky <ben.widawsky@intel.com> wrote:
> > 
> > > Cross host bridge verification primarily determines if the requested
> > > interleave ordering can be achieved by the root decoder, which isn't as
> > > programmable as other decoders.
> > > 
> > > The algorithm implemented here is based on the CXL Type 3 Memory Device
> > > Software Guide, chapter 2.13.14
> > > 
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>  
> > 
> > Trivial thing inline.
> > 
> > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> > > index c8e3c48dfbb9..ca559a4b5347 100644
> > > --- a/drivers/cxl/region.c
> > > +++ b/drivers/cxl/region.c
> > > @@ -28,6 +28,17 @@
> > >   */
> > >  
> > >  #define region_ways(region) ((region)->config.eniw)
> > > +#define region_ig(region) (ilog2((region)->config.ig))
> > > +
> > > +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> > > +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> > > +	     idx < region_ways(region);                                        \
> > > +	     idx++, ep = (region)->config.targets[idx])
> > > +
> > > +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> > > +	for (idx = 0, target = (decoder)->target[idx];                         \  
> > 
> > As target is used too often in here, you'll replace it in ->target[idx] as well.
> > It happens to work today because the parameter always happens to be target
> > 
> > > +	     idx < (decoder)->nr_targets;                                      \
> > > +	     idx++, target++)
> I should have read the next few lines :)
> 
> target++ doesn't get (decoder)->target[idx] which is what we want - it indexes
> off the end of a particular instance rather than through the array.
> 
> I'm guessing this was from my unclear comment yesterday. I should have spent
> a little more time being explicit there.
> 
> Jonathan

Yeah. I was working quickly because I ended up losing childcare for this week
and wanted to get this out ASAP. I'll fix it up on the next round.

> 
> > >    
>
Ben Widawsky Jan. 11, 2022, 9:32 p.m. UTC | #6
On 22-01-07 10:38:27, Jonathan Cameron wrote:
> On Fri, 7 Jan 2022 10:30:52 +0000
> Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:
> 
> > On Thu,  6 Jan 2022 16:37:52 -0800
> > Ben Widawsky <ben.widawsky@intel.com> wrote:
> > 
> > > Cross host bridge verification primarily determines if the requested
> > > interleave ordering can be achieved by the root decoder, which isn't as
> > > programmable as other decoders.
> > > 
> > > The algorithm implemented here is based on the CXL Type 3 Memory Device
> > > Software Guide, chapter 2.13.14
> > > 
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>  
> > 
> > Trivial thing inline.
> > 
> > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> > > index c8e3c48dfbb9..ca559a4b5347 100644
> > > --- a/drivers/cxl/region.c
> > > +++ b/drivers/cxl/region.c
> > > @@ -28,6 +28,17 @@
> > >   */
> > >  
> > >  #define region_ways(region) ((region)->config.eniw)
> > > +#define region_ig(region) (ilog2((region)->config.ig))
> > > +
> > > +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> > > +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> > > +	     idx < region_ways(region);                                        \
> > > +	     idx++, ep = (region)->config.targets[idx])
> > > +
> > > +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> > > +	for (idx = 0, target = (decoder)->target[idx];                         \  
> > 
> > As target is used too often in here, you'll replace it in ->target[idx] as well.
> > It happens to work today because the parameter always happens to be target
> > 
> > > +	     idx < (decoder)->nr_targets;                                      \
> > > +	     idx++, target++)
> I should have read the next few lines :)
> 
> target++ doesn't get (decoder)->target[idx] which is what we want - it indexes
> off the end of a particular instance rather than through the array.
> 
> I'm guessing this was from my unclear comment yesterday. I should have spent
> a little more time being explicit there.
> 
> Jonathan
> 
> > >    
> 

Gotcha. I combined the idx increment as well, what do you think about this (just
typed, not tested):

#define for_each_cxl_decoder_target(dport, decoder, idx)                      \
        for (idx = 0, dport = (decoder)->target[idx];                         \
             idx < (decoder)->nr_targets;                                     \
             dport = (decoder)->target[++idx])
Ben Widawsky Jan. 11, 2022, 10:47 p.m. UTC | #7
On 22-01-07 11:55:24, Jonathan Cameron wrote:
> On Fri, 7 Jan 2022 10:07:14 +0000
> Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:
> 
> > On Thu,  6 Jan 2022 16:37:52 -0800
> > Ben Widawsky <ben.widawsky@intel.com> wrote:
> > 
> > > Cross host bridge verification primarily determines if the requested
> > > interleave ordering can be achieved by the root decoder, which isn't as
> > > programmable as other decoders.
> > > 
> > > The algorithm implemented here is based on the CXL Type 3 Memory Device
> > > Software Guide, chapter 2.13.14
> > > 
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>  
> > 
> > Hi Ben,
> > 
> > A few things I'm carrying 'fixes' for in here.
> > 
> > Jonathan
> > 
> > > ---
> > >  .clang-format        |  2 +
> > >  drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
> > >  drivers/cxl/trace.h  |  3 ++
> > >  3 files changed, 93 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/.clang-format b/.clang-format
> > > index 15d4eaabc6b5..55f628f21722 100644
> > > --- a/.clang-format
> > > +++ b/.clang-format
> > > @@ -169,6 +169,8 @@ ForEachMacros:
> > >    - 'for_each_cpu_and'
> > >    - 'for_each_cpu_not'
> > >    - 'for_each_cpu_wrap'
> > > +  - 'for_each_cxl_decoder_target'
> > > +  - 'for_each_cxl_endpoint'
> > >    - 'for_each_dapm_widgets'
> > >    - 'for_each_dev_addr'
> > >    - 'for_each_dev_scope'
> > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
> > > index c8e3c48dfbb9..ca559a4b5347 100644
> > > --- a/drivers/cxl/region.c
> > > +++ b/drivers/cxl/region.c
> > > @@ -28,6 +28,17 @@
> > >   */
> > >  
> > >  #define region_ways(region) ((region)->config.eniw)
> > > +#define region_ig(region) (ilog2((region)->config.ig))
> > > +
> > > +#define for_each_cxl_endpoint(ep, region, idx)                                 \
> > > +	for (idx = 0, ep = (region)->config.targets[idx];                      \
> > > +	     idx < region_ways(region);                                        \
> > > +	     idx++, ep = (region)->config.targets[idx])
> > > +
> > > +#define for_each_cxl_decoder_target(target, decoder, idx)                      \
> > > +	for (idx = 0, target = (decoder)->target[idx];                         \
> > > +	     idx < (decoder)->nr_targets;                                      \
> > > +	     idx++, target++)
> > >  
> > >  static struct cxl_decoder *rootd_from_region(struct cxl_region *r)
> > >  {
> > > @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd,
> > >  	return true;
> > >  }
> > >  
> > > +static int get_unique_hostbridges(const struct cxl_region *region,
> > > +				  struct cxl_port **hbs)
> > > +{
> > > +	struct cxl_memdev *ep;
> > > +	int i, hb_count = 0;
> > > +
> > > +	for_each_cxl_endpoint(ep, region, i) {
> > > +		struct cxl_port *hb = get_hostbridge(ep);
> > > +		bool found = false;
> > > +		int j;
> > > +
> > > +		BUG_ON(!hb);
> > > +
> > > +		for (j = 0; j < hb_count; j++) {
> > > +			if (hbs[j] == hb)
> > > +				found = true;
> > > +		}
> > > +		if (!found)
> > > +			hbs[hb_count++] = hb;
> > > +	}
> > > +
> > > +	return hb_count;
> > > +}
> > > +
> > >  /**
> > >   * region_xhb_config_valid() - determine cross host bridge validity
> > >   * @rootd: The root decoder to check against
> > > @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd,
> > >  static bool region_xhb_config_valid(const struct cxl_region *region,
> > >  				    const struct cxl_decoder *rootd)
> > >  {
> > > -	/* TODO: */
> > > +	struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE];
> > > +	int rootd_ig, i;
> > > +	struct cxl_dport *target;
> > > +
> > > +	/* Are all devices in this region on the same CXL host bridge */
> > > +	if (get_unique_hostbridges(region, hbs) == 1)
> > > +		return true;
> > > +
> > > +	rootd_ig = rootd->interleave_granularity;
> > > +
> > > +	/* CFMWS.HBIG >= Device.Label.IG */
> > > +	if (rootd_ig < region_ig(region)) {
> > > +		trace_xhb_valid(region,
> > > +				"granularity does not support the region interleave granularity\n");
> > > +		return false;
> > > +	}
> > > +
> > > +	/* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */
> > > +	if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) >  
> > 
> > This maths isn't what the comment says it is.
> > ((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways)
> > so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the
> > actual number of ways not the log2 of it.
> > 
> > That feeds through below.

You're correct on both counts. Nice catch.

> > 
> > 
> > > +	    region_ways(region)) {
> > > +		trace_xhb_valid(region,
> > > +				"granularity to device granularity ratio requires a larger number of devices than currently configured");
> > > +		return false;
> > > +	}
> > > +
> > > +	/* Check that endpoints are hooked up in the correct order */
> > > +	for_each_cxl_decoder_target(target, rootd, i) {
> > > +		struct cxl_memdev *endpoint = region->config.targets[i];
> > > +
> > > +		if (get_hostbridge(endpoint) != target->port) {  
> > 
> > I think this should be
> > get_hostbridge(endpoint)->uport != target->dport
> > 
> > As it stands you are comparing the host bridge with the root object.
> 
> On closer inspection this code doesn't do what it is meant to do at all
> if there are multiple EP below a given root bridge.
> 
> You'd expect multiple endpoints to match to each target->port.
> Something along the lines of this should work:
> 
>         {
>                 struct cxl_memdev **epgroupstart = region->config.targets;
>                 struct cxl_memdev **endpoint;
> 
>                 for_each_cxl_decoder_target(target, rootd, i) {
>                         /* Find start of next endpoint group */
>                         endpoint = epgroupstart;
>                         if (*endpoint == NULL) {
>                                 printk("No endpoints under decoder target\n");
>                                 return false;
>                         }
>                         while (*epgroupstart &&
>                                 get_hostbridge(*endpoint) == get_hostbridge(*epgroupstart))
>                                 epgroupstart++;
>                 }
>                 if (*epgroupstart) {
>                         printk("still some entries left. boom\n");
>                         return false;
>                 }
>         }
> 
> Only lightly tested with correct inputs...
> 
> Next up is figuring out why the EP HDM decoder won't commit. :)
> 
> Jonathan
> 

You're correct that what I have isn't correct. However, I think this was just
bogus leftover from an aborted attempt to try to implement this. See below...

> 
> > 
> > > +			trace_xhb_valid(region, "device ordering bad\n");
> > > +			return false;
> > > +		}
> > > +	}
> > > +
> > > +	/*
> > > +	 * CFMWS.InterleaveTargetList[n] must contain all devices, x where:
> > > +	 *	(Device[x],RegionLabel.Position >> (CFMWS.HBIG -
> > > +	 *	Device[x].RegionLabel.InterleaveGranularity)) &
> > > +	 *	((2^CFMWS.ENIW) - 1) = n
> > > +	 *
> > > +	 * Linux notes: All devices are known to have the same interleave
> > > +	 * granularity at this point.
> > > +	 */
> > > +	for_each_cxl_decoder_target(target, rootd, i) {
> > > +		if (((i >> (rootd_ig - region_ig(region)))) &
> > > +		    (((1 << rootd->interleave_ways) - 1) != target->port_id)) {
> > > +			trace_xhb_valid(region,
> > > +					"One or more devices are not connected to the correct hostbridge.");
> > > +			return false;
> > > +		}
> > > +	}
> > > +

I think this does the correct XHB calculation. So I think I can just remove the
top hunk and we're good. Do you agree?

> > >  	return true;
> > >  }
> > >  
> > > diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h
> > > index a53f00ba5d0e..4de47d1111ac 100644
> > > --- a/drivers/cxl/trace.h
> > > +++ b/drivers/cxl/trace.h
> > > @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed,
> > >  DEFINE_EVENT(cxl_region_template, allocation_failed,
> > >  	     TP_PROTO(const struct cxl_region *region, char *status),
> > >  	     TP_ARGS(region, status));
> > > +DEFINE_EVENT(cxl_region_template, xhb_valid,
> > > +	     TP_PROTO(const struct cxl_region *region, char *status),
> > > +	     TP_ARGS(region, status));
> > >  
> > >  #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */
> > >    
> > 
>
diff mbox series

Patch

diff --git a/.clang-format b/.clang-format
index 15d4eaabc6b5..55f628f21722 100644
--- a/.clang-format
+++ b/.clang-format
@@ -169,6 +169,8 @@  ForEachMacros:
   - 'for_each_cpu_and'
   - 'for_each_cpu_not'
   - 'for_each_cpu_wrap'
+  - 'for_each_cxl_decoder_target'
+  - 'for_each_cxl_endpoint'
   - 'for_each_dapm_widgets'
   - 'for_each_dev_addr'
   - 'for_each_dev_scope'
diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c
index c8e3c48dfbb9..ca559a4b5347 100644
--- a/drivers/cxl/region.c
+++ b/drivers/cxl/region.c
@@ -28,6 +28,17 @@ 
  */
 
 #define region_ways(region) ((region)->config.eniw)
+#define region_ig(region) (ilog2((region)->config.ig))
+
+#define for_each_cxl_endpoint(ep, region, idx)                                 \
+	for (idx = 0, ep = (region)->config.targets[idx];                      \
+	     idx < region_ways(region);                                        \
+	     idx++, ep = (region)->config.targets[idx])
+
+#define for_each_cxl_decoder_target(target, decoder, idx)                      \
+	for (idx = 0, target = (decoder)->target[idx];                         \
+	     idx < (decoder)->nr_targets;                                      \
+	     idx++, target++)
 
 static struct cxl_decoder *rootd_from_region(struct cxl_region *r)
 {
@@ -175,6 +186,30 @@  static bool qtg_match(const struct cxl_decoder *rootd,
 	return true;
 }
 
+static int get_unique_hostbridges(const struct cxl_region *region,
+				  struct cxl_port **hbs)
+{
+	struct cxl_memdev *ep;
+	int i, hb_count = 0;
+
+	for_each_cxl_endpoint(ep, region, i) {
+		struct cxl_port *hb = get_hostbridge(ep);
+		bool found = false;
+		int j;
+
+		BUG_ON(!hb);
+
+		for (j = 0; j < hb_count; j++) {
+			if (hbs[j] == hb)
+				found = true;
+		}
+		if (!found)
+			hbs[hb_count++] = hb;
+	}
+
+	return hb_count;
+}
+
 /**
  * region_xhb_config_valid() - determine cross host bridge validity
  * @rootd: The root decoder to check against
@@ -188,7 +223,59 @@  static bool qtg_match(const struct cxl_decoder *rootd,
 static bool region_xhb_config_valid(const struct cxl_region *region,
 				    const struct cxl_decoder *rootd)
 {
-	/* TODO: */
+	struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE];
+	int rootd_ig, i;
+	struct cxl_dport *target;
+
+	/* Are all devices in this region on the same CXL host bridge */
+	if (get_unique_hostbridges(region, hbs) == 1)
+		return true;
+
+	rootd_ig = rootd->interleave_granularity;
+
+	/* CFMWS.HBIG >= Device.Label.IG */
+	if (rootd_ig < region_ig(region)) {
+		trace_xhb_valid(region,
+				"granularity does not support the region interleave granularity\n");
+		return false;
+	}
+
+	/* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */
+	if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) >
+	    region_ways(region)) {
+		trace_xhb_valid(region,
+				"granularity to device granularity ratio requires a larger number of devices than currently configured");
+		return false;
+	}
+
+	/* Check that endpoints are hooked up in the correct order */
+	for_each_cxl_decoder_target(target, rootd, i) {
+		struct cxl_memdev *endpoint = region->config.targets[i];
+
+		if (get_hostbridge(endpoint) != target->port) {
+			trace_xhb_valid(region, "device ordering bad\n");
+			return false;
+		}
+	}
+
+	/*
+	 * CFMWS.InterleaveTargetList[n] must contain all devices, x where:
+	 *	(Device[x],RegionLabel.Position >> (CFMWS.HBIG -
+	 *	Device[x].RegionLabel.InterleaveGranularity)) &
+	 *	((2^CFMWS.ENIW) - 1) = n
+	 *
+	 * Linux notes: All devices are known to have the same interleave
+	 * granularity at this point.
+	 */
+	for_each_cxl_decoder_target(target, rootd, i) {
+		if (((i >> (rootd_ig - region_ig(region)))) &
+		    (((1 << rootd->interleave_ways) - 1) != target->port_id)) {
+			trace_xhb_valid(region,
+					"One or more devices are not connected to the correct hostbridge.");
+			return false;
+		}
+	}
+
 	return true;
 }
 
diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h
index a53f00ba5d0e..4de47d1111ac 100644
--- a/drivers/cxl/trace.h
+++ b/drivers/cxl/trace.h
@@ -38,6 +38,9 @@  DEFINE_EVENT(cxl_region_template, sanitize_failed,
 DEFINE_EVENT(cxl_region_template, allocation_failed,
 	     TP_PROTO(const struct cxl_region *region, char *status),
 	     TP_ARGS(region, status));
+DEFINE_EVENT(cxl_region_template, xhb_valid,
+	     TP_PROTO(const struct cxl_region *region, char *status),
+	     TP_ARGS(region, status));
 
 #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */