diff mbox series

[v4,2/2] cxl/region: check interleave capability

Message ID 20240422091350.4701-3-yaoxt.fnst@fujitsu.com
State New, archived
Headers show
Series cxl/region: add interleave capability check | expand

Commit Message

Xingtao Yao (Fujitsu) April 22, 2024, 9:13 a.m. UTC
Since interleave capability is not verified, a target can successfully
attach to a region even if it lacks support for the specified interleave
ways or granularity.

When attempting to access memory, unexpected behavior occurs due to the
incorrect conversion of HPA to a faulty DPA, leading to a segmentation
fault, as observed when executing the command:
$ numactl -m 2 ls

According to the CXL specification (section 8.2.4.20 CXL HDM Decoder
Capability Structure), bits 11 and 12 within the 'CXL HDM Decoder
Capability Register' indicate the capability to establish interleaving
in 3, 6, 12, and 16 ways. If these bits are not set, the target cannot
be attached to a region utilizing such interleave ways.

Additionally, bits 8 and 9 in the same register represent the capability
of the bits used for interleaving in the address, commonly referred to as
the interleave mask.

Regarding 'Decoder Protection':
If IW is less than 8 (for interleave ways of 1, 2, 4, 8, 16), the
interleave bits start at bit position IG + 8 and end at IG + IW + 8 - 1.

If the IW is greater than or equal to 8 (for interleave ways of 3, 6, 12),
the interleave bits start at bit position IG + 8 and end at IG + IW - 1.

If the interleave mask is insufficient to cover the required interleave
bits, the target cannot be attached to the region.

The above check does not apply to the host-bridges with single port and
switches with single dport, because there does not have a instance of
CXL HDM Decoder Capability Structure for them.

Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
---
 drivers/cxl/core/hdm.c    |  5 +++
 drivers/cxl/core/region.c | 70 +++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |  2 ++
 drivers/cxl/cxlmem.h      |  1 +
 4 files changed, 78 insertions(+)

Comments

Jonathan Cameron April 22, 2024, 11:17 a.m. UTC | #1
On Mon, 22 Apr 2024 05:13:50 -0400
Yao Xingtao <yaoxt.fnst@fujitsu.com> wrote:

> Since interleave capability is not verified, a target can successfully
> attach to a region even if it lacks support for the specified interleave
> ways or granularity.
> 
> When attempting to access memory, unexpected behavior occurs due to the
> incorrect conversion of HPA to a faulty DPA, leading to a segmentation
> fault, as observed when executing the command:
> $ numactl -m 2 ls

Don't mention a segfault as in theory it should have been impossible to
set this decoder up (commit decoder should have failed at the device end).
So just say that it fails and leave the details unmentioned.

> 
> According to the CXL specification (section 8.2.4.20 CXL HDM Decoder
> Capability Structure), bits 11 and 12 within the 'CXL HDM Decoder
> Capability Register' indicate the capability to establish interleaving
> in 3, 6, 12, and 16 ways. If these bits are not set, the target cannot
> be attached to a region utilizing such interleave ways.
> 
> Additionally, bits 8 and 9 in the same register represent the capability
> of the bits used for interleaving in the address, commonly referred to as
> the interleave mask.
> 
> Regarding 'Decoder Protection':
> If IW is less than 8 (for interleave ways of 1, 2, 4, 8, 16), the
> interleave bits start at bit position IG + 8 and end at IG + IW + 8 - 1.
> 
> If the IW is greater than or equal to 8 (for interleave ways of 3, 6, 12),
> the interleave bits start at bit position IG + 8 and end at IG + IW - 1.
> 
> If the interleave mask is insufficient to cover the required interleave
> bits, the target cannot be attached to the region.
> 
> The above check does not apply to the host-bridges with single port and
> switches with single dport, because there does not have a instance of
> CXL HDM Decoder Capability Structure for them.

There needs to be clear separation between such cases that do not
have HDM decoders and the permissible case where there is one but
it can effectively be set to anything because no interleaving is
going on.

> 
> Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
> Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>

>  
>  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5c186e0a39b9..5c09652136c9 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1210,6 +1210,60 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled,
>  	return 0;
>  }
>  
> +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> +{
> +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> +	struct cxl_switch_decoder *cxlsd;
> +	unsigned int interleave_mask;
> +	u8 eiw;
> +	u16 eig;
> +	int rc, high_pos, low_pos;
> +
> +	if (is_switch_decoder(&cxld->dev)) {
> +		cxlsd = to_cxl_switch_decoder(&cxld->dev);
> +		if (cxlsd->passthrough)
> +			return 0;
> +	}
> +
> +	rc = ways_to_eiw(iw, &eiw);
> +	if (rc)
> +		return rc;
> +
> +	if (!(cxlhdm->iw_cap_mask & BIT(iw)))
> +		return -EOPNOTSUPP;
> +
> +	rc = granularity_to_eig(ig, &eig);
> +	if (rc)
> +		return rc;
> +
> +	/*
> +	 * Per CXL specification (8.2.3.20.13 Decoder Protection in r3.1)
> +	 * if IW < 8, the interleave bits start at bit position IG + 8, and
> +	 * end at IG + IW + 8 - 1.
> +	 * if IW >= 8, the interleave bits start at bit position IG + 8, and
> +	 * end at IG + IW - 1.
> +	 */
> +	if (eiw >= 8)
> +		high_pos = eiw + eig - 1;
> +	else
> +		high_pos = eiw + eig + 7;
> +	low_pos = eig + 8;
> +	/*
> +	 * when the IW is 0 or 8 (interlave way is 1 or 3), the low_pos is
> +	 * larger than high_pos, since the target is in the target list of a
> +	 * passthrough decoder, the following check is ignored.

There may be a decoder even on 1 to 1 routing cases so is this correct?

> +	 */
> +	if (low_pos > high_pos)
> +		return 0;
Xingtao Yao (Fujitsu) April 23, 2024, 12:02 a.m. UTC | #2
> -----Original Message-----
> From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Sent: Monday, April 22, 2024 7:17 PM
> To: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>
> Cc: dave@stgolabs.net; dave.jiang@intel.com; alison.schofield@intel.com;
> vishal.l.verma@intel.com; ira.weiny@intel.com; dan.j.williams@intel.com;
> jim.harris@samsung.com; linux-cxl@vger.kernel.org
> Subject: Re: [PATCH v4 2/2] cxl/region: check interleave capability
> 
> On Mon, 22 Apr 2024 05:13:50 -0400
> Yao Xingtao <yaoxt.fnst@fujitsu.com> wrote:
> 
> > Since interleave capability is not verified, a target can successfully
> > attach to a region even if it lacks support for the specified interleave
> > ways or granularity.
> >
> > When attempting to access memory, unexpected behavior occurs due to the
> > incorrect conversion of HPA to a faulty DPA, leading to a segmentation
> > fault, as observed when executing the command:
> > $ numactl -m 2 ls
> 
> Don't mention a segfault as in theory it should have been impossible to
> set this decoder up (commit decoder should have failed at the device end).
> So just say that it fails and leave the details unmentioned.
OK, got it.

> 
> >
> > According to the CXL specification (section 8.2.4.20 CXL HDM Decoder
> > Capability Structure), bits 11 and 12 within the 'CXL HDM Decoder
> > Capability Register' indicate the capability to establish interleaving
> > in 3, 6, 12, and 16 ways. If these bits are not set, the target cannot
> > be attached to a region utilizing such interleave ways.
> >
> > Additionally, bits 8 and 9 in the same register represent the capability
> > of the bits used for interleaving in the address, commonly referred to as
> > the interleave mask.
> >
> > Regarding 'Decoder Protection':
> > If IW is less than 8 (for interleave ways of 1, 2, 4, 8, 16), the
> > interleave bits start at bit position IG + 8 and end at IG + IW + 8 - 1.
> >
> > If the IW is greater than or equal to 8 (for interleave ways of 3, 6, 12),
> > the interleave bits start at bit position IG + 8 and end at IG + IW - 1.
> >
> > If the interleave mask is insufficient to cover the required interleave
> > bits, the target cannot be attached to the region.
> >
> > The above check does not apply to the host-bridges with single port and
> > switches with single dport, because there does not have a instance of
> > CXL HDM Decoder Capability Structure for them.
> 
> There needs to be clear separation between such cases that do not
> have HDM decoders and the permissible case where there is one but
> it can effectively be set to anything because no interleaving is
> going on.
OK.

> 
> >
> > Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
> > Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
> 
> >
> >  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 5c186e0a39b9..5c09652136c9 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -1210,6 +1210,60 @@ static int check_last_peer(struct
> cxl_endpoint_decoder *cxled,
> >  	return 0;
> >  }
> >
> > +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> > +{
> > +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> > +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> > +	struct cxl_switch_decoder *cxlsd;
> > +	unsigned int interleave_mask;
> > +	u8 eiw;
> > +	u16 eig;
> > +	int rc, high_pos, low_pos;
> > +
> > +	if (is_switch_decoder(&cxld->dev)) {
> > +		cxlsd = to_cxl_switch_decoder(&cxld->dev);
> > +		if (cxlsd->passthrough)
> > +			return 0;
> > +	}
> > +
> > +	rc = ways_to_eiw(iw, &eiw);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (!(cxlhdm->iw_cap_mask & BIT(iw)))
> > +		return -EOPNOTSUPP;
> > +
> > +	rc = granularity_to_eig(ig, &eig);
> > +	if (rc)
> > +		return rc;
> > +
> > +	/*
> > +	 * Per CXL specification (8.2.3.20.13 Decoder Protection in r3.1)
> > +	 * if IW < 8, the interleave bits start at bit position IG + 8, and
> > +	 * end at IG + IW + 8 - 1.
> > +	 * if IW >= 8, the interleave bits start at bit position IG + 8, and
> > +	 * end at IG + IW - 1.
> > +	 */
> > +	if (eiw >= 8)
> > +		high_pos = eiw + eig - 1;
> > +	else
> > +		high_pos = eiw + eig + 7;
> > +	low_pos = eig + 8;
> > +	/*
> > +	 * when the IW is 0 or 8 (interlave way is 1 or 3), the low_pos is
> > +	 * larger than high_pos, since the target is in the target list of a
> > +	 * passthrough decoder, the following check is ignored.
> 
> There may be a decoder even on 1 to 1 routing cases so is this correct?
yes, the decoder exists.
for this case, the num of interleave bits is zero, means that no interleaving here, 
so we can ignore this check.
> 
> > +	 */
> > +	if (low_pos > high_pos)
> > +		return 0;
> 
>
Dan Williams April 23, 2024, 12:59 a.m. UTC | #3
Yao Xingtao wrote:
> Since interleave capability is not verified, a target can successfully
> attach to a region even if it lacks support for the specified interleave
> ways or granularity.
> 
> When attempting to access memory, unexpected behavior occurs due to the
> incorrect conversion of HPA to a faulty DPA, leading to a segmentation
> fault, as observed when executing the command:
> $ numactl -m 2 ls

This is a QEMU specific statement, right?

That should be called out, because I suspect the behavior on hardware to
be different.

> According to the CXL specification (section 8.2.4.20 CXL HDM Decoder
> Capability Structure), bits 11 and 12 within the 'CXL HDM Decoder
> Capability Register' indicate the capability to establish interleaving
> in 3, 6, 12, and 16 ways. If these bits are not set, the target cannot
> be attached to a region utilizing such interleave ways.
> 
> Additionally, bits 8 and 9 in the same register represent the capability
> of the bits used for interleaving in the address, commonly referred to as
> the interleave mask.

Perhaps replace "commonly referred to as" with "Linux tracks this in the
cxl_port interleave_mask"
 
> Regarding 'Decoder Protection':
> If IW is less than 8 (for interleave ways of 1, 2, 4, 8, 16), the
> interleave bits start at bit position IG + 8 and end at IG + IW + 8 - 1.
> 
> If the IW is greater than or equal to 8 (for interleave ways of 3, 6, 12),
> the interleave bits start at bit position IG + 8 and end at IG + IW - 1.
> 
> If the interleave mask is insufficient to cover the required interleave
> bits, the target cannot be attached to the region.
> 
> The above check does not apply to the host-bridges with single port and
> switches with single dport, because there does not have a instance of
> CXL HDM Decoder Capability Structure for them.
> 
> Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
> Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
> ---
>  drivers/cxl/core/hdm.c    |  5 +++
>  drivers/cxl/core/region.c | 70 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h         |  2 ++
>  drivers/cxl/cxlmem.h      |  1 +
>  4 files changed, 78 insertions(+)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 27fb4f9d489e..b201be4b8d76 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -80,6 +80,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
>  		cxlhdm->interleave_mask |= GENMASK(11, 8);
>  	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
>  		cxlhdm->interleave_mask |= GENMASK(14, 12);
> +	cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
> +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
> +		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
> +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
> +		cxlhdm->iw_cap_mask |= BIT(16);
>  }
>  
>  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5c186e0a39b9..5c09652136c9 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1210,6 +1210,60 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled,
>  	return 0;
>  }
>  
> +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> +{
> +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> +	struct cxl_switch_decoder *cxlsd;
> +	unsigned int interleave_mask;
> +	u8 eiw;
> +	u16 eig;
> +	int rc, high_pos, low_pos;
> +
> +	if (is_switch_decoder(&cxld->dev)) {
> +		cxlsd = to_cxl_switch_decoder(&cxld->dev);
> +		if (cxlsd->passthrough)
> +			return 0;
> +	}
> +
> +	rc = ways_to_eiw(iw, &eiw);
> +	if (rc)
> +		return rc;
> +
> +	if (!(cxlhdm->iw_cap_mask & BIT(iw)))

Would be nice to just write this as:

   if (!test_bit(iw, &cxlhdm->iw_cap_mask))

...requires making iw_cap_mask an 'unsigned long', but that's worth it
to me.

> +		return -EOPNOTSUPP;

This should be -ENXIO like all the other hardware mismatch /
misconfiguration errors in cxl_region_attach().

> +
> +	rc = granularity_to_eig(ig, &eig);
> +	if (rc)
> +		return rc;
> +
> +	/*
> +	 * Per CXL specification (8.2.3.20.13 Decoder Protection in r3.1)
> +	 * if IW < 8, the interleave bits start at bit position IG + 8, and
> +	 * end at IG + IW + 8 - 1.
> +	 * if IW >= 8, the interleave bits start at bit position IG + 8, and
> +	 * end at IG + IW - 1.
> +	 */
> +	if (eiw >= 8)
> +		high_pos = eiw + eig - 1;
> +	else
> +		high_pos = eiw + eig + 7;
> +	low_pos = eig + 8;
> +	/*
> +	 * when the IW is 0 or 8 (interlave way is 1 or 3), the low_pos is
> +	 * larger than high_pos, since the target is in the target list of a
> +	 * passthrough decoder, the following check is ignored.
> +	 */
> +	if (low_pos > high_pos)
> +		return 0;
> +
> +	interleave_mask = GENMASK(high_pos, low_pos);
> +	if (interleave_mask & ~cxlhdm->interleave_mask)
> +		return -EOPNOTSUPP;
> +
> +	return 0;
> +}
> +
>  static int cxl_port_setup_targets(struct cxl_port *port,
>  				  struct cxl_region *cxlr,
>  				  struct cxl_endpoint_decoder *cxled)
> @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
>  			return -ENXIO;
>  		}
>  	} else {
> +		rc = check_interleave_cap(cxld, iw, ig);

This seems too late to be doing the check. check_interleave_cap() only
needs to operate on all the 'struct cxl_port' instances between the
endpoint and the root. That check can be done be walking ports at
cxl_region_attach() time, no need to wait until switch decoders are
being allocated.
Xingtao Yao (Fujitsu) April 23, 2024, 2:47 a.m. UTC | #4
> -----Original Message-----
> From: Dan Williams <dan.j.williams@intel.com>
> Sent: Tuesday, April 23, 2024 8:59 AM
> To: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>; dave@stgolabs.net;
> jonathan.cameron@huawei.com; dave.jiang@intel.com;
> alison.schofield@intel.com; vishal.l.verma@intel.com; ira.weiny@intel.com;
> dan.j.williams@intel.com; jim.harris@samsung.com
> Cc: linux-cxl@vger.kernel.org; Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>
> Subject: RE: [PATCH v4 2/2] cxl/region: check interleave capability
> 
> Yao Xingtao wrote:
> > Since interleave capability is not verified, a target can successfully
> > attach to a region even if it lacks support for the specified interleave
> > ways or granularity.
> >
> > When attempting to access memory, unexpected behavior occurs due to the
> > incorrect conversion of HPA to a faulty DPA, leading to a segmentation
> > fault, as observed when executing the command:
> > $ numactl -m 2 ls
> 
> This is a QEMU specific statement, right?
> 
> That should be called out, because I suspect the behavior on hardware to
> be different.
OK, got it.

> 
> > According to the CXL specification (section 8.2.4.20 CXL HDM Decoder
> > Capability Structure), bits 11 and 12 within the 'CXL HDM Decoder
> > Capability Register' indicate the capability to establish interleaving
> > in 3, 6, 12, and 16 ways. If these bits are not set, the target cannot
> > be attached to a region utilizing such interleave ways.
> >
> > Additionally, bits 8 and 9 in the same register represent the capability
> > of the bits used for interleaving in the address, commonly referred to as
> > the interleave mask.
> 
> Perhaps replace "commonly referred to as" with "Linux tracks this in the
> cxl_port interleave_mask"
looks good!
> 
> > Regarding 'Decoder Protection':
> > If IW is less than 8 (for interleave ways of 1, 2, 4, 8, 16), the
> > interleave bits start at bit position IG + 8 and end at IG + IW + 8 - 1.
> >
> > If the IW is greater than or equal to 8 (for interleave ways of 3, 6, 12),
> > the interleave bits start at bit position IG + 8 and end at IG + IW - 1.
> >
> > If the interleave mask is insufficient to cover the required interleave
> > bits, the target cannot be attached to the region.
> >
> > The above check does not apply to the host-bridges with single port and
> > switches with single dport, because there does not have a instance of
> > CXL HDM Decoder Capability Structure for them.
> >
> > Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
> > Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
> > ---
> >  drivers/cxl/core/hdm.c    |  5 +++
> >  drivers/cxl/core/region.c | 70
> +++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/cxl.h         |  2 ++
> >  drivers/cxl/cxlmem.h      |  1 +
> >  4 files changed, 78 insertions(+)
> >
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 27fb4f9d489e..b201be4b8d76 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -80,6 +80,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm
> *cxlhdm)
> >  		cxlhdm->interleave_mask |= GENMASK(11, 8);
> >  	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
> >  		cxlhdm->interleave_mask |= GENMASK(14, 12);
> > +	cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
> > +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY,
> hdm_cap))
> > +		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
> > +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
> > +		cxlhdm->iw_cap_mask |= BIT(16);
> >  }
> >
> >  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 5c186e0a39b9..5c09652136c9 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -1210,6 +1210,60 @@ static int check_last_peer(struct
> cxl_endpoint_decoder *cxled,
> >  	return 0;
> >  }
> >
> > +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> > +{
> > +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> > +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> > +	struct cxl_switch_decoder *cxlsd;
> > +	unsigned int interleave_mask;
> > +	u8 eiw;
> > +	u16 eig;
> > +	int rc, high_pos, low_pos;
> > +
> > +	if (is_switch_decoder(&cxld->dev)) {
> > +		cxlsd = to_cxl_switch_decoder(&cxld->dev);
> > +		if (cxlsd->passthrough)
> > +			return 0;
> > +	}
> > +
> > +	rc = ways_to_eiw(iw, &eiw);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (!(cxlhdm->iw_cap_mask & BIT(iw)))
> 
> Would be nice to just write this as:
> 
>    if (!test_bit(iw, &cxlhdm->iw_cap_mask))
> 
> ...requires making iw_cap_mask an 'unsigned long', but that's worth it
> to me.
> 
> > +		return -EOPNOTSUPP;
> 
> This should be -ENXIO like all the other hardware mismatch /
> misconfiguration errors in cxl_region_attach().
OK, got it.
> 
> > +
> > +	rc = granularity_to_eig(ig, &eig);
> > +	if (rc)
> > +		return rc;
> > +
> > +	/*
> > +	 * Per CXL specification (8.2.3.20.13 Decoder Protection in r3.1)
> > +	 * if IW < 8, the interleave bits start at bit position IG + 8, and
> > +	 * end at IG + IW + 8 - 1.
> > +	 * if IW >= 8, the interleave bits start at bit position IG + 8, and
> > +	 * end at IG + IW - 1.
> > +	 */
> > +	if (eiw >= 8)
> > +		high_pos = eiw + eig - 1;
> > +	else
> > +		high_pos = eiw + eig + 7;
> > +	low_pos = eig + 8;
> > +	/*
> > +	 * when the IW is 0 or 8 (interlave way is 1 or 3), the low_pos is
> > +	 * larger than high_pos, since the target is in the target list of a
> > +	 * passthrough decoder, the following check is ignored.
> > +	 */
> > +	if (low_pos > high_pos)
> > +		return 0;
> > +
> > +	interleave_mask = GENMASK(high_pos, low_pos);
> > +	if (interleave_mask & ~cxlhdm->interleave_mask)
> > +		return -EOPNOTSUPP;
> > +
> > +	return 0;
> > +}
> > +
> >  static int cxl_port_setup_targets(struct cxl_port *port,
> >  				  struct cxl_region *cxlr,
> >  				  struct cxl_endpoint_decoder *cxled)
> > @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
> >  			return -ENXIO;
> >  		}
> >  	} else {
> > +		rc = check_interleave_cap(cxld, iw, ig);
> 
> This seems too late to be doing the check. check_interleave_cap() only
> needs to operate on all the 'struct cxl_port' instances between the
> endpoint and the root. That check can be done be walking ports at
> cxl_region_attach() time, no need to wait until switch decoders are
> being allocated.

I think we could not get the info of iw and ig for switch decoders while walking ports from
endpoint to root, they were set only in cxl_port_setup_targets().

Is the above understanding correct? if so, the interleave capability check cannot be performed 
at the timing you specified.
Xingtao Yao (Fujitsu) May 12, 2024, 11:43 p.m. UTC | #5
ping.

> >  static int cxl_port_setup_targets(struct cxl_port *port,
> >  				  struct cxl_region *cxlr,
> >  				  struct cxl_endpoint_decoder *cxled)
> > @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port
*port,
> >  			return -ENXIO;
> >  		}
> >  	} else {
> > +		rc = check_interleave_cap(cxld, iw, ig);
>
> This seems too late to be doing the check. check_interleave_cap() only
> needs to operate on all the 'struct cxl_port' instances between the
> endpoint and the root. That check can be done be walking ports at
> cxl_region_attach() time, no need to wait until switch decoders are
> being allocated.

I think we could not get the info of iw and ig for switch decoders while walking ports
from endpoint to root, they were set only in cxl_port_setup_targets().

Is the above understanding correct? if so, the interleave capability check cannot be
performed at the timing you specified.
Xingtao Yao (Fujitsu) May 23, 2024, 9:05 a.m. UTC | #6
ping again.

> -----Original Message-----
> From: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>
> Sent: Monday, May 13, 2024 7:44 AM
> To: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>; Dan Williams
> <dan.j.williams@intel.com>; dave@stgolabs.net; jonathan.cameron@huawei.com;
> dave.jiang@intel.com; alison.schofield@intel.com; vishal.l.verma@intel.com;
> ira.weiny@intel.com; jim.harris@samsung.com
> Cc: linux-cxl@vger.kernel.org
> Subject: RE: [PATCH v4 2/2] cxl/region: check interleave capability
> 
> ping.
> 
> > >  static int cxl_port_setup_targets(struct cxl_port *port,
> > >  				  struct cxl_region *cxlr,
> > >  				  struct cxl_endpoint_decoder *cxled)
> > > @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port
> *port,
> > >  			return -ENXIO;
> > >  		}
> > >  	} else {
> > > +		rc = check_interleave_cap(cxld, iw, ig);
> >
> > This seems too late to be doing the check. check_interleave_cap() only
> > needs to operate on all the 'struct cxl_port' instances between the
> > endpoint and the root. That check can be done be walking ports at
> > cxl_region_attach() time, no need to wait until switch decoders are
> > being allocated.
> 
> I think we could not get the info of iw and ig for switch decoders while walking ports
> from endpoint to root, they were set only in cxl_port_setup_targets().
> 
> Is the above understanding correct? if so, the interleave capability check cannot be
> performed at the timing you specified.
Dan Williams May 23, 2024, 6:47 p.m. UTC | #7
Xingtao Yao (Fujitsu) wrote:
> ping.

Forgive the delay.

> 
> > >  static int cxl_port_setup_targets(struct cxl_port *port,
> > >  				  struct cxl_region *cxlr,
> > >  				  struct cxl_endpoint_decoder *cxled)
> > > @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port
> *port,
> > >  			return -ENXIO;
> > >  		}
> > >  	} else {
> > > +		rc = check_interleave_cap(cxld, iw, ig);
> >
> > This seems too late to be doing the check. check_interleave_cap() only
> > needs to operate on all the 'struct cxl_port' instances between the
> > endpoint and the root. That check can be done be walking ports at
> > cxl_region_attach() time, no need to wait until switch decoders are
> > being allocated.
> 
> I think we could not get the info of iw and ig for switch decoders
> while walking ports from endpoint to root, they were set only in
> cxl_port_setup_targets().

So the reason I wanted as much validation as possible to move from
cxl_region_setup_targets() to cxl_region_attach_position() is to move
setup failures closer to the device that caused the problem. Otherwise
cxl_region_setup_targets() will fail only when the last device was added
and the overall check is performed.

> Is the above understanding correct? if so, the interleave capability
> check cannot be performed at the timing you specified.

I think @iw can be validated early because cxl_port_attach_region()
knows how many downstream targets have been assigned to that port and
can validate when the number of targets exceeds the decoder capability.

For @ig though you are right, it is more difficult given the dependency
on "parent_ig". So, maybe a compromise is to validate that nr_targets
does not violate cxlhdm->target_count at cxl_region_attach_position()
time, but then do the granularity validation in
cxl_region_setup_targets() as you have it.
Xingtao Yao (Fujitsu) May 24, 2024, 9:15 a.m. UTC | #8
> -----Original Message-----
> From: Dan Williams <dan.j.williams@intel.com>
> Sent: Friday, May 24, 2024 2:47 AM
> To: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>; Dan Williams
> <dan.j.williams@intel.com>; dave@stgolabs.net; jonathan.cameron@huawei.com;
> dave.jiang@intel.com; alison.schofield@intel.com; vishal.l.verma@intel.com;
> ira.weiny@intel.com; jim.harris@samsung.com
> Cc: linux-cxl@vger.kernel.org
> Subject: RE: [PATCH v4 2/2] cxl/region: check interleave capability
> 
> Xingtao Yao (Fujitsu) wrote:
> > ping.
> 
> Forgive the delay.
> 
> >
> > > >  static int cxl_port_setup_targets(struct cxl_port *port,
> > > >  				  struct cxl_region *cxlr,
> > > >  				  struct cxl_endpoint_decoder *cxled)
> > > > @@ -1360,6 +1414,14 @@ static int cxl_port_setup_targets(struct cxl_port
> > *port,
> > > >  			return -ENXIO;
> > > >  		}
> > > >  	} else {
> > > > +		rc = check_interleave_cap(cxld, iw, ig);
> > >
> > > This seems too late to be doing the check. check_interleave_cap() only
> > > needs to operate on all the 'struct cxl_port' instances between the
> > > endpoint and the root. That check can be done be walking ports at
> > > cxl_region_attach() time, no need to wait until switch decoders are
> > > being allocated.
> >
> > I think we could not get the info of iw and ig for switch decoders
> > while walking ports from endpoint to root, they were set only in
> > cxl_port_setup_targets().
> 
> So the reason I wanted as much validation as possible to move from
> cxl_region_setup_targets() to cxl_region_attach_position() is to move
> setup failures closer to the device that caused the problem. Otherwise
> cxl_region_setup_targets() will fail only when the last device was added
> and the overall check is performed.
> 
> > Is the above understanding correct? if so, the interleave capability
> > check cannot be performed at the timing you specified.
> 
> I think @iw can be validated early because cxl_port_attach_region()
> knows how many downstream targets have been assigned to that port and
> can validate when the number of targets exceeds the decoder capability.
> 
> For @ig though you are right, it is more difficult given the dependency
> on "parent_ig". So, maybe a compromise is to validate that nr_targets
> does not violate cxlhdm->target_count at cxl_region_attach_position()
> time, but then do the granularity validation in
> cxl_region_setup_targets() as you have it.
thanks for your reply, got it.
diff mbox series

Patch

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 27fb4f9d489e..b201be4b8d76 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -80,6 +80,11 @@  static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
 		cxlhdm->interleave_mask |= GENMASK(11, 8);
 	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
 		cxlhdm->interleave_mask |= GENMASK(14, 12);
+	cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
+		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
+		cxlhdm->iw_cap_mask |= BIT(16);
 }
 
 static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5c186e0a39b9..5c09652136c9 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1210,6 +1210,60 @@  static int check_last_peer(struct cxl_endpoint_decoder *cxled,
 	return 0;
 }
 
+static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
+{
+	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+	struct cxl_switch_decoder *cxlsd;
+	unsigned int interleave_mask;
+	u8 eiw;
+	u16 eig;
+	int rc, high_pos, low_pos;
+
+	if (is_switch_decoder(&cxld->dev)) {
+		cxlsd = to_cxl_switch_decoder(&cxld->dev);
+		if (cxlsd->passthrough)
+			return 0;
+	}
+
+	rc = ways_to_eiw(iw, &eiw);
+	if (rc)
+		return rc;
+
+	if (!(cxlhdm->iw_cap_mask & BIT(iw)))
+		return -EOPNOTSUPP;
+
+	rc = granularity_to_eig(ig, &eig);
+	if (rc)
+		return rc;
+
+	/*
+	 * Per CXL specification (8.2.3.20.13 Decoder Protection in r3.1)
+	 * if IW < 8, the interleave bits start at bit position IG + 8, and
+	 * end at IG + IW + 8 - 1.
+	 * if IW >= 8, the interleave bits start at bit position IG + 8, and
+	 * end at IG + IW - 1.
+	 */
+	if (eiw >= 8)
+		high_pos = eiw + eig - 1;
+	else
+		high_pos = eiw + eig + 7;
+	low_pos = eig + 8;
+	/*
+	 * when the IW is 0 or 8 (interlave way is 1 or 3), the low_pos is
+	 * larger than high_pos, since the target is in the target list of a
+	 * passthrough decoder, the following check is ignored.
+	 */
+	if (low_pos > high_pos)
+		return 0;
+
+	interleave_mask = GENMASK(high_pos, low_pos);
+	if (interleave_mask & ~cxlhdm->interleave_mask)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int cxl_port_setup_targets(struct cxl_port *port,
 				  struct cxl_region *cxlr,
 				  struct cxl_endpoint_decoder *cxled)
@@ -1360,6 +1414,14 @@  static int cxl_port_setup_targets(struct cxl_port *port,
 			return -ENXIO;
 		}
 	} else {
+		rc = check_interleave_cap(cxld, iw, ig);
+		if (rc) {
+			dev_dbg(&cxlr->dev,
+				"%s:%s iw: %d ig: %d is not supported\n",
+				dev_name(port->uport_dev),
+				dev_name(&port->dev), iw, ig);
+			return rc;
+		}
 		cxld->interleave_ways = iw;
 		cxld->interleave_granularity = ig;
 		cxld->hpa_range = (struct range) {
@@ -1796,6 +1858,14 @@  static int cxl_region_attach(struct cxl_region *cxlr,
 	struct cxl_dport *dport;
 	int rc = -ENXIO;
 
+	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
+				  p->interleave_granularity);
+	if (rc) {
+		dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
+			dev_name(&cxled->cxld.dev), p->interleave_ways,
+			p->interleave_granularity);
+		return rc;
+	}
 	if (cxled->mode != cxlr->mode) {
 		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
 			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 6f562baa164f..b7afb936c854 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -45,6 +45,8 @@ 
 #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
 #define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
 #define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
+#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
 #define CXL_HDM_DECODER_CTRL_OFFSET 0x4
 #define   CXL_HDM_DECODER_ENABLE BIT(1)
 #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 36cee9c30ceb..a044234f5993 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -853,6 +853,7 @@  struct cxl_hdm {
 	unsigned int decoder_count;
 	unsigned int target_count;
 	unsigned int interleave_mask;
+	unsigned int iw_cap_mask;
 	struct cxl_port *port;
 };