diff mbox series

[RFC] cxl/region: Fix region creation for greater than x2 switches

Message ID 20241027075717.3714821-1-huaisheng.ye@intel.com
State New
Headers show
Series [RFC] cxl/region: Fix region creation for greater than x2 switches | expand

Commit Message

Ye, Huaisheng Oct. 27, 2024, 7:57 a.m. UTC
The cxl_port_setup_targets() algorithm fails to identify valid target list
ordering in the presence of 4-way and above switches resulting in
'cxl create-region' failures of the form:

  # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
  cxl region: create_region: region0: failed to set target7 to mem0
  cxl region: cmd_create_region: created 0 regions

  [kernel debug message]
  check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host mem6:decoder7.0 at 2
  bus_remove_device:574: bus: 'cxl': remove device region0

QEMU can create this failing topology:

                       ACPI0017:00 [root0]
                           |
                         HB_0 [port1]
                        /             \
                     RP_0             RP_1
                      |                 |
                USP [port2]           USP [port3]
            /    /    \    \        /   /    \    \
          DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
           |     |     |     |     |    |     |    |
          mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
 Pos:      0     2     4     6     1    3     5    7

 HB: Host Bridge
 RP: Root Port
 USP: Upstream Port
 DSP: Downstream Port

...with the following command steps:

$ qemu-system-x86_64 -machine q35,cxl=on,accel=tcg  \
        -smp cpus=8 \
        -m 8G \
        -hda /home/work/vm-images/centos-stream8-02.qcow2 \
        -object memory-backend-ram,size=4G,id=m0 \
        -object memory-backend-ram,size=4G,id=m1 \
        -object memory-backend-ram,size=2G,id=cxl-mem0 \
        -object memory-backend-ram,size=2G,id=cxl-mem1 \
        -object memory-backend-ram,size=2G,id=cxl-mem2 \
        -object memory-backend-ram,size=2G,id=cxl-mem3 \
        -object memory-backend-ram,size=2G,id=cxl-mem4 \
        -object memory-backend-ram,size=2G,id=cxl-mem5 \
        -object memory-backend-ram,size=2G,id=cxl-mem6 \
        -object memory-backend-ram,size=2G,id=cxl-mem7 \
        -numa node,memdev=m0,cpus=0-3,nodeid=0 \
        -numa node,memdev=m1,cpus=4-7,nodeid=1 \
        -netdev user,id=net0,hostfwd=tcp::2222-:22 \
        -device virtio-net-pci,netdev=net0 \
        -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
        -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
        -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
        -device cxl-upstream,bus=root_port0,id=us0 \
        -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
        -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-vmem0 \
        -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
        -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-vmem1 \
        -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
        -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-vmem2 \
        -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
        -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-vmem3 \
        -device cxl-upstream,bus=root_port1,id=us1 \
        -device cxl-downstream,port=4,bus=us1,id=swport4,chassis=0,slot=8 \
        -device cxl-type3,bus=swport4,volatile-memdev=cxl-mem4,id=cxl-vmem4 \
        -device cxl-downstream,port=5,bus=us1,id=swport5,chassis=0,slot=9 \
        -device cxl-type3,bus=swport5,volatile-memdev=cxl-mem5,id=cxl-vmem5 \
        -device cxl-downstream,port=6,bus=us1,id=swport6,chassis=0,slot=10 \
        -device cxl-type3,bus=swport6,volatile-memdev=cxl-mem6,id=cxl-vmem6 \
        -device cxl-downstream,port=7,bus=us1,id=swport7,chassis=0,slot=11 \
        -device cxl-type3,bus=swport7,volatile-memdev=cxl-mem7,id=cxl-vmem7 \
        -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=32G &

In Guest OS:
# cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0

Fix the method to calculate @distance by iterativeley multiplying the number of targets per switch port. This also follows the algorithm recommended here [1].

Fixes: 27b3f8d13830 ("cxl/region: Program target lists")
Link: http://lore.kernel.org/6538824b52349_7258329466@dwillia2-xfh.jf.intel.com.notmuch [1]
Signed-off-by: Huaisheng Ye <huaisheng.ye@intel.com>

---
 drivers/cxl/core/region.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

Comments

Zhijian Li (Fujitsu) Oct. 31, 2024, 9:33 a.m. UTC | #1
On 27/10/2024 15:57, Huaisheng Ye wrote:
> The cxl_port_setup_targets() algorithm fails to identify valid target list
> ordering in the presence of 4-way and above switches resulting in
> 'cxl create-region' failures of the form:
> 
>    # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
>    cxl region: create_region: region0: failed to set target7 to mem0
>    cxl region: cmd_create_region: created 0 regions
> 
>    [kernel debug message]
>    check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host mem6:decoder7.0 at 2
>    bus_remove_device:574: bus: 'cxl': remove device region0
> 
> QEMU can create this failing topology:
> 
>                         ACPI0017:00 [root0]
>                             |
>                           HB_0 [port1]
>                          /             \
>                       RP_0             RP_1
>                        |                 |
>                  USP [port2]           USP [port3]
>              /    /    \    \        /   /    \    \
>            DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
>             |     |     |     |     |    |     |    |
>            mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
>   Pos:      0     2     4     6     1    3     5    7

Yeah, I tried this topology long long ago, it didn't work. At that time, I thought it
might be just like that. Until recently that I saw this [1] in section
2.13.15.1 Region Spanning 2 HB Root Ports Example Configuration Checks

I once tried to understand why the code used "distance" to determine the order of the target,
but in the end, I still couldn't figure it out (and I still don't understand it now).
IIRC, neither the CXL spec nor this document mentioned the keyword "distance" at all.

[1] https://cdrdv2-public.intel.com/643805/643805_CXL_Memory_Device_SW_Guide_Rev1_1.pdf

Anyway, many thanks.
I tried this patch, it works for me.

Tested-by: Li Zhijian <lizhijian@fujitsu.com>


> 
>   HB: Host Bridge
>   RP: Root Port
>   USP: Upstream Port
>   DSP: Downstream Port
> 
> ...with the following command steps:
> 
> $ qemu-system-x86_64 -machine q35,cxl=on,accel=tcg  \
>          -smp cpus=8 \
>          -m 8G \
>          -hda /home/work/vm-images/centos-stream8-02.qcow2 \
>          -object memory-backend-ram,size=4G,id=m0 \
>          -object memory-backend-ram,size=4G,id=m1 \
>          -object memory-backend-ram,size=2G,id=cxl-mem0 \
>          -object memory-backend-ram,size=2G,id=cxl-mem1 \
>          -object memory-backend-ram,size=2G,id=cxl-mem2 \
>          -object memory-backend-ram,size=2G,id=cxl-mem3 \
>          -object memory-backend-ram,size=2G,id=cxl-mem4 \
>          -object memory-backend-ram,size=2G,id=cxl-mem5 \
>          -object memory-backend-ram,size=2G,id=cxl-mem6 \
>          -object memory-backend-ram,size=2G,id=cxl-mem7 \
>          -numa node,memdev=m0,cpus=0-3,nodeid=0 \
>          -numa node,memdev=m1,cpus=4-7,nodeid=1 \
>          -netdev user,id=net0,hostfwd=tcp::2222-:22 \
>          -device virtio-net-pci,netdev=net0 \
>          -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
>          -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
>          -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
>          -device cxl-upstream,bus=root_port0,id=us0 \
>          -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
>          -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-vmem0 \
>          -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
>          -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-vmem1 \
>          -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
>          -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-vmem2 \
>          -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
>          -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-vmem3 \
>          -device cxl-upstream,bus=root_port1,id=us1 \
>          -device cxl-downstream,port=4,bus=us1,id=swport4,chassis=0,slot=8 \
>          -device cxl-type3,bus=swport4,volatile-memdev=cxl-mem4,id=cxl-vmem4 \
>          -device cxl-downstream,port=5,bus=us1,id=swport5,chassis=0,slot=9 \
>          -device cxl-type3,bus=swport5,volatile-memdev=cxl-mem5,id=cxl-vmem5 \
>          -device cxl-downstream,port=6,bus=us1,id=swport6,chassis=0,slot=10 \
>          -device cxl-type3,bus=swport6,volatile-memdev=cxl-mem6,id=cxl-vmem6 \
>          -device cxl-downstream,port=7,bus=us1,id=swport7,chassis=0,slot=11 \
>          -device cxl-type3,bus=swport7,volatile-memdev=cxl-mem7,id=cxl-vmem7 \
>          -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=32G &
> 
> In Guest OS:
> # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
> 
> Fix the method to calculate @distance by iterativeley multiplying the number of targets per switch port. This also follows the algorithm recommended here [1].
> 
> Fixes: 27b3f8d13830 ("cxl/region: Program target lists")
> Link: http://lore.kernel.org/6538824b52349_7258329466@dwillia2-xfh.jf.intel.com.notmuch [1]
> Signed-off-by: Huaisheng Ye <huaisheng.ye@intel.com>
> 
> ---
>   drivers/cxl/core/region.c | 19 ++++++++++++-------
>   1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index e701e4b04032..9e226a293f45 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1288,6 +1288,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
>   	struct cxl_region_params *p = &cxlr->params;
>   	struct cxl_decoder *cxld = cxl_rr->decoder;
>   	struct cxl_switch_decoder *cxlsd;
> +	struct cxl_port *iter = port;
>   	u16 eig, peig;
>   	u8 eiw, peiw;
>   
> @@ -1304,16 +1305,20 @@ static int cxl_port_setup_targets(struct cxl_port *port,
>   
>   	cxlsd = to_cxl_switch_decoder(&cxld->dev);
>   	if (cxl_rr->nr_targets_set) {
> -		int i, distance;
> +		int i, distance = 1;
> +		struct cxl_region_ref *cxl_rr_iter;
>   
>   		/*
> -		 * Passthrough decoders impose no distance requirements between
> -		 * peers
> +		 * Get distance from the number of distinct targets in region's
> +		 * interest and the ancestral nr_targets.
>   		 */
> -		if (cxl_rr->nr_targets == 1)
> -			distance = 0;
> -		else
> -			distance = p->nr_targets / cxl_rr->nr_targets;
> +		do {
> +			cxl_rr_iter = cxl_rr_load(iter, cxlr);
> +			distance *= cxl_rr_iter->nr_targets;
> +			iter = to_cxl_port(iter->dev.parent);
> +		} while (!is_cxl_root(iter));
> +		distance *= cxlrd->cxlsd.cxld.interleave_ways;
> +
>   		for (i = 0; i < cxl_rr->nr_targets_set; i++)
>   			if (ep->dport == cxlsd->target[i]) {
>   				rc = check_last_peer(cxled, ep, cxl_rr,
Ye, Huaisheng Nov. 6, 2024, 9:34 a.m. UTC | #2
From: Zhijian Li (Fujitsu) <lizhijian@fujitsu.com>
Sent: Thursday, October 31, 2024 5:34 PM
> 
> Yeah, I tried this topology long long ago, it didn't work. At that time, I
> thought it might be just like that. Until recently that I saw this [1] in
> section
> 2.13.15.1 Region Spanning 2 HB Root Ports Example Configuration Checks
> 
> I once tried to understand why the code used "distance" to determine the
> order of the target, but in the end, I still couldn't figure it out (and I
> still don't understand it now).
> IIRC, neither the CXL spec nor this document mentioned the keyword
> "distance" at all.

Based on my understanding, distance is designed to ensure finding the correct endpoint peer.
In other words, distance and endpoint peer could ensure that the targets are placed in the optimal positions for best performance.

Feel free to correct me if I was wrong.

> [1] https://cdrdv2-
> public.intel.com/643805/643805_CXL_Memory_Device_SW_Guide_Rev1_1.pdf
> 
> Anyway, many thanks.
> I tried this patch, it works for me.
> 
> Tested-by: Li Zhijian <lizhijian@fujitsu.com>

Thanks for the testing.

Best Regards,
Huaisheng Ye
Dan Williams Nov. 7, 2024, 1:11 a.m. UTC | #3
[ add Dave so you 

Zhijian Li (Fujitsu) wrote:
> 
> 
> On 27/10/2024 15:57, Huaisheng Ye wrote:
> > The cxl_port_setup_targets() algorithm fails to identify valid target list
> > ordering in the presence of 4-way and above switches resulting in
> > 'cxl create-region' failures of the form:
> > 
> >    # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
> >    cxl region: create_region: region0: failed to set target7 to mem0
> >    cxl region: cmd_create_region: created 0 regions
> > 
> >    [kernel debug message]
> >    check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host mem6:decoder7.0 at 2
> >    bus_remove_device:574: bus: 'cxl': remove device region0
> > 
> > QEMU can create this failing topology:
> > 
> >                         ACPI0017:00 [root0]
> >                             |
> >                           HB_0 [port1]
> >                          /             \
> >                       RP_0             RP_1
> >                        |                 |
> >                  USP [port2]           USP [port3]
> >              /    /    \    \        /   /    \    \
> >            DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
> >             |     |     |     |     |    |     |    |
> >            mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
> >   Pos:      0     2     4     6     1    3     5    7
> 
> Yeah, I tried this topology long long ago, it didn't work. At that time, I thought it
> might be just like that. Until recently that I saw this [1] in section
> 2.13.15.1 Region Spanning 2 HB Root Ports Example Configuration Checks
> 
> I once tried to understand why the code used "distance" to determine the order of the target,
> but in the end, I still couldn't figure it out (and I still don't understand it now).
> IIRC, neither the CXL spec nor this document mentioned the keyword "distance" at all.

Oh, that means this needs a comment or a better variable name.

In this patch discussion [1] Jim came up with the term "ancestral_ways"
to describe the same concept of what is the offset ("distance") between
consecutive indices in the target list.

[1]: http://lore.kernel.org/ZUHeTLZb+od8q4EE@ubuntu

Does "ancestral_ways" more clearly convey the math that is being
performed at each level of the hierarchy?

Now, "ancestral_ways" also does not show up in the CXL specification,
but that is because the CXL specification leaves at as an exercise for
software to figure out an algorithm to validate that a proposed ordering
of memory-device-decoders in a region can be supported by the given CXL
topology.

In the meantime I have flagged this patch to Dave for consideration in
the next cxl-fixes pull request, but I suspect it will need to be
updated with a comment and/or rename to resovle the "distance"
confusion.

https://patchwork.kernel.org/bundle/cxllinux/cxl-fixes/
Dave Jiang Nov. 7, 2024, 3:23 p.m. UTC | #4
On 11/6/24 6:11 PM, Dan Williams wrote:
> [ add Dave so you 
> 
> Zhijian Li (Fujitsu) wrote:
>>
>>
>> On 27/10/2024 15:57, Huaisheng Ye wrote:
>>> The cxl_port_setup_targets() algorithm fails to identify valid target list
>>> ordering in the presence of 4-way and above switches resulting in
>>> 'cxl create-region' failures of the form:
>>>
>>>    # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4 mem1 mem6 mem3 mem2 mem5 mem7 mem0
>>>    cxl region: create_region: region0: failed to set target7 to mem0
>>>    cxl region: cmd_create_region: created 0 regions
>>>
>>>    [kernel debug message]
>>>    check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host mem6:decoder7.0 at 2
>>>    bus_remove_device:574: bus: 'cxl': remove device region0
>>>
>>> QEMU can create this failing topology:
>>>
>>>                         ACPI0017:00 [root0]
>>>                             |
>>>                           HB_0 [port1]
>>>                          /             \
>>>                       RP_0             RP_1
>>>                        |                 |
>>>                  USP [port2]           USP [port3]
>>>              /    /    \    \        /   /    \    \
>>>            DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
>>>             |     |     |     |     |    |     |    |
>>>            mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
>>>   Pos:      0     2     4     6     1    3     5    7
>>
>> Yeah, I tried this topology long long ago, it didn't work. At that time, I thought it
>> might be just like that. Until recently that I saw this [1] in section
>> 2.13.15.1 Region Spanning 2 HB Root Ports Example Configuration Checks
>>
>> I once tried to understand why the code used "distance" to determine the order of the target,
>> but in the end, I still couldn't figure it out (and I still don't understand it now).
>> IIRC, neither the CXL spec nor this document mentioned the keyword "distance" at all.
> 
> Oh, that means this needs a comment or a better variable name.
> 
> In this patch discussion [1] Jim came up with the term "ancestral_ways"
> to describe the same concept of what is the offset ("distance") between
> consecutive indices in the target list.
> 
> [1]: http://lore.kernel.org/ZUHeTLZb+od8q4EE@ubuntu
> 
> Does "ancestral_ways" more clearly convey the math that is being
> performed at each level of the hierarchy?
> 
> Now, "ancestral_ways" also does not show up in the CXL specification,
> but that is because the CXL specification leaves at as an exercise for
> software to figure out an algorithm to validate that a proposed ordering
> of memory-device-decoders in a region can be supported by the given CXL
> topology.
> 
> In the meantime I have flagged this patch to Dave for consideration in
> the next cxl-fixes pull request, but I suspect it will need to be
> updated with a comment and/or rename to resovle the "distance"
> confusion.
> 
> https://patchwork.kernel.org/bundle/cxllinux/cxl-fixes/

If we can get it respin and tagged by next week, there's time to get it into the 6.13 merge window. Otherwise it can wait until 6.13-rc fixes.
>
Ye, Huaisheng Nov. 12, 2024, 1:56 a.m. UTC | #5
> From: Jiang, Dave <dave.jiang@intel.com>
> Sent: Thursday, November 7, 2024 11:23 PM
> >>
> >>
> >> On 27/10/2024 15:57, Huaisheng Ye wrote:
> >>> The cxl_port_setup_targets() algorithm fails to identify valid
> >>> target list ordering in the presence of 4-way and above switches
> >>> resulting in 'cxl create-region' failures of the form:
> >>>
> >>>    # cxl create-region -d decoder0.0 -g 1024 -s 2G -t ram -w 8 -m mem4
> mem1 mem6 mem3 mem2 mem5 mem7 mem0
> >>>    cxl region: create_region: region0: failed to set target7 to mem0
> >>>    cxl region: cmd_create_region: created 0 regions
> >>>
> >>>    [kernel debug message]
> >>>    check_last_peer:1213: cxl region0: pci0000:0c:port1: cannot host
> mem6:decoder7.0 at 2
> >>>    bus_remove_device:574: bus: 'cxl': remove device region0
> >>>
> >>> QEMU can create this failing topology:
> >>>
> >>>                         ACPI0017:00 [root0]
> >>>                             |
> >>>                           HB_0 [port1]
> >>>                          /             \
> >>>                       RP_0             RP_1
> >>>                        |                 |
> >>>                  USP [port2]           USP [port3]
> >>>              /    /    \    \        /   /    \    \
> >>>            DSP   DSP   DSP   DSP   DSP  DSP   DSP  DSP
> >>>             |     |     |     |     |    |     |    |
> >>>            mem4  mem6  mem2  mem7  mem1 mem3  mem5  mem0
> >>>   Pos:      0     2     4     6     1    3     5    7
> >>
> >> Yeah, I tried this topology long long ago, it didn't work. At that
> >> time, I thought it might be just like that. Until recently that I saw
> >> this [1] in section
> >> 2.13.15.1 Region Spanning 2 HB Root Ports Example Configuration
> >> Checks
> >>
> >> I once tried to understand why the code used "distance" to determine
> >> the order of the target, but in the end, I still couldn't figure it out
> (and I still don't understand it now).
> >> IIRC, neither the CXL spec nor this document mentioned the keyword
> "distance" at all.
> >
> > Oh, that means this needs a comment or a better variable name.
> >
> > In this patch discussion [1] Jim came up with the term "ancestral_ways"
> > to describe the same concept of what is the offset ("distance")
> > between consecutive indices in the target list.
> >
> > [1]: http://lore.kernel.org/ZUHeTLZb+od8q4EE@ubuntu
> >
> > Does "ancestral_ways" more clearly convey the math that is being
> > performed at each level of the hierarchy?
> >
> > Now, "ancestral_ways" also does not show up in the CXL specification,
> > but that is because the CXL specification leaves at as an exercise for
> > software to figure out an algorithm to validate that a proposed
> > ordering of memory-device-decoders in a region can be supported by the
> > given CXL topology.

Regarding this patch, the distance (or offset) is obtained by multiplying the number of distinct targets in region's interest and its all ancestral nr_targets.
If we renamed "distance" to "ancestral_ways", I am afraid that confusion will arise in the future. Because this variable is not only determined by ancestral ways.

I think ep_distance or ep_interval, even ep_offset would be better.

> > In the meantime I have flagged this patch to Dave for consideration in
> > the next cxl-fixes pull request, but I suspect it will need to be
> > updated with a comment and/or rename to resovle the "distance"
> > confusion.
> >
> > https://patchwork.kernel.org/bundle/cxllinux/cxl-fixes/
> 
> If we can get it respin and tagged by next week, there's time to get it
> into the 6.13 merge window. Otherwise it can wait until 6.13-rc fixes.
> >
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e701e4b04032..9e226a293f45 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1288,6 +1288,7 @@  static int cxl_port_setup_targets(struct cxl_port *port,
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_decoder *cxld = cxl_rr->decoder;
 	struct cxl_switch_decoder *cxlsd;
+	struct cxl_port *iter = port;
 	u16 eig, peig;
 	u8 eiw, peiw;
 
@@ -1304,16 +1305,20 @@  static int cxl_port_setup_targets(struct cxl_port *port,
 
 	cxlsd = to_cxl_switch_decoder(&cxld->dev);
 	if (cxl_rr->nr_targets_set) {
-		int i, distance;
+		int i, distance = 1;
+		struct cxl_region_ref *cxl_rr_iter;
 
 		/*
-		 * Passthrough decoders impose no distance requirements between
-		 * peers
+		 * Get distance from the number of distinct targets in region's
+		 * interest and the ancestral nr_targets.
 		 */
-		if (cxl_rr->nr_targets == 1)
-			distance = 0;
-		else
-			distance = p->nr_targets / cxl_rr->nr_targets;
+		do {
+			cxl_rr_iter = cxl_rr_load(iter, cxlr);
+			distance *= cxl_rr_iter->nr_targets;
+			iter = to_cxl_port(iter->dev.parent);
+		} while (!is_cxl_root(iter));
+		distance *= cxlrd->cxlsd.cxld.interleave_ways;
+
 		for (i = 0; i < cxl_rr->nr_targets_set; i++)
 			if (ep->dport == cxlsd->target[i]) {
 				rc = check_last_peer(cxled, ep, cxl_rr,