diff mbox series

[RFC] cxl: Support Global Persistent Flush (GPF)

Message ID 20241205082101.1193038-1-dave@stgolabs.net
State New
Headers show
Series [RFC] cxl: Support Global Persistent Flush (GPF) | expand

Commit Message

Davidlohr Bueso Dec. 5, 2024, 8:21 a.m. UTC
Add support for GPF flows. It is found that the CXL specification
around this to be a bit too involved from the driver side. And while
this should really all handled by the hardware, this patch takes
things with a grain of salt.

- Dirty shutdown is not handled, and puts the responsibility on the
Admin to deal with any GPF failure - otherwise the kernel will just
keep using the device upon next boot. Hence no SetShutdownState DIRTY
upon memdev probe (and no need for clearing upon successful flush).

- As such, the driver will only update port timeouts throughout the
decode hierarchy, upon device probing and hot-remove. These timeouts
can be over-specified, particularly T1. Set the max and rely on
devices to minimize GPF response times to avoid the worst case wait
times that those timeouts imply.

- Energy budgeting is not supported.

Testing
-------

Qemu sets the DVSEC timeouts, for which the reading/writing flow
can be tested. Default values were modified[*] to spice things up:

(i) 2 direct-attached Type3 devices:

[    3.827398] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:0e:00.0: Device GPF timeout: 4000000 us (power needed: 51mW)
[    3.828308] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 2 us
[    3.829402] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:00.0: new GPF Port phase 1 timeout: 70000000 us
[    3.830392] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 3 us
[    3.831038] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 4000000 us
[    3.832282] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 2 us
[    3.833338] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:01.0: new GPF Port phase 1 timeout: 70000000 us
[    3.833948] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 3 us
[    3.834495] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 4000000 us
[    3.861131] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:0d:00.0: Device GPF timeout: 1000000 us (power needed: 51mW)
[    3.861769] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[    3.862353] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 4000000 us
[    3.862963] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[    3.863540] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 4000000 us

(ii) 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with the CXL Type3 device directly attached:

 +-[0000:0c]-+-00.0-[0d]----00.0
 |           \-01.0-[0e]----00.0
 \-[0000:de]-+-00.0-[df]----00.0
	     \-01.0-[e0]----00.0

[    6.878513] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:e0:00.0: Device GPF timeout: 5000000 us (power needed: 51mW)
[    6.878578] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:de:00.0: Port GPF phase 1 timeout: 2 us
[    6.879753] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:de:00.0: new GPF Port phase 1 timeout: 70000000 us
[    6.881505] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:de:00.0: Port GPF phase 2 timeout: 3 us
[    6.882830] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:de:00.0: new GPF Port phase 2 timeout: 5000000 us
[    6.884699] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:de:01.0: Port GPF phase 1 timeout: 2 us
[    6.885903] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:de:01.0: new GPF Port phase 1 timeout: 70000000 us
[    6.888307] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:de:01.0: Port GPF phase 2 timeout: 3 us
[    6.889476] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:de:01.0: new GPF Port phase 2 timeout: 5000000 us
[    6.995010] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:0d:00.0: Device GPF timeout: 1000000 us (power needed: 51mW)
[    6.995092] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 2 us
[    6.995106] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:00.0: new GPF Port phase 1 timeout: 70000000 us
[    6.995117] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 3 us
[    6.995127] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 1000000 us
[    6.995200] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 2 us
[    6.995211] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:01.0: new GPF Port phase 1 timeout: 70000000 us
[    6.995221] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 3 us
[    6.995232] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 1000000 us
[    7.461810] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:0e:00.0: Device GPF timeout: 3000000 us (power needed: 51mW)
[    7.463006] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[    7.464746] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 1000000 us
[    7.465567] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 3000000 us
[    7.466534] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[    7.467023] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 1000000 us
[    7.467478] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 3000000 us
[    7.504908] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:df:00.0: Device GPF timeout: 4000000 us (power needed: 51mW)
[    7.505400] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:de:00.0: Port GPF phase 1 timeout: 70000000 us
[    7.505857] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:de:00.0: Port GPF phase 2 timeout: 5000000 us
[    7.506339] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:de:01.0: Port GPF phase 1 timeout: 70000000 us
[    7.506794] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:de:01.0: Port GPF phase 2 timeout: 5000000 us

(iii) 4 Type3 devices below a CXL Switch:

[    4.667274] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:0f:00.0: Device GPF timeout: 2000000 us (power needed: 51mW)
[    4.668383] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:00.0: Port GPF phase 1 timeout: 1 us
[    4.671100] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0e:00.0: new GPF Port phase 1 timeout: 70000000 us
[    4.672242] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:00.0: Port GPF phase 2 timeout: 2 us
[    4.674552] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:00.0: new GPF Port phase 2 timeout: 2000000 us
[    4.675750] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:01.0: Port GPF phase 1 timeout: 1 us
[    4.676895] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0e:01.0: new GPF Port phase 1 timeout: 70000000 us
[    4.678213] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:01.0: Port GPF phase 2 timeout: 2 us
[    4.679514] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:01.0: new GPF Port phase 2 timeout: 2000000 us
[    4.680876] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:02.0: Port GPF phase 1 timeout: 1 us
[    4.682099] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0e:02.0: new GPF Port phase 1 timeout: 70000000 us
[    4.683407] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:02.0: Port GPF phase 2 timeout: 2 us
[    4.684748] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:02.0: new GPF Port phase 2 timeout: 2000000 us
[    4.686150] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:03.0: Port GPF phase 1 timeout: 1 us
[    4.687396] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0e:03.0: new GPF Port phase 1 timeout: 70000000 us
[    4.688601] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:03.0: Port GPF phase 2 timeout: 2 us
[    4.690070] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:03.0: new GPF Port phase 2 timeout: 2000000 us
[    4.692076] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 2 us
[    4.693138] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:00.0: new GPF Port phase 1 timeout: 70000000 us
[    4.693149] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 3 us
[    4.694357] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 2000000 us
[    4.695776] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 2 us
[    4.697186] cxl_core:cxl_pci_update_gpf_port:1115: pcieport 0000:0c:01.0: new GPF Port phase 1 timeout: 70000000 us
[    4.698546] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 3 us
[    4.700998] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 2000000 us
[    4.742957] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:10:00.0: Device GPF timeout: 3000000 us (power needed: 51mW)
[    4.743036] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:00.0: Port GPF phase 1 timeout: 70000000 us
[    4.743050] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:00.0: Port GPF phase 2 timeout: 2000000 us
[    4.743064] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:00.0: new GPF Port phase 2 timeout: 3000000 us
[    4.743124] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:01.0: Port GPF phase 1 timeout: 70000000 us
[    4.743135] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:01.0: Port GPF phase 2 timeout: 2000000 us
[    4.743146] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:01.0: new GPF Port phase 2 timeout: 3000000 us
[    4.743232] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:02.0: Port GPF phase 1 timeout: 70000000 us
[    4.743243] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:02.0: Port GPF phase 2 timeout: 2000000 us
[    4.743254] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:02.0: new GPF Port phase 2 timeout: 3000000 us
[    4.743305] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:03.0: Port GPF phase 1 timeout: 70000000 us
[    4.743319] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:03.0: Port GPF phase 2 timeout: 2000000 us
[    4.743329] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:03.0: new GPF Port phase 2 timeout: 3000000 us
[    4.743443] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[    4.743453] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 2000000 us
[    4.743465] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 3000000 us
[    4.743538] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[    4.743548] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 2000000 us
[    4.743559] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 3000000 us
[    5.193687] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:11:00.0: Device GPF timeout: 4000000 us (power needed: 51mW)
[    5.194285] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:00.0: Port GPF phase 1 timeout: 70000000 us
[    5.194802] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:00.0: Port GPF phase 2 timeout: 3000000 us
[    5.195326] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:00.0: new GPF Port phase 2 timeout: 4000000 us
[    5.195907] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:01.0: Port GPF phase 1 timeout: 70000000 us
[    5.196412] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:01.0: Port GPF phase 2 timeout: 3000000 us
[    5.196929] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:01.0: new GPF Port phase 2 timeout: 4000000 us
[    5.197493] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:02.0: Port GPF phase 1 timeout: 70000000 us
[    5.198101] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:02.0: Port GPF phase 2 timeout: 3000000 us
[    5.198562] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:02.0: new GPF Port phase 2 timeout: 4000000 us
[    5.199079] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:03.0: Port GPF phase 1 timeout: 70000000 us
[    5.199539] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:03.0: Port GPF phase 2 timeout: 3000000 us
[    5.200003] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:03.0: new GPF Port phase 2 timeout: 4000000 us
[    5.200547] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[    5.200997] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 3000000 us
[    5.201429] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 4000000 us
[    5.201917] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[    5.202351] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 3000000 us
[    5.202781] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 4000000 us
[    5.222313] cxl_pci:cxl_gpf_setup:859: cxl_pci 0000:12:00.0: Device GPF timeout: 1000000 us (power needed: 51mW)
[    5.222795] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:00.0: Port GPF phase 1 timeout: 70000000 us
[    5.223233] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:00.0: Port GPF phase 2 timeout: 4000000 us
[    5.223705] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:01.0: Port GPF phase 1 timeout: 70000000 us
[    5.224149] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:01.0: Port GPF phase 2 timeout: 4000000 us
[    5.224607] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:02.0: Port GPF phase 1 timeout: 70000000 us
[    5.225038] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:02.0: Port GPF phase 2 timeout: 4000000 us
[    5.225498] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:03.0: Port GPF phase 1 timeout: 70000000 us
[    5.225933] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:03.0: Port GPF phase 2 timeout: 4000000 us
[    5.226430] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[    5.226853] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 4000000 us
[    5.227310] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[    5.227732] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 4000000 us

(iv) Hot removal - from (iii) above:
root@cxl:~# echo 1 > /sys/bus/pci/devices/0000\:11\:00.0/remove
[   83.208979] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:00.0: Port GPF phase 1 timeout: 70000000 us
[   83.209479] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:00.0: Port GPF phase 2 timeout: 4000000 us
[   83.209988] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:00.0: new GPF Port phase 2 timeout: 3000000 us
[   83.210498] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:01.0: Port GPF phase 1 timeout: 70000000 us
[   83.210965] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:01.0: Port GPF phase 2 timeout: 4000000 us
[   83.211417] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:01.0: new GPF Port phase 2 timeout: 3000000 us
[   83.211932] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:02.0: Port GPF phase 1 timeout: 70000000 us
[   83.212404] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:02.0: Port GPF phase 2 timeout: 4000000 us
[   83.212932] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:02.0: new GPF Port phase 2 timeout: 3000000 us
[   83.213622] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0e:03.0: Port GPF phase 1 timeout: 70000000 us
[   83.214200] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0e:03.0: Port GPF phase 2 timeout: 4000000 us
[   83.214647] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0e:03.0: new GPF Port phase 2 timeout: 3000000 us
[   83.215196] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:00.0: Port GPF phase 1 timeout: 70000000 us
[   83.215650] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:00.0: Port GPF phase 2 timeout: 4000000 us
[   83.216107] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:00.0: new GPF Port phase 2 timeout: 3000000 us
[   83.216601] cxl_core:cxl_pci_update_gpf_port:1100: pcieport 0000:0c:01.0: Port GPF phase 1 timeout: 70000000 us
[   83.217061] cxl_core:cxl_pci_update_gpf_port:1139: pcieport 0000:0c:01.0: Port GPF phase 2 timeout: 4000000 us
[   83.217507] cxl_core:cxl_pci_update_gpf_port:1154: pcieport 0000:0c:01.0: new GPF Port phase 2 timeout: 3000000 us

[*]: qemu diff:

Comments

Dan Williams Dec. 10, 2024, 11:47 p.m. UTC | #1
Davidlohr Bueso wrote:

Hi Davidlohr, thanks for this:

> Add support for GPF flows. It is found that the CXL specification
> around this to be a bit too involved from the driver side. And while
> this should really all handled by the hardware, this patch takes
> things with a grain of salt.
> 
> - Dirty shutdown is not handled, and puts the responsibility on the
> Admin to deal with any GPF failure - otherwise the kernel will just
> keep using the device upon next boot. Hence no SetShutdownState DIRTY
> upon memdev probe (and no need for clearing upon successful flush).
> 
> - As such, the driver will only update port timeouts throughout the
> decode hierarchy, upon device probing and hot-remove. These timeouts
> can be over-specified, particularly T1. Set the max and rely on
> devices to minimize GPF response times to avoid the worst case wait
> times that those timeouts imply.
> 
> - Energy budgeting is not supported.

The missing detail for me is why are the defaults likely to be broken
such that the kernel must get involved in setting them, and how does
the kernel determine they are broken?

It is unfortunate that the specification does not recommend default
values for this, or even an implementation note about what system
software is supposed to do with these registers.

Can you say a bit more about the end user visible effects when the
timeouts are violated? Because the critical enabling from my perspective
is timeout detection (notifying the sysadmin), not timeout setting. 

A policy for setting the timeouts makes sense, but I am not sure that
the kernel should pick "max" as a default policy. Especially when max is
80 seconds which is already longer than lockup detectors would tolerate.
Likely anything over 30 seconds the kernel should be active complaining
about something being broken. If a configuration needs more time that
likely needs a user policy override mechanism.

This would be consistent with what we did for mailbox initialization
timeouts. Pick a "kernel reasonable" default and provide an override for
outlier cases.
Yuquan Wang Dec. 11, 2024, 10:53 a.m. UTC | #2
On Tue, Dec 10, 2024 at 03:47:21PM -0800, Dan Williams wrote:
> Davidlohr Bueso wrote:
> 
> Hi Davidlohr, thanks for this:
> 
> > Add support for GPF flows. It is found that the CXL specification
> > around this to be a bit too involved from the driver side. And while
> > this should really all handled by the hardware, this patch takes
> > things with a grain of salt.
> > 
> > - Dirty shutdown is not handled, and puts the responsibility on the
> > Admin to deal with any GPF failure - otherwise the kernel will just
> > keep using the device upon next boot. Hence no SetShutdownState DIRTY
> > upon memdev probe (and no need for clearing upon successful flush).
> > 
> > - As such, the driver will only update port timeouts throughout the
> > decode hierarchy, upon device probing and hot-remove. These timeouts
> > can be over-specified, particularly T1. Set the max and rely on
> > devices to minimize GPF response times to avoid the worst case wait
> > times that those timeouts imply.
> > 
> > - Energy budgeting is not supported.
> 
> The missing detail for me is why are the defaults likely to be broken
> such that the kernel must get involved in setting them, and how does
> the kernel determine they are broken?
> 
> It is unfortunate that the specification does not recommend default
> values for this, or even an implementation note about what system
> software is supposed to do with these registers.
> 
> Can you say a bit more about the end user visible effects when the
> timeouts are violated? Because the critical enabling from my perspective
> is timeout detection (notifying the sysadmin), not timeout setting. 
> 
> A policy for setting the timeouts makes sense, but I am not sure that
> the kernel should pick "max" as a default policy. Especially when max is
> 80 seconds which is already longer than lockup detectors would tolerate.
> Likely anything over 30 seconds the kernel should be active complaining
> about something being broken. If a configuration needs more time that
> likely needs a user policy override mechanism.
> 
> This would be consistent with what we did for mailbox initialization
> timeouts. Pick a "kernel reasonable" default and provide an override for
> outlier cases.

Hi, list

Except GPF flows, could we use cxl pmem as a special nvdimm device in kernel
including using ADR mechanism? As the cxl_pmem module would bridge the cxl
pmem device from cxl subsystem into the nvdimm subsystem.

Many Thanks
Yuquan
diff mbox series

Patch

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index ec12c58d9f93..952c5c3d3171 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -439,6 +439,7 @@  static void build_dvsecs(CXLType3Dev *ct3d)
	      range1_base_hi = 0, range1_base_lo = 0,
	      range2_size_hi = 0, range2_size_lo = 0,
	      range2_base_hi = 0, range2_base_lo = 0;
+    GRand *rand = g_rand_new();

     /*
      * Volatile memory is mapped as (0x0)
@@ -500,7 +501,7 @@  static void build_dvsecs(CXLType3Dev *ct3d)
				REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
				REG_LOC_DVSEC_REVID, (uint8_t *)regloc_dvsec);
     dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
-        .phase2_duration = 0x603, /* 3 seconds */
+        .phase2_duration = g_rand_int_range(rand, 0x601, 0x606), /* 1-6 seconds */
	 .phase2_power = 0x33, /* 0x33 miliwatts */
     };
     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
@@ -517,6 +518,7 @@  static void build_dvsecs(CXLType3Dev *ct3d)
				PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
				PCIE_FLEXBUS_PORT_DVSEC,
				PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
+    g_rand_free(rand);
 }

 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 17c32b5736ab..3353b87b72a6 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -140,7 +140,7 @@  static void build_dvsecs(CXLComponentState *cxl)
     dvsec = (uint8_t *)&(CXLDVSECPortGPF){
	 .rsvd        = 0,
	 .phase1_ctrl = 1, /* 1μs timeout */
-        .phase2_ctrl = 1, /* 1μs timeout */
+        .phase2_ctrl = 2, /* 2μs timeout */
     };
     cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
				GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
index d400f2aa1fc7..1226fcabfc50 100644
--- a/hw/pci-bridge/cxl_root_port.c
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -121,8 +121,8 @@  static void build_dvsecs(CXLComponentState *cxl)

     dvsec = (uint8_t *)&(CXLDVSECPortGPF){
	 .rsvd        = 0,
-        .phase1_ctrl = 1, /* 1μs timeout */
-        .phase2_ctrl = 1, /* 1μs timeout */
+        .phase1_ctrl = 2, /* 2μs timeout */
+        .phase2_ctrl = 3, /* 3μs timeout */
     };
     cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
				GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 Documentation/driver-api/cxl/maturity-map.rst |   2 +-
 drivers/cxl/core/pci.c                        | 104 ++++++++++++++++++
 drivers/cxl/core/port.c                       |  56 ++++++++++
 drivers/cxl/cxl.h                             |   3 +
 drivers/cxl/cxlmem.h                          |   5 +
 drivers/cxl/cxlpci.h                          |  62 +++++++++++
 drivers/cxl/pci.c                             |  81 ++++++++++++++
 7 files changed, 312 insertions(+), 1 deletion(-)

diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst
index df8e2ac2a320..99dd2c841e69 100644
--- a/Documentation/driver-api/cxl/maturity-map.rst
+++ b/Documentation/driver-api/cxl/maturity-map.rst
@@ -130,7 +130,7 @@  Mailbox commands
 * [0] Switch CCI
 * [3] Timestamp
 * [1] PMEM labels
-* [0] PMEM GPF / Dirty Shutdown
+* [1] PMEM GPF / Dirty Shutdown
 * [0] Scan Media
 
 PMU
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 5b46bc46aaa9..7e803c2ab5fe 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1054,3 +1054,107 @@  int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
 
 	return 0;
 }
+
+int cxl_pci_update_gpf_port(struct pci_dev *pdev,
+			    struct cxl_memdev *cxlmd, bool remove)
+{
+	u16 ctrl;
+	int port_t1_base, port_t1_scale;
+	int port_t2_base, port_t2_scale;
+	unsigned long device_tmo, port_tmo;
+	int rc, dvsec;
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+	dvsec = pci_find_dvsec_capability(
+		pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PORT_GPF);
+	if (!dvsec) {
+		dev_warn(&pdev->dev,
+			 "GPF Port DVSEC not present\n");
+		return -EINVAL;
+	}
+
+	/* check for t1 */
+	rc = pci_read_config_word(
+		pdev,
+		dvsec + CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET,
+		&ctrl);
+	if (rc)
+		return rc;
+
+	port_t1_base = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK,
+				 ctrl);
+	port_t1_scale = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK,
+				  ctrl);
+	if (port_t1_scale > GPF_TIMEOUT_SCALE_MAX) {
+		dev_warn(&pdev->dev, "GPF: invalid port phase 1 timeout\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Set max timeout such that vendors will optimize GPF flow to
+	 * avoid the implied worst-case scenario delays.
+	 */
+	device_tmo = gpf_timeout_us(7, GPF_TIMEOUT_SCALE_MAX);
+	port_tmo = gpf_timeout_us(port_t1_base, port_t1_scale);
+
+	dev_dbg(&pdev->dev, "Port GPF phase 1 timeout: %lu us\n", port_tmo);
+
+	if ((remove && device_tmo != port_tmo) || device_tmo > port_tmo) {
+		/* update the timeout in DVSEC */
+		ctrl = FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK,
+				   7);
+		ctrl |= FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK,
+				   GPF_TIMEOUT_SCALE_MAX);
+		rc = pci_write_config_word(
+			pdev,
+			dvsec + CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET,
+			ctrl);
+		if (rc)
+			return rc;
+
+		dev_dbg(&pdev->dev,
+			"new GPF Port phase 1 timeout: %lu us\n", device_tmo);
+	}
+
+	/* check for t2 */
+	rc = pci_read_config_word(
+		pdev,
+		dvsec + CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET,
+		&ctrl);
+	if (rc)
+		return rc;
+
+	port_t2_base = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK,
+			    ctrl);
+	port_t2_scale = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK,
+			     ctrl);
+	if (port_t2_scale > GPF_TIMEOUT_SCALE_MAX) {
+		dev_warn(&pdev->dev, "GPF: invalid port phase 2 timeout\n");
+		return -EINVAL;
+	}
+
+	device_tmo = gpf_timeout_us(mds->gpf_t2_base, mds->gpf_t2_scale);
+	port_tmo = gpf_timeout_us(port_t2_base, port_t2_scale);
+
+	dev_dbg(&pdev->dev, "Port GPF phase 2 timeout: %lu us\n", port_tmo);
+
+	if ((remove && device_tmo != port_tmo) || device_tmo > port_tmo) {
+		/* update the timeout in DVSEC */
+		ctrl = FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK,
+				   mds->gpf_t2_base);
+		ctrl |= FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK,
+				   mds->gpf_t2_scale);
+		rc = pci_write_config_word(
+			pdev,
+			dvsec + CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET,
+			ctrl);
+		if (rc)
+			return rc;
+
+		dev_dbg(&pdev->dev,
+			"new GPF Port phase 2 timeout: %lu us\n", device_tmo);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_pci_update_gpf_port, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index af92c67bc954..eacf813d4bb4 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1393,6 +1393,61 @@  static struct device *endpoint_host(struct cxl_port *endpoint)
 	return &port->dev;
 }
 
+static void delete_update_gpf(struct cxl_memdev *cxlmd)
+{
+	struct cxl_port *port = cxlmd->endpoint;
+	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+	struct cxl_memdev *max_cxlmd = NULL;
+	struct cxl_memdev_state *mds;
+	struct cxl_ep *ep;
+	unsigned long index;
+
+	/* first calculate the new max T2 timeout */
+	xa_for_each(&parent_port->endpoints, index, ep) {
+		struct cxl_memdev *this_cxlmd;
+		struct cxl_memdev_state *max_mds;
+
+		this_cxlmd = to_cxl_memdev(ep->ep);
+		if (cxlmd == this_cxlmd) /* ignore self */
+			continue;
+
+		if (!max_cxlmd) {
+			max_cxlmd = this_cxlmd;
+			continue;
+		}
+
+		mds = to_cxl_memdev_state(this_cxlmd->cxlds);
+		max_mds = to_cxl_memdev_state(max_cxlmd->cxlds);
+
+		if (gpf_timeout_us(mds->gpf_t2_base, mds->gpf_t2_scale) >
+		    gpf_timeout_us(max_mds->gpf_t2_base, max_mds->gpf_t2_scale))
+			max_cxlmd = this_cxlmd;
+	}
+
+	if (!max_cxlmd) /* no other devices */
+		return;
+
+	while (1) {
+		struct cxl_dport *dport;
+
+		parent_port = to_cxl_port(port->dev.parent);
+		mds = to_cxl_memdev_state(max_cxlmd->cxlds);
+
+		xa_for_each(&parent_port->dports, index, dport) {
+			if (!dev_is_pci(dport->dport_dev))
+				continue;
+
+			cxl_pci_update_gpf_port(to_pci_dev(dport->dport_dev),
+						max_cxlmd, true);
+		}
+
+		if (is_cxl_root(parent_port))
+			break;
+
+		port = parent_port;
+	}
+}
+
 static void delete_endpoint(void *data)
 {
 	struct cxl_memdev *cxlmd = data;
@@ -1400,6 +1455,7 @@  static void delete_endpoint(void *data)
 	struct device *host = endpoint_host(endpoint);
 
 	scoped_guard(device, host) {
+		delete_update_gpf(cxlmd);
 		if (host->driver && !endpoint->dead) {
 			devm_release_action(host, cxl_unlink_parent_dport, endpoint);
 			devm_release_action(host, cxl_unlink_uport, endpoint);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 5406e3ab3d4a..00f6bb445fa1 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -902,6 +902,9 @@  void cxl_coordinates_combine(struct access_coordinate *out,
 
 bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
 
+int cxl_pci_update_gpf_port(struct pci_dev *pdev,
+			    struct cxl_memdev *mds, bool remove);
+
 /*
  * Unit test builds overrides this to __weak, find the 'strong' version
  * of these symbols in tools/testing/cxl/.
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 2a25d1957ddb..8f19d9615330 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -503,6 +503,11 @@  struct cxl_memdev_state {
 	struct cxl_poison_state poison;
 	struct cxl_security_state security;
 	struct cxl_fw_state fw;
+
+	/* cache Device GPF info */
+	u16 gpf_t2_base;
+	u16 gpf_t2_scale;
+	u32 gpf_power_mwatts;
 };
 
 static inline struct cxl_memdev_state *
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 4da07727ab9c..5eae39641f96 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -40,9 +40,20 @@ 
 
 /* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
 #define CXL_DVSEC_PORT_GPF					4
+#define   CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET		0x0C
+#define     CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK		GENMASK(3, 0)
+#define	    CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK		GENMASK(11, 8)
+#define   CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET		0xE
+#define     CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK		GENMASK(3, 0)
+#define	    CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK		GENMASK(11, 8)
 
 /* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
 #define CXL_DVSEC_DEVICE_GPF					5
+#define   CXL_DVSEC_DEVICE_GPF_PHASE_2_DURATION_OFFSET		0xA
+#define     CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_BASE_MASK		GENMASK(3, 0)
+#define	    CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_SCALE_MASK	GENMASK(11, 8)
+#define   CXL_DVSEC_DEVICE_GPF_PHASE_2_POWER_OFFSET		0xC
+#define	    CXL_DVSEC_DEVICE_GPF_PHASE_2_ACTIVE_POWER_MASK	GENMASK(31, 0)
 
 /* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */
 #define CXL_DVSEC_PCIE_FLEXBUS_PORT				7
@@ -129,4 +140,55 @@  void read_cdat_data(struct cxl_port *port);
 void cxl_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 				    pci_channel_state_t state);
+
+#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
+
+/*
+ * The spec is over involved. Do not account for any ad-hoc host delays.
+ * Ie: propagation delay, host-side processing delays, and any other
+ * host/system-specific delays.
+ */
+static inline unsigned long gpf_other_delays_us(void)
+{
+	return 0;
+}
+
+static inline unsigned long gpf_timeout_us(int base, int scale)
+{
+	unsigned long tmo;
+
+	switch (scale) {
+	case 0: /* 1 us */
+		tmo = 1;
+		break;
+	case 1: /* 10 us */
+		tmo = 10UL;
+		break;
+	case 2: /* 100 us */
+		tmo = 100UL;
+		break;
+	case 3: /* 1 ms */
+		tmo = 1000UL;
+		break;
+	case 4: /* 10 ms */
+		tmo = 10000UL;
+		break;
+	case 5: /* 100 ms */
+		tmo = 100000UL;
+		break;
+	case 6: /* 1 s */
+		tmo = 1000000UL;
+		break;
+	case GPF_TIMEOUT_SCALE_MAX:
+		tmo = 10000000UL;
+		break;
+	default:
+		tmo = 0;
+		break;
+	}
+
+	tmo *= base;
+	return tmo + gpf_other_delays_us();
+}
+
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 188412d45e0d..0f2150c392cb 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -807,6 +807,85 @@  static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds)
 	return 0;
 }
 
+static int cxl_gpf_setup(struct pci_dev *pdev)
+{
+	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+	struct cxl_memdev *cxlmd = cxlds->cxlmd;
+	struct cxl_port *port;
+	int rc, gpf_dvsec;
+	u16 duration;
+	u32 power;
+	int device_t2_base, device_t2_scale;
+
+	/* get the timeouts for phase 2, given by the hardware */
+	gpf_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					      CXL_DVSEC_DEVICE_GPF);
+	if (!gpf_dvsec) {
+		dev_warn(&pdev->dev,
+			 "GPF Device DVSEC not present\n");
+		return -EINVAL;
+	}
+
+	rc = pci_read_config_word(
+		pdev,
+		gpf_dvsec + CXL_DVSEC_DEVICE_GPF_PHASE_2_DURATION_OFFSET,
+		&duration);
+	if (rc)
+		return rc;
+
+	device_t2_base = FIELD_GET(CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_BASE_MASK,
+			    duration);
+	device_t2_scale = FIELD_GET(CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_SCALE_MASK,
+			     duration);
+	if (device_t2_scale > GPF_TIMEOUT_SCALE_MAX) {
+		dev_warn(&pdev->dev, "GPF: invalid device timeout\n");
+		return -EINVAL;
+	}
+
+	/* cache device GPF timeout and power consumption for phase 2 */
+	mds->gpf_t2_base = device_t2_base;
+	mds->gpf_t2_scale = device_t2_scale;
+
+	rc = pci_read_config_dword(
+		pdev,
+		gpf_dvsec + CXL_DVSEC_DEVICE_GPF_PHASE_2_POWER_OFFSET,
+		&power);
+	if (rc)
+		return rc;
+
+	mds->gpf_power_mwatts = power;
+
+	dev_dbg(&pdev->dev, "Device GPF timeout: %lu us (power needed: %dmW)\n",
+	       gpf_timeout_us(device_t2_base, device_t2_scale),
+	       mds->gpf_power_mwatts);
+
+	/* iterate up the hierarchy updating max port timeouts where necessary */
+	port = cxlmd->endpoint;
+	while (1) {
+		struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+		struct cxl_dport *dport;
+		unsigned long index;
+
+		device_lock(&parent_port->dev);
+		xa_for_each(&parent_port->dports, index, dport) {
+			if (!dev_is_pci(dport->dport_dev))
+				continue;
+
+			cxl_pci_update_gpf_port(to_pci_dev(dport->dport_dev),
+						cxlmd, false);
+		}
+		device_unlock(&parent_port->dev);
+
+		if (is_cxl_root(parent_port))
+			break;
+
+		port = parent_port;
+	}
+
+	return rc;
+}
+
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
@@ -946,6 +1025,8 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
 
+	cxl_gpf_setup(pdev);
+
 	pci_save_state(pdev);
 
 	return rc;