@@ -439,6 +439,7 @@ static void build_dvsecs(CXLType3Dev *ct3d)
range1_base_hi = 0, range1_base_lo = 0,
range2_size_hi = 0, range2_size_lo = 0,
range2_base_hi = 0, range2_base_lo = 0;
+ GRand *rand = g_rand_new();
/*
* Volatile memory is mapped as (0x0)
@@ -500,7 +501,7 @@ static void build_dvsecs(CXLType3Dev *ct3d)
REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
REG_LOC_DVSEC_REVID, (uint8_t *)regloc_dvsec);
dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
- .phase2_duration = 0x603, /* 3 seconds */
+ .phase2_duration = g_rand_int_range(rand, 0x601, 0x606), /* 1-6 seconds */
.phase2_power = 0x33, /* 0x33 miliwatts */
};
cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
@@ -517,6 +518,7 @@ static void build_dvsecs(CXLType3Dev *ct3d)
PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
PCIE_FLEXBUS_PORT_DVSEC,
PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
+ g_rand_free(rand);
}
static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
@@ -140,7 +140,7 @@ static void build_dvsecs(CXLComponentState *cxl)
dvsec = (uint8_t *)&(CXLDVSECPortGPF){
.rsvd = 0,
.phase1_ctrl = 1, /* 1μs timeout */
- .phase2_ctrl = 1, /* 1μs timeout */
+ .phase2_ctrl = 2, /* 2μs timeout */
};
cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
@@ -121,8 +121,8 @@ static void build_dvsecs(CXLComponentState *cxl)
dvsec = (uint8_t *)&(CXLDVSECPortGPF){
.rsvd = 0,
- .phase1_ctrl = 1, /* 1μs timeout */
- .phase2_ctrl = 1, /* 1μs timeout */
+ .phase1_ctrl = 2, /* 2μs timeout */
+ .phase2_ctrl = 3, /* 3μs timeout */
};
cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
Documentation/driver-api/cxl/maturity-map.rst | 2 +-
drivers/cxl/core/pci.c | 104 ++++++++++++++++++
drivers/cxl/core/port.c | 56 ++++++++++
drivers/cxl/cxl.h | 3 +
drivers/cxl/cxlmem.h | 5 +
drivers/cxl/cxlpci.h | 62 +++++++++++
drivers/cxl/pci.c | 81 ++++++++++++++
7 files changed, 312 insertions(+), 1 deletion(-)
@@ -130,7 +130,7 @@ Mailbox commands
* [0] Switch CCI
* [3] Timestamp
* [1] PMEM labels
-* [0] PMEM GPF / Dirty Shutdown
+* [1] PMEM GPF / Dirty Shutdown
* [0] Scan Media
PMU
@@ -1054,3 +1054,107 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
return 0;
}
+
+int cxl_pci_update_gpf_port(struct pci_dev *pdev,
+ struct cxl_memdev *cxlmd, bool remove)
+{
+ u16 ctrl;
+ int port_t1_base, port_t1_scale;
+ int port_t2_base, port_t2_scale;
+ unsigned long device_tmo, port_tmo;
+ int rc, dvsec;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+ dvsec = pci_find_dvsec_capability(
+ pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PORT_GPF);
+ if (!dvsec) {
+ dev_warn(&pdev->dev,
+ "GPF Port DVSEC not present\n");
+ return -EINVAL;
+ }
+
+ /* check for t1 */
+ rc = pci_read_config_word(
+ pdev,
+ dvsec + CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET,
+ &ctrl);
+ if (rc)
+ return rc;
+
+ port_t1_base = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK,
+ ctrl);
+ port_t1_scale = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK,
+ ctrl);
+ if (port_t1_scale > GPF_TIMEOUT_SCALE_MAX) {
+ dev_warn(&pdev->dev, "GPF: invalid port phase 1 timeout\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Set max timeout such that vendors will optimize GPF flow to
+ * avoid the implied worst-case scenario delays.
+ */
+ device_tmo = gpf_timeout_us(7, GPF_TIMEOUT_SCALE_MAX);
+ port_tmo = gpf_timeout_us(port_t1_base, port_t1_scale);
+
+ dev_dbg(&pdev->dev, "Port GPF phase 1 timeout: %lu us\n", port_tmo);
+
+ if ((remove && device_tmo != port_tmo) || device_tmo > port_tmo) {
+ /* update the timeout in DVSEC */
+ ctrl = FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK,
+ 7);
+ ctrl |= FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK,
+ GPF_TIMEOUT_SCALE_MAX);
+ rc = pci_write_config_word(
+ pdev,
+ dvsec + CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET,
+ ctrl);
+ if (rc)
+ return rc;
+
+ dev_dbg(&pdev->dev,
+ "new GPF Port phase 1 timeout: %lu us\n", device_tmo);
+ }
+
+ /* check for t2 */
+ rc = pci_read_config_word(
+ pdev,
+ dvsec + CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET,
+ &ctrl);
+ if (rc)
+ return rc;
+
+ port_t2_base = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK,
+ ctrl);
+ port_t2_scale = FIELD_GET(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK,
+ ctrl);
+ if (port_t2_scale > GPF_TIMEOUT_SCALE_MAX) {
+ dev_warn(&pdev->dev, "GPF: invalid port phase 2 timeout\n");
+ return -EINVAL;
+ }
+
+ device_tmo = gpf_timeout_us(mds->gpf_t2_base, mds->gpf_t2_scale);
+ port_tmo = gpf_timeout_us(port_t2_base, port_t2_scale);
+
+ dev_dbg(&pdev->dev, "Port GPF phase 2 timeout: %lu us\n", port_tmo);
+
+ if ((remove && device_tmo != port_tmo) || device_tmo > port_tmo) {
+ /* update the timeout in DVSEC */
+ ctrl = FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK,
+ mds->gpf_t2_base);
+ ctrl |= FIELD_PREP(CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK,
+ mds->gpf_t2_scale);
+ rc = pci_write_config_word(
+ pdev,
+ dvsec + CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET,
+ ctrl);
+ if (rc)
+ return rc;
+
+ dev_dbg(&pdev->dev,
+ "new GPF Port phase 2 timeout: %lu us\n", device_tmo);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_pci_update_gpf_port, CXL);
@@ -1393,6 +1393,61 @@ static struct device *endpoint_host(struct cxl_port *endpoint)
return &port->dev;
}
+static void delete_update_gpf(struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *port = cxlmd->endpoint;
+ struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+ struct cxl_memdev *max_cxlmd = NULL;
+ struct cxl_memdev_state *mds;
+ struct cxl_ep *ep;
+ unsigned long index;
+
+ /* first calculate the new max T2 timeout */
+ xa_for_each(&parent_port->endpoints, index, ep) {
+ struct cxl_memdev *this_cxlmd;
+ struct cxl_memdev_state *max_mds;
+
+ this_cxlmd = to_cxl_memdev(ep->ep);
+ if (cxlmd == this_cxlmd) /* ignore self */
+ continue;
+
+ if (!max_cxlmd) {
+ max_cxlmd = this_cxlmd;
+ continue;
+ }
+
+ mds = to_cxl_memdev_state(this_cxlmd->cxlds);
+ max_mds = to_cxl_memdev_state(max_cxlmd->cxlds);
+
+ if (gpf_timeout_us(mds->gpf_t2_base, mds->gpf_t2_scale) >
+ gpf_timeout_us(max_mds->gpf_t2_base, max_mds->gpf_t2_scale))
+ max_cxlmd = this_cxlmd;
+ }
+
+ if (!max_cxlmd) /* no other devices */
+ return;
+
+ while (1) {
+ struct cxl_dport *dport;
+
+ parent_port = to_cxl_port(port->dev.parent);
+ mds = to_cxl_memdev_state(max_cxlmd->cxlds);
+
+ xa_for_each(&parent_port->dports, index, dport) {
+ if (!dev_is_pci(dport->dport_dev))
+ continue;
+
+ cxl_pci_update_gpf_port(to_pci_dev(dport->dport_dev),
+ max_cxlmd, true);
+ }
+
+ if (is_cxl_root(parent_port))
+ break;
+
+ port = parent_port;
+ }
+}
+
static void delete_endpoint(void *data)
{
struct cxl_memdev *cxlmd = data;
@@ -1400,6 +1455,7 @@ static void delete_endpoint(void *data)
struct device *host = endpoint_host(endpoint);
scoped_guard(device, host) {
+ delete_update_gpf(cxlmd);
if (host->driver && !endpoint->dead) {
devm_release_action(host, cxl_unlink_parent_dport, endpoint);
devm_release_action(host, cxl_unlink_uport, endpoint);
@@ -902,6 +902,9 @@ void cxl_coordinates_combine(struct access_coordinate *out,
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
+int cxl_pci_update_gpf_port(struct pci_dev *pdev,
+ struct cxl_memdev *mds, bool remove);
+
/*
* Unit test builds overrides this to __weak, find the 'strong' version
* of these symbols in tools/testing/cxl/.
@@ -503,6 +503,11 @@ struct cxl_memdev_state {
struct cxl_poison_state poison;
struct cxl_security_state security;
struct cxl_fw_state fw;
+
+ /* cache Device GPF info */
+ u16 gpf_t2_base;
+ u16 gpf_t2_scale;
+ u32 gpf_power_mwatts;
};
static inline struct cxl_memdev_state *
@@ -40,9 +40,20 @@
/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
#define CXL_DVSEC_PORT_GPF 4
+#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C
+#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0)
+#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8)
+#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE
+#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0)
+#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8)
/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
#define CXL_DVSEC_DEVICE_GPF 5
+#define CXL_DVSEC_DEVICE_GPF_PHASE_2_DURATION_OFFSET 0xA
+#define CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_BASE_MASK GENMASK(3, 0)
+#define CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_SCALE_MASK GENMASK(11, 8)
+#define CXL_DVSEC_DEVICE_GPF_PHASE_2_POWER_OFFSET 0xC
+#define CXL_DVSEC_DEVICE_GPF_PHASE_2_ACTIVE_POWER_MASK GENMASK(31, 0)
/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */
#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7
@@ -129,4 +140,55 @@ void read_cdat_data(struct cxl_port *port);
void cxl_cor_error_detected(struct pci_dev *pdev);
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
pci_channel_state_t state);
+
+#define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */
+
+/*
+ * The spec is over involved. Do not account for any ad-hoc host delays.
+ * Ie: propagation delay, host-side processing delays, and any other
+ * host/system-specific delays.
+ */
+static inline unsigned long gpf_other_delays_us(void)
+{
+ return 0;
+}
+
+static inline unsigned long gpf_timeout_us(int base, int scale)
+{
+ unsigned long tmo;
+
+ switch (scale) {
+ case 0: /* 1 us */
+ tmo = 1;
+ break;
+ case 1: /* 10 us */
+ tmo = 10UL;
+ break;
+ case 2: /* 100 us */
+ tmo = 100UL;
+ break;
+ case 3: /* 1 ms */
+ tmo = 1000UL;
+ break;
+ case 4: /* 10 ms */
+ tmo = 10000UL;
+ break;
+ case 5: /* 100 ms */
+ tmo = 100000UL;
+ break;
+ case 6: /* 1 s */
+ tmo = 1000000UL;
+ break;
+ case GPF_TIMEOUT_SCALE_MAX:
+ tmo = 10000000UL;
+ break;
+ default:
+ tmo = 0;
+ break;
+ }
+
+ tmo *= base;
+ return tmo + gpf_other_delays_us();
+}
+
#endif /* __CXL_PCI_H__ */
@@ -807,6 +807,85 @@ static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds)
return 0;
}
+static int cxl_gpf_setup(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct cxl_port *port;
+ int rc, gpf_dvsec;
+ u16 duration;
+ u32 power;
+ int device_t2_base, device_t2_scale;
+
+ /* get the timeouts for phase 2, given by the hardware */
+ gpf_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ CXL_DVSEC_DEVICE_GPF);
+ if (!gpf_dvsec) {
+ dev_warn(&pdev->dev,
+ "GPF Device DVSEC not present\n");
+ return -EINVAL;
+ }
+
+ rc = pci_read_config_word(
+ pdev,
+ gpf_dvsec + CXL_DVSEC_DEVICE_GPF_PHASE_2_DURATION_OFFSET,
+ &duration);
+ if (rc)
+ return rc;
+
+ device_t2_base = FIELD_GET(CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_BASE_MASK,
+ duration);
+ device_t2_scale = FIELD_GET(CXL_DVSEC_DEVICE_GPF_PHASE_2_TIME_SCALE_MASK,
+ duration);
+ if (device_t2_scale > GPF_TIMEOUT_SCALE_MAX) {
+ dev_warn(&pdev->dev, "GPF: invalid device timeout\n");
+ return -EINVAL;
+ }
+
+ /* cache device GPF timeout and power consumption for phase 2 */
+ mds->gpf_t2_base = device_t2_base;
+ mds->gpf_t2_scale = device_t2_scale;
+
+ rc = pci_read_config_dword(
+ pdev,
+ gpf_dvsec + CXL_DVSEC_DEVICE_GPF_PHASE_2_POWER_OFFSET,
+ &power);
+ if (rc)
+ return rc;
+
+ mds->gpf_power_mwatts = power;
+
+ dev_dbg(&pdev->dev, "Device GPF timeout: %lu us (power needed: %dmW)\n",
+ gpf_timeout_us(device_t2_base, device_t2_scale),
+ mds->gpf_power_mwatts);
+
+ /* iterate up the hierarchy updating max port timeouts where necessary */
+ port = cxlmd->endpoint;
+ while (1) {
+ struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+ struct cxl_dport *dport;
+ unsigned long index;
+
+ device_lock(&parent_port->dev);
+ xa_for_each(&parent_port->dports, index, dport) {
+ if (!dev_is_pci(dport->dport_dev))
+ continue;
+
+ cxl_pci_update_gpf_port(to_pci_dev(dport->dport_dev),
+ cxlmd, false);
+ }
+ device_unlock(&parent_port->dev);
+
+ if (is_cxl_root(parent_port))
+ break;
+
+ port = parent_port;
+ }
+
+ return rc;
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
@@ -946,6 +1025,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
+ cxl_gpf_setup(pdev);
+
pci_save_state(pdev);
return rc;