diff mbox series

[v5,3/4] cxl: Fix incorrect region perf data calculation

Message ID 20240325230234.1847525-4-dave.jiang@intel.com
State Superseded
Headers show
Series cxl: access_coordinate validity fixes for 6.9 | expand

Commit Message

Dave Jiang March 25, 2024, 11 p.m. UTC
Current math in cxl_region_perf_data_calculate divides the latency by 1000
every time the function gets called. This causes the region latency to be
divided by 1000 per memory device and the math is incorrect. This is user
visible as the latency access_coordinate exposed via sysfs will show
incorrect latency data.

Move the latency adjustment to where dpa_perf is set and this should
provide the appropriate latency for the region for each endpoint.

Fixes: 3d9f4a197230 ("cxl/region: Calculate performance data for a region")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/core/cdat.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

Comments

Dan Williams April 1, 2024, 7:33 p.m. UTC | #1
Dave Jiang wrote:
> Current math in cxl_region_perf_data_calculate divides the latency by 1000
> every time the function gets called. This causes the region latency to be
> divided by 1000 per memory device and the math is incorrect. This is user
> visible as the latency access_coordinate exposed via sysfs will show
> incorrect latency data.
> 
> Move the latency adjustment to where dpa_perf is set and this should
> provide the appropriate latency for the region for each endpoint.
> 
> Fixes: 3d9f4a197230 ("cxl/region: Calculate performance data for a region")
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/cxl/core/cdat.c | 23 ++++++++++++-----------
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
> index 5b75d2d56099..a1b204d451d3 100644
> --- a/drivers/cxl/core/cdat.c
> +++ b/drivers/cxl/core/cdat.c
> @@ -214,8 +214,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
>  static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
>  			      struct cxl_dpa_perf *dpa_perf)
>  {
> -	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
> +	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
>  		dpa_perf->coord[i] = dent->coord[i];
> +		/*
> +		 * Convert latency to nanosec from picosec to be consistent
> +		 * with the resulting latency coordinates computed by the
> +		 * HMAT_REPORTING code.
> +		 */
> +		dpa_perf->coord[i].read_latency =
> +			DIV_ROUND_UP(dpa_perf->coord[i].read_latency, 1000);
> +		dpa_perf->coord[i].write_latency =
> +			DIV_ROUND_UP(dpa_perf->coord[i].write_latency, 1000);

It feels like a latent bug that dpa_perf is temporarily counted in
picoseconds.  I.e. every place that assigns into a 'struct
access_coordinate' should do so in nanoseconds. I see how this change
fixes the "over-division" problem, but dpa_perf->coord should never have
storted picosecond values in the first instance.

Something like the following, to match / reuse hmat_normalize()
(untested!):

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index af5cb818f84d..90a7a90ea811 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -530,17 +530,7 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
        if (kstrtou32(acpi_device_uid(hb), 0, &uid))
                return -EINVAL;
 
-       rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
-       if (rc < 0)
-               return rc;
-
-       /* Adjust back to picoseconds from nanoseconds */
-       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
-               dport->hb_coord[i].read_latency *= 1000;
-               dport->hb_coord[i].write_latency *= 1000;
-       }
-
-       return 0;
+       return acpi_get_genport_coordinates(uid, dport->hb_coord);
 }
 
 static int add_host_bridge_dport(struct device *match, void *arg)
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index eddbbe21450c..d5ba4de97c08 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -124,10 +124,8 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
 
        le_base = (__force __le64)dslbis->entry_base_unit;
        le_val = (__force __le16)dslbis->entry[0];
-       rc = check_mul_overflow(le64_to_cpu(le_base),
-                               le16_to_cpu(le_val), &val);
-       if (rc)
-               pr_warn("DSLBIS value overflowed.\n");
+       val = hmat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+                            dslbis->data_type);
 
        cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
diff mbox series

Patch

diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 5b75d2d56099..a1b204d451d3 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -214,8 +214,19 @@  static int cxl_port_perf_data_calculate(struct cxl_port *port,
 static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
 			      struct cxl_dpa_perf *dpa_perf)
 {
-	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
 		dpa_perf->coord[i] = dent->coord[i];
+		/*
+		 * Convert latency to nanosec from picosec to be consistent
+		 * with the resulting latency coordinates computed by the
+		 * HMAT_REPORTING code.
+		 */
+		dpa_perf->coord[i].read_latency =
+			DIV_ROUND_UP(dpa_perf->coord[i].read_latency, 1000);
+		dpa_perf->coord[i].write_latency =
+			DIV_ROUND_UP(dpa_perf->coord[i].write_latency, 1000);
+	}
+
 	dpa_perf->dpa_range = dent->dpa_range;
 	dpa_perf->qos_class = dent->qos_class;
 	dev_dbg(dev,
@@ -559,16 +570,6 @@  void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
 						     perf->coord[i].write_latency);
 		cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
 		cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
-
-		/*
-		 * Convert latency to nanosec from picosec to be consistent
-		 * with the resulting latency coordinates computed by the
-		 * HMAT_REPORTING code.
-		 */
-		cxlr->coord[i].read_latency =
-			DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
-		cxlr->coord[i].write_latency =
-			DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
 	}
 }