From patchwork Thu Dec 7 23:31:49 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 13484484 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="cGsFEcs/" Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.10]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 90C221712 for ; Thu, 7 Dec 2023 15:31:51 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1701991911; x=1733527911; h=subject:from:to:cc:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=aO6P6WHCwy1u4K6n+r79HEbwzAsax4lm61MF2Nz/9js=; b=cGsFEcs/fxx78BOlkLeNzTv00ncm9jTBnrqvsAdZFS1IYdB0mBHaQ1FK QJMji1S4MgGz3MiuR2A4BfORSkq+dvagVyd/hni3MWcrRTwQGO5gBRChC lT3wj43nhill3jmAxJ2Z4u3dqZtH+cTOcDTpy5hxUR8NysDaPzzHojyzj evNTN87hng3Xy7nPFfrzydoTWIz7ZxoC+oaWJY/7KvhMPd1P2938WgTBt vURZalW2WGYJbCrrckEWE/eKupikeAK4RNg+KXxQaJ4NBaIdxgTO6YOCb iE7m6kujbMeSjvOcZ3ckeg6GdTULVKry7xs4RAMpZ8j2M6dUZKGc+7Xes A==; X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="1431991" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="1431991" Received: from orsmga004.jf.intel.com ([10.7.209.38]) by fmvoesa104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:31:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="895326283" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="895326283" Received: from djiang5-mobl3.amr.corp.intel.com (HELO [192.168.1.177]) ([10.213.168.225]) by orsmga004-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:31:50 -0800 Subject: [PATCH 1/3] cxl/region: Calculate performance data for a region From: Dave Jiang To: linux-cxl@vger.kernel.org Cc: dan.j.williams@intel.com, ira.weiny@intel.com, vishal.l.verma@intel.com, alison.schofield@intel.com, jonathan.cameron@huawei.com, dave@stgolabs.net Date: Thu, 07 Dec 2023 16:31:49 -0700 Message-ID: <170199190986.3543815.7111880145751330916.stgit@djiang5-mobl3> In-Reply-To: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> References: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-cxl@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Calculate and store the performance data for a CXL region. Find the worst read and write latency for all the included ranges from each of the devices that attributes to the region and designate that as the latency data. Sum all the read and write bandwidth data for each of the device region and that is the total bandwidth for the region. Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 1 2 files changed, 95 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 56e575c79bb4..d879f5702cf2 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2934,6 +2934,98 @@ static int is_system_ram(struct resource *res, void *arg) return 1; } +static int cxl_region_perf_data_calculate(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled; + unsigned int rd_bw = 0, rd_lat = 0; + unsigned int wr_bw = 0, wr_lat = 0; + struct access_coordinate *coord; + struct list_head *perf_list; + int rc = 0, i; + + lockdep_assert_held(&cxl_region_rwsem); + + /* No need to proceed if hmem attributes are already present */ + if (cxlr->coord) + return 0; + + coord = devm_kzalloc(&cxlr->dev, sizeof(*coord), GFP_KERNEL); + if (!coord) + return -ENOMEM; + + cxled = p->targets[0]; + + for (i = 0; i < p->nr_targets; i++) { + struct range dpa = { + .start = cxled->dpa_res->start, + .end = cxled->dpa_res->end, + }; + struct cxl_memdev_state *mds; + struct perf_prop_entry *perf; + struct cxl_dev_state *cxlds; + struct cxl_memdev *cxlmd; + bool found = false; + + cxled = p->targets[i]; + cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + mds = to_cxl_memdev_state(cxlds); + + switch (cxlr->mode) { + case CXL_DECODER_RAM: + perf_list = &mds->ram_perf_list; + break; + case CXL_DECODER_PMEM: + perf_list = &mds->pmem_perf_list; + break; + default: + rc = -EINVAL; + goto err; + } + + if (list_empty(perf_list)) { + rc = -ENOENT; + goto err; + } + + list_for_each_entry(perf, perf_list, list) { + if (range_contains(&perf->dpa_range, &dpa)) { + found = true; + break; + } + } + + if (!found) { + rc = -ENOENT; + goto err; + } + + /* Get total bandwidth and the worst latency for the cxl region */ + rd_lat = max_t(unsigned int, rd_lat, + perf->coord.read_latency); + rd_bw += perf->coord.read_bandwidth; + wr_lat = max_t(unsigned int, wr_lat, + perf->coord.write_latency); + wr_bw += perf->coord.write_bandwidth; + } + + *coord = (struct access_coordinate) { + .read_latency = rd_lat, + .read_bandwidth = rd_bw, + .write_latency = wr_lat, + .write_bandwidth = wr_bw, + }; + + cxlr->coord = coord; + + return 0; + +err: + devm_kfree(&cxlr->dev, coord); + return rc; +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -2959,6 +3051,8 @@ static int cxl_region_probe(struct device *dev) goto out; } + cxl_region_perf_data_calculate(cxlr); + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 004534cf0361..265da412c5bd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -529,6 +529,7 @@ struct cxl_region { struct cxl_pmem_region *cxlr_pmem; unsigned long flags; struct cxl_region_params params; + struct access_coordinate *coord; }; struct cxl_nvdimm_bridge { From patchwork Thu Dec 7 23:31:56 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 13484485 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="O0vC5p2z" Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.10]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 026B21712 for ; Thu, 7 Dec 2023 15:31:59 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1701991920; x=1733527920; h=subject:from:to:cc:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=WRd6C+Vxp2KoTZfERbc11RFvKr3AbYsWGXaMkH1baIY=; b=O0vC5p2zeTXyixmZlyZkmOdnptKdmQZNkYhJk/DYnzieqK0svpYDKeWz 9TKD0wIn8eHEth3Ry8lWqd1Y1mbbPlO7TwwHcmnrrkeB6PQVeIA9EPsdc PeRj8mzV9f/EvDty0bHruN+6qGcKnvl0x0vBC+SFQ54ZKeZHu/fxBTJtK fCp399x9H4TQNWMBHzNzCKKYD+EIW44EMSVSygtnkX7k/A27+0NTL0Tr/ soB6iaNUFYBk3LIGPk70JPUeBcQrL9Nem9sOLBpUz3G/NsGylf78XjXF2 uAaFl5AqMsgPAjl4kw1LjH8wxtuC0hLhYzPIjWDoOA5W21L1kaZB5o5Ir w==; X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="1431995" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="1431995" Received: from orsmga004.jf.intel.com ([10.7.209.38]) by fmvoesa104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:31:58 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="895326293" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="895326293" Received: from djiang5-mobl3.amr.corp.intel.com (HELO [192.168.1.177]) ([10.213.168.225]) by orsmga004-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:31:56 -0800 Subject: [PATCH 2/3] cxl/region: Add sysfs attribute for locality attributes of CXL regions From: Dave Jiang To: linux-cxl@vger.kernel.org Cc: dan.j.williams@intel.com, ira.weiny@intel.com, vishal.l.verma@intel.com, alison.schofield@intel.com, jonathan.cameron@huawei.com, dave@stgolabs.net Date: Thu, 07 Dec 2023 16:31:56 -0700 Message-ID: <170199191618.3543815.17768111410214136858.stgit@djiang5-mobl3> In-Reply-To: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> References: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-cxl@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add read/write latencies and bandwidth sysfs attributes for the enabled CXL region. The bandwidth is the aggregated bandwidth of all devices that contributes to the CXL region. The latency is the worst latency of the device amongst all the devices that contributes to the CXL region. Signed-off-by: Dave Jiang --- Documentation/ABI/testing/sysfs-bus-cxl | 40 +++++++++++++++++++++++++++++++ drivers/cxl/core/region.c | 24 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index fff2581b8033..e96f172eb6a6 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -552,3 +552,43 @@ Description: attribute is only visible for devices supporting the capability. The retrieved errors are logged as kernel events when cxl_poison event tracing is enabled. + + +What: /sys/bus/cxl/devices/regionZ/read_bandwidth +Date: Apr, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The aggregated read bandwidth of the region. The number is + the accumulated read bandwidth of all CXL memory devices that + contributes to the region. + + +What: /sys/bus/cxl/devices/regionZ/write_bandwidth +Date: Apr, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The aggregated write bandwidth of the region. The number is + the accumulated write bandwidth of all CXL memory devices that + contributes to the region. + + +What: /sys/bus/cxl/devices/regionZ/read_latency +Date: Apr, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The read latency of the region. The number is + the worst read latency of all CXL memory devices that + contributes to the region. + + +What: /sys/bus/cxl/devices/regionZ/write_latency +Date: Apr, 2023 +KernelVersion: v6.8 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The write latency of the region. The number is + the worst write latency of all CXL memory devices that + contributes to the region. diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d879f5702cf2..72c47f624d63 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -645,6 +645,26 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(size); +#define ACCESS_ATTR(attrib) \ +static ssize_t attrib##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (!cxlr->coord) \ + return 0; \ + \ + return sysfs_emit(buf, "%u\n", \ + cxlr->coord->attrib); \ +} \ +static DEVICE_ATTR_RO(attrib) + +ACCESS_ATTR(read_bandwidth); +ACCESS_ATTR(read_latency); +ACCESS_ATTR(write_bandwidth); +ACCESS_ATTR(write_latency); + static struct attribute *cxl_region_attrs[] = { &dev_attr_uuid.attr, &dev_attr_commit.attr, @@ -653,6 +673,10 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_resource.attr, &dev_attr_size.attr, &dev_attr_mode.attr, + &dev_attr_read_bandwidth.attr, + &dev_attr_write_bandwidth.attr, + &dev_attr_read_latency.attr, + &dev_attr_write_latency.attr, NULL, }; From patchwork Thu Dec 7 23:32:02 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 13484486 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="Zp+7csL0" Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.10]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7E12F1712 for ; Thu, 7 Dec 2023 15:32:04 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1701991924; x=1733527924; h=subject:from:to:cc:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=qHCKPZEi8y/1+jm5CaPw2uB0tv+0sUGm80nfXA2gUCw=; b=Zp+7csL0HhiW4SzCnhr2LLuJYWuTKMgYUy1gOnzneEab6ZSIKYLd2u/L oWqSwbIT5vlBtHo1pjee2RMsWfEFud/mWZjUA5YfMwSjb55MpbosEoeiy rwa63xWpEJgkoKRgTGT3w25zY3c0dwvyqE362OEeCLemKFUChHftBMkbz JwGIMVazgEHcqh1KoVKOOC5M3zqdI3zZGM8CrrhYXzgEPWmGStbCfiRg7 tofNn9p1ztLnztODdXyMu+4xBOrcDfwa/cL5Wd8dDUg3r+kSAkeySgrKN zPl2CtoL55rBWQRcT/E61lBcYNH5sIr9lmtb4vBEivTf5/BBfPM76hbxp A==; X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="1432001" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="1432001" Received: from orsmga004.jf.intel.com ([10.7.209.38]) by fmvoesa104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:32:04 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10917"; a="895326361" X-IronPort-AV: E=Sophos;i="6.04,259,1695711600"; d="scan'208";a="895326361" Received: from djiang5-mobl3.amr.corp.intel.com (HELO [192.168.1.177]) ([10.213.168.225]) by orsmga004-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Dec 2023 15:32:03 -0800 Subject: [PATCH 3/3] cxl: Add memory hotplug notifier for cxl region From: Dave Jiang To: linux-cxl@vger.kernel.org Cc: Greg Kroah-Hartman , "Rafael J. Wysocki" , dan.j.williams@intel.com, ira.weiny@intel.com, vishal.l.verma@intel.com, alison.schofield@intel.com, jonathan.cameron@huawei.com, dave@stgolabs.net Date: Thu, 07 Dec 2023 16:32:02 -0700 Message-ID: <170199192262.3543815.6979022920061286874.stgit@djiang5-mobl3> In-Reply-To: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> References: <170199184936.3543815.17537965163543815359.stgit@djiang5-mobl3> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-cxl@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 When the CXL region is formed, the driver would computed the performance data for the region. However this data is not available at the node data collection that has been populated by the HMAT during kernel initialization. Add a memory hotplug notifier to update the performance data to the node hmem_attrs to expose the newly calculated region performance data. The CXL region is created under specific CFMWS. The node for the CFMWS is created during SRAT parsing by acpi_parse_cfmws(). The notifier will run once only and turn itself off after the initial run. Additional regions may overwrite the initial data, but since this is for the same poximity domain it's a don't care for now. node_set_perf_attrs() is exported to allow update of perf attribs for a node. Given that only CXL is using this, export only to CXL namespace. Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Signed-off-by: Dave Jiang Reviewed-by: "Huang, Ying" --- drivers/base/node.c | 1 + drivers/cxl/core/region.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ 3 files changed, 47 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index cb2b6cc7f6e6..f5b5a3f11894 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } } } +EXPORT_SYMBOL_NS_GPL(node_set_perf_attrs, CXL); /** * struct node_cache_info - Internal tracking for memory node caches diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 72c47f624d63..3794e91e12b1 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -2958,6 +2959,37 @@ static int is_system_ram(struct resource *res, void *arg) return 1; } +static int cxl_region_perf_attrs_callback(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + memory_notifier); + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = p->targets[0]; + struct cxl_decoder *cxld = &cxled->cxld; + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + struct access_coordinate coord; + int region_nid; + + if (nid == NUMA_NO_NODE || action != MEM_ONLINE || !cxlr->coord) + return NOTIFY_STOP; + + region_nid = phys_to_target_node(cxld->hpa_range.start); + if (nid != region_nid) + return NOTIFY_STOP; + + /* Adjust latencies from psec to nsec to be consistent with HMAT targets */ + coord = *cxlr->coord; + coord.read_latency = DIV_ROUND_UP(coord.read_latency, 1000); + coord.write_latency = DIV_ROUND_UP(coord.write_latency, 1000); + + node_set_perf_attrs(nid, &coord, 0); + node_set_perf_attrs(nid, &coord, 1); + + return NOTIFY_STOP; +} + static int cxl_region_perf_data_calculate(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3077,6 +3109,10 @@ static int cxl_region_probe(struct device *dev) cxl_region_perf_data_calculate(cxlr); + cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; + cxlr->memory_notifier.priority = HMAT_CALLBACK_PRI; + register_memory_notifier(&cxlr->memory_notifier); + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. @@ -3108,9 +3144,17 @@ static int cxl_region_probe(struct device *dev) } } +static void cxl_region_remove(struct device *dev) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + + unregister_memory_notifier(&cxlr->memory_notifier); +} + static struct cxl_driver cxl_region_driver = { .name = "cxl_region", .probe = cxl_region_probe, + .remove = cxl_region_remove, .id = CXL_DEVICE_REGION, }; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 265da412c5bd..c326ee8956ec 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -530,6 +531,7 @@ struct cxl_region { unsigned long flags; struct cxl_region_params params; struct access_coordinate *coord; + struct notifier_block memory_notifier; }; struct cxl_nvdimm_bridge {