diff mbox series

[3/8] drivers/perf: hisi: Add new functions for L3C PMU

Message ID 1609395576-32775-4-git-send-email-zhangshaokun@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series Add support for HiSilicon Hip09 uncore PMU driver | expand

Commit Message

Shaokun Zhang Dec. 31, 2020, 6:19 a.m. UTC
On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU,
like, tracetag feature that L3C PMU can only count the specified
operations by the user, or L3C PMU can give the desired core's statistics
in the cluster.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5

$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Co-developed-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
 drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 258 +++++++++++++++++++++++++--
 drivers/perf/hisilicon/hisi_uncore_pmu.c     |   8 +-
 drivers/perf/hisilicon/hisi_uncore_pmu.h     |  11 ++
 3 files changed, 257 insertions(+), 20 deletions(-)

Comments

John Garry Jan. 26, 2021, 12:05 p.m. UTC | #1
On 31/12/2020 06:19, Shaokun Zhang wrote:
> On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU,
> like, tracetag feature that L3C PMU can only count the specified
> operations by the user, or L3C PMU can give the desired core's statistics
> in the cluster.
> $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5
> 
> $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5
> 
> Cc: Mark Rutland<mark.rutland@arm.com>
> Cc: Will Deacon<will@kernel.org>
> Cc: John Garry<john.garry@huawei.com>
> Cc: Jonathan Cameron<Jonathan.Cameron@huawei.com>
> Co-developed-by: Qi Liu<liuqi115@huawei.com>
> Signed-off-by: Qi Liu<liuqi115@huawei.com>
> Signed-off-by: Shaokun Zhang<zhangshaokun@hisilicon.com>

Reviewed-by: John Garry <john.garry@huawei.com>
diff mbox series

Patch

diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index dc06e3ca7505..cf4c95ed075b 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -23,12 +23,17 @@ 
 #define L3C_INT_MASK		0x0800
 #define L3C_INT_STATUS		0x0808
 #define L3C_INT_CLEAR		0x080c
+#define L3C_CORE_CTRL           0x1b04
+#define L3C_TRACETAG_CTRL       0x1b20
+#define L3C_DATSRC_TYPE         0x1b48
+#define L3C_DATSRC_CTRL         0x1bf0
 #define L3C_EVENT_CTRL	        0x1c00
 #define L3C_VERSION		0x1cf0
 #define L3C_EVENT_TYPE0		0x1d00
 /*
- * Each counter is 48-bits and [48:63] are reserved
- * which are Read-As-Zero and Writes-Ignored.
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
  */
 #define L3C_CNTR0_LOWER		0x1e00
 
@@ -36,8 +41,187 @@ 
 #define L3C_NR_COUNTERS		0x8
 
 #define L3C_PERF_CTRL_EN	0x10000
+#define L3C_TRACETAG_EN		BIT(31)
+#define L3C_TRACETAG_REQ_SHIFT	7
+#define L3C_TRACETAG_MARK_EN	BIT(0)
+#define L3C_TRACETAG_REQ_EN	(L3C_TRACETAG_MARK_EN | BIT(2))
+#define L3C_TRACETAG_CORE_EN	(L3C_TRACETAG_MARK_EN | BIT(3))
+#define L3C_CORE_EN		BIT(20)
+#define L3C_COER_NONE		0x0
+#define L3C_DATSRC_MASK		0xFF
+#define L3C_DATSRC_SKT_EN	BIT(23)
+#define L3C_DATSRC_NONE		0x0
 #define L3C_EVTYPE_NONE		0xff
 #define L3C_V1_NR_EVENTS	0x59
+#define L3C_V2_NR_EVENTS	0xFF
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+
+static inline void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Set request-type for tracetag */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
+		val |= L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Enable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static inline void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Clear request-type */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
+		val &= ~L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Disable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static inline void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 reg, reg_idx, shift, val;
+	int idx = hwc->idx;
+
+	/*
+	 * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
+	 * There are 2 datasource ctrl register for the 8 hardware counters.
+	 * Datasrc is 8-bits and for the former 4 hardware counters,
+	 * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_DATSRC_TYPE1 is chosen.
+	 */
+	reg = L3C_DATSRC_TYPE + rounddown(idx, 4);
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_DATSRC_MASK << shift);
+	val |= ds_cfg << shift;
+	writel(val, l3c_pmu->base + reg);
+}
+
+static inline void hisi_l3c_pmu_config_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, ds_cfg);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val |= L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static inline void hisi_l3c_pmu_clear_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val &= ~L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static inline void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Config and enable core information */
+		writel(core, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Enable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static inline void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Clear core information */
+		writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Disable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_config_req_tracetag(event);
+		hisi_l3c_pmu_config_core_tracetag(event);
+		hisi_l3c_pmu_config_ds(event);
+	}
+}
+
+static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_clear_ds(event);
+		hisi_l3c_pmu_clear_core_tracetag(event);
+		hisi_l3c_pmu_clear_req_tracetag(event);
+	}
+}
+
 /*
  * Select the counter register offset using the counter index
  */
@@ -49,14 +233,12 @@  static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
 static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
 				     struct hw_perf_event *hwc)
 {
-	/* Read 64-bits and the upper 16 bits are RAZ */
 	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
 				       struct hw_perf_event *hwc, u64 val)
 {
-	/* Write 64-bits and the upper 16 bits are WI */
 	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
@@ -165,23 +347,14 @@  static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
 
 static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
 	{ "HISI0213", },
-	{},
+	{ "HISI0214", },
+	{}
 };
 MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
 
 static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
 				  struct hisi_pmu *l3c_pmu)
 {
-	unsigned long long id;
-	acpi_status status;
-
-	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
-				       "_UID", NULL, &id);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	l3c_pmu->index_id = id;
-
 	/*
 	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
 	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@@ -219,6 +392,20 @@  static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
 	.attrs = hisi_l3c_pmu_v1_format_attr,
 };
 
+static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+	HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
+	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_v2_format_attr,
+};
+
 static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
 	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
 	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
@@ -241,6 +428,17 @@  static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
 	.attrs = hisi_l3c_pmu_v1_events_attr,
 };
 
+static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(dat_hit,		0x48),
+	HISI_PMU_EVENT_ATTR(dat_access,		0xb8),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_v2_events_attr,
+};
+
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
 
 static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
@@ -272,6 +470,14 @@  static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
 	NULL,
 };
 
+static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
+	&hisi_l3c_pmu_v2_format_group,
+	&hisi_l3c_pmu_v2_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	&hisi_l3c_pmu_identifier_group,
+	NULL
+};
+
 static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.write_evtype		= hisi_l3c_pmu_write_evtype,
 	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
@@ -285,6 +491,8 @@  static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.read_counter		= hisi_l3c_pmu_read_counter,
 	.get_int_status		= hisi_l3c_pmu_get_int_status,
 	.clear_int_status	= hisi_l3c_pmu_clear_int_status,
+	.enable_filter		= hisi_l3c_pmu_enable_filter,
+	.disable_filter		= hisi_l3c_pmu_disable_filter,
 };
 
 static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@@ -300,12 +508,20 @@  static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
+	if (l3c_pmu->identifier >= HISI_PMU_V2) {
+		l3c_pmu->counter_bits = 64;
+		l3c_pmu->check_event = L3C_V2_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
+	} else {
+		l3c_pmu->counter_bits = 48;
+		l3c_pmu->check_event = L3C_V1_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
+	}
+
 	l3c_pmu->num_counters = L3C_NR_COUNTERS;
-	l3c_pmu->counter_bits = 48;
 	l3c_pmu->ops = &hisi_uncore_l3c_ops;
 	l3c_pmu->dev = &pdev->dev;
 	l3c_pmu->on_cpu = -1;
-	l3c_pmu->check_event = L3C_V1_NR_EVENTS;
 
 	return 0;
 }
@@ -333,8 +549,12 @@  static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/*
+	 * CCL_ID is used to identify the L3C in the same SCCL which was
+	 * used _UID by mistake.
+	 */
 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
-			      l3c_pmu->sccl_id, l3c_pmu->index_id);
+			      l3c_pmu->sccl_id, l3c_pmu->ccl_id);
 	l3c_pmu->pmu = (struct pmu) {
 		.name		= name,
 		.module		= THIS_MODULE,
@@ -347,7 +567,7 @@  static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 		.start		= hisi_uncore_pmu_start,
 		.stop		= hisi_uncore_pmu_stop,
 		.read		= hisi_uncore_pmu_read,
-		.attr_groups	= hisi_l3c_pmu_v1_attr_groups,
+		.attr_groups	= l3c_pmu->pmu_events.attr_groups,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
 	};
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 82a4ff2bc3ae..30fb152e217c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -21,7 +21,7 @@ 
 #include "hisi_uncore_pmu.h"
 
 #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
-#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
 
 /*
  * PMU format attributes
@@ -256,6 +256,9 @@  static void hisi_uncore_pmu_enable_event(struct perf_event *event)
 	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
 				    HISI_GET_EVENTID(event));
 
+	if (hisi_pmu->ops->enable_filter)
+		hisi_pmu->ops->enable_filter(event);
+
 	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
 	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
 }
@@ -268,6 +271,9 @@  static void hisi_uncore_pmu_disable_event(struct perf_event *event)
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (hisi_pmu->ops->disable_filter)
+		hisi_pmu->ops->disable_filter(event);
+
 	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
 	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index aaaf637cc9ea..c9f180001ab0 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -11,6 +11,7 @@ 
 #ifndef __HISI_UNCORE_PMU_H__
 #define __HISI_UNCORE_PMU_H__
 
+#include <linux/bitfield.h>
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
@@ -22,6 +23,7 @@ 
 #undef pr_fmt
 #define pr_fmt(fmt)     "hisi_pmu: " fmt
 
+#define HISI_PMU_V2		0x30
 #define HISI_MAX_COUNTERS 0x10
 #define to_hisi_pmu(p)	(container_of(p, struct hisi_pmu, pmu))
 
@@ -35,6 +37,12 @@ 
 #define HISI_PMU_EVENT_ATTR(_name, _config)		\
 	HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
 
+#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo)        \
+	static inline u32 hisi_get_##name(struct perf_event *event)            \
+	{                                                                  \
+		return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
+	}
+
 struct hisi_pmu;
 
 struct hisi_uncore_ops {
@@ -50,11 +58,14 @@  struct hisi_uncore_ops {
 	void (*stop_counters)(struct hisi_pmu *);
 	u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
 	void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
+	void (*enable_filter)(struct perf_event *event);
+	void (*disable_filter)(struct perf_event *event);
 };
 
 struct hisi_pmu_hwevents {
 	struct perf_event *hw_events[HISI_MAX_COUNTERS];
 	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+	const struct attribute_group **attr_groups;
 };
 
 /* Generic pmu struct for different pmu types */