diff mbox series

powerpc/papr_scm: Fix nvdimm event mappings

Message ID 20220610133431.410514-1-kjain@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/papr_scm: Fix nvdimm event mappings | expand

Commit Message

kajoljain June 10, 2022, 1:34 p.m. UTC
Commit 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
adds performance monitoring support for papr-scm nvdimm devices via
perf interface. It also adds one array in papr_scm_priv
structure called "nvdimm_events_map", to dynamically save the stat_id
for events specified in nvdimm driver code "nd_perf.c".

Right now the mapping is done based on the result of 
H_SCM_PERFORMANCE_STATS hcall, when all the stats are
requested. Currently there is an assumption, that a
certain stat will always be found at a specific offset
in the stat buffer. The assumption may not be true or
documented as part of PAPR documentation. Fixing it,
by adding a static mapping for nvdimm events to
corresponding stat-id, and removing the map from
papr_scm_priv structure.

Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 59 ++++++++++-------------
 1 file changed, 25 insertions(+), 34 deletions(-)

Comments

Michael Ellerman June 27, 2022, 6:35 a.m. UTC | #1
Hi Kajol,

A few comments below ...

Kajol Jain <kjain@linux.ibm.com> writes:
> Commit 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
> adds performance monitoring support for papr-scm nvdimm devices via
  ^ 
We're talking about a commit that's already happened so we should use
past tense, so "added".

> perf interface. It also adds one array in papr_scm_priv
                         "added" 
> structure called "nvdimm_events_map", to dynamically save the stat_id
> for events specified in nvdimm driver code "nd_perf.c".
>
> Right now the mapping is done based on the result of 
> H_SCM_PERFORMANCE_STATS hcall, when all the stats are
> requested. Currently there is an assumption, that a
> certain stat will always be found at a specific offset
> in the stat buffer.
                    ^
                    "returned by the hypervisor."

To make it clear where the stat buffer comes from, and that it's out of
our control.

> The assumption may not be true or documented as part of PAPR
> documentation.

That reads as the assumption "may not be documented as part of PAPR". I
think what you mean is the assumption *is not* documented by PAPR, and
although it happens to be true on current systems it may not be true in
future.

> Fixing it, by adding a static mapping for nvdimm events to
  Fix  it
> corresponding stat-id, and removing the map from
> papr_scm_priv structure.
>
> Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 59 ++++++++++-------------
>  1 file changed, 25 insertions(+), 34 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 181b855b3050..5434c654a797 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -350,6 +347,26 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
>  #ifdef CONFIG_PERF_EVENTS
>  #define to_nvdimm_pmu(_pmu)	container_of(_pmu, struct nvdimm_pmu, pmu)
> 
> +static const char * const nvdimm_events_map[] = {
> +	"N/A",
> +	"CtlResCt",
> +	"CtlResTm",
> +	"PonSecs ",
> +	"MemLife ",
> +	"CritRscU",
> +	"HostLCnt",
> +	"HostSCnt",
> +	"HostSDur",
> +	"HostLDur",
> +	"MedRCnt ",
> +	"MedWCnt ",
> +	"MedRDur ",
> +	"MedWDur ",
> +	"CchRHCnt",
> +	"CchWHCnt",
> +	"FastWCnt",
> +};
  
The order of the strings in that array becomes ABI. Because it defines
the mapping from perf_event.attr.config (perf user ABI) to the actual
event we request from the hypervisor.

So I'd like that made more explicit by using designated initialisers, eg:

static const char * const nvdimm_events_map[] = {
	[1] = "CtlResCt",
	[2] = "CtlResTm",
        ...

That way an accidental reordering of the array won't break anything.

You shouldn't need to specify 0 either as it's not used.

> @@ -370,7 +387,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
>  
>  	stat = &stats->scm_statistic[0];
>  	memcpy(&stat->stat_id,
> -	       &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)],
> +	       nvdimm_events_map[event->attr.config],
>  		sizeof(stat->stat_id));

It's not clear that this won't index off the end of the array.

There is a check in papr_scm_pmu_event_init(), but I'd probably be
happier if we did an explicit check in here as well, eg:

	if (event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
		return -EINVAL;


>  	stat->stat_val = 0;
>  
> @@ -460,10 +477,9 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags)
>  
>  static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
>  {
> -	struct papr_scm_perf_stat *stat;
>  	struct papr_scm_perf_stats *stats;
>  	u32 available_events;
> -	int index, rc = 0;
> +	int rc = 0;

You shouldn't need to initialise rc here. It's not used until the call
to drc_pmem_query_stats() below.

>  	available_events = (p->stat_buffer_len  - sizeof(struct papr_scm_perf_stats))
>  			/ sizeof(struct papr_scm_perf_stat);
> @@ -473,34 +489,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
>  	/* Allocate the buffer for phyp where stats are written */
>  	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
>  	if (!stats) {
> -		rc = -ENOMEM;
> -		return rc;
> +		return -ENOMEM;
>  	}
>  
>  	/* Called to get list of events supported */
>  	rc = drc_pmem_query_stats(p, stats, 0);
> -	if (rc)
> -		goto out;
>  
> -	/*
> -	 * Allocate memory and populate nvdimm_event_map.
> -	 * Allocate an extra element for NULL entry
> -	 */
> -	p->nvdimm_events_map = kcalloc(available_events + 1,
> -				       sizeof(stat->stat_id),
> -				       GFP_KERNEL);
> -	if (!p->nvdimm_events_map) {
> -		rc = -ENOMEM;
> -		goto out;
> -	}
> -
> -	/* Copy all stat_ids to event map */
> -	for (index = 0, stat = stats->scm_statistic;
> -	     index < available_events; index++, ++stat) {
> -		memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)],
> -		       &stat->stat_id, sizeof(stat->stat_id));
> -	}
> -out:
>  	kfree(stats);
>  	return rc;
>  }

cheers
kajoljain June 28, 2022, 6:04 a.m. UTC | #2
On 6/27/22 12:05, Michael Ellerman wrote:
> Hi Kajol,
> 
> A few comments below ...

Hi Michael,
   Thanks for reviewing the patch. I will make the changes suggested by
you and send version 2 of this patch.

Thanks,
Kajol Jain
> 
> Kajol Jain <kjain@linux.ibm.com> writes:
>> Commit 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
>> adds performance monitoring support for papr-scm nvdimm devices via
>   ^ 
> We're talking about a commit that's already happened so we should use
> past tense, so "added".
> 
>> perf interface. It also adds one array in papr_scm_priv
>                          "added" 
>> structure called "nvdimm_events_map", to dynamically save the stat_id
>> for events specified in nvdimm driver code "nd_perf.c".
>>
>> Right now the mapping is done based on the result of 
>> H_SCM_PERFORMANCE_STATS hcall, when all the stats are
>> requested. Currently there is an assumption, that a
>> certain stat will always be found at a specific offset
>> in the stat buffer.
>                     ^
>                     "returned by the hypervisor."
> 
> To make it clear where the stat buffer comes from, and that it's out of
> our control.
> 
>> The assumption may not be true or documented as part of PAPR
>> documentation.
> 
> That reads as the assumption "may not be documented as part of PAPR". I
> think what you mean is the assumption *is not* documented by PAPR, and
> although it happens to be true on current systems it may not be true in
> future.
> 
>> Fixing it, by adding a static mapping for nvdimm events to
>   Fix  it
>> corresponding stat-id, and removing the map from
>> papr_scm_priv structure.
>>
>> Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support")
>> Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
>> ---
>>  arch/powerpc/platforms/pseries/papr_scm.c | 59 ++++++++++-------------
>>  1 file changed, 25 insertions(+), 34 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index 181b855b3050..5434c654a797 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -350,6 +347,26 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
>>  #ifdef CONFIG_PERF_EVENTS
>>  #define to_nvdimm_pmu(_pmu)	container_of(_pmu, struct nvdimm_pmu, pmu)
>>
>> +static const char * const nvdimm_events_map[] = {
>> +	"N/A",
>> +	"CtlResCt",
>> +	"CtlResTm",
>> +	"PonSecs ",
>> +	"MemLife ",
>> +	"CritRscU",
>> +	"HostLCnt",
>> +	"HostSCnt",
>> +	"HostSDur",
>> +	"HostLDur",
>> +	"MedRCnt ",
>> +	"MedWCnt ",
>> +	"MedRDur ",
>> +	"MedWDur ",
>> +	"CchRHCnt",
>> +	"CchWHCnt",
>> +	"FastWCnt",
>> +};
>   
> The order of the strings in that array becomes ABI. Because it defines
> the mapping from perf_event.attr.config (perf user ABI) to the actual
> event we request from the hypervisor.
> 
> So I'd like that made more explicit by using designated initialisers, eg:
> 
> static const char * const nvdimm_events_map[] = {
> 	[1] = "CtlResCt",
> 	[2] = "CtlResTm",
>         ...
> 
> That way an accidental reordering of the array won't break anything.

Yes make sense. Will do update it.
> 
> You shouldn't need to specify 0 either as it's not used.
> 
>> @@ -370,7 +387,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
>>  
>>  	stat = &stats->scm_statistic[0];
>>  	memcpy(&stat->stat_id,
>> -	       &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)],
>> +	       nvdimm_events_map[event->attr.config],
>>  		sizeof(stat->stat_id));
> 
> It's not clear that this won't index off the end of the array.
> 
> There is a check in papr_scm_pmu_event_init(), but I'd probably be
> happier if we did an explicit check in here as well, eg:
> 
> 	if (event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
> 		return -EINVAL;
> 
> 
>>  	stat->stat_val = 0;
>>  
>> @@ -460,10 +477,9 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags)
>>  
>>  static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
>>  {
>> -	struct papr_scm_perf_stat *stat;
>>  	struct papr_scm_perf_stats *stats;
>>  	u32 available_events;
>> -	int index, rc = 0;
>> +	int rc = 0;
> 
> You shouldn't need to initialise rc here. It's not used until the call
> to drc_pmem_query_stats() below.

Ok sure.

> 
>>  	available_events = (p->stat_buffer_len  - sizeof(struct papr_scm_perf_stats))
>>  			/ sizeof(struct papr_scm_perf_stat);
>> @@ -473,34 +489,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
>>  	/* Allocate the buffer for phyp where stats are written */
>>  	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
>>  	if (!stats) {
>> -		rc = -ENOMEM;
>> -		return rc;
>> +		return -ENOMEM;
>>  	}
>>  
>>  	/* Called to get list of events supported */
>>  	rc = drc_pmem_query_stats(p, stats, 0);
>> -	if (rc)
>> -		goto out;
>>  
>> -	/*
>> -	 * Allocate memory and populate nvdimm_event_map.
>> -	 * Allocate an extra element for NULL entry
>> -	 */
>> -	p->nvdimm_events_map = kcalloc(available_events + 1,
>> -				       sizeof(stat->stat_id),
>> -				       GFP_KERNEL);
>> -	if (!p->nvdimm_events_map) {
>> -		rc = -ENOMEM;
>> -		goto out;
>> -	}
>> -
>> -	/* Copy all stat_ids to event map */
>> -	for (index = 0, stat = stats->scm_statistic;
>> -	     index < available_events; index++, ++stat) {
>> -		memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)],
>> -		       &stat->stat_id, sizeof(stat->stat_id));
>> -	}
>> -out:
>>  	kfree(stats);
>>  	return rc;
>>  }
> 
> cheers
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 181b855b3050..5434c654a797 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -124,9 +124,6 @@  struct papr_scm_priv {
 
 	/* The bits which needs to be overridden */
 	u64 health_bitmap_inject_mask;
-
-	/* array to have event_code and stat_id mappings */
-	u8 *nvdimm_events_map;
 };
 
 static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -350,6 +347,26 @@  static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
 #ifdef CONFIG_PERF_EVENTS
 #define to_nvdimm_pmu(_pmu)	container_of(_pmu, struct nvdimm_pmu, pmu)
 
+static const char * const nvdimm_events_map[] = {
+	"N/A",
+	"CtlResCt",
+	"CtlResTm",
+	"PonSecs ",
+	"MemLife ",
+	"CritRscU",
+	"HostLCnt",
+	"HostSCnt",
+	"HostSDur",
+	"HostLDur",
+	"MedRCnt ",
+	"MedWCnt ",
+	"MedRDur ",
+	"MedWDur ",
+	"CchRHCnt",
+	"CchWHCnt",
+	"FastWCnt",
+};
+
 static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
 {
 	struct papr_scm_perf_stat *stat;
@@ -361,7 +378,7 @@  static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
 	size = sizeof(struct papr_scm_perf_stats) +
 		sizeof(struct papr_scm_perf_stat);
 
-	if (!p || !p->nvdimm_events_map)
+	if (!p)
 		return -EINVAL;
 
 	stats = kzalloc(size, GFP_KERNEL);
@@ -370,7 +387,7 @@  static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
 
 	stat = &stats->scm_statistic[0];
 	memcpy(&stat->stat_id,
-	       &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)],
+	       nvdimm_events_map[event->attr.config],
 		sizeof(stat->stat_id));
 	stat->stat_val = 0;
 
@@ -460,10 +477,9 @@  static void papr_scm_pmu_del(struct perf_event *event, int flags)
 
 static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
 {
-	struct papr_scm_perf_stat *stat;
 	struct papr_scm_perf_stats *stats;
 	u32 available_events;
-	int index, rc = 0;
+	int rc = 0;
 
 	available_events = (p->stat_buffer_len  - sizeof(struct papr_scm_perf_stats))
 			/ sizeof(struct papr_scm_perf_stat);
@@ -473,34 +489,12 @@  static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
 	/* Allocate the buffer for phyp where stats are written */
 	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
 	if (!stats) {
-		rc = -ENOMEM;
-		return rc;
+		return -ENOMEM;
 	}
 
 	/* Called to get list of events supported */
 	rc = drc_pmem_query_stats(p, stats, 0);
-	if (rc)
-		goto out;
 
-	/*
-	 * Allocate memory and populate nvdimm_event_map.
-	 * Allocate an extra element for NULL entry
-	 */
-	p->nvdimm_events_map = kcalloc(available_events + 1,
-				       sizeof(stat->stat_id),
-				       GFP_KERNEL);
-	if (!p->nvdimm_events_map) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	/* Copy all stat_ids to event map */
-	for (index = 0, stat = stats->scm_statistic;
-	     index < available_events; index++, ++stat) {
-		memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)],
-		       &stat->stat_id, sizeof(stat->stat_id));
-	}
-out:
 	kfree(stats);
 	return rc;
 }
@@ -536,7 +530,7 @@  static void papr_scm_pmu_register(struct papr_scm_priv *p)
 
 	rc = register_nvdimm_pmu(nd_pmu, p->pdev);
 	if (rc)
-		goto pmu_register_err;
+		goto pmu_check_events_err;
 
 	/*
 	 * Set archdata.priv value to nvdimm_pmu structure, to handle the
@@ -545,8 +539,6 @@  static void papr_scm_pmu_register(struct papr_scm_priv *p)
 	p->pdev->archdata.priv = nd_pmu;
 	return;
 
-pmu_register_err:
-	kfree(p->nvdimm_events_map);
 pmu_check_events_err:
 	kfree(nd_pmu);
 pmu_err_print:
@@ -1557,7 +1549,6 @@  static int papr_scm_remove(struct platform_device *pdev)
 		unregister_nvdimm_pmu(pdev->archdata.priv);
 
 	pdev->archdata.priv = NULL;
-	kfree(p->nvdimm_events_map);
 	kfree(p->bus_desc.provider_name);
 	kfree(p);