diff mbox series

[V4,1/2] perf: imx8_ddr_perf: add AXI ID filter support

Message ID 20190731104351.3117-1-qiangqing.zhang@nxp.com (mailing list archive)
State New, archived
Headers show
Series [V4,1/2] perf: imx8_ddr_perf: add AXI ID filter support | expand

Commit Message

Joakim Zhang July 31, 2019, 10:46 a.m. UTC
AXI filtering is used by CSV modes 0x41 and 0x42 to count reads or
writes with an ARID or AXID matching filter setting. Granularity is at
subsystem level. Implementation does not allow filtring between masters
within a subsystem. Filter is defined with 2 configuration registers.

--AXI_ID defines AxID matching value
--AXI_MASKING defines which bits of AxID are meaningful for the matching

When non-masked bits are matching corresponding AXI_ID bits then counter
is incremented. This filter allows counting read or write access from a
subsystem or multiple subsystems.

Perf counter is incremented if AxID && AXI_MASKING == AXI_ID && AXI_MASKING

AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.

Read and write AXI ID filter should write same value to DPCR1 if want to
specify at the same time as this filter is shared between counters.

e.g.
perf stat -a -e imx8_ddr0/axi-id-read,axi_id=0xMMMMDDDD/,imx8_ddr0/
axi-id-write,axi_id=0xMMMMDDDD/cmd
MMMM: AXI_MASKING
DDDD: AXI_ID

ChangeLog:
V1 -> V2:
	* add error log if user specifies read/write AXI ID filter at
	the same time.
	* of_device_get_match_data() instead of of_match_device(), and
	remove the check of return value.
V2 -> V3:
	* move the AXI ID check to event_add().
	* add support for same value of axi_id.
V3 -> V4:
	* move the AXI ID check to event_init().

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 58 ++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

Comments

Robin Murphy July 31, 2019, 12:36 p.m. UTC | #1
On 31/07/2019 11:46, Joakim Zhang wrote:
> AXI filtering is used by CSV modes 0x41 and 0x42 to count reads or
> writes with an ARID or AXID matching filter setting. Granularity is at
> subsystem level. Implementation does not allow filtring between masters
> within a subsystem. Filter is defined with 2 configuration registers.
> 
> --AXI_ID defines AxID matching value
> --AXI_MASKING defines which bits of AxID are meaningful for the matching
> 
> When non-masked bits are matching corresponding AXI_ID bits then counter
> is incremented. This filter allows counting read or write access from a
> subsystem or multiple subsystems.
> 
> Perf counter is incremented if AxID && AXI_MASKING == AXI_ID && AXI_MASKING
> 
> AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
> 
> Read and write AXI ID filter should write same value to DPCR1 if want to
> specify at the same time as this filter is shared between counters.
> 
> e.g.
> perf stat -a -e imx8_ddr0/axi-id-read,axi_id=0xMMMMDDDD/,imx8_ddr0/
> axi-id-write,axi_id=0xMMMMDDDD/cmd
> MMMM: AXI_MASKING
> DDDD: AXI_ID
> 
> ChangeLog:
> V1 -> V2:
> 	* add error log if user specifies read/write AXI ID filter at
> 	the same time.
> 	* of_device_get_match_data() instead of of_match_device(), and
> 	remove the check of return value.
> V2 -> V3:
> 	* move the AXI ID check to event_add().
> 	* add support for same value of axi_id.
> V3 -> V4:
> 	* move the AXI ID check to event_init().
> 
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> ---
>   drivers/perf/fsl_imx8_ddr_perf.c | 58 ++++++++++++++++++++++++++++++--
>   1 file changed, 56 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
> index 63fe21600072..cb90caad6441 100644
> --- a/drivers/perf/fsl_imx8_ddr_perf.c
> +++ b/drivers/perf/fsl_imx8_ddr_perf.c
> @@ -42,9 +42,22 @@
>   
>   static DEFINE_IDA(ddr_ida);
>   
> +/* DDR Perf hardware feature */
> +#define DDR_CAP_AXI_ID_FILTER		0x1	/* support AXI ID filter */
> +
> +struct fsl_ddr_devtype_data {
> +	unsigned int quirks;	/* quirks needed for different DDR Perf core */
> +};
> +
> +static const struct fsl_ddr_devtype_data imx8_devtype_data;
> +
> +static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
> +	.quirks = DDR_CAP_AXI_ID_FILTER,
> +};
> +
>   static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
> -	{ .compatible = "fsl,imx8-ddr-pmu",},
> -	{ .compatible = "fsl,imx8m-ddr-pmu",},
> +	{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
> +	{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
>   	{ /* sentinel */ }
>   };
>   
> @@ -57,6 +70,8 @@ struct ddr_pmu {
>   	struct perf_event *events[NUM_COUNTERS];
>   	int active_events;
>   	enum cpuhp_state cpuhp_state;
> +	const struct fsl_ddr_devtype_data *devtype_data;
> +	unsigned int axi_id_read, axi_id_write;

Given that there's apparently only one filter, tracking two different 
IDs seems a bit weird. It would be more intuitive to track one value, 
plus whether it's currently valid or not (unless that's easy to infer by 
just seeing whether any read or write events are currently scheduled).

>   	int irq;
>   	int id;
>   };
> @@ -128,6 +143,8 @@ static struct attribute *ddr_perf_events_attrs[] = {
>   	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
>   	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
>   	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
> +	IMX8_DDR_PMU_EVENT_ATTR(axi-id-read, 0x41),
> +	IMX8_DDR_PMU_EVENT_ATTR(axi-id-write, 0x42),
>   	NULL,
>   };
>   
> @@ -137,9 +154,11 @@ static struct attribute_group ddr_perf_events_attr_group = {
>   };
>   
>   PMU_FORMAT_ATTR(event, "config:0-7");
> +PMU_FORMAT_ATTR(axi_id, "config1:0-31");
>   
>   static struct attribute *ddr_perf_format_attrs[] = {
>   	&format_attr_event.attr,
> +	&format_attr_axi_id.attr,
>   	NULL,
>   };
>   
> @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct perf_event *event)
>   	struct hw_perf_event *hwc = &event->hw;
>   	struct perf_event *sibling;
>   
> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> +		if (event->attr.config == 0x41)
> +			pmu->axi_id_read = event->attr.config1;
> +
> +		if (event->attr.config == 0x42)
> +			pmu->axi_id_write = event->attr.config1;
> +
> +		if (pmu->axi_id_read && pmu->axi_id_write &&
> +		    (pmu->axi_id_read != pmu->axi_id_write))
> +			return -EINVAL;
> +	}

This isn't the correct approach that Mark outlined :(

In event_init, you should validate that any filtering for the given 
event is compatible with any other sibling events in the same group, but 
you should not consider (and should definitely not change) the current 
state of the PMU at that point. This step is about rejecting event 
configurations which could *never* be successfully scheduled (since a 
group represents a set of events which must be scheduled all at the same 
time).

In event_add, you know the given event/group is sufficiently valid to 
*potentially* be scheduled, given that it has passed the event_init 
checks, but you then need to check that the filtering is compatible with 
all other events *currently* counting on the PMU. If this fails, perf 
core will try to reschedule the current events until the new one is able 
to run. That's why you need the additional step of validating groups 
beforehand, because otherwise you could end up with contradictory 
scheduling constraints at this point and never make progress.

> +
>   	if (event->attr.type != event->pmu->type)
>   		return -ENOENT;
>   
> @@ -274,6 +305,15 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
>   	struct hw_perf_event *hwc = &event->hw;
>   	int counter = hwc->idx;
>   
> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> +		if (event->attr.config == 0x41 ||
> +		    event->attr.config == 0x42) {
> +			int val = event->attr.config1;
> +
> +			writel(val, pmu->base + COUNTER_DPCR1);
> +		}
> +	}
> +
>   	local64_set(&hwc->prev_count, 0);
>   
>   	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
> @@ -324,6 +364,11 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
>   	struct hw_perf_event *hwc = &event->hw;
>   	int counter = hwc->idx;
>   
> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> +		pmu->axi_id_read = 0;
> +		pmu->axi_id_write = 0;
> +	}
> +
>   	ddr_perf_event_stop(event, PERF_EF_UPDATE);
>   
>   	ddr_perf_free_counter(pmu, counter);
> @@ -445,6 +490,7 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
>   
>   static int ddr_perf_probe(struct platform_device *pdev)
>   {
> +	const struct fsl_ddr_devtype_data *data;

You don't do anything meaningful with this variable, so you may as well 
just get rid of it.

>   	struct ddr_pmu *pmu;
>   	struct device_node *np;
>   	void __iomem *base;
> @@ -472,6 +518,14 @@ static int ddr_perf_probe(struct platform_device *pdev)
>   	if (!name)
>   		return -ENOMEM;
>   
> +	data = of_device_get_match_data(&pdev->dev);
> +	pmu->devtype_data = data;
> +
> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> +		pmu->axi_id_read = 0;
> +		pmu->axi_id_write = 0;

You've just kzalloc()ed the structure, so these are already initialised 
to zero.

Robin.

> +	}
> +
>   	pmu->cpu = raw_smp_processor_id();
>   	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>   				      DDR_CPUHP_CB_NAME,
>
Joakim Zhang Aug. 1, 2019, 5:25 a.m. UTC | #2
> -----Original Message-----
> From: Robin Murphy <robin.murphy@arm.com>
> Sent: 2019年7月31日 20:37
> To: Joakim Zhang <qiangqing.zhang@nxp.com>; will@kernel.org;
> mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de; dl-linux-imx
> <linux-imx@nxp.com>
> Subject: Re: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter support
> 
> On 31/07/2019 11:46, Joakim Zhang wrote:
> > AXI filtering is used by CSV modes 0x41 and 0x42 to count reads or
> > writes with an ARID or AXID matching filter setting. Granularity is at
> > subsystem level. Implementation does not allow filtring between
> > masters within a subsystem. Filter is defined with 2 configuration registers.
> >
> > --AXI_ID defines AxID matching value
> > --AXI_MASKING defines which bits of AxID are meaningful for the
> > matching
> >
> > When non-masked bits are matching corresponding AXI_ID bits then
> > counter is incremented. This filter allows counting read or write
> > access from a subsystem or multiple subsystems.
> >
> > Perf counter is incremented if AxID && AXI_MASKING == AXI_ID &&
> > AXI_MASKING
> >
> > AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance
> counter.
> >
> > Read and write AXI ID filter should write same value to DPCR1 if want
> > to specify at the same time as this filter is shared between counters.
> >
> > e.g.
> > perf stat -a -e imx8_ddr0/axi-id-read,axi_id=0xMMMMDDDD/,imx8_ddr0/
> > axi-id-write,axi_id=0xMMMMDDDD/cmd
> > MMMM: AXI_MASKING
> > DDDD: AXI_ID
> >
> > ChangeLog:
> > V1 -> V2:
> > 	* add error log if user specifies read/write AXI ID filter at
> > 	the same time.
> > 	* of_device_get_match_data() instead of of_match_device(), and
> > 	remove the check of return value.
> > V2 -> V3:
> > 	* move the AXI ID check to event_add().
> > 	* add support for same value of axi_id.
> > V3 -> V4:
> > 	* move the AXI ID check to event_init().
> >
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> >   drivers/perf/fsl_imx8_ddr_perf.c | 58
> ++++++++++++++++++++++++++++++--
> >   1 file changed, 56 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/perf/fsl_imx8_ddr_perf.c
> > b/drivers/perf/fsl_imx8_ddr_perf.c
> > index 63fe21600072..cb90caad6441 100644
> > --- a/drivers/perf/fsl_imx8_ddr_perf.c
> > +++ b/drivers/perf/fsl_imx8_ddr_perf.c
> > @@ -42,9 +42,22 @@
> >
> >   static DEFINE_IDA(ddr_ida);
> >
> > +/* DDR Perf hardware feature */
> > +#define DDR_CAP_AXI_ID_FILTER		0x1	/* support AXI ID filter */
> > +
> > +struct fsl_ddr_devtype_data {
> > +	unsigned int quirks;	/* quirks needed for different DDR Perf core */
> > +};
> > +
> > +static const struct fsl_ddr_devtype_data imx8_devtype_data;
> > +
> > +static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
> > +	.quirks = DDR_CAP_AXI_ID_FILTER,
> > +};
> > +
> >   static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
> > -	{ .compatible = "fsl,imx8-ddr-pmu",},
> > -	{ .compatible = "fsl,imx8m-ddr-pmu",},
> > +	{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
> > +	{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
> >   	{ /* sentinel */ }
> >   };
> >
> > @@ -57,6 +70,8 @@ struct ddr_pmu {
> >   	struct perf_event *events[NUM_COUNTERS];
> >   	int active_events;
> >   	enum cpuhp_state cpuhp_state;
> > +	const struct fsl_ddr_devtype_data *devtype_data;
> > +	unsigned int axi_id_read, axi_id_write;
> 
> Given that there's apparently only one filter, tracking two different IDs seems a
> bit weird. It would be more intuitive to track one value, plus whether it's
> currently valid or not (unless that's easy to infer by just seeing whether any
> read or write events are currently scheduled).
> 
> >   	int irq;
> >   	int id;
> >   };
> > @@ -128,6 +143,8 @@ static struct attribute *ddr_perf_events_attrs[] = {
> >   	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
> >   	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
> >   	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
> > +	IMX8_DDR_PMU_EVENT_ATTR(axi-id-read, 0x41),
> > +	IMX8_DDR_PMU_EVENT_ATTR(axi-id-write, 0x42),
> >   	NULL,
> >   };
> >
> > @@ -137,9 +154,11 @@ static struct attribute_group
> ddr_perf_events_attr_group = {
> >   };
> >
> >   PMU_FORMAT_ATTR(event, "config:0-7");
> > +PMU_FORMAT_ATTR(axi_id, "config1:0-31");
> >
> >   static struct attribute *ddr_perf_format_attrs[] = {
> >   	&format_attr_event.attr,
> > +	&format_attr_axi_id.attr,
> >   	NULL,
> >   };
> >
> > @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct perf_event
> *event)
> >   	struct hw_perf_event *hwc = &event->hw;
> >   	struct perf_event *sibling;
> >
> > +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > +		if (event->attr.config == 0x41)
> > +			pmu->axi_id_read = event->attr.config1;
> > +
> > +		if (event->attr.config == 0x42)
> > +			pmu->axi_id_write = event->attr.config1;
> > +
> > +		if (pmu->axi_id_read && pmu->axi_id_write &&
> > +		    (pmu->axi_id_read != pmu->axi_id_write))
> > +			return -EINVAL;
> > +	}
> 
> This isn't the correct approach that Mark outlined :(
> 
> In event_init, you should validate that any filtering for the given event is
> compatible with any other sibling events in the same group, but you should not
> consider (and should definitely not change) the current state of the PMU at
> that point. This step is about rejecting event configurations which could
> *never* be successfully scheduled (since a group represents a set of events
> which must be scheduled all at the same time).
> 
> In event_add, you know the given event/group is sufficiently valid to
> *potentially* be scheduled, given that it has passed the event_init checks, but
> you then need to check that the filtering is compatible with all other events
> *currently* counting on the PMU. If this fails, perf core will try to reschedule
> the current events until the new one is able to run. That's why you need the
> additional step of validating groups beforehand, because otherwise you could
> end up with contradictory scheduling constraints at this point and never make
> progress.

Hi Mark and Robin,

Thanks for all your kindly detailed explanation firstly.

My understanding from your comments, I need to validate the filtering (i.e. config1/axi_id) for *all* events in same group during event_init, right?
But it's so strange for that axi_id is only for axi-id-read and axi-id-write event. We don't need to specify axi_id for any other events when mixed with these two events.

If I can just check the axi-id-read and axi-id-write event during event_add and then pass the axi_id value to the filter register. Don't check the case that user
specify both of them at the same time with different filtering value. Instead of checking it in the driver, I add the doc in Documentation/admin-guide/perf/ directory
to note that axi-id-read and axi-id-write event should be specified separately, or specified at the same time with same axi_id value.

Best Regards,
Joakim Zhang
> > +
> >   	if (event->attr.type != event->pmu->type)
> >   		return -ENOENT;
> >
> > @@ -274,6 +305,15 @@ static void ddr_perf_event_start(struct perf_event
> *event, int flags)
> >   	struct hw_perf_event *hwc = &event->hw;
> >   	int counter = hwc->idx;
> >
> > +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > +		if (event->attr.config == 0x41 ||
> > +		    event->attr.config == 0x42) {
> > +			int val = event->attr.config1;
> > +
> > +			writel(val, pmu->base + COUNTER_DPCR1);
> > +		}
> > +	}
> > +
> >   	local64_set(&hwc->prev_count, 0);
> >
> >   	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
> @@
> > -324,6 +364,11 @@ static void ddr_perf_event_del(struct perf_event *event,
> int flags)
> >   	struct hw_perf_event *hwc = &event->hw;
> >   	int counter = hwc->idx;
> >
> > +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > +		pmu->axi_id_read = 0;
> > +		pmu->axi_id_write = 0;
> > +	}
> > +
> >   	ddr_perf_event_stop(event, PERF_EF_UPDATE);
> >
> >   	ddr_perf_free_counter(pmu, counter); @@ -445,6 +490,7 @@ static
> int
> > ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >
> >   static int ddr_perf_probe(struct platform_device *pdev)
> >   {
> > +	const struct fsl_ddr_devtype_data *data;
> 
> You don't do anything meaningful with this variable, so you may as well just get
> rid of it.
> 
> >   	struct ddr_pmu *pmu;
> >   	struct device_node *np;
> >   	void __iomem *base;
> > @@ -472,6 +518,14 @@ static int ddr_perf_probe(struct platform_device
> *pdev)
> >   	if (!name)
> >   		return -ENOMEM;
> >
> > +	data = of_device_get_match_data(&pdev->dev);
> > +	pmu->devtype_data = data;
> > +
> > +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > +		pmu->axi_id_read = 0;
> > +		pmu->axi_id_write = 0;
> 
> You've just kzalloc()ed the structure, so these are already initialised to zero.
> 
> Robin.
> 
> > +	}
> > +
> >   	pmu->cpu = raw_smp_processor_id();
> >   	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> >   				      DDR_CPUHP_CB_NAME,
> >
Robin Murphy Aug. 1, 2019, 9:59 a.m. UTC | #3
On 2019-08-01 6:25 am, Joakim Zhang wrote:
[...]
>>> @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct perf_event
>> *event)
>>>    	struct hw_perf_event *hwc = &event->hw;
>>>    	struct perf_event *sibling;
>>>
>>> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
>>> +		if (event->attr.config == 0x41)
>>> +			pmu->axi_id_read = event->attr.config1;
>>> +
>>> +		if (event->attr.config == 0x42)
>>> +			pmu->axi_id_write = event->attr.config1;
>>> +
>>> +		if (pmu->axi_id_read && pmu->axi_id_write &&
>>> +		    (pmu->axi_id_read != pmu->axi_id_write))
>>> +			return -EINVAL;
>>> +	}
>>
>> This isn't the correct approach that Mark outlined :(
>>
>> In event_init, you should validate that any filtering for the given event is
>> compatible with any other sibling events in the same group, but you should not
>> consider (and should definitely not change) the current state of the PMU at
>> that point. This step is about rejecting event configurations which could
>> *never* be successfully scheduled (since a group represents a set of events
>> which must be scheduled all at the same time).
>>
>> In event_add, you know the given event/group is sufficiently valid to
>> *potentially* be scheduled, given that it has passed the event_init checks, but
>> you then need to check that the filtering is compatible with all other events
>> *currently* counting on the PMU. If this fails, perf core will try to reschedule
>> the current events until the new one is able to run. That's why you need the
>> additional step of validating groups beforehand, because otherwise you could
>> end up with contradictory scheduling constraints at this point and never make
>> progress.
> 
> Hi Mark and Robin,
> 
> Thanks for all your kindly detailed explanation firstly.
> 
> My understanding from your comments, I need to validate the filtering (i.e. config1/axi_id) for *all* events in same group during event_init, right?
> But it's so strange for that axi_id is only for axi-id-read and axi-id-write event. We don't need to specify axi_id for any other events when mixed with these two events.

Sorry, I implicitly meant all *relevant* events - obviously there's 
nothing to check for events which don't have filtering anyway. All that 
matters is the case where we're asked to create a read/write event in a 
group which already has at least one other read/write event as a 
sibling. I've sketched out a quick (completely untested) example of one 
way to do that part below. The logic for event_add would be very 
similar, except instead of comparing the sibling against the event, 
there you'd compare the event against the current PMU state.

> If I can just check the axi-id-read and axi-id-write event during event_add and then pass the axi_id value to the filter register. Don't check the case that user
> specify both of them at the same time with different filtering value. Instead of checking it in the driver, I add the doc in Documentation/admin-guide/perf/ directory
> to note that axi-id-read and axi-id-write event should be specified separately, or specified at the same time with same axi_id value.

Sure, we could just rely on the user to get it right, but that means 
there's a fair chance that the user can inadvertently get it wrong, get 
nonsensical results, and waste a week trying to debug a perceived 
problem which doesn't actually exist. It's not difficult for the driver 
to perform the correct validation, so it's better for everyone if it does.

It also seems reasonable that a user might want to intentionally measure 
events on different IDs over the same run (but not in the same group), 
e.g. to compare the relative average bandwidth of two devices, perhaps 
to tune QoS parameters. That requires perf core to know it needs to 
rotate the events during the run, which will only happen if event_add 
does the right thing.

Robin.

----->8-----
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c 
b/drivers/perf/fsl_imx8_ddr_perf.c
index 63fe21600072..f0f643831fda 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -189,11 +189,23 @@ static u32 ddr_perf_read_counter(struct ddr_pmu 
*pmu, int counter)
         return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
  }

+static bool ddr_perf_is_filtered(struct perf_event *event)
+{
+       return event->attr.config == 0x41 || event->attr.config == 0x42;
+}
+
+static u32 ddr_perf_filter_val(struct perf_event *event)
+{
+       return event->attr.config1;
+}
+
  static int ddr_perf_event_init(struct perf_event *event)
  {
         struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
         struct hw_perf_event *hwc = &event->hw;
         struct perf_event *sibling;
+       bool is_filtered;
+       u32 filter_val;

         if (event->attr.type != event->pmu->type)
                 return -ENOENT;
@@ -215,10 +227,17 @@ static int ddr_perf_event_init(struct perf_event 
*event)
                         !is_software_event(event->group_leader))
                 return -EINVAL;

+       is_filtered = ddr_perf_is_filtered(event);
+       filter_val = ddr_perf_filter_val(event);
+
         for_each_sibling_event(sibling, event->group_leader) {
                 if (sibling->pmu != event->pmu &&
                                 !is_software_event(sibling))
                         return -EINVAL;
+
+               if (is_filtered && ddr_perf_is_filtered(sibling) &&
+                   ddr_perf_filter_val(sibling) != filter_val)
+                       return -EINVAL;
         }

         event->cpu = pmu->cpu;
Joakim Zhang Aug. 2, 2019, 7:19 a.m. UTC | #4
> -----Original Message-----
> From: Robin Murphy <robin.murphy@arm.com>
> Sent: 2019年8月1日 18:00
> To: Joakim Zhang <qiangqing.zhang@nxp.com>; will@kernel.org;
> mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de; dl-linux-imx
> <linux-imx@nxp.com>
> Subject: Re: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter support
> 
> On 2019-08-01 6:25 am, Joakim Zhang wrote:
> [...]
> >>> @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct
> >>> perf_event
> >> *event)
> >>>    	struct hw_perf_event *hwc = &event->hw;
> >>>    	struct perf_event *sibling;
> >>>
> >>> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> >>> +		if (event->attr.config == 0x41)
> >>> +			pmu->axi_id_read = event->attr.config1;
> >>> +
> >>> +		if (event->attr.config == 0x42)
> >>> +			pmu->axi_id_write = event->attr.config1;
> >>> +
> >>> +		if (pmu->axi_id_read && pmu->axi_id_write &&
> >>> +		    (pmu->axi_id_read != pmu->axi_id_write))
> >>> +			return -EINVAL;
> >>> +	}
> >>
> >> This isn't the correct approach that Mark outlined :(
> >>
> >> In event_init, you should validate that any filtering for the given
> >> event is compatible with any other sibling events in the same group,
> >> but you should not consider (and should definitely not change) the
> >> current state of the PMU at that point. This step is about rejecting
> >> event configurations which could
> >> *never* be successfully scheduled (since a group represents a set of
> >> events which must be scheduled all at the same time).
> >>
> >> In event_add, you know the given event/group is sufficiently valid to
> >> *potentially* be scheduled, given that it has passed the event_init
> >> checks, but you then need to check that the filtering is compatible
> >> with all other events
> >> *currently* counting on the PMU. If this fails, perf core will try to
> >> reschedule the current events until the new one is able to run.
> >> That's why you need the additional step of validating groups
> >> beforehand, because otherwise you could end up with contradictory
> >> scheduling constraints at this point and never make progress.
> >
> > Hi Mark and Robin,
> >
> > Thanks for all your kindly detailed explanation firstly.
> >
> > My understanding from your comments, I need to validate the filtering (i.e.
> config1/axi_id) for *all* events in same group during event_init, right?
> > But it's so strange for that axi_id is only for axi-id-read and axi-id-write event.
> We don't need to specify axi_id for any other events when mixed with these
> two events.
> 
> Sorry, I implicitly meant all *relevant* events - obviously there's nothing to
> check for events which don't have filtering anyway. All that matters is the case
> where we're asked to create a read/write event in a group which already has at
> least one other read/write event as a sibling. I've sketched out a quick
> (completely untested) example of one way to do that part below. The logic for
> event_add would be very similar, except instead of comparing the sibling
> against the event, there you'd compare the event against the current PMU
> state.
> 
> > If I can just check the axi-id-read and axi-id-write event during
> > event_add and then pass the axi_id value to the filter register. Don't
> > check the case that user specify both of them at the same time with different
> filtering value. Instead of checking it in the driver, I add the doc in
> Documentation/admin-guide/perf/ directory to note that axi-id-read and
> axi-id-write event should be specified separately, or specified at the same time
> with same axi_id value.
> 
> Sure, we could just rely on the user to get it right, but that means there's a fair
> chance that the user can inadvertently get it wrong, get nonsensical results,
> and waste a week trying to debug a perceived problem which doesn't actually
> exist. It's not difficult for the driver to perform the correct validation, so it's
> better for everyone if it does.
> 
> It also seems reasonable that a user might want to intentionally measure
> events on different IDs over the same run (but not in the same group), e.g. to
> compare the relative average bandwidth of two devices, perhaps to tune QoS
> parameters. That requires perf core to know it needs to rotate the events
> during the run, which will only happen if event_add does the right thing.
> 
> Robin.

Hi Robin,

I completely understood what you said now, thank you very much. But I came across another issue when I
test this case. You can see below.

[...]
>          for_each_sibling_event(sibling, event->group_leader) {
>                  if (sibling->pmu != event->pmu &&
>                                  !is_software_event(sibling))
>                          return -EINVAL;
> +
> +               if (is_filtered && ddr_perf_is_filtered(sibling) &&
> +                   ddr_perf_filter_val(sibling) != filter_val)
> +                       return -EINVAL;
>          }
[...]

Need to check the axi id value of sibling events in one same group with for_each_sibling_event ():
#define for_each_sibling_event(sibling, event)                  \
         if ((event)->group_leader == (event))                   \
                 list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)

But I found that all check in this For loop will never be checked, that means the code never runs into this For loop.
               if (sibling->pmu != event->pmu &&
                              !is_software_event(sibling))
                       return -EINVAL;

               if (is_filtered && ddr_perf_is_filtered(sibling) &&
                  ddr_perf_filter_val(sibling) != filter_val)
                       return -EINVAL;

Finally I found that it can't iterate over the list with list_for_each_entry((sibling), &(event)->sibling_list, sibling_list). So driver
can't reject event group with axi id illegal value during event_init() now. Do you know why it can't iterate to sibling events?

Best Regards,
Joakim Zhang
Joakim Zhang Aug. 5, 2019, 10:33 a.m. UTC | #5
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年8月2日 15:20
> To: Robin Murphy <robin.murphy@arm.com>; will@kernel.org;
> mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de; dl-linux-imx
> <linux-imx@nxp.com>
> Subject: RE: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter support
> 
> 
> > -----Original Message-----
> > From: Robin Murphy <robin.murphy@arm.com>
> > Sent: 2019年8月1日 18:00
> > To: Joakim Zhang <qiangqing.zhang@nxp.com>; will@kernel.org;
> > mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> > Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de;
> > dl-linux-imx <linux-imx@nxp.com>
> > Subject: Re: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter
> > support
> >
> > On 2019-08-01 6:25 am, Joakim Zhang wrote:
> > [...]
> > >>> @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct
> > >>> perf_event
> > >> *event)
> > >>>    	struct hw_perf_event *hwc = &event->hw;
> > >>>    	struct perf_event *sibling;
> > >>>
> > >>> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > >>> +		if (event->attr.config == 0x41)
> > >>> +			pmu->axi_id_read = event->attr.config1;
> > >>> +
> > >>> +		if (event->attr.config == 0x42)
> > >>> +			pmu->axi_id_write = event->attr.config1;
> > >>> +
> > >>> +		if (pmu->axi_id_read && pmu->axi_id_write &&
> > >>> +		    (pmu->axi_id_read != pmu->axi_id_write))
> > >>> +			return -EINVAL;
> > >>> +	}
> > >>
> > >> This isn't the correct approach that Mark outlined :(
> > >>
> > >> In event_init, you should validate that any filtering for the given
> > >> event is compatible with any other sibling events in the same
> > >> group, but you should not consider (and should definitely not
> > >> change) the current state of the PMU at that point. This step is
> > >> about rejecting event configurations which could
> > >> *never* be successfully scheduled (since a group represents a set
> > >> of events which must be scheduled all at the same time).
> > >>
> > >> In event_add, you know the given event/group is sufficiently valid
> > >> to
> > >> *potentially* be scheduled, given that it has passed the event_init
> > >> checks, but you then need to check that the filtering is compatible
> > >> with all other events
> > >> *currently* counting on the PMU. If this fails, perf core will try
> > >> to reschedule the current events until the new one is able to run.
> > >> That's why you need the additional step of validating groups
> > >> beforehand, because otherwise you could end up with contradictory
> > >> scheduling constraints at this point and never make progress.
> > >
> > > Hi Mark and Robin,
> > >
> > > Thanks for all your kindly detailed explanation firstly.
> > >
> > > My understanding from your comments, I need to validate the filtering (i.e.
> > config1/axi_id) for *all* events in same group during event_init, right?
> > > But it's so strange for that axi_id is only for axi-id-read and axi-id-write
> event.
> > We don't need to specify axi_id for any other events when mixed with
> > these two events.
> >
> > Sorry, I implicitly meant all *relevant* events - obviously there's
> > nothing to check for events which don't have filtering anyway. All
> > that matters is the case where we're asked to create a read/write
> > event in a group which already has at least one other read/write event
> > as a sibling. I've sketched out a quick (completely untested) example
> > of one way to do that part below. The logic for event_add would be
> > very similar, except instead of comparing the sibling against the
> > event, there you'd compare the event against the current PMU state.
> >
> > > If I can just check the axi-id-read and axi-id-write event during
> > > event_add and then pass the axi_id value to the filter register.
> > > Don't check the case that user specify both of them at the same time
> > > with different
> > filtering value. Instead of checking it in the driver, I add the doc
> > in Documentation/admin-guide/perf/ directory to note that axi-id-read
> > and axi-id-write event should be specified separately, or specified at
> > the same time with same axi_id value.
> >
> > Sure, we could just rely on the user to get it right, but that means
> > there's a fair chance that the user can inadvertently get it wrong,
> > get nonsensical results, and waste a week trying to debug a perceived
> > problem which doesn't actually exist. It's not difficult for the
> > driver to perform the correct validation, so it's better for everyone if it does.
> >
> > It also seems reasonable that a user might want to intentionally
> > measure events on different IDs over the same run (but not in the same
> > group), e.g. to compare the relative average bandwidth of two devices,
> > perhaps to tune QoS parameters. That requires perf core to know it
> > needs to rotate the events during the run, which will only happen if
> event_add does the right thing.
> >
> > Robin.
> 
> Hi Robin,
> 
> I completely understood what you said now, thank you very much. But I came
> across another issue when I test this case. You can see below.
> 
> [...]
> >          for_each_sibling_event(sibling, event->group_leader) {
> >                  if (sibling->pmu != event->pmu &&
> >                                  !is_software_event(sibling))
> >                          return -EINVAL;
> > +
> > +               if (is_filtered && ddr_perf_is_filtered(sibling) &&
> > +                   ddr_perf_filter_val(sibling) != filter_val)
> > +                       return -EINVAL;
> >          }
> [...]
> 
> Need to check the axi id value of sibling events in one same group with
> for_each_sibling_event ():
> #define for_each_sibling_event(sibling, event)                  \
>          if ((event)->group_leader == (event))                   \
>                  list_for_each_entry((sibling), &(event)->sibling_list,
> sibling_list)
> 
> But I found that all check in this For loop will never be checked, that means the
> code never runs into this For loop.
>                if (sibling->pmu != event->pmu &&
>                               !is_software_event(sibling))
>                        return -EINVAL;
> 
>                if (is_filtered && ddr_perf_is_filtered(sibling) &&
>                   ddr_perf_filter_val(sibling) != filter_val)
>                        return -EINVAL;
> 
> Finally I found that it can't iterate over the list with list_for_each_entry((sibling),
> &(event)->sibling_list, sibling_list). So driver can't reject event group with axi id
> illegal value during event_init() now. Do you know why it can't iterate to sibling
> events?

Hi Robin,

I have an another question, with below configuration, if this means cycles, read and write events is an event group?
	perf stat -e imx8_ddr0/cycles/,imx8_ddr0/read/,imx8_ddr0/write/ sleep 1

If yes, I found that perf event core attach event group(perf_group_attach() in kernel/events/core.c) after perf event initialization (perf_try_init_event() call pmu->event_init() callback in kernel/events/core.c).
Is it reasonable as we always check the sibling event form event_init callback? And each perf event pass to perf_group_attach() always point to it's group leader, so for_each_sibling_event() in event_init() can't
iterate to it's sibling events, as it has no sibling events from perf_froup_attach().

Do I misunderstand angthing?

Best Regards,
Joakim Zhang
> Best Regards,
> Joakim Zhang
Joakim Zhang Aug. 8, 2019, 6:11 a.m. UTC | #6
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年8月5日 18:33
> To: Robin Murphy <robin.murphy@arm.com>; will@kernel.org;
> mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de; dl-linux-imx
> <linux-imx@nxp.com>
> Subject: RE: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter support
> 
> 
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2019年8月2日 15:20
> > To: Robin Murphy <robin.murphy@arm.com>; will@kernel.org;
> > mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> > Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de;
> > dl-linux-imx <linux-imx@nxp.com>
> > Subject: RE: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter
> > support
> >
> >
> > > -----Original Message-----
> > > From: Robin Murphy <robin.murphy@arm.com>
> > > Sent: 2019年8月1日 18:00
> > > To: Joakim Zhang <qiangqing.zhang@nxp.com>; will@kernel.org;
> > > mark.rutland@arm.com; Frank Li <frank.li@nxp.com>
> > > Cc: linux-arm-kernel@lists.infradead.org; kernel@pengutronix.de;
> > > dl-linux-imx <linux-imx@nxp.com>
> > > Subject: Re: [PATCH V4 1/2] perf: imx8_ddr_perf: add AXI ID filter
> > > support
> > >
> > > On 2019-08-01 6:25 am, Joakim Zhang wrote:
> > > [...]
> > > >>> @@ -195,6 +214,18 @@ static int ddr_perf_event_init(struct
> > > >>> perf_event
> > > >> *event)
> > > >>>    	struct hw_perf_event *hwc = &event->hw;
> > > >>>    	struct perf_event *sibling;
> > > >>>
> > > >>> +	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
> > > >>> +		if (event->attr.config == 0x41)
> > > >>> +			pmu->axi_id_read = event->attr.config1;
> > > >>> +
> > > >>> +		if (event->attr.config == 0x42)
> > > >>> +			pmu->axi_id_write = event->attr.config1;
> > > >>> +
> > > >>> +		if (pmu->axi_id_read && pmu->axi_id_write &&
> > > >>> +		    (pmu->axi_id_read != pmu->axi_id_write))
> > > >>> +			return -EINVAL;
> > > >>> +	}
> > > >>
> > > >> This isn't the correct approach that Mark outlined :(
> > > >>
> > > >> In event_init, you should validate that any filtering for the
> > > >> given event is compatible with any other sibling events in the
> > > >> same group, but you should not consider (and should definitely
> > > >> not
> > > >> change) the current state of the PMU at that point. This step is
> > > >> about rejecting event configurations which could
> > > >> *never* be successfully scheduled (since a group represents a set
> > > >> of events which must be scheduled all at the same time).
> > > >>
> > > >> In event_add, you know the given event/group is sufficiently
> > > >> valid to
> > > >> *potentially* be scheduled, given that it has passed the
> > > >> event_init checks, but you then need to check that the filtering
> > > >> is compatible with all other events
> > > >> *currently* counting on the PMU. If this fails, perf core will
> > > >> try to reschedule the current events until the new one is able to run.
> > > >> That's why you need the additional step of validating groups
> > > >> beforehand, because otherwise you could end up with contradictory
> > > >> scheduling constraints at this point and never make progress.
> > > >
> > > > Hi Mark and Robin,
> > > >
> > > > Thanks for all your kindly detailed explanation firstly.
> > > >
> > > > My understanding from your comments, I need to validate the filtering
> (i.e.
> > > config1/axi_id) for *all* events in same group during event_init, right?
> > > > But it's so strange for that axi_id is only for axi-id-read and
> > > > axi-id-write
> > event.
> > > We don't need to specify axi_id for any other events when mixed with
> > > these two events.
> > >
> > > Sorry, I implicitly meant all *relevant* events - obviously there's
> > > nothing to check for events which don't have filtering anyway. All
> > > that matters is the case where we're asked to create a read/write
> > > event in a group which already has at least one other read/write
> > > event as a sibling. I've sketched out a quick (completely untested)
> > > example of one way to do that part below. The logic for event_add
> > > would be very similar, except instead of comparing the sibling
> > > against the event, there you'd compare the event against the current PMU
> state.
> > >
> > > > If I can just check the axi-id-read and axi-id-write event during
> > > > event_add and then pass the axi_id value to the filter register.
> > > > Don't check the case that user specify both of them at the same
> > > > time with different
> > > filtering value. Instead of checking it in the driver, I add the doc
> > > in Documentation/admin-guide/perf/ directory to note that
> > > axi-id-read and axi-id-write event should be specified separately,
> > > or specified at the same time with same axi_id value.
> > >
> > > Sure, we could just rely on the user to get it right, but that means
> > > there's a fair chance that the user can inadvertently get it wrong,
> > > get nonsensical results, and waste a week trying to debug a
> > > perceived problem which doesn't actually exist. It's not difficult
> > > for the driver to perform the correct validation, so it's better for everyone if
> it does.
> > >
> > > It also seems reasonable that a user might want to intentionally
> > > measure events on different IDs over the same run (but not in the
> > > same group), e.g. to compare the relative average bandwidth of two
> > > devices, perhaps to tune QoS parameters. That requires perf core to
> > > know it needs to rotate the events during the run, which will only
> > > happen if
> > event_add does the right thing.
> > >
> > > Robin.
> >
> > Hi Robin,
> >
> > I completely understood what you said now, thank you very much. But I
> > came across another issue when I test this case. You can see below.
> >
> > [...]
> > >          for_each_sibling_event(sibling, event->group_leader) {
> > >                  if (sibling->pmu != event->pmu &&
> > >                                  !is_software_event(sibling))
> > >                          return -EINVAL;
> > > +
> > > +               if (is_filtered && ddr_perf_is_filtered(sibling) &&
> > > +                   ddr_perf_filter_val(sibling) != filter_val)
> > > +                       return -EINVAL;
> > >          }
> > [...]
> >
> > Need to check the axi id value of sibling events in one same group
> > with for_each_sibling_event ():
> > #define for_each_sibling_event(sibling, event)                  \
> >          if ((event)->group_leader == (event))                   \
> >                  list_for_each_entry((sibling),
> > &(event)->sibling_list,
> > sibling_list)
> >
> > But I found that all check in this For loop will never be checked,
> > that means the code never runs into this For loop.
> >                if (sibling->pmu != event->pmu &&
> >                               !is_software_event(sibling))
> >                        return -EINVAL;
> >
> >                if (is_filtered && ddr_perf_is_filtered(sibling) &&
> >                   ddr_perf_filter_val(sibling) != filter_val)
> >                        return -EINVAL;
> >
> > Finally I found that it can't iterate over the list with
> > list_for_each_entry((sibling), &(event)->sibling_list, sibling_list).
> > So driver can't reject event group with axi id illegal value during
> > event_init() now. Do you know why it can't iterate to sibling events?
> 
> Hi Robin,
> 
> I have an another question, with below configuration, if this means cycles, read
> and write events is an event group?
> 	perf stat -e imx8_ddr0/cycles/,imx8_ddr0/read/,imx8_ddr0/write/ sleep 1
> 
> If yes, I found that perf event core attach event group(perf_group_attach() in
> kernel/events/core.c) after perf event initialization (perf_try_init_event() call
> pmu->event_init() callback in kernel/events/core.c).
> Is it reasonable as we always check the sibling event form event_init callback?
> And each perf event pass to perf_group_attach() always point to it's group
> leader, so for_each_sibling_event() in event_init() can't iterate to it's sibling
> events, as it has no sibling events from perf_froup_attach().
> 
> Do I misunderstand angthing?

Hi Robin,

I have known that below is correct method to configure an event group:
	perf stat -e '{imx8_ddr0/cycles/,imx8_ddr0/read/,imx8_ddr0/write/}' sleep 1

I will send out PATCH V5, please help review, thank you.
 
Best Regards,
Joakim Zhang
> Best Regards,
> Joakim Zhang
> > Best Regards,
> > Joakim Zhang
diff mbox series

Patch

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 63fe21600072..cb90caad6441 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -42,9 +42,22 @@ 
 
 static DEFINE_IDA(ddr_ida);
 
+/* DDR Perf hardware feature */
+#define DDR_CAP_AXI_ID_FILTER		0x1	/* support AXI ID filter */
+
+struct fsl_ddr_devtype_data {
+	unsigned int quirks;	/* quirks needed for different DDR Perf core */
+};
+
+static const struct fsl_ddr_devtype_data imx8_devtype_data;
+
+static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER,
+};
+
 static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
-	{ .compatible = "fsl,imx8-ddr-pmu",},
-	{ .compatible = "fsl,imx8m-ddr-pmu",},
+	{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
+	{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
 	{ /* sentinel */ }
 };
 
@@ -57,6 +70,8 @@  struct ddr_pmu {
 	struct perf_event *events[NUM_COUNTERS];
 	int active_events;
 	enum cpuhp_state cpuhp_state;
+	const struct fsl_ddr_devtype_data *devtype_data;
+	unsigned int axi_id_read, axi_id_write;
 	int irq;
 	int id;
 };
@@ -128,6 +143,8 @@  static struct attribute *ddr_perf_events_attrs[] = {
 	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
 	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
 	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+	IMX8_DDR_PMU_EVENT_ATTR(axi-id-read, 0x41),
+	IMX8_DDR_PMU_EVENT_ATTR(axi-id-write, 0x42),
 	NULL,
 };
 
@@ -137,9 +154,11 @@  static struct attribute_group ddr_perf_events_attr_group = {
 };
 
 PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(axi_id, "config1:0-31");
 
 static struct attribute *ddr_perf_format_attrs[] = {
 	&format_attr_event.attr,
+	&format_attr_axi_id.attr,
 	NULL,
 };
 
@@ -195,6 +214,18 @@  static int ddr_perf_event_init(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_event *sibling;
 
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		if (event->attr.config == 0x41)
+			pmu->axi_id_read = event->attr.config1;
+
+		if (event->attr.config == 0x42)
+			pmu->axi_id_write = event->attr.config1;
+
+		if (pmu->axi_id_read && pmu->axi_id_write &&
+		    (pmu->axi_id_read != pmu->axi_id_write))
+			return -EINVAL;
+	}
+
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
@@ -274,6 +305,15 @@  static void ddr_perf_event_start(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
 
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		if (event->attr.config == 0x41 ||
+		    event->attr.config == 0x42) {
+			int val = event->attr.config1;
+
+			writel(val, pmu->base + COUNTER_DPCR1);
+		}
+	}
+
 	local64_set(&hwc->prev_count, 0);
 
 	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
@@ -324,6 +364,11 @@  static void ddr_perf_event_del(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
 
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		pmu->axi_id_read = 0;
+		pmu->axi_id_write = 0;
+	}
+
 	ddr_perf_event_stop(event, PERF_EF_UPDATE);
 
 	ddr_perf_free_counter(pmu, counter);
@@ -445,6 +490,7 @@  static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 static int ddr_perf_probe(struct platform_device *pdev)
 {
+	const struct fsl_ddr_devtype_data *data;
 	struct ddr_pmu *pmu;
 	struct device_node *np;
 	void __iomem *base;
@@ -472,6 +518,14 @@  static int ddr_perf_probe(struct platform_device *pdev)
 	if (!name)
 		return -ENOMEM;
 
+	data = of_device_get_match_data(&pdev->dev);
+	pmu->devtype_data = data;
+
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		pmu->axi_id_read = 0;
+		pmu->axi_id_write = 0;
+	}
+
 	pmu->cpu = raw_smp_processor_id();
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
 				      DDR_CPUHP_CB_NAME,