diff mbox series

[v3,1/2] perf/marvell: Odyssey DDR Performance monitor support

Message ID 20240122124933.1311925-2-gthiagarajan@marvell.com (mailing list archive)
State New, archived
Headers show
Series Marvell Odyssey uncore performance monitor support | expand

Commit Message

Gowthami Thiagarajan Jan. 22, 2024, 12:49 p.m. UTC
Odyssey DRAM Subsystem supports eight counters for monitoring performance
and software can program those counters to monitor any of the defined
performance events. Supported performance events include those counted
at the interface between the DDR controller and the PHY, interface between
the DDR Controller and the CHI interconnect, or within the DDR Controller.

Additionally DSS also supports two fixed performance event counters, one
for ddr reads and the other for ddr writes.

Signed-off-by: Gowthami Thiagarajan <gthiagarajan@marvell.com>
---
 drivers/perf/marvell_cn10k_ddr_pmu.c | 421 +++++++++++++++++++++++----
 1 file changed, 359 insertions(+), 62 deletions(-)

Comments

kernel test robot Jan. 25, 2024, 2:06 p.m. UTC | #1
Hi Gowthami,

kernel test robot noticed the following build warnings:

[auto build test WARNING on soc/for-next]
[also build test WARNING on linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Gowthami-Thiagarajan/perf-marvell-Odyssey-DDR-Performance-monitor-support/20240122-205209
base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link:    https://lore.kernel.org/r/20240122124933.1311925-2-gthiagarajan%40marvell.com
patch subject: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support
config: s390-randconfig-001-20240125 (https://download.01.org/0day-ci/archive/20240125/202401252104.CFnKSeTe-lkp@intel.com/config)
compiler: clang version 18.0.0git (https://github.com/llvm/llvm-project a31a60074717fc40887cfe132b77eec93bedd307)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240125/202401252104.CFnKSeTe-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401252104.CFnKSeTe-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/perf/marvell_cn10k_ddr_pmu.c:9:
   In file included from include/linux/io.h:13:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from drivers/perf/marvell_cn10k_ddr_pmu.c:9:
   In file included from include/linux/io.h:13:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from drivers/perf/marvell_cn10k_ddr_pmu.c:9:
   In file included from include/linux/io.h:13:
   In file included from arch/s390/include/asm/io.h:78:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     692 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     700 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     708 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     717 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     726 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     735 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
>> drivers/perf/marvell_cn10k_ddr_pmu.c:179:43: warning: unused variable 'cn10k_ddr_pmu_pdata' [-Wunused-const-variable]
     179 | static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
         |                                           ^~~~~~~~~~~~~~~~~~~
>> drivers/perf/marvell_cn10k_ddr_pmu.c:195:43: warning: unused variable 'odyssey_ddr_pmu_pdata' [-Wunused-const-variable]
     195 | static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
         |                                           ^~~~~~~~~~~~~~~~~~~~~
   14 warnings generated.


vim +/cn10k_ddr_pmu_pdata +179 drivers/perf/marvell_cn10k_ddr_pmu.c

   178	
 > 179	static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
   180		.counter_overflow_val =  BIT_ULL(48),
   181		.counter_max_val = GENMASK_ULL(48, 0),
   182		.ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
   183		.ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
   184		.ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
   185		.ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
   186		.ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
   187		.ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
   188		.ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
   189		.ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
   190		.ddrc_perf_cnt_freerun_clr = 0,
   191		.ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
   192		.ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
   193	};
   194	
 > 195	static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
   196		.counter_overflow_val = 0,
   197		.counter_max_val = GENMASK_ULL(63, 0),
   198		.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
   199		.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
   200		.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
   201		.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
   202		.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
   203		.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
   204		.ddrc_perf_cnt_freerun_en = 0,
   205		.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
   206		.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
   207		.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
   208		.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
   209	};
   210
kernel test robot Jan. 26, 2024, 4:13 a.m. UTC | #2
Hi Gowthami,

kernel test robot noticed the following build warnings:

[auto build test WARNING on soc/for-next]
[also build test WARNING on linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Gowthami-Thiagarajan/perf-marvell-Odyssey-DDR-Performance-monitor-support/20240122-205209
base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link:    https://lore.kernel.org/r/20240122124933.1311925-2-gthiagarajan%40marvell.com
patch subject: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support
config: s390-allmodconfig (https://download.01.org/0day-ci/archive/20240126/202401261154.gKVsomua-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240126/202401261154.gKVsomua-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401261154.gKVsomua-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/perf/marvell_cn10k_ddr_pmu.c:195:43: warning: 'odyssey_ddr_pmu_pdata' defined but not used [-Wunused-const-variable=]
     195 | static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
         |                                           ^~~~~~~~~~~~~~~~~~~~~


vim +/odyssey_ddr_pmu_pdata +195 drivers/perf/marvell_cn10k_ddr_pmu.c

   194	
 > 195	static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
   196		.counter_overflow_val = 0,
   197		.counter_max_val = GENMASK_ULL(63, 0),
   198		.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
   199		.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
   200		.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
   201		.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
   202		.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
   203		.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
   204		.ddrc_perf_cnt_freerun_en = 0,
   205		.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
   206		.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
   207		.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
   208		.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
   209	};
   210
Jonathan Cameron Jan. 29, 2024, 12:04 p.m. UTC | #3
On Mon, 22 Jan 2024 18:19:32 +0530
Gowthami Thiagarajan <gthiagarajan@marvell.com> wrote:

> Odyssey DRAM Subsystem supports eight counters for monitoring performance
> and software can program those counters to monitor any of the defined
> performance events. Supported performance events include those counted
> at the interface between the DDR controller and the PHY, interface between
> the DDR Controller and the CHI interconnect, or within the DDR Controller.
> 
> Additionally DSS also supports two fixed performance event counters, one
> for ddr reads and the other for ddr writes.
> 
> Signed-off-by: Gowthami Thiagarajan <gthiagarajan@marvell.com>
Hi Gowthami,

A quick drive by review.
Questions like whether this patch should be split are down to the perf maintainers.
I would ask for it, but not my area of the kernel ;)

Jonathan


> +#define VERSION_V1				1
> +#define VERSION_V2				2

Prefix these defines + perhaps make them an enum?

> +
>  struct cn10k_ddr_pmu {
>  	struct pmu pmu;
>  	void __iomem *base;
> +	struct ddr_pmu_platform_data *p_data;

const both because it should be and to avoid casting away the const.


> +	int version;
>  	unsigned int cpu;
>  	struct	device *dev;
>  	int active_events;
> @@ -134,6 +160,54 @@ struct cn10k_ddr_pmu {
>  
>  #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
>  
> +struct ddr_pmu_platform_data {
> +	u64 counter_overflow_val;
> +	u64 counter_max_val;
> +	u64 ddrc_perf_cnt_base;
> +	u64 ddrc_perf_cfg_base;
> +	u64 ddrc_perf_cnt_op_mode_ctrl;

Good to name these in a fashion that makes it clear what they are.
Some are values, some are register address offsets I think?

Shy is ddrc_perf prefix useful in here?


> +	u64 ddrc_perf_cnt_start_op_ctrl;
> +	u64 ddrc_perf_cnt_end_op_ctrl;
> +	u64 ddrc_perf_cnt_end_status;
> +	u64 ddrc_perf_cnt_freerun_en;
> +	u64 ddrc_perf_cnt_freerun_ctrl;
> +	u64 ddrc_perf_cnt_freerun_clr;
> +	u64 ddrc_perf_cnt_value_wr_op;
> +	u64 ddrc_perf_cnt_value_rd_op;
> +};
> +
> +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
> +	.counter_overflow_val =  BIT_ULL(48),
> +	.counter_max_val = GENMASK_ULL(48, 0),
> +	.ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
> +	.ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
> +	.ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
> +	.ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
> +	.ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
> +	.ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
> +	.ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
> +	.ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
> +	.ddrc_perf_cnt_freerun_clr = 0,
> +	.ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
> +	.ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
> +};
> +
> +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
> +	.counter_overflow_val = 0,
> +	.counter_max_val = GENMASK_ULL(63, 0),
> +	.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
> +	.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
> +	.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
> +	.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
> +	.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
> +	.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
> +	.ddrc_perf_cnt_freerun_en = 0,
> +	.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
> +	.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
> +	.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
> +	.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
> +};


...

> -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
> +				     struct cn10k_ddr_pmu *ddr_pmu)
>  {
> +	int ret = 0;
> +
>  	switch (eventid) {
>  	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
>  	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
>  		*event_bitmap = (1ULL << (eventid - 1));
>  		break;
> +	case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
> +		if (ddr_pmu->version == VERSION_V2) {
> +			*event_bitmap = (1ULL << (eventid - 1));
> +		} else {
> +			pr_err("%s Invalid eventid %d\n", __func__, eventid);
> +			ret = -EINVAL;
> +		}
> +		break;
>  	case EVENT_OP_IS_ENTER_SELFREF:
>  	case EVENT_OP_IS_ENTER_POWERDOWN:
>  	case EVENT_OP_IS_ENTER_MPSM:
> @@ -280,10 +451,10 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
>  		break;
>  	default:
>  		pr_err("%s Invalid eventid %d\n", __func__, eventid);
> -		return -EINVAL;
> +		ret = -EINVAL;
>  	}
>  
> -	return 0;
> +	return ret;
Why?  Just return in the various paths above. 

Direct returns make for easier to review code as you can follow a particular path through
more quickly.

>  }

>  static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
>  {
>  	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
> +	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
>  
>  	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
> -		       DDRC_PERF_CNT_END_OP_CTRL);
> +		       p_data->ddrc_perf_cnt_end_op_ctrl);
>  }
>  
>  static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> @@ -549,6 +778,7 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
>  
>  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  {
> +	struct ddr_pmu_platform_data *p_data = pmu->p_data;
>  	struct perf_event *event;
>  	struct hw_perf_event *hwc;
>  	u64 prev_count, new_count;
> @@ -561,7 +791,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  		prev_count = local64_read(&hwc->prev_count);
>  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
>  
> -		/* Overflow condition is when new count less than
> +		/*
> +		 * Overflow condition is when new count less than
>  		 * previous count
>  		 */
>  		if (new_count < prev_count)
> @@ -574,7 +805,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  		prev_count = local64_read(&hwc->prev_count);
>  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
>  
> -		/* Overflow condition is when new count less than
> +		/*
> +		 * Overflow condition is when new count less than

Good to fix this, but not in a patch doing anything meaningful. If you want
to make comment syntax changes - separate patch.

>  		 * previous count
>  		 */
>  		if (new_count < prev_count)
> @@ -586,11 +818,23 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
>  			continue;
>  
>  		value = cn10k_ddr_perf_read_counter(pmu, i);
> -		if (value == DDRC_PERF_CNT_MAX_VALUE) {
> +		if (value == p_data->counter_max_val) {
>  			pr_info("Counter-(%d) reached max value\n", i);
> -			cn10k_ddr_perf_event_update_all(pmu);
> -			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> -			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> +			/*
> +			 * As separate control register is added for each
> +			 * counter in odyssey, no need to update all
> +			 * the events
> +			 */
> +			if (pmu->version == VERSION_V2) {

This sort of version difference is often better handled via a callback
in the your pdata structure.  Makes it easy to add a new one for v3 :)

> +				cn10k_ddr_perf_event_update(pmu->events[i]);
> +				cn10k_ddr_perf_counter_stop(pmu, i);
> +				cn10k_ddr_perf_counter_start(pmu, i);
> +
> +			} else {
> +				cn10k_ddr_perf_event_update_all(pmu);
> +				cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> +				cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> +			}
>  		}
>  	}
>  
> @@ -631,7 +875,10 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>  
>  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  {
> +	struct ddr_pmu_platform_data *pltfm_data;
> +	struct device *dev = &pdev->dev;
>  	struct cn10k_ddr_pmu *ddr_pmu;
> +	const char  *compatible;
>  	struct resource *res;
>  	void __iomem *base;
>  	char *name;
> @@ -642,6 +889,14 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  		return -ENOMEM;
>  
>  	ddr_pmu->dev = &pdev->dev;
> +
> +	pltfm_data = (struct ddr_pmu_platform_data *)
> +		      device_get_match_data(&pdev->dev);

Shouldn't need the cast as it's a const void *
and you should not need to modify it in here (so make your
data types 
const struct ddr_pmu_platform * 



> +	if (!pltfm_data) {
> +		dev_err(&pdev->dev, "Error: No device match data found\n");
> +		return -ENODEV;
> +	}
> +	ddr_pmu->p_data = pltfm_data;
>  	platform_set_drvdata(pdev, ddr_pmu);
>  
>  	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> @@ -650,25 +905,59 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
>  
>  	ddr_pmu->base = base;
>  
> -	/* Setup the PMU counter to work in manual mode */
> -	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
> -		       DDRC_PERF_CNT_OP_MODE_CTRL);
> -
> -	ddr_pmu->pmu = (struct pmu) {
> -		.module	      = THIS_MODULE,
> -		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> -		.task_ctx_nr = perf_invalid_context,
> -		.attr_groups = cn10k_attr_groups,
> -		.event_init  = cn10k_ddr_perf_event_init,
> -		.add	     = cn10k_ddr_perf_event_add,
> -		.del	     = cn10k_ddr_perf_event_del,
> -		.start	     = cn10k_ddr_perf_event_start,
> -		.stop	     = cn10k_ddr_perf_event_stop,
> -		.read	     = cn10k_ddr_perf_event_update,
> -		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> -		.pmu_disable = cn10k_ddr_perf_pmu_disable,
> -	};
> +	ret = device_property_read_string(dev, "compatible", &compatible);
> +	if (ret) {
> +		pr_err("compatible property not found\n");
> +		return ret;
> +	}
>  
> +	if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
> +		     strlen(compatible)) == 0))

Why not just embed this in your pdata structure?
Even better would be add data to reflect the actual differences
rather than relying on a 'version' number.  It tends to be more
extensible as new implementations surface to encode each difference
as data in such a structure.  Otherwise, in the long run you
end up with big switch statements for the many different versions
which just provide some per version constants.  That's messy.


> +		ddr_pmu->version = VERSION_V1;
> +	else
> +		ddr_pmu->version = VERSION_V2;
> +
> +	if (ddr_pmu->version == VERSION_V1) {
> +		ddr_pmu->pmu = (struct pmu) {
> +			.module	      = THIS_MODULE,
> +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> +			.task_ctx_nr = perf_invalid_context,
> +			.attr_groups = cn10k_attr_groups,
> +			.event_init  = cn10k_ddr_perf_event_init,
> +			.add	     = cn10k_ddr_perf_event_add,
> +			.del	     = cn10k_ddr_perf_event_del,
> +			.start	     = cn10k_ddr_perf_event_start,
> +			.stop	     = cn10k_ddr_perf_event_stop,
> +			.read	     = cn10k_ddr_perf_event_update,
> +			.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> +			.pmu_disable = cn10k_ddr_perf_pmu_disable,
> +		};
> +
> +	/*
> +	 * As we have separate control registers for each counter in Odyssey,
> +	 * setting up the mode will be done when we enable each counter
> +	 *

Trivial: Odd formatting. I'd drop the blank commented line and add a full stop.

> +	 */
> +
> +	/* Setup the PMU counter to work in manual mode */
> +		writeq(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
> +		      (ddr_pmu->p_data->ddrc_perf_cnt_op_mode_ctrl));
> +	} else {
> +		ddr_pmu->pmu = (struct pmu) {
> +			.module       = THIS_MODULE,
> +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> +			.task_ctx_nr = perf_invalid_context,
> +			.attr_groups = odyssey_attr_groups,
> +			.event_init  = cn10k_ddr_perf_event_init,
> +			.add         = cn10k_ddr_perf_event_add,
> +			.del         = cn10k_ddr_perf_event_del,
> +			.start       = cn10k_ddr_perf_event_start,
> +			.stop        = cn10k_ddr_perf_event_stop,
> +			.read        = cn10k_ddr_perf_event_update,
> +			.pmu_enable  = NULL,
> +			.pmu_disable = NULL,

No need to set these to NULL.  Not providing them has same result and
I don't think there is any particular value wrt to 'documentation' of
setting them explicitly.  If there is a reason this needs calling out
I'd expect a comment explaining why.

Ideal patch series structure for changes like this patch makes would be:
1) Refactor to pull out the pdata - no functional change.
2) Patch adding the support for the new device.

Result is easier to review than the combination of the two changes.


> +		};
> +	}
Gowthami Thiagarajan Feb. 27, 2024, 1:46 p.m. UTC | #4
Hi Jonathan,

Please find the response inline.

> -----Original Message-----
> From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Sent: Monday, January 29, 2024 5:35 PM
> To: Gowthami Thiagarajan <gthiagarajan@marvell.com>
> Cc: will@kernel.org; mark.rutland@arm.com; linux-arm-kernel@lists.infradead.org; linux-
> kernel@vger.kernel.org; Sunil Kovvuri Goutham <sgoutham@marvell.com>; Bharat Bhushan
> <bbhushan2@marvell.com>; George Cherian <gcherian@marvell.com>
> Subject: [EXT] Re: [PATCH v3 1/2] perf/marvell: Odyssey DDR Performance monitor support
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Mon, 22 Jan 2024 18:19:32 +0530
> Gowthami Thiagarajan <gthiagarajan@marvell.com> wrote:
> 
> > Odyssey DRAM Subsystem supports eight counters for monitoring performance
> > and software can program those counters to monitor any of the defined
> > performance events. Supported performance events include those counted
> > at the interface between the DDR controller and the PHY, interface between
> > the DDR Controller and the CHI interconnect, or within the DDR Controller.
> >
> > Additionally DSS also supports two fixed performance event counters, one
> > for ddr reads and the other for ddr writes.
> >
> > Signed-off-by: Gowthami Thiagarajan <gthiagarajan@marvell.com>
> Hi Gowthami,
> 
> A quick drive by review.
> Questions like whether this patch should be split are down to the perf maintainers.
> I would ask for it, but not my area of the kernel ;)
> 
> Jonathan
> 
> 
> > +#define VERSION_V1				1
> > +#define VERSION_V2				2
> 
> Prefix these defines + perhaps make them an enum?
[Gowthami] Ack.
> 
> > +
> >  struct cn10k_ddr_pmu {
> >  	struct pmu pmu;
> >  	void __iomem *base;
> > +	struct ddr_pmu_platform_data *p_data;
> 
> const both because it should be and to avoid casting away the const.
[Gowthami] Ack. 
> 
> 
> > +	int version;
> >  	unsigned int cpu;
> >  	struct	device *dev;
> >  	int active_events;
> > @@ -134,6 +160,54 @@ struct cn10k_ddr_pmu {
> >
> >  #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
> >
> > +struct ddr_pmu_platform_data {
> > +	u64 counter_overflow_val;
> > +	u64 counter_max_val;
> > +	u64 ddrc_perf_cnt_base;
> > +	u64 ddrc_perf_cfg_base;
> > +	u64 ddrc_perf_cnt_op_mode_ctrl;
> 
> Good to name these in a fashion that makes it clear what they are.
> Some are values, some are register address offsets I think?
> 
> Shy is ddrc_perf prefix useful in here?
[Gowthami] Yes. Some are values and most of them point to register offsets.
Just to match with the register defined values named it so.
> 
> 
> > +	u64 ddrc_perf_cnt_start_op_ctrl;
> > +	u64 ddrc_perf_cnt_end_op_ctrl;
> > +	u64 ddrc_perf_cnt_end_status;
> > +	u64 ddrc_perf_cnt_freerun_en;
> > +	u64 ddrc_perf_cnt_freerun_ctrl;
> > +	u64 ddrc_perf_cnt_freerun_clr;
> > +	u64 ddrc_perf_cnt_value_wr_op;
> > +	u64 ddrc_perf_cnt_value_rd_op;
> > +};
> > +
> > +static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
> > +	.counter_overflow_val =  BIT_ULL(48),
> > +	.counter_max_val = GENMASK_ULL(48, 0),
> > +	.ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
> > +	.ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
> > +	.ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
> > +	.ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
> > +	.ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
> > +	.ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
> > +	.ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
> > +	.ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
> > +	.ddrc_perf_cnt_freerun_clr = 0,
> > +	.ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
> > +	.ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
> > +};
> > +
> > +static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
> > +	.counter_overflow_val = 0,
> > +	.counter_max_val = GENMASK_ULL(63, 0),
> > +	.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
> > +	.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
> > +	.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
> > +	.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
> > +	.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
> > +	.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
> > +	.ddrc_perf_cnt_freerun_en = 0,
> > +	.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
> > +	.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
> > +	.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
> > +	.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
> > +};
> 
> 
> ...
> 
> > -static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> > +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
> > +				     struct cn10k_ddr_pmu *ddr_pmu)
> >  {
> > +	int ret = 0;
> > +
> >  	switch (eventid) {
> >  	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
> >  	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
> >  		*event_bitmap = (1ULL << (eventid - 1));
> >  		break;
> > +	case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
> > +		if (ddr_pmu->version == VERSION_V2) {
> > +			*event_bitmap = (1ULL << (eventid - 1));
> > +		} else {
> > +			pr_err("%s Invalid eventid %d\n", __func__, eventid);
> > +			ret = -EINVAL;
> > +		}
> > +		break;
> >  	case EVENT_OP_IS_ENTER_SELFREF:
> >  	case EVENT_OP_IS_ENTER_POWERDOWN:
> >  	case EVENT_OP_IS_ENTER_MPSM:
> > @@ -280,10 +451,10 @@ static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
> >  		break;
> >  	default:
> >  		pr_err("%s Invalid eventid %d\n", __func__, eventid);
> > -		return -EINVAL;
> > +		ret = -EINVAL;
> >  	}
> >
> > -	return 0;
> > +	return ret;
> Why?  Just return in the various paths above.
> 
> Direct returns make for easier to review code as you can follow a particular path through
> more quickly.
[Gowthami] Ack. 
> 
> >  }
> 
> >  static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
> >  {
> >  	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
> > +	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
> >
> >  	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
> > -		       DDRC_PERF_CNT_END_OP_CTRL);
> > +		       p_data->ddrc_perf_cnt_end_op_ctrl);
> >  }
> >
> >  static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> > @@ -549,6 +778,7 @@ static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
> >
> >  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> >  {
> > +	struct ddr_pmu_platform_data *p_data = pmu->p_data;
> >  	struct perf_event *event;
> >  	struct hw_perf_event *hwc;
> >  	u64 prev_count, new_count;
> > @@ -561,7 +791,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> >  		prev_count = local64_read(&hwc->prev_count);
> >  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
> >
> > -		/* Overflow condition is when new count less than
> > +		/*
> > +		 * Overflow condition is when new count less than
> >  		 * previous count
> >  		 */
> >  		if (new_count < prev_count)
> > @@ -574,7 +805,8 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> >  		prev_count = local64_read(&hwc->prev_count);
> >  		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
> >
> > -		/* Overflow condition is when new count less than
> > +		/*
> > +		 * Overflow condition is when new count less than
> 
> Good to fix this, but not in a patch doing anything meaningful. If you want
> to make comment syntax changes - separate patch.
[Gowthami] Ack.
> 
> >  		 * previous count
> >  		 */
> >  		if (new_count < prev_count)
> > @@ -586,11 +818,23 @@ static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
> >  			continue;
> >
> >  		value = cn10k_ddr_perf_read_counter(pmu, i);
> > -		if (value == DDRC_PERF_CNT_MAX_VALUE) {
> > +		if (value == p_data->counter_max_val) {
> >  			pr_info("Counter-(%d) reached max value\n", i);
> > -			cn10k_ddr_perf_event_update_all(pmu);
> > -			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> > -			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> > +			/*
> > +			 * As separate control register is added for each
> > +			 * counter in odyssey, no need to update all
> > +			 * the events
> > +			 */
> > +			if (pmu->version == VERSION_V2) {
> 
> This sort of version difference is often better handled via a callback
> in the your pdata structure.  Makes it easy to add a new one for v3 :)
> 
[Gowthami] Agree. Will make the change in the next version. 
> > +				cn10k_ddr_perf_event_update(pmu->events[i]);
> > +				cn10k_ddr_perf_counter_stop(pmu, i);
> > +				cn10k_ddr_perf_counter_start(pmu, i);
> > +
> > +			} else {
> > +				cn10k_ddr_perf_event_update_all(pmu);
> > +				cn10k_ddr_perf_pmu_disable(&pmu->pmu);
> > +				cn10k_ddr_perf_pmu_enable(&pmu->pmu);
> > +			}
> >  		}
> >  	}
> >
> > @@ -631,7 +875,10 @@ static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> >
> >  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> >  {
> > +	struct ddr_pmu_platform_data *pltfm_data;
> > +	struct device *dev = &pdev->dev;
> >  	struct cn10k_ddr_pmu *ddr_pmu;
> > +	const char  *compatible;
> >  	struct resource *res;
> >  	void __iomem *base;
> >  	char *name;
> > @@ -642,6 +889,14 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> >  		return -ENOMEM;
> >
> >  	ddr_pmu->dev = &pdev->dev;
> > +
> > +	pltfm_data = (struct ddr_pmu_platform_data *)
> > +		      device_get_match_data(&pdev->dev);
> 
> Shouldn't need the cast as it's a const void *
> and you should not need to modify it in here (so make your
> data types
> const struct ddr_pmu_platform *
> 
[Gowthami] Ack 
> 
> 
> > +	if (!pltfm_data) {
> > +		dev_err(&pdev->dev, "Error: No device match data found\n");
> > +		return -ENODEV;
> > +	}
> > +	ddr_pmu->p_data = pltfm_data;
> >  	platform_set_drvdata(pdev, ddr_pmu);
> >
> >  	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
> > @@ -650,25 +905,59 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
> >
> >  	ddr_pmu->base = base;
> >
> > -	/* Setup the PMU counter to work in manual mode */
> > -	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
> > -		       DDRC_PERF_CNT_OP_MODE_CTRL);
> > -
> > -	ddr_pmu->pmu = (struct pmu) {
> > -		.module	      = THIS_MODULE,
> > -		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > -		.task_ctx_nr = perf_invalid_context,
> > -		.attr_groups = cn10k_attr_groups,
> > -		.event_init  = cn10k_ddr_perf_event_init,
> > -		.add	     = cn10k_ddr_perf_event_add,
> > -		.del	     = cn10k_ddr_perf_event_del,
> > -		.start	     = cn10k_ddr_perf_event_start,
> > -		.stop	     = cn10k_ddr_perf_event_stop,
> > -		.read	     = cn10k_ddr_perf_event_update,
> > -		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> > -		.pmu_disable = cn10k_ddr_perf_pmu_disable,
> > -	};
> > +	ret = device_property_read_string(dev, "compatible", &compatible);
> > +	if (ret) {
> > +		pr_err("compatible property not found\n");
> > +		return ret;
> > +	}
> >
> > +	if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
> > +		     strlen(compatible)) == 0))
> 
> Why not just embed this in your pdata structure?
> Even better would be add data to reflect the actual differences
> rather than relying on a 'version' number.  It tends to be more
> extensible as new implementations surface to encode each difference
> as data in such a structure.  Otherwise, in the long run you
> end up with big switch statements for the many different versions
> which just provide some per version constants.  That's messy.
[Gowthami] Ack.
> 
> 
> > +		ddr_pmu->version = VERSION_V1;
> > +	else
> > +		ddr_pmu->version = VERSION_V2;
> > +
> > +	if (ddr_pmu->version == VERSION_V1) {
> > +		ddr_pmu->pmu = (struct pmu) {
> > +			.module	      = THIS_MODULE,
> > +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > +			.task_ctx_nr = perf_invalid_context,
> > +			.attr_groups = cn10k_attr_groups,
> > +			.event_init  = cn10k_ddr_perf_event_init,
> > +			.add	     = cn10k_ddr_perf_event_add,
> > +			.del	     = cn10k_ddr_perf_event_del,
> > +			.start	     = cn10k_ddr_perf_event_start,
> > +			.stop	     = cn10k_ddr_perf_event_stop,
> > +			.read	     = cn10k_ddr_perf_event_update,
> > +			.pmu_enable  = cn10k_ddr_perf_pmu_enable,
> > +			.pmu_disable = cn10k_ddr_perf_pmu_disable,
> > +		};
> > +
> > +	/*
> > +	 * As we have separate control registers for each counter in Odyssey,
> > +	 * setting up the mode will be done when we enable each counter
> > +	 *
> 
> Trivial: Odd formatting. I'd drop the blank commented line and add a full stop.
[Gowthami] Ack
> 
> > +	 */
> > +
> > +	/* Setup the PMU counter to work in manual mode */
> > +		writeq(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
> > +		      (ddr_pmu->p_data->ddrc_perf_cnt_op_mode_ctrl));
> > +	} else {
> > +		ddr_pmu->pmu = (struct pmu) {
> > +			.module       = THIS_MODULE,
> > +			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
> > +			.task_ctx_nr = perf_invalid_context,
> > +			.attr_groups = odyssey_attr_groups,
> > +			.event_init  = cn10k_ddr_perf_event_init,
> > +			.add         = cn10k_ddr_perf_event_add,
> > +			.del         = cn10k_ddr_perf_event_del,
> > +			.start       = cn10k_ddr_perf_event_start,
> > +			.stop        = cn10k_ddr_perf_event_stop,
> > +			.read        = cn10k_ddr_perf_event_update,
> > +			.pmu_enable  = NULL,
> > +			.pmu_disable = NULL,
> 
> No need to set these to NULL.  Not providing them has same result and
> I don't think there is any particular value wrt to 'documentation' of
> setting them explicitly.  If there is a reason this needs calling out
> I'd expect a comment explaining why.
[Gowthami] There is no specific reason. Will remove it in the next version.
> 
> Ideal patch series structure for changes like this patch makes would be:
> 1) Refactor to pull out the pdata - no functional change.
> 2) Patch adding the support for the new device.
> 
> Result is easier to review than the combination of the two changes.
[Gowthami] Agree. Will split the patch as suggested.

Thanks,
Gowthami
> 
> 
> > +		};
> > +	}
diff mbox series

Patch

diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index 524ba82bfce2..b9c8be08d720 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -1,7 +1,8 @@ 
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+/*
+ * Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
  *
- * Copyright (C) 2021 Marvell.
+ * Copyright (C) 2024 Marvell.
  */
 
 #include <linux/init.h>
@@ -14,24 +15,29 @@ 
 #include <linux/platform_device.h>
 
 /* Performance Counters Operating Mode Control Registers */
-#define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
-#define OP_MODE_CTRL_VAL_MANNUAL	0x1
+#define OP_MODE_CTRL_VAL_MANUAL	0x1
+#define CN10K_DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
+#define ODY_DDRC_PERF_CNT_OP_MODE_CTRL		0x20020
 
 /* Performance Counters Start Operation Control Registers */
-#define DDRC_PERF_CNT_START_OP_CTRL	0x8028
+#define CN10K_DDRC_PERF_CNT_START_OP_CTRL	0x8028
 #define START_OP_CTRL_VAL_START		0x1ULL
 #define START_OP_CTRL_VAL_ACTIVE	0x2
+#define ODY_DDRC_PERF_CNT_START_OP_CTRL		0x200A0
 
 /* Performance Counters End Operation Control Registers */
-#define DDRC_PERF_CNT_END_OP_CTRL	0x8030
+#define CN10K_DDRC_PERF_CNT_END_OP_CTRL		0x8030
 #define END_OP_CTRL_VAL_END		0x1ULL
+#define ODY_DDRC_PERF_CNT_END_OP_CTRL		0x200E0
 
 /* Performance Counters End Status Registers */
-#define DDRC_PERF_CNT_END_STATUS		0x8038
+#define CN10K_DDRC_PERF_CNT_END_STATUS		0x8038
+#define ODY_DDRC_PERF_CNT_END_STATUS		0x20120
 #define END_STATUS_VAL_END_TIMER_MODE_END	0x1
 
 /* Performance Counters Configuration Registers */
-#define DDRC_PERF_CFG_BASE		0x8040
+#define CN10K_DDRC_PERF_CFG_BASE		0x8040
+#define ODY_DDRC_PERF_CFG_BASE			0x20160
 
 /* 8 Generic event counter + 2 fixed event counters */
 #define DDRC_PERF_NUM_GEN_COUNTERS	8
@@ -42,18 +48,26 @@ 
 					 DDRC_PERF_NUM_FIX_COUNTERS)
 
 /* Generic event counter registers */
-#define DDRC_PERF_CFG(n)		(DDRC_PERF_CFG_BASE + 8 * (n))
+#define DDRC_PERF_CFG(base, n)		((base) + 8 * (n))
 #define EVENT_ENABLE			BIT_ULL(63)
 
 /* Two dedicated event counters for DDR reads and writes */
 #define EVENT_DDR_READS			101
 #define EVENT_DDR_WRITES		100
 
+#define DDRC_PERF_REG(base, n)		((base) + 8 * (n))
 /*
  * programmable events IDs in programmable event counters.
  * DO NOT change these event-id numbers, they are used to
  * program event bitmap in h/w.
+ *
  */
+#define EVENT_DFI_CMD_IS_RETRY			61
+#define EVENT_RD_UC_ECC_ERROR			60
+#define EVENT_RD_CRC_ERROR			59
+#define EVENT_CAPAR_ERROR			58
+#define EVENT_WR_CRC_ERROR			57
+#define EVENT_DFI_PARITY_POISON			56
 #define EVENT_OP_IS_ZQLATCH			55
 #define EVENT_OP_IS_ZQSTART			54
 #define EVENT_OP_IS_TCR_MRR			53
@@ -63,8 +77,8 @@ 
 #define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD	49
 #define EVENT_BSM_STARVATION			48
 #define EVENT_BSM_ALLOC				47
-#define EVENT_LPR_REQ_WITH_NOCREDIT		46
-#define EVENT_HPR_REQ_WITH_NOCREDIT		45
+#define EVENT_RETRY_FIFO_FULL_OR_LPR_REQ_NOCRED	46
+#define EVENT_DFI_OR_HPR_REQ_NOCRED		45
 #define EVENT_OP_IS_ZQCS			44
 #define EVENT_OP_IS_ZQCL			43
 #define EVENT_OP_IS_LOAD_MODE			42
@@ -102,28 +116,40 @@ 
 #define EVENT_HIF_RD_OR_WR			1
 
 /* Event counter value registers */
-#define DDRC_PERF_CNT_VALUE_BASE		0x8080
-#define DDRC_PERF_CNT_VALUE(n)	(DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+#define CN10K_DDRC_PERF_CNT_VALUE_BASE	0x8080
+#define ODY_DDRC_PERF_CNT_VALUE_BASE	0x201C0
 
 /* Fixed event counter enable/disable register */
-#define DDRC_PERF_CNT_FREERUN_EN	0x80C0
+#define CN10K_DDRC_PERF_CNT_FREERUN_EN		0x80C0
 #define DDRC_PERF_FREERUN_WRITE_EN	0x1
 #define DDRC_PERF_FREERUN_READ_EN	0x2
 
 /* Fixed event counter control register */
-#define DDRC_PERF_CNT_FREERUN_CTRL	0x80C8
+#define CN10K_DDRC_PERF_CNT_FREERUN_CTRL	0x80C8
 #define DDRC_FREERUN_WRITE_CNT_CLR	0x1
 #define DDRC_FREERUN_READ_CNT_CLR	0x2
+#define ODY_DDRC_PERF_CNT_FREERUN_CTRL		0x20240
+
+/* Fixed event counter clear register, defined only for Odyssey */
+#define ODY_DDRC_PERF_CNT_FREERUN_CLR		0x20248
 
-/* Fixed event counter value register */
-#define DDRC_PERF_CNT_VALUE_WR_OP	0x80D0
-#define DDRC_PERF_CNT_VALUE_RD_OP	0x80D8
 #define DDRC_PERF_CNT_VALUE_OVERFLOW	BIT_ULL(48)
 #define DDRC_PERF_CNT_MAX_VALUE		GENMASK_ULL(48, 0)
 
+/* Fixed event counter value register */
+#define CN10K_DDRC_PERF_CNT_VALUE_WR_OP		0x80D0
+#define CN10K_DDRC_PERF_CNT_VALUE_RD_OP		0x80D8
+#define ODY_DDRC_PERF_CNT_VALUE_WR_OP		0x20250
+#define ODY_DDRC_PERF_CNT_VALUE_RD_OP		0x20258
+
+#define VERSION_V1				1
+#define VERSION_V2				2
+
 struct cn10k_ddr_pmu {
 	struct pmu pmu;
 	void __iomem *base;
+	struct ddr_pmu_platform_data *p_data;
+	int version;
 	unsigned int cpu;
 	struct	device *dev;
 	int active_events;
@@ -134,6 +160,54 @@  struct cn10k_ddr_pmu {
 
 #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
 
+struct ddr_pmu_platform_data {
+	u64 counter_overflow_val;
+	u64 counter_max_val;
+	u64 ddrc_perf_cnt_base;
+	u64 ddrc_perf_cfg_base;
+	u64 ddrc_perf_cnt_op_mode_ctrl;
+	u64 ddrc_perf_cnt_start_op_ctrl;
+	u64 ddrc_perf_cnt_end_op_ctrl;
+	u64 ddrc_perf_cnt_end_status;
+	u64 ddrc_perf_cnt_freerun_en;
+	u64 ddrc_perf_cnt_freerun_ctrl;
+	u64 ddrc_perf_cnt_freerun_clr;
+	u64 ddrc_perf_cnt_value_wr_op;
+	u64 ddrc_perf_cnt_value_rd_op;
+};
+
+static const struct ddr_pmu_platform_data cn10k_ddr_pmu_pdata = {
+	.counter_overflow_val =  BIT_ULL(48),
+	.counter_max_val = GENMASK_ULL(48, 0),
+	.ddrc_perf_cnt_base = CN10K_DDRC_PERF_CNT_VALUE_BASE,
+	.ddrc_perf_cfg_base = CN10K_DDRC_PERF_CFG_BASE,
+	.ddrc_perf_cnt_op_mode_ctrl = CN10K_DDRC_PERF_CNT_OP_MODE_CTRL,
+	.ddrc_perf_cnt_start_op_ctrl = CN10K_DDRC_PERF_CNT_START_OP_CTRL,
+	.ddrc_perf_cnt_end_op_ctrl = CN10K_DDRC_PERF_CNT_END_OP_CTRL,
+	.ddrc_perf_cnt_end_status = CN10K_DDRC_PERF_CNT_END_STATUS,
+	.ddrc_perf_cnt_freerun_en = CN10K_DDRC_PERF_CNT_FREERUN_EN,
+	.ddrc_perf_cnt_freerun_ctrl = CN10K_DDRC_PERF_CNT_FREERUN_CTRL,
+	.ddrc_perf_cnt_freerun_clr = 0,
+	.ddrc_perf_cnt_value_wr_op = CN10K_DDRC_PERF_CNT_VALUE_WR_OP,
+	.ddrc_perf_cnt_value_rd_op = CN10K_DDRC_PERF_CNT_VALUE_RD_OP,
+};
+
+static const struct ddr_pmu_platform_data odyssey_ddr_pmu_pdata = {
+	.counter_overflow_val = 0,
+	.counter_max_val = GENMASK_ULL(63, 0),
+	.ddrc_perf_cnt_base = ODY_DDRC_PERF_CNT_VALUE_BASE,
+	.ddrc_perf_cfg_base = ODY_DDRC_PERF_CFG_BASE,
+	.ddrc_perf_cnt_op_mode_ctrl = ODY_DDRC_PERF_CNT_OP_MODE_CTRL,
+	.ddrc_perf_cnt_start_op_ctrl = ODY_DDRC_PERF_CNT_START_OP_CTRL,
+	.ddrc_perf_cnt_end_op_ctrl = ODY_DDRC_PERF_CNT_END_OP_CTRL,
+	.ddrc_perf_cnt_end_status = ODY_DDRC_PERF_CNT_END_STATUS,
+	.ddrc_perf_cnt_freerun_en = 0,
+	.ddrc_perf_cnt_freerun_ctrl = ODY_DDRC_PERF_CNT_FREERUN_CTRL,
+	.ddrc_perf_cnt_freerun_clr = ODY_DDRC_PERF_CNT_FREERUN_CLR,
+	.ddrc_perf_cnt_value_wr_op = ODY_DDRC_PERF_CNT_VALUE_WR_OP,
+	.ddrc_perf_cnt_value_rd_op = ODY_DDRC_PERF_CNT_VALUE_RD_OP,
+};
+
 static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
 					struct device_attribute *attr,
 					char *page)
@@ -189,9 +263,9 @@  static struct attribute *cn10k_ddr_perf_events_attrs[] = {
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit,
-					EVENT_HPR_REQ_WITH_NOCREDIT),
+				 EVENT_DFI_OR_HPR_REQ_NOCRED),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit,
-					EVENT_LPR_REQ_WITH_NOCREDIT),
+				 EVENT_RETRY_FIFO_FULL_OR_LPR_REQ_NOCRED),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
 	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
@@ -214,6 +288,85 @@  static struct attribute_group cn10k_ddr_perf_events_attr_group = {
 	.attrs = cn10k_ddr_perf_events_attrs,
 };
 
+static struct attribute *odyssey_ddr_perf_events_attrs[] = {
+	/* Programmable */
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_wr_data_access,
+				 EVENT_DFI_WR_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_rd_data_access,
+				 EVENT_DFI_RD_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+				 EVENT_HPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+				 EVENT_LPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+				 EVENT_WR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access,
+				 EVENT_OP_IS_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access,
+				 EVENT_OP_IS_RD_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr,
+				 EVENT_PRECHARGE_FOR_RDWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+				 EVENT_PRECHARGE_FOR_OTHER),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown,
+				 EVENT_OP_IS_ENTER_POWERDOWN),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cycles, EVENT_DFI_OR_HPR_REQ_NOCRED),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_retry_fifo_full,
+				 EVENT_RETRY_FIFO_FULL_OR_LPR_REQ_NOCRED),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+				 EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+				 EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_parity_poison,
+				 EVENT_DFI_PARITY_POISON),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_crc_error, EVENT_WR_CRC_ERROR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_capar_error, EVENT_CAPAR_ERROR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_crc_error, EVENT_RD_CRC_ERROR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_uc_ecc_error, EVENT_RD_UC_ECC_ERROR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dfi_cmd_is_retry, EVENT_DFI_CMD_IS_RETRY),
+	/* Free run event counters */
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+	NULL
+};
+
+static struct attribute_group odyssey_ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = odyssey_ddr_perf_events_attrs,
+};
+
 PMU_FORMAT_ATTR(event, "config:0-8");
 
 static struct attribute *cn10k_ddr_perf_format_attrs[] = {
@@ -254,6 +407,13 @@  static const struct attribute_group *cn10k_attr_groups[] = {
 	NULL,
 };
 
+static const struct attribute_group *odyssey_attr_groups[] = {
+	&odyssey_ddr_perf_events_attr_group,
+	&cn10k_ddr_perf_format_attr_group,
+	&cn10k_ddr_perf_cpumask_attr_group,
+	NULL
+};
+
 /* Default poll timeout is 100 sec, which is very sufficient for
  * 48 bit counter incremented max at 5.6 GT/s, which may take many
  * hours to overflow.
@@ -266,13 +426,24 @@  static ktime_t cn10k_ddr_pmu_timer_period(void)
 	return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
 }
 
-static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap,
+				     struct cn10k_ddr_pmu *ddr_pmu)
 {
+	int ret = 0;
+
 	switch (eventid) {
 	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
 	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
 		*event_bitmap = (1ULL << (eventid - 1));
 		break;
+	case EVENT_DFI_PARITY_POISON ...EVENT_DFI_CMD_IS_RETRY:
+		if (ddr_pmu->version == VERSION_V2) {
+			*event_bitmap = (1ULL << (eventid - 1));
+		} else {
+			pr_err("%s Invalid eventid %d\n", __func__, eventid);
+			ret = -EINVAL;
+		}
+		break;
 	case EVENT_OP_IS_ENTER_SELFREF:
 	case EVENT_OP_IS_ENTER_POWERDOWN:
 	case EVENT_OP_IS_ENTER_MPSM:
@@ -280,10 +451,10 @@  static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
 		break;
 	default:
 		pr_err("%s Invalid eventid %d\n", __func__, eventid);
-		return -EINVAL;
+		ret = -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
@@ -356,6 +527,7 @@  static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
 {
 	u32 reg;
 	u64 val;
+	struct ddr_pmu_platform_data *p_data = pmu->p_data;
 
 	if (counter > DDRC_PERF_NUM_COUNTERS) {
 		pr_err("Error: unsupported counter %d\n", counter);
@@ -363,7 +535,7 @@  static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
 	}
 
 	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
-		reg = DDRC_PERF_CFG(counter);
+		reg = DDRC_PERF_CFG(p_data->ddrc_perf_cfg_base, counter);
 		val = readq_relaxed(pmu->base + reg);
 
 		if (enable)
@@ -373,7 +545,13 @@  static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
 
 		writeq_relaxed(val, pmu->base + reg);
 	} else {
-		val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+		if (p_data->ddrc_perf_cnt_freerun_en)
+			val = readq_relaxed(pmu->base +
+					    p_data->ddrc_perf_cnt_freerun_en);
+		else
+			val = readq_relaxed(pmu->base +
+					    p_data->ddrc_perf_cnt_freerun_ctrl);
+
 		if (enable) {
 			if (counter == DDRC_PERF_READ_COUNTER_IDX)
 				val |= DDRC_PERF_FREERUN_READ_EN;
@@ -385,7 +563,13 @@  static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
 			else
 				val &= ~DDRC_PERF_FREERUN_WRITE_EN;
 		}
-		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+
+		if (p_data->ddrc_perf_cnt_freerun_en)
+			writeq_relaxed(val, pmu->base +
+				       p_data->ddrc_perf_cnt_freerun_en);
+		else
+			writeq_relaxed(val, pmu->base +
+				       p_data->ddrc_perf_cnt_freerun_ctrl);
 	}
 }
 
@@ -393,13 +577,18 @@  static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
 {
 	u64 val;
 
+	struct ddr_pmu_platform_data *p_data = pmu->p_data;
+
 	if (counter == DDRC_PERF_READ_COUNTER_IDX)
-		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+		return readq_relaxed(pmu->base +
+				     p_data->ddrc_perf_cnt_value_rd_op);
 
 	if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
-		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+		return readq_relaxed(pmu->base +
+				     p_data->ddrc_perf_cnt_value_wr_op);
 
-	val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+	val = readq_relaxed(pmu->base +
+			    DDRC_PERF_REG(p_data->ddrc_perf_cnt_base, counter));
 	return val;
 }
 
@@ -407,6 +596,7 @@  static void cn10k_ddr_perf_event_update(struct perf_event *event)
 {
 	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	struct ddr_pmu_platform_data *p_data = pmu->p_data;
 	u64 prev_count, new_count, mask;
 
 	do {
@@ -414,20 +604,48 @@  static void cn10k_ddr_perf_event_update(struct perf_event *event)
 		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
 	} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
 
-	mask = DDRC_PERF_CNT_MAX_VALUE;
+	mask = p_data->counter_max_val;
 
 	local64_add((new_count - prev_count) & mask, &event->count);
 }
 
+static void cn10k_ddr_perf_counter_start(struct cn10k_ddr_pmu *ddr_pmu,
+					 int counter)
+{
+	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+	u64 ctrl_reg = p_data->ddrc_perf_cnt_start_op_ctrl;
+
+	writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+		       DDRC_PERF_REG(ctrl_reg, counter));
+}
+
+static void cn10k_ddr_perf_counter_stop(struct cn10k_ddr_pmu *ddr_pmu,
+					int counter)
+{
+	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
+	u64 ctrl_reg = p_data->ddrc_perf_cnt_end_op_ctrl;
+
+	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+		       DDRC_PERF_REG(ctrl_reg, counter));
+}
+
 static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
 {
 	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
+	u64 ctrl_reg = pmu->p_data->ddrc_perf_cnt_op_mode_ctrl;
 
 	local64_set(&hwc->prev_count, 0);
 
 	cn10k_ddr_perf_counter_enable(pmu, counter, true);
+	if (pmu->version == VERSION_V2) {
+	/* Setup the PMU counter to work in manual mode */
+		writeq_relaxed(OP_MODE_CTRL_VAL_MANUAL, pmu->base +
+			       DDRC_PERF_REG(ctrl_reg, counter));
+
+		cn10k_ddr_perf_counter_start(pmu, counter);
+	}
 
 	hwc->state = 0;
 }
@@ -435,6 +653,7 @@  static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
 static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
 {
 	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct ddr_pmu_platform_data *p_data = pmu->p_data;
 	struct hw_perf_event *hwc = &event->hw;
 	u8 config = event->attr.config;
 	int counter, ret;
@@ -454,8 +673,8 @@  static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
 
 	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
 		/* Generic counters, configure event id */
-		reg_offset = DDRC_PERF_CFG(counter);
-		ret = ddr_perf_get_event_bitmap(config, &val);
+		reg_offset = DDRC_PERF_CFG(p_data->ddrc_perf_cfg_base, counter);
+		ret = ddr_perf_get_event_bitmap(config, &val, pmu);
 		if (ret)
 			return ret;
 
@@ -467,7 +686,12 @@  static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
 		else
 			val = DDRC_FREERUN_WRITE_CNT_CLR;
 
-		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+		if (p_data->ddrc_perf_cnt_freerun_clr)
+			writeq_relaxed(val, pmu->base +
+				       p_data->ddrc_perf_cnt_freerun_clr);
+		else
+			writeq_relaxed(val, pmu->base +
+				       p_data->ddrc_perf_cnt_freerun_ctrl);
 	}
 
 	hwc->state |= PERF_HES_STOPPED;
@@ -486,6 +710,9 @@  static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags)
 
 	cn10k_ddr_perf_counter_enable(pmu, counter, false);
 
+	if (pmu->version == VERSION_V2)
+		cn10k_ddr_perf_counter_stop(pmu, counter);
+
 	if (flags & PERF_EF_UPDATE)
 		cn10k_ddr_perf_event_update(event);
 
@@ -512,17 +739,19 @@  static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
 static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
 {
 	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
 
 	writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
-		       DDRC_PERF_CNT_START_OP_CTRL);
+		       p_data->ddrc_perf_cnt_start_op_ctrl);
 }
 
 static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
 {
 	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+	struct ddr_pmu_platform_data *p_data = ddr_pmu->p_data;
 
 	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
-		       DDRC_PERF_CNT_END_OP_CTRL);
+		       p_data->ddrc_perf_cnt_end_op_ctrl);
 }
 
 static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
@@ -549,6 +778,7 @@  static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
 
 static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
 {
+	struct ddr_pmu_platform_data *p_data = pmu->p_data;
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
 	u64 prev_count, new_count;
@@ -561,7 +791,8 @@  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
 		prev_count = local64_read(&hwc->prev_count);
 		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
 
-		/* Overflow condition is when new count less than
+		/*
+		 * Overflow condition is when new count less than
 		 * previous count
 		 */
 		if (new_count < prev_count)
@@ -574,7 +805,8 @@  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
 		prev_count = local64_read(&hwc->prev_count);
 		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
 
-		/* Overflow condition is when new count less than
+		/*
+		 * Overflow condition is when new count less than
 		 * previous count
 		 */
 		if (new_count < prev_count)
@@ -586,11 +818,23 @@  static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
 			continue;
 
 		value = cn10k_ddr_perf_read_counter(pmu, i);
-		if (value == DDRC_PERF_CNT_MAX_VALUE) {
+		if (value == p_data->counter_max_val) {
 			pr_info("Counter-(%d) reached max value\n", i);
-			cn10k_ddr_perf_event_update_all(pmu);
-			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
-			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+			/*
+			 * As separate control register is added for each
+			 * counter in odyssey, no need to update all
+			 * the events
+			 */
+			if (pmu->version == VERSION_V2) {
+				cn10k_ddr_perf_event_update(pmu->events[i]);
+				cn10k_ddr_perf_counter_stop(pmu, i);
+				cn10k_ddr_perf_counter_start(pmu, i);
+
+			} else {
+				cn10k_ddr_perf_event_update_all(pmu);
+				cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+				cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+			}
 		}
 	}
 
@@ -631,7 +875,10 @@  static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 {
+	struct ddr_pmu_platform_data *pltfm_data;
+	struct device *dev = &pdev->dev;
 	struct cn10k_ddr_pmu *ddr_pmu;
+	const char  *compatible;
 	struct resource *res;
 	void __iomem *base;
 	char *name;
@@ -642,6 +889,14 @@  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	ddr_pmu->dev = &pdev->dev;
+
+	pltfm_data = (struct ddr_pmu_platform_data *)
+		      device_get_match_data(&pdev->dev);
+	if (!pltfm_data) {
+		dev_err(&pdev->dev, "Error: No device match data found\n");
+		return -ENODEV;
+	}
+	ddr_pmu->p_data = pltfm_data;
 	platform_set_drvdata(pdev, ddr_pmu);
 
 	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
@@ -650,25 +905,59 @@  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 
 	ddr_pmu->base = base;
 
-	/* Setup the PMU counter to work in manual mode */
-	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
-		       DDRC_PERF_CNT_OP_MODE_CTRL);
-
-	ddr_pmu->pmu = (struct pmu) {
-		.module	      = THIS_MODULE,
-		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
-		.task_ctx_nr = perf_invalid_context,
-		.attr_groups = cn10k_attr_groups,
-		.event_init  = cn10k_ddr_perf_event_init,
-		.add	     = cn10k_ddr_perf_event_add,
-		.del	     = cn10k_ddr_perf_event_del,
-		.start	     = cn10k_ddr_perf_event_start,
-		.stop	     = cn10k_ddr_perf_event_stop,
-		.read	     = cn10k_ddr_perf_event_update,
-		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
-		.pmu_disable = cn10k_ddr_perf_pmu_disable,
-	};
+	ret = device_property_read_string(dev, "compatible", &compatible);
+	if (ret) {
+		pr_err("compatible property not found\n");
+		return ret;
+	}
 
+	if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
+		     strlen(compatible)) == 0))
+		ddr_pmu->version = VERSION_V1;
+	else
+		ddr_pmu->version = VERSION_V2;
+
+	if (ddr_pmu->version == VERSION_V1) {
+		ddr_pmu->pmu = (struct pmu) {
+			.module	      = THIS_MODULE,
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr = perf_invalid_context,
+			.attr_groups = cn10k_attr_groups,
+			.event_init  = cn10k_ddr_perf_event_init,
+			.add	     = cn10k_ddr_perf_event_add,
+			.del	     = cn10k_ddr_perf_event_del,
+			.start	     = cn10k_ddr_perf_event_start,
+			.stop	     = cn10k_ddr_perf_event_stop,
+			.read	     = cn10k_ddr_perf_event_update,
+			.pmu_enable  = cn10k_ddr_perf_pmu_enable,
+			.pmu_disable = cn10k_ddr_perf_pmu_disable,
+		};
+
+	/*
+	 * As we have separate control registers for each counter in Odyssey,
+	 * setting up the mode will be done when we enable each counter
+	 *
+	 */
+
+	/* Setup the PMU counter to work in manual mode */
+		writeq(OP_MODE_CTRL_VAL_MANUAL, ddr_pmu->base +
+		      (ddr_pmu->p_data->ddrc_perf_cnt_op_mode_ctrl));
+	} else {
+		ddr_pmu->pmu = (struct pmu) {
+			.module       = THIS_MODULE,
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr = perf_invalid_context,
+			.attr_groups = odyssey_attr_groups,
+			.event_init  = cn10k_ddr_perf_event_init,
+			.add         = cn10k_ddr_perf_event_add,
+			.del         = cn10k_ddr_perf_event_del,
+			.start       = cn10k_ddr_perf_event_start,
+			.stop        = cn10k_ddr_perf_event_stop,
+			.read        = cn10k_ddr_perf_event_update,
+			.pmu_enable  = NULL,
+			.pmu_disable = NULL,
+		};
+	}
 	/* Choose this cpu to collect perf data */
 	ddr_pmu->cpu = raw_smp_processor_id();
 
@@ -688,7 +977,7 @@  static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 	if (ret)
 		goto error;
 
-	pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+	pr_info("DDR PMU Driver for ddrc@%llx\n", res->start);
 	return 0;
 error:
 	cpuhp_state_remove_instance_nocalls(
@@ -711,7 +1000,8 @@  static int cn10k_ddr_perf_remove(struct platform_device *pdev)
 
 #ifdef CONFIG_OF
 static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
-	{ .compatible = "marvell,cn10k-ddr-pmu", },
+	{ .compatible = "marvell,cn10k-ddr-pmu",
+	  .data = &cn10k_ddr_pmu_pdata },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
@@ -719,7 +1009,14 @@  MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = {
-	{"MRVL000A", 0},
+	{
+		.id = "MRVL000A",
+		.driver_data = (kernel_ulong_t)&cn10k_ddr_pmu_pdata,
+	},
+	{
+		.id = "MRVL000C",
+		.driver_data = (kernel_ulong_t)&odyssey_ddr_pmu_pdata,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match);