diff mbox series

[v4,3/8] mailbox: mtk-cmdq: Add driver data to support for MT8196

Message ID 20250218054405.2017918-4-jason-jh.lin@mediatek.com (mailing list archive)
State New
Headers show
Series Add GCE support for MT8196 | expand

Commit Message

Jason-JH Lin (林睿祥) Feb. 18, 2025, 5:41 a.m. UTC
MT8196 has 3 new hardware configuration compared with the previous SoC,
which correspond to the 3 new driver data:

1. mminfra_offset: For GCE data plane control
   Since GCE has been moved into mminfra, GCE needs to append the
   mminfra offset to the DRAM address when accessing the DRAM.

2. gce_vm: For GCE hardware virtualization
   Currently, the first version of the mt8196 mailbox controller only
   requires setting the VM-related registers to enable the permissions
   of a host VM.

3. dma_mask_bit: For dma address bit control
   In order to avoid the hardware limitations of MT8196 accessing DRAM,
   GCE needs to configure the DMA address to be less than 35 bits.

Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com>
---
 drivers/mailbox/mtk-cmdq-mailbox.c       | 90 +++++++++++++++++++++---
 include/linux/mailbox/mtk-cmdq-mailbox.h |  2 +
 2 files changed, 84 insertions(+), 8 deletions(-)

Comments

CK Hu (胡俊光) Feb. 18, 2025, 9:25 a.m. UTC | #1
On Tue, 2025-02-18 at 13:41 +0800, Jason-JH Lin wrote:
> MT8196 has 3 new hardware configuration compared with the previous SoC,
> which correspond to the 3 new driver data:
> 
> 1. mminfra_offset: For GCE data plane control
>    Since GCE has been moved into mminfra, GCE needs to append the
>    mminfra offset to the DRAM address when accessing the DRAM.

It seems that GCE has iova and mminfra would mapping the iova to physical address.
Maybe let GCE be a iommu device or add iommus property in device node, and use dma_map_xxx() to get iova of GCE.
iommus property point to mminfra device (maybe another name) and mminfra device would process the mapping of iova and physical address.

> 
> 2. gce_vm: For GCE hardware virtualization
>    Currently, the first version of the mt8196 mailbox controller only
>    requires setting the VM-related registers to enable the permissions
>    of a host VM.

What's this? I know this patch would not implement the full VM function,
but describe more about what this is. Why need to enable permission?

Regards,
CK

> 
> 3. dma_mask_bit: For dma address bit control
>    In order to avoid the hardware limitations of MT8196 accessing DRAM,
>    GCE needs to configure the DMA address to be less than 35 bits.
> 
> Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com>
> ---
>  drivers/mailbox/mtk-cmdq-mailbox.c       | 90 +++++++++++++++++++++---
>  include/linux/mailbox/mtk-cmdq-mailbox.h |  2 +
>  2 files changed, 84 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
> index d186865b8dce..0abe10a7fef9 100644
> --- a/drivers/mailbox/mtk-cmdq-mailbox.c
> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c
> @@ -43,6 +43,17 @@
>  #define GCE_CTRL_BY_SW				GENMASK(2, 0)
>  #define GCE_DDR_EN				GENMASK(18, 16)
>  
> +#define GCE_VM_ID_MAP0			0x5018
> +#define GCE_VM_MAP0_ALL_HOST			GENMASK(29, 0)
> +#define GCE_VM_ID_MAP1			0x501c
> +#define GCE_VM_MAP1_ALL_HOST			GENMASK(29, 0)
> +#define GCE_VM_ID_MAP2			0x5020
> +#define GCE_VM_MAP2_ALL_HOST			GENMASK(29, 0)
> +#define GCE_VM_ID_MAP3			0x5024
> +#define GCE_VM_MAP3_ALL_HOST			GENMASK(5, 0)
> +#define GCE_VM_CPR_GSIZE		0x50c4
> +#define GCE_VM_CPR_GSIZE_HSOT			GENMASK(3, 0)
> +
>  #define CMDQ_THR_ACTIVE_SLOT_CYCLES	0x3200
>  #define CMDQ_THR_ENABLED		0x1
>  #define CMDQ_THR_DISABLED		0x0
> @@ -87,11 +98,24 @@ struct cmdq {
>  struct gce_plat {
>  	u32 thread_nr;
>  	u8 shift;
> +	dma_addr_t mminfra_offset;
>  	bool control_by_sw;
>  	bool sw_ddr_en;
> +	bool gce_vm;
> +	u32 dma_mask_bit;
>  	u32 gce_num;
>  };
>  
> +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const struct gce_plat *pdata)
> +{
> +	return ((addr + pdata->mminfra_offset) >> pdata->shift);
> +}
> +
> +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const struct gce_plat *pdata)
> +{
> +	return ((addr << pdata->shift) - pdata->mminfra_offset);
> +}
> +
>  static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable)
>  {
>  	WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks));
> @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan)
>  }
>  EXPORT_SYMBOL(cmdq_get_shift_pa);
>  
> +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan)
> +{
> +	struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox);
> +
> +	return cmdq->pdata->mminfra_offset;
> +}
> +EXPORT_SYMBOL(cmdq_get_offset_pa);
> +
> +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr)
> +{
> +	struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox);
> +
> +	if (cmdq->pdata->mminfra_offset == 0)
> +		return false;
> +
> +	/*
> +	 * mminfra will recognize the addr that greater than the mminfra_offset
> +	 * as a transaction to DRAM.
> +	 * So the caller needs to append mminfra_offset for the true case.
> +	 */
> +	return (addr >= cmdq->pdata->mminfra_offset);
> +}
> +EXPORT_SYMBOL(cmdq_addr_need_offset);
> +
>  static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread)
>  {
>  	u32 status;
> @@ -143,6 +191,17 @@ static void cmdq_init(struct cmdq *cmdq)
>  	u32 gctl_regval = 0;
>  
>  	WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks));
> +
> +	if (cmdq->pdata->gce_vm) {
> +		/* config cpr size for host vm */
> +		writel(GCE_VM_CPR_GSIZE_HSOT, cmdq->base + GCE_VM_CPR_GSIZE);
> +		/* config CPR_GSIZE before setting VM_ID_MAP to avoid data leakage */
> +		writel(GCE_VM_MAP0_ALL_HOST, cmdq->base + GCE_VM_ID_MAP0);
> +		writel(GCE_VM_MAP1_ALL_HOST, cmdq->base + GCE_VM_ID_MAP1);
> +		writel(GCE_VM_MAP2_ALL_HOST, cmdq->base + GCE_VM_ID_MAP2);
> +		writel(GCE_VM_MAP3_ALL_HOST, cmdq->base + GCE_VM_ID_MAP3);
> +	}
> +
>  	if (cmdq->pdata->control_by_sw)
>  		gctl_regval = GCE_CTRL_BY_SW;
>  	if (cmdq->pdata->sw_ddr_en)
> @@ -199,7 +258,7 @@ static void cmdq_task_insert_into_thread(struct cmdq_task *task)
>  				prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
>  	prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] =
>  		(u64)CMDQ_JUMP_BY_PA << 32 |
> -		(task->pa_base >> task->cmdq->pdata->shift);
> +		cmdq_reg_shift_addr(task->pa_base, task->cmdq->pdata);
>  	dma_sync_single_for_device(dev, prev_task->pa_base,
>  				   prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
>  
> @@ -264,7 +323,7 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq,
>  	else
>  		return;
>  
> -	curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq->pdata->shift;
> +	curr_pa = cmdq_reg_shift_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), cmdq->pdata);
>  
>  	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
>  				 list_entry) {
> @@ -416,9 +475,9 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
>  		 */
>  		WARN_ON(cmdq_thread_reset(cmdq, thread) < 0);
>  
> -		writel(task->pa_base >> cmdq->pdata->shift,
> +		writel(cmdq_reg_shift_addr(task->pa_base, cmdq->pdata),
>  		       thread->base + CMDQ_THR_CURR_ADDR);
> -		writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift,
> +		writel(cmdq_reg_shift_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata),
>  		       thread->base + CMDQ_THR_END_ADDR);
>  
>  		writel(thread->priority, thread->base + CMDQ_THR_PRIORITY);
> @@ -426,10 +485,10 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
>  		writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK);
>  	} else {
>  		WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
> -		curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) <<
> -			cmdq->pdata->shift;
> -		end_pa = readl(thread->base + CMDQ_THR_END_ADDR) <<
> -			cmdq->pdata->shift;
> +		curr_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_CURR_ADDR),
> +					       cmdq->pdata);
> +		end_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_END_ADDR),
> +					      cmdq->pdata);
>  		/* check boundary */
>  		if (curr_pa == end_pa - CMDQ_INST_SIZE ||
>  		    curr_pa == end_pa) {
> @@ -663,6 +722,9 @@ static int cmdq_probe(struct platform_device *pdev)
>  	if (err)
>  		return err;
>  
> +	if (cmdq->pdata->dma_mask_bit)
> +		dma_set_coherent_mask(dev, DMA_BIT_MASK(cmdq->pdata->dma_mask_bit));
> +
>  	cmdq->mbox.dev = dev;
>  	cmdq->mbox.chans = devm_kcalloc(dev, cmdq->pdata->thread_nr,
>  					sizeof(*cmdq->mbox.chans), GFP_KERNEL);
> @@ -782,6 +844,17 @@ static const struct gce_plat gce_plat_mt8195 = {
>  	.gce_num = 2
>  };
>  
> +static const struct gce_plat gce_plat_mt8196 = {
> +	.thread_nr = 32,
> +	.shift = 3,
> +	.mminfra_offset = 0x80000000, /* 2GB */
> +	.control_by_sw = true,
> +	.sw_ddr_en = true,
> +	.gce_vm = true,
> +	.dma_mask_bit = 35,
> +	.gce_num = 2
> +};
> +
>  static const struct of_device_id cmdq_of_ids[] = {
>  	{.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_mt6779},
>  	{.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_mt8173},
> @@ -790,6 +863,7 @@ static const struct of_device_id cmdq_of_ids[] = {
>  	{.compatible = "mediatek,mt8188-gce", .data = (void *)&gce_plat_mt8188},
>  	{.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_mt8192},
>  	{.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_mt8195},
> +	{.compatible = "mediatek,mt8196-gce", .data = (void *)&gce_plat_mt8196},
>  	{}
>  };
>  MODULE_DEVICE_TABLE(of, cmdq_of_ids);
> diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
> index a8f0070c7aa9..79398bf95f8d 100644
> --- a/include/linux/mailbox/mtk-cmdq-mailbox.h
> +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
> @@ -79,5 +79,7 @@ struct cmdq_pkt {
>  };
>  
>  u8 cmdq_get_shift_pa(struct mbox_chan *chan);
> +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan);
> +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr);
>  
>  #endif /* __MTK_CMDQ_MAILBOX_H__ */
Jason-JH Lin (林睿祥) Feb. 22, 2025, 10:44 a.m. UTC | #2
Hi CK,

Thanks for the reviews.

On Tue, 2025-02-18 at 09:25 +0000, CK Hu (胡俊光) wrote:
> On Tue, 2025-02-18 at 13:41 +0800, Jason-JH Lin wrote:
> > MT8196 has 3 new hardware configuration compared with the previous
> > SoC,
> > which correspond to the 3 new driver data:
> > 
> > 1. mminfra_offset: For GCE data plane control
> >    Since GCE has been moved into mminfra, GCE needs to append the
> >    mminfra offset to the DRAM address when accessing the DRAM.
> 
> It seems that GCE has iova and mminfra would mapping the iova to
> physical address.
> Maybe let GCE be a iommu device or add iommus property in device
> node, and use dma_map_xxx() to get iova of GCE.
> iommus property point to mminfra device (maybe another name) and
> mminfra device would process the mapping of iova and physical
> address.

The GCE in the 8196 is using IOVA already.

The main reason of adding the mminfra_offset 0x8000_0000(2G) is to
solve the address conflicting problem:
Due to MMIO, if the GCE needs to access a hardware register at
0x1000_0000, but the SMMU is also mapping a DRAM block at 0x1000_0000,
the GCE will not know whether it should write to the hardware register
or the DRAM.
Therefore, a rule was set in the MMINFRA circuit during the HW design:
transactions with addresses greater than 2G are considered data paths,
while those with addresses less than 2G are considered config paths.
Additionally, on the data path, addresses are remapped by subtracting
2G, allowing the SMMU to still map DRAM addresses less than 2G.
However, the software needs to add 2G to the DRAM address that
the GCE needs to access to ensure that MMINFRA will follow the data
path.

Since the MMINFRA remap subtracting 2G is done in the hardware circuit
and cannot be configured by software, the address +2G adjustment is
implemented in the CMDQ driver.

> 
> > 
> > 2. gce_vm: For GCE hardware virtualization
> >    Currently, the first version of the mt8196 mailbox controller
> > only
> >    requires setting the VM-related registers to enable the
> > permissions
> >    of a host VM.
> 
> What's this? I know this patch would not implement the full VM
> function,
> but describe more about what this is. Why need to enable permission?
> 

OK I'll add the commit message below in the next version:

For the 8196, it is necessary to configure access permissions for
specific GCE threads for different VMs in order to properly access the
GCE thread registers.
Currently, since only the host VM is being used, it is required to
enable access permissions for all GCE threads for the host VM.

VM_MAP0 is for threads 0-9, VM_MAP1 is for threads 10-19, VM_MAP2 is
for threads 20-29, and VM_MAP3 is for threads 30-31. Each thread has a
3-bit configuration, and setting all bits to 1 means the thread is
configured for the host VM.

Regards,
Jason-JH Lin

> Regards,
> CK
>
diff mbox series

Patch

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index d186865b8dce..0abe10a7fef9 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -43,6 +43,17 @@ 
 #define GCE_CTRL_BY_SW				GENMASK(2, 0)
 #define GCE_DDR_EN				GENMASK(18, 16)
 
+#define GCE_VM_ID_MAP0			0x5018
+#define GCE_VM_MAP0_ALL_HOST			GENMASK(29, 0)
+#define GCE_VM_ID_MAP1			0x501c
+#define GCE_VM_MAP1_ALL_HOST			GENMASK(29, 0)
+#define GCE_VM_ID_MAP2			0x5020
+#define GCE_VM_MAP2_ALL_HOST			GENMASK(29, 0)
+#define GCE_VM_ID_MAP3			0x5024
+#define GCE_VM_MAP3_ALL_HOST			GENMASK(5, 0)
+#define GCE_VM_CPR_GSIZE		0x50c4
+#define GCE_VM_CPR_GSIZE_HSOT			GENMASK(3, 0)
+
 #define CMDQ_THR_ACTIVE_SLOT_CYCLES	0x3200
 #define CMDQ_THR_ENABLED		0x1
 #define CMDQ_THR_DISABLED		0x0
@@ -87,11 +98,24 @@  struct cmdq {
 struct gce_plat {
 	u32 thread_nr;
 	u8 shift;
+	dma_addr_t mminfra_offset;
 	bool control_by_sw;
 	bool sw_ddr_en;
+	bool gce_vm;
+	u32 dma_mask_bit;
 	u32 gce_num;
 };
 
+static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const struct gce_plat *pdata)
+{
+	return ((addr + pdata->mminfra_offset) >> pdata->shift);
+}
+
+static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const struct gce_plat *pdata)
+{
+	return ((addr << pdata->shift) - pdata->mminfra_offset);
+}
+
 static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable)
 {
 	WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks));
@@ -112,6 +136,30 @@  u8 cmdq_get_shift_pa(struct mbox_chan *chan)
 }
 EXPORT_SYMBOL(cmdq_get_shift_pa);
 
+dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan)
+{
+	struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox);
+
+	return cmdq->pdata->mminfra_offset;
+}
+EXPORT_SYMBOL(cmdq_get_offset_pa);
+
+bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr)
+{
+	struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox);
+
+	if (cmdq->pdata->mminfra_offset == 0)
+		return false;
+
+	/*
+	 * mminfra will recognize the addr that greater than the mminfra_offset
+	 * as a transaction to DRAM.
+	 * So the caller needs to append mminfra_offset for the true case.
+	 */
+	return (addr >= cmdq->pdata->mminfra_offset);
+}
+EXPORT_SYMBOL(cmdq_addr_need_offset);
+
 static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread)
 {
 	u32 status;
@@ -143,6 +191,17 @@  static void cmdq_init(struct cmdq *cmdq)
 	u32 gctl_regval = 0;
 
 	WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks));
+
+	if (cmdq->pdata->gce_vm) {
+		/* config cpr size for host vm */
+		writel(GCE_VM_CPR_GSIZE_HSOT, cmdq->base + GCE_VM_CPR_GSIZE);
+		/* config CPR_GSIZE before setting VM_ID_MAP to avoid data leakage */
+		writel(GCE_VM_MAP0_ALL_HOST, cmdq->base + GCE_VM_ID_MAP0);
+		writel(GCE_VM_MAP1_ALL_HOST, cmdq->base + GCE_VM_ID_MAP1);
+		writel(GCE_VM_MAP2_ALL_HOST, cmdq->base + GCE_VM_ID_MAP2);
+		writel(GCE_VM_MAP3_ALL_HOST, cmdq->base + GCE_VM_ID_MAP3);
+	}
+
 	if (cmdq->pdata->control_by_sw)
 		gctl_regval = GCE_CTRL_BY_SW;
 	if (cmdq->pdata->sw_ddr_en)
@@ -199,7 +258,7 @@  static void cmdq_task_insert_into_thread(struct cmdq_task *task)
 				prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
 	prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] =
 		(u64)CMDQ_JUMP_BY_PA << 32 |
-		(task->pa_base >> task->cmdq->pdata->shift);
+		cmdq_reg_shift_addr(task->pa_base, task->cmdq->pdata);
 	dma_sync_single_for_device(dev, prev_task->pa_base,
 				   prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
 
@@ -264,7 +323,7 @@  static void cmdq_thread_irq_handler(struct cmdq *cmdq,
 	else
 		return;
 
-	curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq->pdata->shift;
+	curr_pa = cmdq_reg_shift_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), cmdq->pdata);
 
 	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
 				 list_entry) {
@@ -416,9 +475,9 @@  static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
 		 */
 		WARN_ON(cmdq_thread_reset(cmdq, thread) < 0);
 
-		writel(task->pa_base >> cmdq->pdata->shift,
+		writel(cmdq_reg_shift_addr(task->pa_base, cmdq->pdata),
 		       thread->base + CMDQ_THR_CURR_ADDR);
-		writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift,
+		writel(cmdq_reg_shift_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata),
 		       thread->base + CMDQ_THR_END_ADDR);
 
 		writel(thread->priority, thread->base + CMDQ_THR_PRIORITY);
@@ -426,10 +485,10 @@  static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
 		writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK);
 	} else {
 		WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
-		curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) <<
-			cmdq->pdata->shift;
-		end_pa = readl(thread->base + CMDQ_THR_END_ADDR) <<
-			cmdq->pdata->shift;
+		curr_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_CURR_ADDR),
+					       cmdq->pdata);
+		end_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_END_ADDR),
+					      cmdq->pdata);
 		/* check boundary */
 		if (curr_pa == end_pa - CMDQ_INST_SIZE ||
 		    curr_pa == end_pa) {
@@ -663,6 +722,9 @@  static int cmdq_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
+	if (cmdq->pdata->dma_mask_bit)
+		dma_set_coherent_mask(dev, DMA_BIT_MASK(cmdq->pdata->dma_mask_bit));
+
 	cmdq->mbox.dev = dev;
 	cmdq->mbox.chans = devm_kcalloc(dev, cmdq->pdata->thread_nr,
 					sizeof(*cmdq->mbox.chans), GFP_KERNEL);
@@ -782,6 +844,17 @@  static const struct gce_plat gce_plat_mt8195 = {
 	.gce_num = 2
 };
 
+static const struct gce_plat gce_plat_mt8196 = {
+	.thread_nr = 32,
+	.shift = 3,
+	.mminfra_offset = 0x80000000, /* 2GB */
+	.control_by_sw = true,
+	.sw_ddr_en = true,
+	.gce_vm = true,
+	.dma_mask_bit = 35,
+	.gce_num = 2
+};
+
 static const struct of_device_id cmdq_of_ids[] = {
 	{.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_mt6779},
 	{.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_mt8173},
@@ -790,6 +863,7 @@  static const struct of_device_id cmdq_of_ids[] = {
 	{.compatible = "mediatek,mt8188-gce", .data = (void *)&gce_plat_mt8188},
 	{.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_mt8192},
 	{.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_mt8195},
+	{.compatible = "mediatek,mt8196-gce", .data = (void *)&gce_plat_mt8196},
 	{}
 };
 MODULE_DEVICE_TABLE(of, cmdq_of_ids);
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
index a8f0070c7aa9..79398bf95f8d 100644
--- a/include/linux/mailbox/mtk-cmdq-mailbox.h
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -79,5 +79,7 @@  struct cmdq_pkt {
 };
 
 u8 cmdq_get_shift_pa(struct mbox_chan *chan);
+dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan);
+bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr);
 
 #endif /* __MTK_CMDQ_MAILBOX_H__ */