Message ID | 20250218054405.2017918-4-jason-jh.lin@mediatek.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add GCE support for MT8196 | expand |
On Tue, 2025-02-18 at 13:41 +0800, Jason-JH Lin wrote: > MT8196 has 3 new hardware configuration compared with the previous SoC, > which correspond to the 3 new driver data: > > 1. mminfra_offset: For GCE data plane control > Since GCE has been moved into mminfra, GCE needs to append the > mminfra offset to the DRAM address when accessing the DRAM. It seems that GCE has iova and mminfra would mapping the iova to physical address. Maybe let GCE be a iommu device or add iommus property in device node, and use dma_map_xxx() to get iova of GCE. iommus property point to mminfra device (maybe another name) and mminfra device would process the mapping of iova and physical address. > > 2. gce_vm: For GCE hardware virtualization > Currently, the first version of the mt8196 mailbox controller only > requires setting the VM-related registers to enable the permissions > of a host VM. What's this? I know this patch would not implement the full VM function, but describe more about what this is. Why need to enable permission? Regards, CK > > 3. dma_mask_bit: For dma address bit control > In order to avoid the hardware limitations of MT8196 accessing DRAM, > GCE needs to configure the DMA address to be less than 35 bits. > > Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com> > --- > drivers/mailbox/mtk-cmdq-mailbox.c | 90 +++++++++++++++++++++--- > include/linux/mailbox/mtk-cmdq-mailbox.h | 2 + > 2 files changed, 84 insertions(+), 8 deletions(-) > > diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c > index d186865b8dce..0abe10a7fef9 100644 > --- a/drivers/mailbox/mtk-cmdq-mailbox.c > +++ b/drivers/mailbox/mtk-cmdq-mailbox.c > @@ -43,6 +43,17 @@ > #define GCE_CTRL_BY_SW GENMASK(2, 0) > #define GCE_DDR_EN GENMASK(18, 16) > > +#define GCE_VM_ID_MAP0 0x5018 > +#define GCE_VM_MAP0_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP1 0x501c > +#define GCE_VM_MAP1_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP2 0x5020 > +#define GCE_VM_MAP2_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP3 0x5024 > +#define GCE_VM_MAP3_ALL_HOST GENMASK(5, 0) > +#define GCE_VM_CPR_GSIZE 0x50c4 > +#define GCE_VM_CPR_GSIZE_HSOT GENMASK(3, 0) > + > #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 > #define CMDQ_THR_ENABLED 0x1 > #define CMDQ_THR_DISABLED 0x0 > @@ -87,11 +98,24 @@ struct cmdq { > struct gce_plat { > u32 thread_nr; > u8 shift; > + dma_addr_t mminfra_offset; > bool control_by_sw; > bool sw_ddr_en; > + bool gce_vm; > + u32 dma_mask_bit; > u32 gce_num; > }; > > +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const struct gce_plat *pdata) > +{ > + return ((addr + pdata->mminfra_offset) >> pdata->shift); > +} > + > +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const struct gce_plat *pdata) > +{ > + return ((addr << pdata->shift) - pdata->mminfra_offset); > +} > + > static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) > { > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); > @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) > } > EXPORT_SYMBOL(cmdq_get_shift_pa); > > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) > +{ > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); > + > + return cmdq->pdata->mminfra_offset; > +} > +EXPORT_SYMBOL(cmdq_get_offset_pa); > + > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr) > +{ > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); > + > + if (cmdq->pdata->mminfra_offset == 0) > + return false; > + > + /* > + * mminfra will recognize the addr that greater than the mminfra_offset > + * as a transaction to DRAM. > + * So the caller needs to append mminfra_offset for the true case. > + */ > + return (addr >= cmdq->pdata->mminfra_offset); > +} > +EXPORT_SYMBOL(cmdq_addr_need_offset); > + > static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread) > { > u32 status; > @@ -143,6 +191,17 @@ static void cmdq_init(struct cmdq *cmdq) > u32 gctl_regval = 0; > > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); > + > + if (cmdq->pdata->gce_vm) { > + /* config cpr size for host vm */ > + writel(GCE_VM_CPR_GSIZE_HSOT, cmdq->base + GCE_VM_CPR_GSIZE); > + /* config CPR_GSIZE before setting VM_ID_MAP to avoid data leakage */ > + writel(GCE_VM_MAP0_ALL_HOST, cmdq->base + GCE_VM_ID_MAP0); > + writel(GCE_VM_MAP1_ALL_HOST, cmdq->base + GCE_VM_ID_MAP1); > + writel(GCE_VM_MAP2_ALL_HOST, cmdq->base + GCE_VM_ID_MAP2); > + writel(GCE_VM_MAP3_ALL_HOST, cmdq->base + GCE_VM_ID_MAP3); > + } > + > if (cmdq->pdata->control_by_sw) > gctl_regval = GCE_CTRL_BY_SW; > if (cmdq->pdata->sw_ddr_en) > @@ -199,7 +258,7 @@ static void cmdq_task_insert_into_thread(struct cmdq_task *task) > prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); > prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] = > (u64)CMDQ_JUMP_BY_PA << 32 | > - (task->pa_base >> task->cmdq->pdata->shift); > + cmdq_reg_shift_addr(task->pa_base, task->cmdq->pdata); > dma_sync_single_for_device(dev, prev_task->pa_base, > prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); > > @@ -264,7 +323,7 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq, > else > return; > > - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq->pdata->shift; > + curr_pa = cmdq_reg_shift_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), cmdq->pdata); > > list_for_each_entry_safe(task, tmp, &thread->task_busy_list, > list_entry) { > @@ -416,9 +475,9 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) > */ > WARN_ON(cmdq_thread_reset(cmdq, thread) < 0); > > - writel(task->pa_base >> cmdq->pdata->shift, > + writel(cmdq_reg_shift_addr(task->pa_base, cmdq->pdata), > thread->base + CMDQ_THR_CURR_ADDR); > - writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift, > + writel(cmdq_reg_shift_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata), > thread->base + CMDQ_THR_END_ADDR); > > writel(thread->priority, thread->base + CMDQ_THR_PRIORITY); > @@ -426,10 +485,10 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) > writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK); > } else { > WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0); > - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << > - cmdq->pdata->shift; > - end_pa = readl(thread->base + CMDQ_THR_END_ADDR) << > - cmdq->pdata->shift; > + curr_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), > + cmdq->pdata); > + end_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_END_ADDR), > + cmdq->pdata); > /* check boundary */ > if (curr_pa == end_pa - CMDQ_INST_SIZE || > curr_pa == end_pa) { > @@ -663,6 +722,9 @@ static int cmdq_probe(struct platform_device *pdev) > if (err) > return err; > > + if (cmdq->pdata->dma_mask_bit) > + dma_set_coherent_mask(dev, DMA_BIT_MASK(cmdq->pdata->dma_mask_bit)); > + > cmdq->mbox.dev = dev; > cmdq->mbox.chans = devm_kcalloc(dev, cmdq->pdata->thread_nr, > sizeof(*cmdq->mbox.chans), GFP_KERNEL); > @@ -782,6 +844,17 @@ static const struct gce_plat gce_plat_mt8195 = { > .gce_num = 2 > }; > > +static const struct gce_plat gce_plat_mt8196 = { > + .thread_nr = 32, > + .shift = 3, > + .mminfra_offset = 0x80000000, /* 2GB */ > + .control_by_sw = true, > + .sw_ddr_en = true, > + .gce_vm = true, > + .dma_mask_bit = 35, > + .gce_num = 2 > +}; > + > static const struct of_device_id cmdq_of_ids[] = { > {.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_mt6779}, > {.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_mt8173}, > @@ -790,6 +863,7 @@ static const struct of_device_id cmdq_of_ids[] = { > {.compatible = "mediatek,mt8188-gce", .data = (void *)&gce_plat_mt8188}, > {.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_mt8192}, > {.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_mt8195}, > + {.compatible = "mediatek,mt8196-gce", .data = (void *)&gce_plat_mt8196}, > {} > }; > MODULE_DEVICE_TABLE(of, cmdq_of_ids); > diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h > index a8f0070c7aa9..79398bf95f8d 100644 > --- a/include/linux/mailbox/mtk-cmdq-mailbox.h > +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h > @@ -79,5 +79,7 @@ struct cmdq_pkt { > }; > > u8 cmdq_get_shift_pa(struct mbox_chan *chan); > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan); > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr); > > #endif /* __MTK_CMDQ_MAILBOX_H__ */
Hi CK, Thanks for the reviews. On Tue, 2025-02-18 at 09:25 +0000, CK Hu (胡俊光) wrote: > On Tue, 2025-02-18 at 13:41 +0800, Jason-JH Lin wrote: > > MT8196 has 3 new hardware configuration compared with the previous > > SoC, > > which correspond to the 3 new driver data: > > > > 1. mminfra_offset: For GCE data plane control > > Since GCE has been moved into mminfra, GCE needs to append the > > mminfra offset to the DRAM address when accessing the DRAM. > > It seems that GCE has iova and mminfra would mapping the iova to > physical address. > Maybe let GCE be a iommu device or add iommus property in device > node, and use dma_map_xxx() to get iova of GCE. > iommus property point to mminfra device (maybe another name) and > mminfra device would process the mapping of iova and physical > address. The GCE in the 8196 is using IOVA already. The main reason of adding the mminfra_offset 0x8000_0000(2G) is to solve the address conflicting problem: Due to MMIO, if the GCE needs to access a hardware register at 0x1000_0000, but the SMMU is also mapping a DRAM block at 0x1000_0000, the GCE will not know whether it should write to the hardware register or the DRAM. Therefore, a rule was set in the MMINFRA circuit during the HW design: transactions with addresses greater than 2G are considered data paths, while those with addresses less than 2G are considered config paths. Additionally, on the data path, addresses are remapped by subtracting 2G, allowing the SMMU to still map DRAM addresses less than 2G. However, the software needs to add 2G to the DRAM address that the GCE needs to access to ensure that MMINFRA will follow the data path. Since the MMINFRA remap subtracting 2G is done in the hardware circuit and cannot be configured by software, the address +2G adjustment is implemented in the CMDQ driver. > > > > > 2. gce_vm: For GCE hardware virtualization > > Currently, the first version of the mt8196 mailbox controller > > only > > requires setting the VM-related registers to enable the > > permissions > > of a host VM. > > What's this? I know this patch would not implement the full VM > function, > but describe more about what this is. Why need to enable permission? > OK I'll add the commit message below in the next version: For the 8196, it is necessary to configure access permissions for specific GCE threads for different VMs in order to properly access the GCE thread registers. Currently, since only the host VM is being used, it is required to enable access permissions for all GCE threads for the host VM. VM_MAP0 is for threads 0-9, VM_MAP1 is for threads 10-19, VM_MAP2 is for threads 20-29, and VM_MAP3 is for threads 30-31. Each thread has a 3-bit configuration, and setting all bits to 1 means the thread is configured for the host VM. Regards, Jason-JH Lin > Regards, > CK >
Il 18/02/25 06:41, Jason-JH Lin ha scritto: > MT8196 has 3 new hardware configuration compared with the previous SoC, > which correspond to the 3 new driver data: > > 1. mminfra_offset: For GCE data plane control > Since GCE has been moved into mminfra, GCE needs to append the > mminfra offset to the DRAM address when accessing the DRAM. > > 2. gce_vm: For GCE hardware virtualization > Currently, the first version of the mt8196 mailbox controller only > requires setting the VM-related registers to enable the permissions > of a host VM. I think that the GCE VM changes should go to a different commit, as that looks like being something not critical for basic functionality of the MMINFRA GCE. I really like seeing support for that, but please split the basic stuff from the extra functionality :-) > > 3. dma_mask_bit: For dma address bit control > In order to avoid the hardware limitations of MT8196 accessing DRAM, > GCE needs to configure the DMA address to be less than 35 bits. > > Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com> > --- > drivers/mailbox/mtk-cmdq-mailbox.c | 90 +++++++++++++++++++++--- > include/linux/mailbox/mtk-cmdq-mailbox.h | 2 + > 2 files changed, 84 insertions(+), 8 deletions(-) > > diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c > index d186865b8dce..0abe10a7fef9 100644 > --- a/drivers/mailbox/mtk-cmdq-mailbox.c > +++ b/drivers/mailbox/mtk-cmdq-mailbox.c > @@ -43,6 +43,17 @@ > #define GCE_CTRL_BY_SW GENMASK(2, 0) > #define GCE_DDR_EN GENMASK(18, 16) > > +#define GCE_VM_ID_MAP0 0x5018 > +#define GCE_VM_MAP0_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP1 0x501c > +#define GCE_VM_MAP1_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP2 0x5020 > +#define GCE_VM_MAP2_ALL_HOST GENMASK(29, 0) > +#define GCE_VM_ID_MAP3 0x5024 > +#define GCE_VM_MAP3_ALL_HOST GENMASK(5, 0) > +#define GCE_VM_CPR_GSIZE 0x50c4 > +#define GCE_VM_CPR_GSIZE_HSOT GENMASK(3, 0) typo: GSIZE_HOST.... ...but also, if you could add some brief description of what the VMIDs are used for and what the GSIZE is... that'd be very much appreciated from whoever is reading this. The GCE stuff isn't even properly described in datasheets - I do (probably!) understand what those are for, but asking people to get years of experience on MediaTek to understand what's going on would be a bit rude, wouldn't it? :-D > + > #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 > #define CMDQ_THR_ENABLED 0x1 > #define CMDQ_THR_DISABLED 0x0 > @@ -87,11 +98,24 @@ struct cmdq { > struct gce_plat { > u32 thread_nr; > u8 shift; > + dma_addr_t mminfra_offset; It looks like this is exactly the DRAM's iostart... at least, I can see that in the downstream devicetree that's where it starts. memory: memory@80000000 { device_type = "memory"; reg = <0 0x80000000 0 0x40000000>; }; It doesn't really look like being a coincidence, but, for the sake of asking: is this just a coincidence? :-) > bool control_by_sw; > bool sw_ddr_en; > + bool gce_vm; > + u32 dma_mask_bit; > u32 gce_num; > }; > > +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const struct gce_plat *pdata) > +{ > + return ((addr + pdata->mminfra_offset) >> pdata->shift); > +} > + > +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const struct gce_plat *pdata) > +{ > + return ((addr << pdata->shift) - pdata->mminfra_offset); > +} I'm not sure that you really need those two functions... probably it's simply cleaner and easier to just write that single line every time... and I'm saying that especially for how you're using those functions, with some readl() passed directly as param, decreasing human readability by "a whole lot" :-) > + > static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) > { > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); > @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) > } > EXPORT_SYMBOL(cmdq_get_shift_pa); > > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) > +{ > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); > + > + return cmdq->pdata->mminfra_offset; > +} > +EXPORT_SYMBOL(cmdq_get_offset_pa); I think I remember this get_offset_pa from the old times, then CK removed it (and I was really happy about that disappearing), or am I confusing this with something else? (of course, this wasn't used for mminfra, but for something else!) > + > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr) > +{ > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); > + > + if (cmdq->pdata->mminfra_offset == 0) > + return false; > + > + /* > + * mminfra will recognize the addr that greater than the mminfra_offset > + * as a transaction to DRAM. > + * So the caller needs to append mminfra_offset for the true case. > + */ > + return (addr >= cmdq->pdata->mminfra_offset); /** * cmdq_is_mminfra_gce() - Brief description * @args..... * * The MMINFRA GCE will recognize an address greater than DRAM iostart as a * DRAM transaction instead of ....xyz * * In order for callers to perform (xyz) transactions through the CMDQ, those * need to know if they are using a GCE located in MMINFRA. */ bool cmdq_is_mminfra_gce(...) { return cmdq->pdata->mminfra_offset && (addr >= cmdq->pdata->mminfra_offset) > +} > +EXPORT_SYMBOL(cmdq_addr_need_offset); > + ...but then, is there really no way of just handling the GCE being in MMINFRA transparently from the callers? Do the callers really *need* to know that they're using a new GCE?! Another way of saying: can't we just handle the address translation in here instead of instructing each and every driver about how to communicate with the new GCE?! Cheers, Angelo > static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread) > { > u32 status; > @@ -143,6 +191,17 @@ static void cmdq_init(struct cmdq *cmdq) > u32 gctl_regval = 0; > > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); > + > + if (cmdq->pdata->gce_vm) { > + /* config cpr size for host vm */ > + writel(GCE_VM_CPR_GSIZE_HSOT, cmdq->base + GCE_VM_CPR_GSIZE); > + /* config CPR_GSIZE before setting VM_ID_MAP to avoid data leakage */ > + writel(GCE_VM_MAP0_ALL_HOST, cmdq->base + GCE_VM_ID_MAP0); > + writel(GCE_VM_MAP1_ALL_HOST, cmdq->base + GCE_VM_ID_MAP1); > + writel(GCE_VM_MAP2_ALL_HOST, cmdq->base + GCE_VM_ID_MAP2); > + writel(GCE_VM_MAP3_ALL_HOST, cmdq->base + GCE_VM_ID_MAP3); > + } > + > if (cmdq->pdata->control_by_sw) > gctl_regval = GCE_CTRL_BY_SW; > if (cmdq->pdata->sw_ddr_en) > @@ -199,7 +258,7 @@ static void cmdq_task_insert_into_thread(struct cmdq_task *task) > prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); > prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] = > (u64)CMDQ_JUMP_BY_PA << 32 | > - (task->pa_base >> task->cmdq->pdata->shift); > + cmdq_reg_shift_addr(task->pa_base, task->cmdq->pdata); > dma_sync_single_for_device(dev, prev_task->pa_base, > prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); > > @@ -264,7 +323,7 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq, > else > return; > > - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq->pdata->shift; > + curr_pa = cmdq_reg_shift_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), cmdq->pdata); > > list_for_each_entry_safe(task, tmp, &thread->task_busy_list, > list_entry) { > @@ -416,9 +475,9 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) > */ > WARN_ON(cmdq_thread_reset(cmdq, thread) < 0); > > - writel(task->pa_base >> cmdq->pdata->shift, > + writel(cmdq_reg_shift_addr(task->pa_base, cmdq->pdata), > thread->base + CMDQ_THR_CURR_ADDR); > - writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift, > + writel(cmdq_reg_shift_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata), > thread->base + CMDQ_THR_END_ADDR); > > writel(thread->priority, thread->base + CMDQ_THR_PRIORITY); > @@ -426,10 +485,10 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) > writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK); > } else { > WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0); > - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << > - cmdq->pdata->shift; > - end_pa = readl(thread->base + CMDQ_THR_END_ADDR) << > - cmdq->pdata->shift; > + curr_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), > + cmdq->pdata); > + end_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_END_ADDR), > + cmdq->pdata); > /* check boundary */ > if (curr_pa == end_pa - CMDQ_INST_SIZE || > curr_pa == end_pa) { > @@ -663,6 +722,9 @@ static int cmdq_probe(struct platform_device *pdev) > if (err) > return err; > > + if (cmdq->pdata->dma_mask_bit) > + dma_set_coherent_mask(dev, DMA_BIT_MASK(cmdq->pdata->dma_mask_bit)); > + > cmdq->mbox.dev = dev; > cmdq->mbox.chans = devm_kcalloc(dev, cmdq->pdata->thread_nr, > sizeof(*cmdq->mbox.chans), GFP_KERNEL); > @@ -782,6 +844,17 @@ static const struct gce_plat gce_plat_mt8195 = { > .gce_num = 2 > }; > > +static const struct gce_plat gce_plat_mt8196 = { > + .thread_nr = 32, > + .shift = 3, > + .mminfra_offset = 0x80000000, /* 2GB */ > + .control_by_sw = true, > + .sw_ddr_en = true, > + .gce_vm = true, > + .dma_mask_bit = 35, > + .gce_num = 2 > +}; > + > static const struct of_device_id cmdq_of_ids[] = { > {.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_mt6779}, > {.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_mt8173}, > @@ -790,6 +863,7 @@ static const struct of_device_id cmdq_of_ids[] = { > {.compatible = "mediatek,mt8188-gce", .data = (void *)&gce_plat_mt8188}, > {.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_mt8192}, > {.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_mt8195}, > + {.compatible = "mediatek,mt8196-gce", .data = (void *)&gce_plat_mt8196}, > {} > }; > MODULE_DEVICE_TABLE(of, cmdq_of_ids); > diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h > index a8f0070c7aa9..79398bf95f8d 100644 > --- a/include/linux/mailbox/mtk-cmdq-mailbox.h > +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h > @@ -79,5 +79,7 @@ struct cmdq_pkt { > }; > > u8 cmdq_get_shift_pa(struct mbox_chan *chan); > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan); > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr); > > #endif /* __MTK_CMDQ_MAILBOX_H__ */
Hi Angelo, Thanks for the reviews. On Tue, 2025-03-04 at 10:32 +0100, AngeloGioacchino Del Regno wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > > > Il 18/02/25 06:41, Jason-JH Lin ha scritto: > > MT8196 has 3 new hardware configuration compared with the previous > > SoC, > > which correspond to the 3 new driver data: > > > > 1. mminfra_offset: For GCE data plane control > > Since GCE has been moved into mminfra, GCE needs to append the > > mminfra offset to the DRAM address when accessing the DRAM. > > > > 2. gce_vm: For GCE hardware virtualization > > Currently, the first version of the mt8196 mailbox controller > > only > > requires setting the VM-related registers to enable the > > permissions > > of a host VM. > > I think that the GCE VM changes should go to a different commit, as > that > looks like being something not critical for basic functionality of > the > MMINFRA GCE. > > I really like seeing support for that, but please split the basic > stuff > from the extra functionality :-) > The VM configuration is the basic configuration for MT8196, so I put it together with other configurations in one patch. But I can understand you want the new function as a independent patch. So I will split the VM related part, mminfra_offset part and dma_mask part to 3 single pathes. Then add them as a driver data for MT8196 in this patch. > > > > 3. dma_mask_bit: For dma address bit control > > In order to avoid the hardware limitations of MT8196 accessing > > DRAM, > > GCE needs to configure the DMA address to be less than 35 bits. > > > > Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com> > > --- > > drivers/mailbox/mtk-cmdq-mailbox.c | 90 > > +++++++++++++++++++++--- > > include/linux/mailbox/mtk-cmdq-mailbox.h | 2 + > > 2 files changed, 84 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c > > b/drivers/mailbox/mtk-cmdq-mailbox.c > > index d186865b8dce..0abe10a7fef9 100644 > > --- a/drivers/mailbox/mtk-cmdq-mailbox.c > > +++ b/drivers/mailbox/mtk-cmdq-mailbox.c > > @@ -43,6 +43,17 @@ > > #define GCE_CTRL_BY_SW GENMASK(2, 0) > > #define GCE_DDR_EN GENMASK(18, 16) > > > > +#define GCE_VM_ID_MAP0 0x5018 > > +#define GCE_VM_MAP0_ALL_HOST GENMASK(29, 0) > > +#define GCE_VM_ID_MAP1 0x501c > > +#define GCE_VM_MAP1_ALL_HOST GENMASK(29, 0) > > +#define GCE_VM_ID_MAP2 0x5020 > > +#define GCE_VM_MAP2_ALL_HOST GENMASK(29, 0) > > +#define GCE_VM_ID_MAP3 0x5024 > > +#define GCE_VM_MAP3_ALL_HOST GENMASK(5, 0) > > +#define GCE_VM_CPR_GSIZE 0x50c4 > > +#define GCE_VM_CPR_GSIZE_HSOT GENMASK(3, 0) > > typo: GSIZE_HOST.... > Thanks, I'll fix it. > ...but also, if you could add some brief description of what the > VMIDs are used for > and what the GSIZE is... that'd be very much appreciated from whoever > is reading > this. > VMID_MAP configuration is in the previous reply mail for CK. CPR_GSIZE is the setting for allocating the CPR SRAM size to each VM. > The GCE stuff isn't even properly described in datasheets - I do > (probably!) > understand what those are for, but asking people to get years of > experience on > MediaTek to understand what's going on would be a bit rude, wouldn't > it? :-D > I agree with you :-) I'll put them in the VM patch and add some brief description for them. > > + > > #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 > > #define CMDQ_THR_ENABLED 0x1 > > #define CMDQ_THR_DISABLED 0x0 > > @@ -87,11 +98,24 @@ struct cmdq { > > struct gce_plat { > > u32 thread_nr; > > u8 shift; > > + dma_addr_t mminfra_offset; > > It looks like this is exactly the DRAM's iostart... at least, I can > see that in the > downstream devicetree that's where it starts. > > memory: memory@80000000 { > device_type = "memory"; > reg = <0 0x80000000 0 0x40000000>; > }; > > It doesn't really look like being a coincidence, but, for the sake of > asking: > is this just a coincidence? :-) > As the confirmation with the hardware designer in previous reply mail for CK: https://patchwork.kernel.org/project/linux-mediatek/patch/20250218054405.2017918-4-jason-jh.lin@mediatek.com/#26258463 Since the MMINFRA remap subtracting 2G is done in the hardware circuit and cannot be configured by software, the address +2G adjustment is necessary to implement in the CMDQ driver. So that might not be a coincidence. But even if DRAM start address changes, this mminfra_offset is still subtracting 2G, so I think it is a better choice to define it as the driver data for MT8196. > > bool control_by_sw; > > bool sw_ddr_en; > > + bool gce_vm; > > + u32 dma_mask_bit; > > u32 gce_num; > > }; > > > > +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const > > struct gce_plat *pdata) > > +{ > > + return ((addr + pdata->mminfra_offset) >> pdata->shift); > > +} > > + > > +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const > > struct gce_plat *pdata) > > +{ > > + return ((addr << pdata->shift) - pdata->mminfra_offset); > > +} > > I'm not sure that you really need those two functions... probably > it's simply > cleaner and easier to just write that single line every time... and > I'm > saying that especially for how you're using those functions, with > some readl() > passed directly as param, decreasing human readability by "a whole > lot" :-) > The reason why I use API wrapper instead of writing it directly in readl() is to avoid missing the shift or mminfra_offset conversion in some places. This problem is not easy to debug, and I have encountered it at least twice... I think the advantage of using function is that it can be uniformly modified to all places that need to handle DRAM address conversion. What do you think? :-) > > + > > static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) > > { > > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); > > @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) > > } > > EXPORT_SYMBOL(cmdq_get_shift_pa); > > > > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) > > +{ > > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, > > mbox); > > + > > + return cmdq->pdata->mminfra_offset; > > +} > > +EXPORT_SYMBOL(cmdq_get_offset_pa); > > I think I remember this get_offset_pa from the old times, then CK > removed it (and I > was really happy about that disappearing), or am I confusing this > with something > else? > > (of course, this wasn't used for mminfra, but for something else!) > I can't find any remove history in mtk-cmdq-mailbox.c. Maybe you mean the patch in this series? https://lore.kernel.org/all/171213938049.123698.15573779837703602591.b4-ty@collabora.com/ > > + > > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t > > addr) > > +{ > > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, > > mbox); > > + > > + if (cmdq->pdata->mminfra_offset == 0) > > + return false; > > + > > + /* > > + * mminfra will recognize the addr that greater than the > > mminfra_offset > > + * as a transaction to DRAM. > > + * So the caller needs to append mminfra_offset for the true > > case. > > + */ > > + return (addr >= cmdq->pdata->mminfra_offset); > > > /** > * cmdq_is_mminfra_gce() - Brief description > * @args..... > * > * The MMINFRA GCE will recognize an address greater than DRAM > iostart as a > * DRAM transaction instead of ....xyz > * > * In order for callers to perform (xyz) transactions through the > CMDQ, those > * need to know if they are using a GCE located in MMINFRA. > */ > bool cmdq_is_mminfra_gce(...) > { > return cmdq->pdata->mminfra_offset && > (addr >= cmdq->pdata->mminfra_offset) > > > +} > > +EXPORT_SYMBOL(cmdq_addr_need_offset); > > + > OK, I'll modify the API like this. > ...but then, is there really no way of just handling the GCE being in > MMINFRA > transparently from the callers? Do the callers really *need* to know > that they're > using a new GCE?! > Since the address subtracting is done in MMINFRA hardware, I think GCE users really need to handle it in driver. > Another way of saying: can't we just handle the address translation > in here instead > of instructing each and every driver about how to communicate with > the new GCE?! > The DRAM address may not only be the command buffer to GCE, but also the working buffer provided by CMDQ users and being a part of GCE instruction, so we need to handle the address translation in CMDQ helper driver for the instruction generation. E.g. ISP drivers may use GCE to write a hardware settings to a DRAM as backup buffer. The GCE write instruction will be: WRITE the value of ISP register to DRAM address + mminfra_offset. But most of the CMDQ users only need to use GCE to write hardware register, so I only keep the translation in cmdq_pkt_mem_move(), cmdq_pkt_poll_addr() and cmdq_pkt_jump_abs() at the latest series. Regards, Jason-JH lin > > Cheers, > Angelo
Il 05/03/25 09:36, Jason-JH Lin (林睿祥) ha scritto: > Hi Angelo, > > Thanks for the reviews. > > On Tue, 2025-03-04 at 10:32 +0100, AngeloGioacchino Del Regno wrote: >> >> External email : Please do not click links or open attachments until >> you have verified the sender or the content. >> >> >> Il 18/02/25 06:41, Jason-JH Lin ha scritto: >>> MT8196 has 3 new hardware configuration compared with the previous >>> SoC, >>> which correspond to the 3 new driver data: >>> >>> 1. mminfra_offset: For GCE data plane control >>> Since GCE has been moved into mminfra, GCE needs to append the >>> mminfra offset to the DRAM address when accessing the DRAM. >>> >>> 2. gce_vm: For GCE hardware virtualization >>> Currently, the first version of the mt8196 mailbox controller >>> only >>> requires setting the VM-related registers to enable the >>> permissions >>> of a host VM. >> >> I think that the GCE VM changes should go to a different commit, as >> that >> looks like being something not critical for basic functionality of >> the >> MMINFRA GCE. >> >> I really like seeing support for that, but please split the basic >> stuff >> from the extra functionality :-) >> > > The VM configuration is the basic configuration for MT8196, so I put it > together with other configurations in one patch. > But I can understand you want the new function as a independent patch. > So I will split the VM related part, mminfra_offset part and dma_mask > part to 3 single pathes. Then add them as a driver data for MT8196 in > this patch. > Yeah, thanks, the log simply gets more readable like that. >>> >>> 3. dma_mask_bit: For dma address bit control >>> In order to avoid the hardware limitations of MT8196 accessing >>> DRAM, >>> GCE needs to configure the DMA address to be less than 35 bits. >>> >>> Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com> >>> --- >>> drivers/mailbox/mtk-cmdq-mailbox.c | 90 >>> +++++++++++++++++++++--- >>> include/linux/mailbox/mtk-cmdq-mailbox.h | 2 + >>> 2 files changed, 84 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c >>> b/drivers/mailbox/mtk-cmdq-mailbox.c >>> index d186865b8dce..0abe10a7fef9 100644 >>> --- a/drivers/mailbox/mtk-cmdq-mailbox.c >>> +++ b/drivers/mailbox/mtk-cmdq-mailbox.c >>> @@ -43,6 +43,17 @@ >>> #define GCE_CTRL_BY_SW GENMASK(2, 0) >>> #define GCE_DDR_EN GENMASK(18, 16) >>> >>> +#define GCE_VM_ID_MAP0 0x5018 >>> +#define GCE_VM_MAP0_ALL_HOST GENMASK(29, 0) >>> +#define GCE_VM_ID_MAP1 0x501c >>> +#define GCE_VM_MAP1_ALL_HOST GENMASK(29, 0) >>> +#define GCE_VM_ID_MAP2 0x5020 >>> +#define GCE_VM_MAP2_ALL_HOST GENMASK(29, 0) >>> +#define GCE_VM_ID_MAP3 0x5024 >>> +#define GCE_VM_MAP3_ALL_HOST GENMASK(5, 0) >>> +#define GCE_VM_CPR_GSIZE 0x50c4 >>> +#define GCE_VM_CPR_GSIZE_HSOT GENMASK(3, 0) >> >> typo: GSIZE_HOST.... >> > > Thanks, I'll fix it. > >> ...but also, if you could add some brief description of what the >> VMIDs are used for >> and what the GSIZE is... that'd be very much appreciated from whoever >> is reading >> this. >> > VMID_MAP configuration is in the previous reply mail for CK. Oh, sorry, too many emails - sometimes I lose some :-) > CPR_GSIZE is the setting for allocating the CPR SRAM size to each VM. Would be awesome if you could then clarify the comment that you have later in the code here, from... /* config cpr size for host vm */ to /* Set the amount of CPR SRAM to allocate to each VM */ ...that could be a way of more properly describing what the writel there is doing. > >> The GCE stuff isn't even properly described in datasheets - I do >> (probably!) >> understand what those are for, but asking people to get years of >> experience on >> MediaTek to understand what's going on would be a bit rude, wouldn't >> it? :-D >> > > I agree with you :-) > I'll put them in the VM patch and add some brief description for them. > Thanks, much appreciated! >>> + >>> #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 >>> #define CMDQ_THR_ENABLED 0x1 >>> #define CMDQ_THR_DISABLED 0x0 >>> @@ -87,11 +98,24 @@ struct cmdq { >>> struct gce_plat { >>> u32 thread_nr; >>> u8 shift; >>> + dma_addr_t mminfra_offset; >> >> It looks like this is exactly the DRAM's iostart... at least, I can >> see that in the >> downstream devicetree that's where it starts. >> >> memory: memory@80000000 { >> device_type = "memory"; >> reg = <0 0x80000000 0 0x40000000>; >> }; >> >> It doesn't really look like being a coincidence, but, for the sake of >> asking: >> is this just a coincidence? :-) >> > > As the confirmation with the hardware designer in previous reply mail > for CK: > https://patchwork.kernel.org/project/linux-mediatek/patch/20250218054405.2017918-4-jason-jh.lin@mediatek.com/#26258463 > That explanation was simply wonderful. > Since the MMINFRA remap subtracting 2G is done in the hardware circuit > and cannot be configured by software, the address +2G adjustment is > necessary to implement in the CMDQ driver. > > So that might not be a coincidence. > But even if DRAM start address changes, this mminfra_offset is still > subtracting 2G, so I think it is a better choice to define it as the > driver data for MT8196. > ....so, this makes me think the following: 1. The DRAM start address cannot *ever* be less than 2G, because otherwise the MMINFRA HW would have a hole in the usable address range; 1a. If the start address changes to less than 2G, then also the IOMMU would get limitations, not only the mminfra..! 2b. This makes it very very very unlikely for the start address to be changed to less than 0x80000000 2. If the DRAM start address changes to be ABOVE 2G (so more than 0x80000000), there would be no point for MMINFRA to start a "config path" write (or read) in the SMMU DRAM block, would it? ;-) I get it - if the DRAM moves up, MMINFRA is still at 2G because that's hard baked into the hardware, but I foresee that it'll be unlikely to see a platform changing the DRAM start address arbitrarily, getting out-of-sync with MMINFRA. I propose to just get the address from the memory node for now, and to add a nice comment in the code that explains that "In at least MT8196, the MMINFRA hardware subtracts xyz etc etc" (and that explanation from the previous email is again wonderful and shall not be lost: either use that in the comment, or add it to the commit description, because it's really that good). Should a new SoC appear in the future requiring an offset from the DRAM start address, we will think about how to make that work in the best possible way: in that case we could either reference something else to get the right address or we can just change this driver to just use the 2G offset statically for all. What I'm trying to do here is to reduce the amount of changes that we'd need for adding new SoCs: since that 2G MMINFRA offset -> 2G DRAM start is not a coincidence I think that, should the DRAM start vary on new SoCs, the MMINFRA offset will follow the trend and vary with it. So what I think is: 1. If I'm right, adding a new SoC (with different MMINFRA + DRAM offset) will be as easy as adding a compatible string in the bindings, no effort in changing this driver with new pdata offsets; 2. If I'm wrong, adding a new SoC means adding compat string and adding pdata and one variable in the cmdq struct. Where N.2 is what we would do anyway if we don't go with my proposed solution... All this is just to give you my considerations about this topic - you're left completely free to disagree with me. If you disagree, I will trust your judgement, no problem here. >>> bool control_by_sw; >>> bool sw_ddr_en; >>> + bool gce_vm; >>> + u32 dma_mask_bit; >>> u32 gce_num; >>> }; >>> >>> +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const >>> struct gce_plat *pdata) >>> +{ >>> + return ((addr + pdata->mminfra_offset) >> pdata->shift); >>> +} >>> + >>> +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const >>> struct gce_plat *pdata) >>> +{ >>> + return ((addr << pdata->shift) - pdata->mminfra_offset); >>> +} >> >> I'm not sure that you really need those two functions... probably >> it's simply >> cleaner and easier to just write that single line every time... and >> I'm >> saying that especially for how you're using those functions, with >> some readl() >> passed directly as param, decreasing human readability by "a whole >> lot" :-) >> > > The reason why I use API wrapper instead of writing it directly in > readl() is to avoid missing the shift or mminfra_offset conversion in > some places. > This problem is not easy to debug, and I have encountered it at least > twice... > > I think the advantage of using function is that it can be uniformly > modified to all places that need to handle DRAM address conversion. > What do you think? :-) > Eh, if you put it like that... it makes sense, so.. yeah, okay :-) Still, please cleanup those instances of `cmdq_reg_revert_addr(readl(something), pdata)` those might be hard to read, so please just do something like: regval = readl(something); curr_pa = cmdq_revert_addr(regval, pdata); ...reword to your own liking, of course. >>> + >>> static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) >>> { >>> WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); >>> @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) >>> } >>> EXPORT_SYMBOL(cmdq_get_shift_pa); >>> >>> +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) >>> +{ >>> + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, >>> mbox); >>> + >>> + return cmdq->pdata->mminfra_offset; >>> +} >>> +EXPORT_SYMBOL(cmdq_get_offset_pa); >> >> I think I remember this get_offset_pa from the old times, then CK >> removed it (and I >> was really happy about that disappearing), or am I confusing this >> with something >> else? >> >> (of course, this wasn't used for mminfra, but for something else!) >> > > I can't find any remove history in mtk-cmdq-mailbox.c. > > Maybe you mean the patch in this series? > https://lore.kernel.org/all/171213938049.123698.15573779837703602591.b4-ty@collabora.com/ > Uhm, I think I may have confused something here, but yes I was remembering the patch series that you pointed out, definitely. At the end, that series is doing something else, so nevermind, was just confusion. >>> + >>> +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t >>> addr) >>> +{ >>> + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, >>> mbox); >>> + >>> + if (cmdq->pdata->mminfra_offset == 0) >>> + return false; >>> + >>> + /* >>> + * mminfra will recognize the addr that greater than the >>> mminfra_offset >>> + * as a transaction to DRAM. >>> + * So the caller needs to append mminfra_offset for the true >>> case. >>> + */ >>> + return (addr >= cmdq->pdata->mminfra_offset); >> >> >> /** >> * cmdq_is_mminfra_gce() - Brief description >> * @args..... >> * >> * The MMINFRA GCE will recognize an address greater than DRAM >> iostart as a >> * DRAM transaction instead of ....xyz >> * >> * In order for callers to perform (xyz) transactions through the >> CMDQ, those >> * need to know if they are using a GCE located in MMINFRA. >> */ >> bool cmdq_is_mminfra_gce(...) >> { >> return cmdq->pdata->mminfra_offset && >> (addr >= cmdq->pdata->mminfra_offset) >> >>> +} >>> +EXPORT_SYMBOL(cmdq_addr_need_offset); >>> + >> > > OK, I'll modify the API like this. > >> ...but then, is there really no way of just handling the GCE being in >> MMINFRA >> transparently from the callers? Do the callers really *need* to know >> that they're >> using a new GCE?! >> > > Since the address subtracting is done in MMINFRA hardware, I think GCE > users really need to handle it in driver. > Since the users of this infrastructure are multimedia related (disp/MDP3), I'd also like to get an opinion from MediaTek engineers familiar with that. CK, Moudy, any opinion on that, please? >> Another way of saying: can't we just handle the address translation >> in here instead >> of instructing each and every driver about how to communicate with >> the new GCE?! >> > > The DRAM address may not only be the command buffer to GCE, but also > the working buffer provided by CMDQ users and being a part of GCE > instruction, so we need to handle the address translation in CMDQ > helper driver for the instruction generation. > E.g. ISP drivers may use GCE to write a hardware settings to a DRAM as > backup buffer. The GCE write instruction will be: > WRITE the value of ISP register to DRAM address + mminfra_offset. > > But most of the CMDQ users only need to use GCE to write hardware > register, so I only keep the translation in cmdq_pkt_mem_move(), > cmdq_pkt_poll_addr() and cmdq_pkt_jump_abs() at the latest series. Yeah you're choosing the best of both worlds in that case, I do agree, but still - if there's a way to avoid drivers to have different handling for mminfra vs no-mminfra, that'd still be preferred. Having the handling for something *centralized* somewhere, instead of it being sparse here and there, would make maintenance way easier... ...and that's why I'm asking for CK and Moudy's opinion, nothing else :-) Cheers! Angelo
[snip] > > > CPR_GSIZE is the setting for allocating the CPR SRAM size to each > > VM. > > Would be awesome if you could then clarify the comment that you have > later in > the code here, from... > > /* config cpr size for host vm */ > > to > > /* Set the amount of CPR SRAM to allocate to each VM */ > > ...that could be a way of more properly describing what the writel > there is doing. > OK, I'll change it. > > > > > The GCE stuff isn't even properly described in datasheets - I do > > > (probably!) > > > understand what those are for, but asking people to get years of > > > experience on > > > MediaTek to understand what's going on would be a bit rude, > > > wouldn't > > > it? :-D > > > > > > > I agree with you :-) > > I'll put them in the VM patch and add some brief description for > > them. > > > > Thanks, much appreciated! > > > > > + > > > > #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 > > > > #define CMDQ_THR_ENABLED 0x1 > > > > #define CMDQ_THR_DISABLED 0x0 > > > > @@ -87,11 +98,24 @@ struct cmdq { > > > > struct gce_plat { > > > > u32 thread_nr; > > > > u8 shift; > > > > + dma_addr_t mminfra_offset; > > > > > > It looks like this is exactly the DRAM's iostart... at least, I > > > can > > > see that in the > > > downstream devicetree that's where it starts. > > > > > > memory: memory@80000000 { > > > device_type = "memory"; > > > reg = <0 0x80000000 0 0x40000000>; > > > }; > > > > > > It doesn't really look like being a coincidence, but, for the > > > sake of > > > asking: > > > is this just a coincidence? :-) > > > > > > > As the confirmation with the hardware designer in previous reply > > mail > > for CK: > > https://patchwork.kernel.org/project/linux-mediatek/patch/20250218054405.2017918-4-jason-jh.lin@mediatek.com/*26258463 > > > > That explanation was simply wonderful. > > > Since the MMINFRA remap subtracting 2G is done in the hardware > > circuit > > and cannot be configured by software, the address +2G adjustment is > > necessary to implement in the CMDQ driver. > > > > So that might not be a coincidence. > > But even if DRAM start address changes, this mminfra_offset is > > still > > subtracting 2G, so I think it is a better choice to define it as > > the > > driver data for MT8196. > > > > ....so, this makes me think the following: > > 1. The DRAM start address cannot *ever* be less than 2G, because > otherwise the > MMINFRA HW would have a hole in the usable address range; > 1a. If the start address changes to less than 2G, then also the > IOMMU would > get limitations, not only the mminfra..! > 2b. This makes it very very very unlikely for the start address > to be changed > to less than 0x80000000 > > 2. If the DRAM start address changes to be ABOVE 2G (so more than > 0x80000000), > there would be no point for MMINFRA to start a "config path" > write (or read) > in the SMMU DRAM block, would it? ;-) > GCE is using IOMMU in MT8196, so all the address put into the GCE instruction or GCE register for GCE access should be IOVA. The DRAM start address is 2G(PA=0x80000000, IOVA=0x0) currently, so when GCE want to access the IOVA=0x0, it will need to +2G into the instruction, then the MMINFRA will see it as data path(IOVA > 2G) and subtract 2G for that IOVA, so GCE can finally access the IOVA=0x0. I'm not sure if I've misunderstood what you mean by ABOVE 2G. :-) If DRAM start address is changed to 3G(PA=0xc0000000) the IOVA is still 0x0, so GCE still need to + 2G to make MMINFRA go to the data path. But if you mean PA=0x80000000 and IOVA start address is 3G(0xc0000000), then MMINFRA will go to the data path without GCE +2G. However, MMINFRA will -2G when going to the data path and that will cause GCE access the wrong IOVA. So GCE still need to +2G no matter IOVA start address is already can make MMINFRA go to the data path(IOVA > 2G). We have already complained to our hardware designer that MMINFRA -2G con not be changed, which will make software operation very troublesome. So in the next few generations of SoC will change this MMINFRA -2G to software configurable. Then we can just make IOVA start address to 2G without adding the mminfra_offset to the IOVA for GCE. > I get it - if the DRAM moves up, MMINFRA is still at 2G because > that's hard baked > into the hardware, but I foresee that it'll be unlikely to see a > platform changing > the DRAM start address arbitrarily, getting out-of-sync with MMINFRA. > > I propose to just get the address from the memory node for now, and > to add a nice > comment in the code that explains that "In at least MT8196, the > MMINFRA hardware > subtracts xyz etc etc" (and that explanation from the previous email > is again > wonderful and shall not be lost: either use that in the comment, or > add it to > the commit description, because it's really that good). > > Should a new SoC appear in the future requiring an offset from the > DRAM start > address, we will think about how to make that work in the best > possible way: in > that case we could either reference something else to get the right > address or > we can just change this driver to just use the 2G offset statically > for all. > > What I'm trying to do here is to reduce the amount of changes that > we'd need for > adding new SoCs: since that 2G MMINFRA offset -> 2G DRAM start is not > a coincidence > I think that, should the DRAM start vary on new SoCs, the MMINFRA > offset will > follow the trend and vary with it. > > So what I think is: > 1. If I'm right, adding a new SoC (with different MMINFRA + DRAM > offset) will be > as easy as adding a compatible string in the bindings, no effort > in changing > this driver with new pdata offsets; > 2. If I'm wrong, adding a new SoC means adding compat string and > adding pdata and > one variable in the cmdq struct. > > Where N.2 is what we would do anyway if we don't go with my proposed > solution... > > All this is just to give you my considerations about this topic - > you're left > completely free to disagree with me. > If you disagree, I will trust your judgement, no problem here. > Yes, I think your are right. No matter the IOVA start address changing, MMINFRA will still -2G(the start address of DRAM PA). Do you mean we can get the mminfra_offset from the start address of memory in DTS, rather than defining it in pdata? > > > > bool control_by_sw; > > > > bool sw_ddr_en; > > > > + bool gce_vm; > > > > + u32 dma_mask_bit; > > > > u32 gce_num; > > > > }; > > > > > > > > +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const > > > > struct gce_plat *pdata) > > > > +{ > > > > + return ((addr + pdata->mminfra_offset) >> pdata->shift); > > > > +} > > > > + > > > > +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const > > > > struct gce_plat *pdata) > > > > +{ > > > > + return ((addr << pdata->shift) - pdata->mminfra_offset); > > > > +} > > > > > > I'm not sure that you really need those two functions... probably > > > it's simply > > > cleaner and easier to just write that single line every time... > > > and > > > I'm > > > saying that especially for how you're using those functions, with > > > some readl() > > > passed directly as param, decreasing human readability by "a > > > whole > > > lot" :-) > > > > > > > The reason why I use API wrapper instead of writing it directly in > > readl() is to avoid missing the shift or mminfra_offset conversion > > in > > some places. > > This problem is not easy to debug, and I have encountered it at > > least > > twice... > > > > I think the advantage of using function is that it can be uniformly > > modified to all places that need to handle DRAM address conversion. > > What do you think? :-) > > > > Eh, if you put it like that... it makes sense, so.. yeah, okay :-) > > Still, please cleanup those instances of > > `cmdq_reg_revert_addr(readl(something), pdata)` > > those might be hard to read, so please just do something like: > > regval = readl(something); > curr_pa = cmdq_revert_addr(regval, pdata); > > ...reword to your own liking, of course. > OK, I'll refine that. Thanks. > > > > + > > > > static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool > > > > enable) > > > > { > > > > WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq- > > > > >clocks)); > > > > @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan > > > > *chan) > > > > } > > > > EXPORT_SYMBOL(cmdq_get_shift_pa); > > > > > > > > +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) > > > > +{ > > > > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, > > > > mbox); > > > > + > > > > + return cmdq->pdata->mminfra_offset; > > > > +} > > > > +EXPORT_SYMBOL(cmdq_get_offset_pa); > > > > > > I think I remember this get_offset_pa from the old times, then CK > > > removed it (and I > > > was really happy about that disappearing), or am I confusing this > > > with something > > > else? > > > > > > (of course, this wasn't used for mminfra, but for something > > > else!) > > > > > > > I can't find any remove history in mtk-cmdq-mailbox.c. > > > > Maybe you mean the patch in this series? > > https://lore.kernel.org/all/171213938049.123698.15573779837703602591.b4-ty@collabora.com/ > > > > Uhm, I think I may have confused something here, but yes I was > remembering the > patch series that you pointed out, definitely. > > At the end, that series is doing something else, so nevermind, was > just confusion. > OK, no problem. > > > > + > > > > +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t > > > > addr) > > > > +{ > > > > + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, > > > > mbox); > > > > + > > > > + if (cmdq->pdata->mminfra_offset == 0) > > > > + return false; > > > > + > > > > + /* > > > > + * mminfra will recognize the addr that greater than the > > > > mminfra_offset > > > > + * as a transaction to DRAM. > > > > + * So the caller needs to append mminfra_offset for the > > > > true > > > > case. > > > > + */ > > > > + return (addr >= cmdq->pdata->mminfra_offset); > > > > > > > > > /** > > > * cmdq_is_mminfra_gce() - Brief description > > > * @args..... > > > * > > > * The MMINFRA GCE will recognize an address greater than DRAM > > > iostart as a > > > * DRAM transaction instead of ....xyz > > > * > > > * In order for callers to perform (xyz) transactions through > > > the > > > CMDQ, those > > > * need to know if they are using a GCE located in MMINFRA. > > > */ > > > bool cmdq_is_mminfra_gce(...) > > > { > > > return cmdq->pdata->mminfra_offset && > > > (addr >= cmdq->pdata->mminfra_offset) > > > > > > > +} > > > > +EXPORT_SYMBOL(cmdq_addr_need_offset); > > > > + > > > > > > > OK, I'll modify the API like this. > > > > > ...but then, is there really no way of just handling the GCE > > > being in > > > MMINFRA > > > transparently from the callers? Do the callers really *need* to > > > know > > > that they're > > > using a new GCE?! > > > > > > > Since the address subtracting is done in MMINFRA hardware, I think > > GCE > > users really need to handle it in driver. > > > > Since the users of this infrastructure are multimedia related > (disp/MDP3), > I'd also like to get an opinion from MediaTek engineers familiar with > that. > > CK, Moudy, any opinion on that, please? > > > > Another way of saying: can't we just handle the address > > > translation > > > in here instead > > > of instructing each and every driver about how to communicate > > > with > > > the new GCE?! > > > > > > > The DRAM address may not only be the command buffer to GCE, but > > also > > the working buffer provided by CMDQ users and being a part of GCE > > instruction, so we need to handle the address translation in CMDQ > > helper driver for the instruction generation. > > E.g. ISP drivers may use GCE to write a hardware settings to a DRAM > > as > > backup buffer. The GCE write instruction will be: > > WRITE the value of ISP register to DRAM address + mminfra_offset. > > > > But most of the CMDQ users only need to use GCE to write hardware > > register, so I only keep the translation in cmdq_pkt_mem_move(), > > cmdq_pkt_poll_addr() and cmdq_pkt_jump_abs() at the latest series. > > Yeah you're choosing the best of both worlds in that case, I do > agree, but > still - if there's a way to avoid drivers to have different handling > for > mminfra vs no-mminfra, that'd still be preferred. > > Having the handling for something *centralized* somewhere, instead of > it > being sparse here and there, would make maintenance way easier... > > ...and that's why I'm asking for CK and Moudy's opinion, nothing else > :-) > Yes, I totally agree with you. Thanks for the asking! Regards, Jason-JH.Lin > Cheers! > Angelo >
Il 06/03/25 12:00, Jason-JH Lin (林睿祥) ha scritto: > [snip] >> >>> CPR_GSIZE is the setting for allocating the CPR SRAM size to each >>> VM. >> >> Would be awesome if you could then clarify the comment that you have >> later in >> the code here, from... >> >> /* config cpr size for host vm */ >> >> to >> >> /* Set the amount of CPR SRAM to allocate to each VM */ >> >> ...that could be a way of more properly describing what the writel >> there is doing. >> > > OK, I'll change it. > >>> >>>> The GCE stuff isn't even properly described in datasheets - I do >>>> (probably!) >>>> understand what those are for, but asking people to get years of >>>> experience on >>>> MediaTek to understand what's going on would be a bit rude, >>>> wouldn't >>>> it? :-D >>>> >>> >>> I agree with you :-) >>> I'll put them in the VM patch and add some brief description for >>> them. >>> >> >> Thanks, much appreciated! >> >>>>> + >>>>> #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 >>>>> #define CMDQ_THR_ENABLED 0x1 >>>>> #define CMDQ_THR_DISABLED 0x0 >>>>> @@ -87,11 +98,24 @@ struct cmdq { >>>>> struct gce_plat { >>>>> u32 thread_nr; >>>>> u8 shift; >>>>> + dma_addr_t mminfra_offset; >>>> >>>> It looks like this is exactly the DRAM's iostart... at least, I >>>> can >>>> see that in the >>>> downstream devicetree that's where it starts. >>>> >>>> memory: memory@80000000 { >>>> device_type = "memory"; >>>> reg = <0 0x80000000 0 0x40000000>; >>>> }; >>>> >>>> It doesn't really look like being a coincidence, but, for the >>>> sake of >>>> asking: >>>> is this just a coincidence? :-) >>>> >>> >>> As the confirmation with the hardware designer in previous reply >>> mail >>> for CK: >>> https://patchwork.kernel.org/project/linux-mediatek/patch/20250218054405.2017918-4-jason-jh.lin@mediatek.com/*26258463 >>> >> >> That explanation was simply wonderful. >> >>> Since the MMINFRA remap subtracting 2G is done in the hardware >>> circuit >>> and cannot be configured by software, the address +2G adjustment is >>> necessary to implement in the CMDQ driver. >>> >>> So that might not be a coincidence. >>> But even if DRAM start address changes, this mminfra_offset is >>> still >>> subtracting 2G, so I think it is a better choice to define it as >>> the >>> driver data for MT8196. >>> >> >> ....so, this makes me think the following: >> >> 1. The DRAM start address cannot *ever* be less than 2G, because >> otherwise the >> MMINFRA HW would have a hole in the usable address range; >> 1a. If the start address changes to less than 2G, then also the >> IOMMU would >> get limitations, not only the mminfra..! >> 2b. This makes it very very very unlikely for the start address >> to be changed >> to less than 0x80000000 >> >> 2. If the DRAM start address changes to be ABOVE 2G (so more than >> 0x80000000), >> there would be no point for MMINFRA to start a "config path" >> write (or read) >> in the SMMU DRAM block, would it? ;-) >> > > GCE is using IOMMU in MT8196, so all the address put into the GCE > instruction or GCE register for GCE access should be IOVA. > > The DRAM start address is 2G(PA=0x80000000, IOVA=0x0) currently, so > when GCE want to access the IOVA=0x0, it will need to +2G into the > instruction, then the MMINFRA will see it as data path(IOVA > 2G) and > subtract 2G for that IOVA, so GCE can finally access the IOVA=0x0. > > I'm not sure if I've misunderstood what you mean by ABOVE 2G. :-) > If DRAM start address is changed to 3G(PA=0xc0000000) the IOVA is still > 0x0, so GCE still need to + 2G to make MMINFRA go to the data path. > > But if you mean PA=0x80000000 and IOVA start address is 3G(0xc0000000), > then MMINFRA will go to the data path without GCE +2G. > However, MMINFRA will -2G when going to the data path and that will > cause GCE access the wrong IOVA. > So GCE still need to +2G no matter IOVA start address is already can > make MMINFRA go to the data path(IOVA > 2G). > > We have already complained to our hardware designer that MMINFRA -2G > con not be changed, which will make software operation very > troublesome. > So in the next few generations of SoC will change this MMINFRA -2G to > software configurable. Then we can just make IOVA start address to 2G > without adding the mminfra_offset to the IOVA for GCE. > Okay now I got it, the reality is way worse than I was thinking... eww... :-( >> I get it - if the DRAM moves up, MMINFRA is still at 2G because >> that's hard baked >> into the hardware, but I foresee that it'll be unlikely to see a >> platform changing >> the DRAM start address arbitrarily, getting out-of-sync with MMINFRA. >> >> I propose to just get the address from the memory node for now, and >> to add a nice >> comment in the code that explains that "In at least MT8196, the >> MMINFRA hardware >> subtracts xyz etc etc" (and that explanation from the previous email >> is again >> wonderful and shall not be lost: either use that in the comment, or >> add it to >> the commit description, because it's really that good). >> >> Should a new SoC appear in the future requiring an offset from the >> DRAM start >> address, we will think about how to make that work in the best >> possible way: in >> that case we could either reference something else to get the right >> address or >> we can just change this driver to just use the 2G offset statically >> for all. >> >> What I'm trying to do here is to reduce the amount of changes that >> we'd need for >> adding new SoCs: since that 2G MMINFRA offset -> 2G DRAM start is not >> a coincidence >> I think that, should the DRAM start vary on new SoCs, the MMINFRA >> offset will >> follow the trend and vary with it. >> >> So what I think is: >> 1. If I'm right, adding a new SoC (with different MMINFRA + DRAM >> offset) will be >> as easy as adding a compatible string in the bindings, no effort >> in changing >> this driver with new pdata offsets; >> 2. If I'm wrong, adding a new SoC means adding compat string and >> adding pdata and >> one variable in the cmdq struct. >> >> Where N.2 is what we would do anyway if we don't go with my proposed >> solution... >> >> All this is just to give you my considerations about this topic - >> you're left >> completely free to disagree with me. >> If you disagree, I will trust your judgement, no problem here. >> > > Yes, I think your are right. No matter the IOVA start address changing, > MMINFRA will still -2G(the start address of DRAM PA). > Do you mean we can get the mminfra_offset from the start address of > memory in DTS, rather than defining it in pdata? > After the last explanation... no, it would be wrong to get the start from memory in DTS, because then this will still need hacks. I was somehow convinced that the DRAM start address and the MMINFRA offset were directly related and that it would've been hard to change the DRAM start address with the MMINFRA offset being -2G, but it's not, so doing it my way will eventually backfire on us. So my way is not good, as it's not showing the reality of things. Just go with your current way, as it's really tied to the hardware and it's not restricted to that dram start coincidence in the end. That's a pity. Just instead of writing 0x80000000, use " SZ_2G " instead... and please add a comment in the code that explains in brief that there's this strange behavior for which we.. need that, and that's a static subtraction, and is tied to the MMINFRA hardware, and cannot be changed :-( btw, being clear.. #include <linux/sizes.h> .mminfra_offset = SZ_2G, ..that way you don't even need a comment saying /* 2GB */ .... Cheers! >>>>> bool control_by_sw; >>>>> bool sw_ddr_en; >>>>> + bool gce_vm; >>>>> + u32 dma_mask_bit; >>>>> u32 gce_num; >>>>> }; >>>>> >>>>> +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const >>>>> struct gce_plat *pdata) >>>>> +{ >>>>> + return ((addr + pdata->mminfra_offset) >> pdata->shift); >>>>> +} >>>>> + >>>>> +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const >>>>> struct gce_plat *pdata) >>>>> +{ >>>>> + return ((addr << pdata->shift) - pdata->mminfra_offset); >>>>> +} >>>> >>>> I'm not sure that you really need those two functions... probably >>>> it's simply >>>> cleaner and easier to just write that single line every time... >>>> and >>>> I'm >>>> saying that especially for how you're using those functions, with >>>> some readl() >>>> passed directly as param, decreasing human readability by "a >>>> whole >>>> lot" :-) >>>> >>> >>> The reason why I use API wrapper instead of writing it directly in >>> readl() is to avoid missing the shift or mminfra_offset conversion >>> in >>> some places. >>> This problem is not easy to debug, and I have encountered it at >>> least >>> twice... >>> >>> I think the advantage of using function is that it can be uniformly >>> modified to all places that need to handle DRAM address conversion. >>> What do you think? :-) >>> >> >> Eh, if you put it like that... it makes sense, so.. yeah, okay :-) >> >> Still, please cleanup those instances of >> >> `cmdq_reg_revert_addr(readl(something), pdata)` >> >> those might be hard to read, so please just do something like: >> >> regval = readl(something); >> curr_pa = cmdq_revert_addr(regval, pdata); >> >> ...reword to your own liking, of course. >> > > OK, I'll refine that. Thanks. > >>>>> + >>>>> static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool >>>>> enable) >>>>> { >>>>> WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq- >>>>>> clocks)); >>>>> @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan >>>>> *chan) >>>>> } >>>>> EXPORT_SYMBOL(cmdq_get_shift_pa); >>>>> >>>>> +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) >>>>> +{ >>>>> + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, >>>>> mbox); >>>>> + >>>>> + return cmdq->pdata->mminfra_offset; >>>>> +} >>>>> +EXPORT_SYMBOL(cmdq_get_offset_pa); >>>> >>>> I think I remember this get_offset_pa from the old times, then CK >>>> removed it (and I >>>> was really happy about that disappearing), or am I confusing this >>>> with something >>>> else? >>>> >>>> (of course, this wasn't used for mminfra, but for something >>>> else!) >>>> >>> >>> I can't find any remove history in mtk-cmdq-mailbox.c. >>> >>> Maybe you mean the patch in this series? >>> https://lore.kernel.org/all/171213938049.123698.15573779837703602591.b4-ty@collabora.com/ >>> >> >> Uhm, I think I may have confused something here, but yes I was >> remembering the >> patch series that you pointed out, definitely. >> >> At the end, that series is doing something else, so nevermind, was >> just confusion. >> > > OK, no problem. > >>>>> + >>>>> +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t >>>>> addr) >>>>> +{ >>>>> + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, >>>>> mbox); >>>>> + >>>>> + if (cmdq->pdata->mminfra_offset == 0) >>>>> + return false; >>>>> + >>>>> + /* >>>>> + * mminfra will recognize the addr that greater than the >>>>> mminfra_offset >>>>> + * as a transaction to DRAM. >>>>> + * So the caller needs to append mminfra_offset for the >>>>> true >>>>> case. >>>>> + */ >>>>> + return (addr >= cmdq->pdata->mminfra_offset); >>>> >>>> >>>> /** >>>> * cmdq_is_mminfra_gce() - Brief description >>>> * @args..... >>>> * >>>> * The MMINFRA GCE will recognize an address greater than DRAM >>>> iostart as a >>>> * DRAM transaction instead of ....xyz >>>> * >>>> * In order for callers to perform (xyz) transactions through >>>> the >>>> CMDQ, those >>>> * need to know if they are using a GCE located in MMINFRA. >>>> */ >>>> bool cmdq_is_mminfra_gce(...) >>>> { >>>> return cmdq->pdata->mminfra_offset && >>>> (addr >= cmdq->pdata->mminfra_offset) >>>> >>>>> +} >>>>> +EXPORT_SYMBOL(cmdq_addr_need_offset); >>>>> + >>>> >>> >>> OK, I'll modify the API like this. >>> >>>> ...but then, is there really no way of just handling the GCE >>>> being in >>>> MMINFRA >>>> transparently from the callers? Do the callers really *need* to >>>> know >>>> that they're >>>> using a new GCE?! >>>> >>> >>> Since the address subtracting is done in MMINFRA hardware, I think >>> GCE >>> users really need to handle it in driver. >>> >> >> Since the users of this infrastructure are multimedia related >> (disp/MDP3), >> I'd also like to get an opinion from MediaTek engineers familiar with >> that. >> >> CK, Moudy, any opinion on that, please? >> >>>> Another way of saying: can't we just handle the address >>>> translation >>>> in here instead >>>> of instructing each and every driver about how to communicate >>>> with >>>> the new GCE?! >>>> >>> >>> The DRAM address may not only be the command buffer to GCE, but >>> also >>> the working buffer provided by CMDQ users and being a part of GCE >>> instruction, so we need to handle the address translation in CMDQ >>> helper driver for the instruction generation. >>> E.g. ISP drivers may use GCE to write a hardware settings to a DRAM >>> as >>> backup buffer. The GCE write instruction will be: >>> WRITE the value of ISP register to DRAM address + mminfra_offset. >>> >>> But most of the CMDQ users only need to use GCE to write hardware >>> register, so I only keep the translation in cmdq_pkt_mem_move(), >>> cmdq_pkt_poll_addr() and cmdq_pkt_jump_abs() at the latest series. >> >> Yeah you're choosing the best of both worlds in that case, I do >> agree, but >> still - if there's a way to avoid drivers to have different handling >> for >> mminfra vs no-mminfra, that'd still be preferred. >> >> Having the handling for something *centralized* somewhere, instead of >> it >> being sparse here and there, would make maintenance way easier... >> >> ...and that's why I'm asking for CK and Moudy's opinion, nothing else >> :-) >> > > Yes, I totally agree with you. Thanks for the asking! > > Regards, > Jason-JH.Lin > >> Cheers! >> Angelo >> >
> > [snip] > > > > > > + > > > > > > #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 > > > > > > #define CMDQ_THR_ENABLED 0x1 > > > > > > #define CMDQ_THR_DISABLED 0x0 > > > > > > @@ -87,11 +98,24 @@ struct cmdq { > > > > > > struct gce_plat { > > > > > > u32 thread_nr; > > > > > > u8 shift; > > > > > > + dma_addr_t mminfra_offset; > > > > > > > > > > It looks like this is exactly the DRAM's iostart... at least, > > > > > I > > > > > can > > > > > see that in the > > > > > downstream devicetree that's where it starts. > > > > > > > > > > memory: memory@80000000 { > > > > > device_type = "memory"; > > > > > reg = <0 0x80000000 0 0x40000000>; > > > > > }; > > > > > > > > > > It doesn't really look like being a coincidence, but, for the > > > > > sake of > > > > > asking: > > > > > is this just a coincidence? :-) > > > > > > > > > > > > > As the confirmation with the hardware designer in previous > > > > reply > > > > mail > > > > for CK: > > > > https://patchwork.kernel.org/project/linux-mediatek/patch/20250218054405.2017918-4-jason-jh.lin@mediatek.com/*26258463 > > > > > > > > > > That explanation was simply wonderful. > > > > > > > Since the MMINFRA remap subtracting 2G is done in the hardware > > > > circuit > > > > and cannot be configured by software, the address +2G > > > > adjustment is > > > > necessary to implement in the CMDQ driver. > > > > > > > > So that might not be a coincidence. > > > > But even if DRAM start address changes, this mminfra_offset is > > > > still > > > > subtracting 2G, so I think it is a better choice to define it > > > > as > > > > the > > > > driver data for MT8196. > > > > > > > > > > ....so, this makes me think the following: > > > > > > 1. The DRAM start address cannot *ever* be less than 2G, because > > > otherwise the > > > MMINFRA HW would have a hole in the usable address range; > > > 1a. If the start address changes to less than 2G, then also > > > the > > > IOMMU would > > > get limitations, not only the mminfra..! > > > 2b. This makes it very very very unlikely for the start > > > address > > > to be changed > > > to less than 0x80000000 > > > > > > 2. If the DRAM start address changes to be ABOVE 2G (so more than > > > 0x80000000), > > > there would be no point for MMINFRA to start a "config path" > > > write (or read) > > > in the SMMU DRAM block, would it? ;-) > > > > > > > GCE is using IOMMU in MT8196, so all the address put into the GCE > > instruction or GCE register for GCE access should be IOVA. > > > > The DRAM start address is 2G(PA=0x80000000, IOVA=0x0) currently, so > > when GCE want to access the IOVA=0x0, it will need to +2G into the > > instruction, then the MMINFRA will see it as data path(IOVA > 2G) > > and > > subtract 2G for that IOVA, so GCE can finally access the IOVA=0x0. > > > > I'm not sure if I've misunderstood what you mean by ABOVE 2G. :-) > > If DRAM start address is changed to 3G(PA=0xc0000000) the IOVA is > > still > > 0x0, so GCE still need to + 2G to make MMINFRA go to the data path. > > > > But if you mean PA=0x80000000 and IOVA start address is > > 3G(0xc0000000), > > then MMINFRA will go to the data path without GCE +2G. > > However, MMINFRA will -2G when going to the data path and that will > > cause GCE access the wrong IOVA. > > So GCE still need to +2G no matter IOVA start address is already > > can > > make MMINFRA go to the data path(IOVA > 2G). > > > > We have already complained to our hardware designer that MMINFRA - > > 2G > > con not be changed, which will make software operation very > > troublesome. > > So in the next few generations of SoC will change this MMINFRA -2G > > to > > software configurable. Then we can just make IOVA start address to > > 2G > > without adding the mminfra_offset to the IOVA for GCE. > > > > Okay now I got it, the reality is way worse than I was thinking... > eww... :-( > > > > I get it - if the DRAM moves up, MMINFRA is still at 2G because > > > that's hard baked > > > into the hardware, but I foresee that it'll be unlikely to see a > > > platform changing > > > the DRAM start address arbitrarily, getting out-of-sync with > > > MMINFRA. > > > > > > I propose to just get the address from the memory node for now, > > > and > > > to add a nice > > > comment in the code that explains that "In at least MT8196, the > > > MMINFRA hardware > > > subtracts xyz etc etc" (and that explanation from the previous > > > email > > > is again > > > wonderful and shall not be lost: either use that in the comment, > > > or > > > add it to > > > the commit description, because it's really that good). > > > > > > Should a new SoC appear in the future requiring an offset from > > > the > > > DRAM start > > > address, we will think about how to make that work in the best > > > possible way: in > > > that case we could either reference something else to get the > > > right > > > address or > > > we can just change this driver to just use the 2G offset > > > statically > > > for all. > > > > > > What I'm trying to do here is to reduce the amount of changes > > > that > > > we'd need for > > > adding new SoCs: since that 2G MMINFRA offset -> 2G DRAM start is > > > not > > > a coincidence > > > I think that, should the DRAM start vary on new SoCs, the MMINFRA > > > offset will > > > follow the trend and vary with it. > > > > > > So what I think is: > > > 1. If I'm right, adding a new SoC (with different MMINFRA + DRAM > > > offset) will be > > > as easy as adding a compatible string in the bindings, no > > > effort > > > in changing > > > this driver with new pdata offsets; > > > 2. If I'm wrong, adding a new SoC means adding compat string and > > > adding pdata and > > > one variable in the cmdq struct. > > > > > > Where N.2 is what we would do anyway if we don't go with my > > > proposed > > > solution... > > > > > > All this is just to give you my considerations about this topic - > > > you're left > > > completely free to disagree with me. > > > If you disagree, I will trust your judgement, no problem here. > > > > > > > Yes, I think your are right. No matter the IOVA start address > > changing, > > MMINFRA will still -2G(the start address of DRAM PA). > > Do you mean we can get the mminfra_offset from the start address of > > memory in DTS, rather than defining it in pdata? > > > > After the last explanation... no, it would be wrong to get the start > from > memory in DTS, because then this will still need hacks. > I was somehow convinced that the DRAM start address and the MMINFRA > offset > were directly related and that it would've been hard to change the > DRAM > start address with the MMINFRA offset being -2G, but it's not, so > doing it > my way will eventually backfire on us. > > So my way is not good, as it's not showing the reality of things. > > Just go with your current way, as it's really tied to the hardware > and it's > not restricted to that dram start coincidence in the end. That's a > pity. > > Just instead of writing 0x80000000, use " SZ_2G " instead... and > please > add a comment in the code that explains in brief that there's this > strange > behavior for which we.. need that, and that's a static subtraction, > and is > tied to the MMINFRA hardware, and cannot be changed :-( > > btw, being clear.. > > #include <linux/sizes.h> > > .mminfra_offset = SZ_2G, > > ..that way you don't even need a comment saying /* 2GB */ .... > > Cheers! Got it. I'll modify it. Thanks for the confirmation. Regards, Jason-JH Lin
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index d186865b8dce..0abe10a7fef9 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -43,6 +43,17 @@ #define GCE_CTRL_BY_SW GENMASK(2, 0) #define GCE_DDR_EN GENMASK(18, 16) +#define GCE_VM_ID_MAP0 0x5018 +#define GCE_VM_MAP0_ALL_HOST GENMASK(29, 0) +#define GCE_VM_ID_MAP1 0x501c +#define GCE_VM_MAP1_ALL_HOST GENMASK(29, 0) +#define GCE_VM_ID_MAP2 0x5020 +#define GCE_VM_MAP2_ALL_HOST GENMASK(29, 0) +#define GCE_VM_ID_MAP3 0x5024 +#define GCE_VM_MAP3_ALL_HOST GENMASK(5, 0) +#define GCE_VM_CPR_GSIZE 0x50c4 +#define GCE_VM_CPR_GSIZE_HSOT GENMASK(3, 0) + #define CMDQ_THR_ACTIVE_SLOT_CYCLES 0x3200 #define CMDQ_THR_ENABLED 0x1 #define CMDQ_THR_DISABLED 0x0 @@ -87,11 +98,24 @@ struct cmdq { struct gce_plat { u32 thread_nr; u8 shift; + dma_addr_t mminfra_offset; bool control_by_sw; bool sw_ddr_en; + bool gce_vm; + u32 dma_mask_bit; u32 gce_num; }; +static inline u32 cmdq_reg_shift_addr(dma_addr_t addr, const struct gce_plat *pdata) +{ + return ((addr + pdata->mminfra_offset) >> pdata->shift); +} + +static inline u32 cmdq_reg_revert_addr(dma_addr_t addr, const struct gce_plat *pdata) +{ + return ((addr << pdata->shift) - pdata->mminfra_offset); +} + static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) { WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); @@ -112,6 +136,30 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) } EXPORT_SYMBOL(cmdq_get_shift_pa); +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan) +{ + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); + + return cmdq->pdata->mminfra_offset; +} +EXPORT_SYMBOL(cmdq_get_offset_pa); + +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr) +{ + struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); + + if (cmdq->pdata->mminfra_offset == 0) + return false; + + /* + * mminfra will recognize the addr that greater than the mminfra_offset + * as a transaction to DRAM. + * So the caller needs to append mminfra_offset for the true case. + */ + return (addr >= cmdq->pdata->mminfra_offset); +} +EXPORT_SYMBOL(cmdq_addr_need_offset); + static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread) { u32 status; @@ -143,6 +191,17 @@ static void cmdq_init(struct cmdq *cmdq) u32 gctl_regval = 0; WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); + + if (cmdq->pdata->gce_vm) { + /* config cpr size for host vm */ + writel(GCE_VM_CPR_GSIZE_HSOT, cmdq->base + GCE_VM_CPR_GSIZE); + /* config CPR_GSIZE before setting VM_ID_MAP to avoid data leakage */ + writel(GCE_VM_MAP0_ALL_HOST, cmdq->base + GCE_VM_ID_MAP0); + writel(GCE_VM_MAP1_ALL_HOST, cmdq->base + GCE_VM_ID_MAP1); + writel(GCE_VM_MAP2_ALL_HOST, cmdq->base + GCE_VM_ID_MAP2); + writel(GCE_VM_MAP3_ALL_HOST, cmdq->base + GCE_VM_ID_MAP3); + } + if (cmdq->pdata->control_by_sw) gctl_regval = GCE_CTRL_BY_SW; if (cmdq->pdata->sw_ddr_en) @@ -199,7 +258,7 @@ static void cmdq_task_insert_into_thread(struct cmdq_task *task) prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] = (u64)CMDQ_JUMP_BY_PA << 32 | - (task->pa_base >> task->cmdq->pdata->shift); + cmdq_reg_shift_addr(task->pa_base, task->cmdq->pdata); dma_sync_single_for_device(dev, prev_task->pa_base, prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE); @@ -264,7 +323,7 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq, else return; - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << cmdq->pdata->shift; + curr_pa = cmdq_reg_shift_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), cmdq->pdata); list_for_each_entry_safe(task, tmp, &thread->task_busy_list, list_entry) { @@ -416,9 +475,9 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) */ WARN_ON(cmdq_thread_reset(cmdq, thread) < 0); - writel(task->pa_base >> cmdq->pdata->shift, + writel(cmdq_reg_shift_addr(task->pa_base, cmdq->pdata), thread->base + CMDQ_THR_CURR_ADDR); - writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift, + writel(cmdq_reg_shift_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata), thread->base + CMDQ_THR_END_ADDR); writel(thread->priority, thread->base + CMDQ_THR_PRIORITY); @@ -426,10 +485,10 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK); } else { WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0); - curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR) << - cmdq->pdata->shift; - end_pa = readl(thread->base + CMDQ_THR_END_ADDR) << - cmdq->pdata->shift; + curr_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_CURR_ADDR), + cmdq->pdata); + end_pa = cmdq_reg_revert_addr(readl(thread->base + CMDQ_THR_END_ADDR), + cmdq->pdata); /* check boundary */ if (curr_pa == end_pa - CMDQ_INST_SIZE || curr_pa == end_pa) { @@ -663,6 +722,9 @@ static int cmdq_probe(struct platform_device *pdev) if (err) return err; + if (cmdq->pdata->dma_mask_bit) + dma_set_coherent_mask(dev, DMA_BIT_MASK(cmdq->pdata->dma_mask_bit)); + cmdq->mbox.dev = dev; cmdq->mbox.chans = devm_kcalloc(dev, cmdq->pdata->thread_nr, sizeof(*cmdq->mbox.chans), GFP_KERNEL); @@ -782,6 +844,17 @@ static const struct gce_plat gce_plat_mt8195 = { .gce_num = 2 }; +static const struct gce_plat gce_plat_mt8196 = { + .thread_nr = 32, + .shift = 3, + .mminfra_offset = 0x80000000, /* 2GB */ + .control_by_sw = true, + .sw_ddr_en = true, + .gce_vm = true, + .dma_mask_bit = 35, + .gce_num = 2 +}; + static const struct of_device_id cmdq_of_ids[] = { {.compatible = "mediatek,mt6779-gce", .data = (void *)&gce_plat_mt6779}, {.compatible = "mediatek,mt8173-gce", .data = (void *)&gce_plat_mt8173}, @@ -790,6 +863,7 @@ static const struct of_device_id cmdq_of_ids[] = { {.compatible = "mediatek,mt8188-gce", .data = (void *)&gce_plat_mt8188}, {.compatible = "mediatek,mt8192-gce", .data = (void *)&gce_plat_mt8192}, {.compatible = "mediatek,mt8195-gce", .data = (void *)&gce_plat_mt8195}, + {.compatible = "mediatek,mt8196-gce", .data = (void *)&gce_plat_mt8196}, {} }; MODULE_DEVICE_TABLE(of, cmdq_of_ids); diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index a8f0070c7aa9..79398bf95f8d 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -79,5 +79,7 @@ struct cmdq_pkt { }; u8 cmdq_get_shift_pa(struct mbox_chan *chan); +dma_addr_t cmdq_get_offset_pa(struct mbox_chan *chan); +bool cmdq_addr_need_offset(struct mbox_chan *chan, dma_addr_t addr); #endif /* __MTK_CMDQ_MAILBOX_H__ */
MT8196 has 3 new hardware configuration compared with the previous SoC, which correspond to the 3 new driver data: 1. mminfra_offset: For GCE data plane control Since GCE has been moved into mminfra, GCE needs to append the mminfra offset to the DRAM address when accessing the DRAM. 2. gce_vm: For GCE hardware virtualization Currently, the first version of the mt8196 mailbox controller only requires setting the VM-related registers to enable the permissions of a host VM. 3. dma_mask_bit: For dma address bit control In order to avoid the hardware limitations of MT8196 accessing DRAM, GCE needs to configure the DMA address to be less than 35 bits. Signed-off-by: Jason-JH Lin <jason-jh.lin@mediatek.com> --- drivers/mailbox/mtk-cmdq-mailbox.c | 90 +++++++++++++++++++++--- include/linux/mailbox/mtk-cmdq-mailbox.h | 2 + 2 files changed, 84 insertions(+), 8 deletions(-)