Message ID | 20241210083029.92620-1-bigunclemax@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v2] clk: thead: Fix TH1520 emmc and shdci clock rate | expand |
On Tue, 2024-12-10 at 11:30 +0300, bigunclemax@gmail.com wrote: > From: Maksim Kiselev <bigunclemax@gmail.com> > > In accordance with LicheePi 4A BSP the clock that comes to emmc/sdhci > is 198Mhz which is got through frequency division of source clock > VIDEO PLL by 4 [1]. > > But now the AP_SUBSYS driver sets the CLK EMMC SDIO to the same > frequency as the VIDEO PLL, equal to 792 MHz. This causes emmc/sdhci > to work 4 times slower. > > Let's fix this issue by adding fixed factor clock that divides > VIDEO PLL by 4 for emmc/sdhci. > > Link: https://github.com/revyos/thead-kernel/blob/7563179071a314f41cdcdbfd8cf6e101e73707f3/drivers/clk/thead/clk-light-fm.c#L454 > > Fixes: ae81b69fd2b1 ("clk: thead: Add support for T-Head TH1520 AP_SUBSYS clocks") > Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com> It has raised the "dd if=gcc-14.2.0.tar.xz of=/dev/null" speed report from 22MB/s to 61MB/s (the file is on a SD card). Tested-by: Xi Ruoyao <xry111@xry111.site> > --- > drivers/clk/thead/clk-th1520-ap.c | 13 ++++++++++++- > 1 file changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c > index 17e32ae08720..1015fab95251 100644 > --- a/drivers/clk/thead/clk-th1520-ap.c > +++ b/drivers/clk/thead/clk-th1520-ap.c > @@ -779,6 +779,13 @@ static struct ccu_div dpu1_clk = { > }, > }; > > +static CLK_FIXED_FACTOR_HW(emmc_sdio_ref_clk, "emmc-sdio-ref", > + &video_pll_clk.common.hw, 4, 1, 0); > + > +static const struct clk_parent_data emmc_sdio_ref_clk_pd[] = { > + { .hw = &emmc_sdio_ref_clk.hw }, > +}; > + > static CCU_GATE(CLK_BROM, brom_clk, "brom", ahb2_cpusys_hclk_pd, 0x100, BIT(4), 0); > static CCU_GATE(CLK_BMU, bmu_clk, "bmu", axi4_cpusys2_aclk_pd, 0x100, BIT(5), 0); > static CCU_GATE(CLK_AON2CPU_A2X, aon2cpu_a2x_clk, "aon2cpu-a2x", axi4_cpusys2_aclk_pd, > @@ -798,7 +805,7 @@ static CCU_GATE(CLK_PERISYS_APB4_HCLK, perisys_apb4_hclk, "perisys-apb4-hclk", p > 0x150, BIT(12), 0); > static CCU_GATE(CLK_NPU_AXI, npu_axi_clk, "npu-axi", axi_aclk_pd, 0x1c8, BIT(5), 0); > static CCU_GATE(CLK_CPU2VP, cpu2vp_clk, "cpu2vp", axi_aclk_pd, 0x1e0, BIT(13), 0); > -static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", video_pll_clk_pd, 0x204, BIT(30), 0); > +static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", emmc_sdio_ref_clk_pd, 0x204, BIT(30), 0); > static CCU_GATE(CLK_GMAC1, gmac1_clk, "gmac1", gmac_pll_clk_pd, 0x204, BIT(26), 0); > static CCU_GATE(CLK_PADCTRL1, padctrl1_clk, "padctrl1", perisys_apb_pclk_pd, 0x204, BIT(24), 0); > static CCU_GATE(CLK_DSMART, dsmart_clk, "dsmart", perisys_apb_pclk_pd, 0x204, BIT(23), 0); > @@ -1059,6 +1066,10 @@ static int th1520_clk_probe(struct platform_device *pdev) > return ret; > priv->hws[CLK_PLL_GMAC_100M] = &gmac_pll_clk_100m.hw; > > + ret = devm_clk_hw_register(dev, &emmc_sdio_ref_clk.hw); > + if (ret) > + return ret; > + > ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, priv); > if (ret) > return ret;
On Tue, Dec 10, 2024 at 11:30:27AM +0300, bigunclemax@gmail.com wrote: > From: Maksim Kiselev <bigunclemax@gmail.com> > > In accordance with LicheePi 4A BSP the clock that comes to emmc/sdhci > is 198Mhz which is got through frequency division of source clock > VIDEO PLL by 4 [1]. > > But now the AP_SUBSYS driver sets the CLK EMMC SDIO to the same > frequency as the VIDEO PLL, equal to 792 MHz. This causes emmc/sdhci > to work 4 times slower. > > Let's fix this issue by adding fixed factor clock that divides > VIDEO PLL by 4 for emmc/sdhci. > > Link: https://github.com/revyos/thead-kernel/blob/7563179071a314f41cdcdbfd8cf6e101e73707f3/drivers/clk/thead/clk-light-fm.c#L454 > > Fixes: ae81b69fd2b1 ("clk: thead: Add support for T-Head TH1520 AP_SUBSYS clocks") > Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com> > --- > drivers/clk/thead/clk-th1520-ap.c | 13 ++++++++++++- > 1 file changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c > index 17e32ae08720..1015fab95251 100644 > --- a/drivers/clk/thead/clk-th1520-ap.c > +++ b/drivers/clk/thead/clk-th1520-ap.c > @@ -779,6 +779,13 @@ static struct ccu_div dpu1_clk = { > }, > }; > > +static CLK_FIXED_FACTOR_HW(emmc_sdio_ref_clk, "emmc-sdio-ref", > + &video_pll_clk.common.hw, 4, 1, 0); > + > +static const struct clk_parent_data emmc_sdio_ref_clk_pd[] = { > + { .hw = &emmc_sdio_ref_clk.hw }, > +}; > + > static CCU_GATE(CLK_BROM, brom_clk, "brom", ahb2_cpusys_hclk_pd, 0x100, BIT(4), 0); > static CCU_GATE(CLK_BMU, bmu_clk, "bmu", axi4_cpusys2_aclk_pd, 0x100, BIT(5), 0); > static CCU_GATE(CLK_AON2CPU_A2X, aon2cpu_a2x_clk, "aon2cpu-a2x", axi4_cpusys2_aclk_pd, > @@ -798,7 +805,7 @@ static CCU_GATE(CLK_PERISYS_APB4_HCLK, perisys_apb4_hclk, "perisys-apb4-hclk", p > 0x150, BIT(12), 0); > static CCU_GATE(CLK_NPU_AXI, npu_axi_clk, "npu-axi", axi_aclk_pd, 0x1c8, BIT(5), 0); > static CCU_GATE(CLK_CPU2VP, cpu2vp_clk, "cpu2vp", axi_aclk_pd, 0x1e0, BIT(13), 0); > -static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", video_pll_clk_pd, 0x204, BIT(30), 0); > +static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", emmc_sdio_ref_clk_pd, 0x204, BIT(30), 0); > static CCU_GATE(CLK_GMAC1, gmac1_clk, "gmac1", gmac_pll_clk_pd, 0x204, BIT(26), 0); > static CCU_GATE(CLK_PADCTRL1, padctrl1_clk, "padctrl1", perisys_apb_pclk_pd, 0x204, BIT(24), 0); > static CCU_GATE(CLK_DSMART, dsmart_clk, "dsmart", perisys_apb_pclk_pd, 0x204, BIT(23), 0); > @@ -1059,6 +1066,10 @@ static int th1520_clk_probe(struct platform_device *pdev) > return ret; > priv->hws[CLK_PLL_GMAC_100M] = &gmac_pll_clk_100m.hw; > > + ret = devm_clk_hw_register(dev, &emmc_sdio_ref_clk.hw); > + if (ret) > + return ret; > + > ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, priv); > if (ret) > return ret; > -- > 2.45.2 > Reviewed-by: Drew Fustini <dfustini@tenstorrent.com> Thanks for fixing this. Reads are over 3 times faster now. 6.13-rc1: debian@lpi4amain:~$ dd bs=1M count=512 if=/dev/zero of=zero_512M.bin oflag=direct 512+0 records in 512+0 records out 536870912 bytes (537 MB, 512 MiB) copied, 11.8457 s, 45.3 MB/s debian@lpi4amain:~$ dd bs=1M if=zero_512M.bin iflag=direct of=/dev/null 512+0 records in 512+0 records out 536870912 bytes (537 MB, 512 MiB) copied, 6.60576 s, 81.3 MB/s 6.13-rc1 with patch: debian@lpi4amain:~$ dd bs=1M count=512 if=/dev/zero of=zero_512M.bin oflag=direct 512+0 records in 512+0 records out 536870912 bytes (537 MB, 512 MiB) copied, 11.5359 s, 46.5 MB/s debian@lpi4amain:~$ dd bs=1M if=zero_512M.bin iflag=direct of=/dev/null 512+0 records in 512+0 records out 536870912 bytes (537 MB, 512 MiB) copied, 2.03638 s, 264 MB/s -Drew
Quoting bigunclemax@gmail.com (2024-12-10 00:30:27) > From: Maksim Kiselev <bigunclemax@gmail.com> > > In accordance with LicheePi 4A BSP the clock that comes to emmc/sdhci > is 198Mhz which is got through frequency division of source clock > VIDEO PLL by 4 [1]. > > But now the AP_SUBSYS driver sets the CLK EMMC SDIO to the same > frequency as the VIDEO PLL, equal to 792 MHz. This causes emmc/sdhci > to work 4 times slower. > > Let's fix this issue by adding fixed factor clock that divides > VIDEO PLL by 4 for emmc/sdhci. > > Link: https://github.com/revyos/thead-kernel/blob/7563179071a314f41cdcdbfd8cf6e101e73707f3/drivers/clk/thead/clk-light-fm.c#L454 > > Fixes: ae81b69fd2b1 ("clk: thead: Add support for T-Head TH1520 AP_SUBSYS clocks") > Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com> > --- Applied to clk-fixes
diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index 17e32ae08720..1015fab95251 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -779,6 +779,13 @@ static struct ccu_div dpu1_clk = { }, }; +static CLK_FIXED_FACTOR_HW(emmc_sdio_ref_clk, "emmc-sdio-ref", + &video_pll_clk.common.hw, 4, 1, 0); + +static const struct clk_parent_data emmc_sdio_ref_clk_pd[] = { + { .hw = &emmc_sdio_ref_clk.hw }, +}; + static CCU_GATE(CLK_BROM, brom_clk, "brom", ahb2_cpusys_hclk_pd, 0x100, BIT(4), 0); static CCU_GATE(CLK_BMU, bmu_clk, "bmu", axi4_cpusys2_aclk_pd, 0x100, BIT(5), 0); static CCU_GATE(CLK_AON2CPU_A2X, aon2cpu_a2x_clk, "aon2cpu-a2x", axi4_cpusys2_aclk_pd, @@ -798,7 +805,7 @@ static CCU_GATE(CLK_PERISYS_APB4_HCLK, perisys_apb4_hclk, "perisys-apb4-hclk", p 0x150, BIT(12), 0); static CCU_GATE(CLK_NPU_AXI, npu_axi_clk, "npu-axi", axi_aclk_pd, 0x1c8, BIT(5), 0); static CCU_GATE(CLK_CPU2VP, cpu2vp_clk, "cpu2vp", axi_aclk_pd, 0x1e0, BIT(13), 0); -static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", video_pll_clk_pd, 0x204, BIT(30), 0); +static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", emmc_sdio_ref_clk_pd, 0x204, BIT(30), 0); static CCU_GATE(CLK_GMAC1, gmac1_clk, "gmac1", gmac_pll_clk_pd, 0x204, BIT(26), 0); static CCU_GATE(CLK_PADCTRL1, padctrl1_clk, "padctrl1", perisys_apb_pclk_pd, 0x204, BIT(24), 0); static CCU_GATE(CLK_DSMART, dsmart_clk, "dsmart", perisys_apb_pclk_pd, 0x204, BIT(23), 0); @@ -1059,6 +1066,10 @@ static int th1520_clk_probe(struct platform_device *pdev) return ret; priv->hws[CLK_PLL_GMAC_100M] = &gmac_pll_clk_100m.hw; + ret = devm_clk_hw_register(dev, &emmc_sdio_ref_clk.hw); + if (ret) + return ret; + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, priv); if (ret) return ret;