diff mbox series

drm/tegra: falcon: Pipeline firmware copy

Message ID 20250205061027.1205748-1-mperttunen@nvidia.com (mailing list archive)
State New
Headers show
Series drm/tegra: falcon: Pipeline firmware copy | expand

Commit Message

Mikko Perttunen Feb. 5, 2025, 6:10 a.m. UTC
The Falcon DMA engine allows queueing multiple operations for
improved performance. Do this to optimize firmware loading.
A performance improvement of 4x to 6x is observed.

Co-developed-by: Ivan Raul Guadarrama <iguadarrama@nvidia.com>
Signed-off-by: Ivan Raul Guadarrama <iguadarrama@nvidia.com>
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 drivers/gpu/drm/tegra/falcon.c | 20 +++++++++++++++++++-
 drivers/gpu/drm/tegra/falcon.h |  1 +
 2 files changed, 20 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index c0d85463eb1a..17f616bbcb45 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -30,6 +30,14 @@  int falcon_wait_idle(struct falcon *falcon)
 				  (value == 0), 10, 100000);
 }
 
+static int falcon_dma_wait_not_full(struct falcon *falcon)
+{
+	u32 value;
+
+	return readl_poll_timeout(falcon->regs + FALCON_DMATRFCMD, value,
+				  !(value & FALCON_DMATRFCMD_FULL), 10, 100000);
+}
+
 static int falcon_dma_wait_idle(struct falcon *falcon)
 {
 	u32 value;
@@ -44,6 +52,7 @@  static int falcon_copy_chunk(struct falcon *falcon,
 			     enum falcon_memory target)
 {
 	u32 cmd = FALCON_DMATRFCMD_SIZE_256B;
+	int err;
 
 	if (target == FALCON_MEMORY_IMEM)
 		cmd |= FALCON_DMATRFCMD_IMEM;
@@ -56,11 +65,15 @@  static int falcon_copy_chunk(struct falcon *falcon,
 	 */
 	cmd |= FALCON_DMATRFCMD_DMACTX(1);
 
+	err = falcon_dma_wait_not_full(falcon);
+	if (err < 0)
+		return err;
+
 	falcon_writel(falcon, offset, FALCON_DMATRFMOFFS);
 	falcon_writel(falcon, base, FALCON_DMATRFFBOFFS);
 	falcon_writel(falcon, cmd, FALCON_DMATRFCMD);
 
-	return falcon_dma_wait_idle(falcon);
+	return 0;
 }
 
 static void falcon_copy_firmware_image(struct falcon *falcon,
@@ -191,6 +204,11 @@  int falcon_boot(struct falcon *falcon)
 		falcon_copy_chunk(falcon, falcon->firmware.code.offset + offset,
 				  offset, FALCON_MEMORY_IMEM);
 
+	/* wait for DMA to complete */
+	err = falcon_dma_wait_idle(falcon);
+	if (err < 0)
+		return err;
+
 	/* setup falcon interrupts */
 	falcon_writel(falcon, FALCON_IRQMSET_EXT(0xff) |
 			      FALCON_IRQMSET_SWGEN1 |
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index 1955cf11a8a6..902bb7e4fd0f 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -47,6 +47,7 @@ 
 #define FALCON_DMATRFMOFFS			0x00001114
 
 #define FALCON_DMATRFCMD			0x00001118
+#define FALCON_DMATRFCMD_FULL			(1 << 0)
 #define FALCON_DMATRFCMD_IDLE			(1 << 1)
 #define FALCON_DMATRFCMD_IMEM			(1 << 4)
 #define FALCON_DMATRFCMD_SIZE_256B		(6 << 8)