diff mbox

[2/2] drm/radeon: Always flush the HDP cache before submitting a CS to the GPU

Message ID 1406799831-2502-3-git-send-email-michel@daenzer.net (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Dänzer July 31, 2014, 9:43 a.m. UTC
From: Michel Dänzer <michel.daenzer@amd.com>

This ensures the GPU sees all previous CPU writes to VRAM, which makes it
safe:

* For userspace to stream data from CPU to GPU via VRAM instead of GTT
* For IBs to be stored in VRAM instead of GTT
* For ring buffers to be stored in VRAM instead of GTT, if the HPD flush
  is performed via MMIO

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/radeon/cik.c         |  4 ----
 drivers/gpu/drm/radeon/r100.c        | 20 +++++++++++++++-----
 drivers/gpu/drm/radeon/radeon.h      |  1 +
 drivers/gpu/drm/radeon/radeon_asic.c |  6 ++++--
 drivers/gpu/drm/radeon/radeon_asic.h |  3 ++-
 drivers/gpu/drm/radeon/radeon_drv.c  |  4 +++-
 drivers/gpu/drm/radeon/radeon_ring.c | 10 ++++++++++
 7 files changed, 35 insertions(+), 13 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 67194a5..a5dfe58 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3666,8 +3666,6 @@  void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
 	radeon_ring_write(ring, fence->seq);
 	radeon_ring_write(ring, 0);
-	/* HDP flush */
-	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
 }
 
 /**
@@ -3696,8 +3694,6 @@  void cik_fence_compute_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, upper_32_bits(addr));
 	radeon_ring_write(ring, fence->seq);
 	radeon_ring_write(ring, 0);
-	/* HDP flush */
-	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
 }
 
 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 9241b89..e1bed43 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -837,11 +837,7 @@  void r100_fence_ring_emit(struct radeon_device *rdev,
 	/* Wait until IDLE & CLEAN */
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
-				RADEON_HDP_READ_BUFFER_INVALIDATE);
-	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+	r100_ring_hdp_flush(rdev, ring);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
 	radeon_ring_write(ring, fence->seq);
@@ -1060,6 +1056,20 @@  void r100_gfx_set_wptr(struct radeon_device *rdev,
 	(void)RREG32(RADEON_CP_RB_WPTR);
 }
 
+/**
+ * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
+ * rdev: radeon device structure
+ * ring: ring buffer struct for emitting packets
+ */
+void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
+				RADEON_HDP_READ_BUFFER_INVALIDATE);
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+}
+
 static void r100_cp_load_microcode(struct radeon_device *rdev)
 {
 	const __be32 *fw_data;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4a76e13..bc970b6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1748,6 +1748,7 @@  struct radeon_asic_ring {
 	/* command emmit functions */
 	void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
 	void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
+	void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
 	bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
 			       struct radeon_semaphore *semaphore, bool emit_wait);
 	void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index ba8caa7..1cb9330 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -185,6 +185,7 @@  static struct radeon_asic_ring r100_gfx_ring = {
 	.get_rptr = &r100_gfx_get_rptr,
 	.get_wptr = &r100_gfx_get_wptr,
 	.set_wptr = &r100_gfx_set_wptr,
+	.hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r100_asic = {
@@ -331,6 +332,7 @@  static struct radeon_asic_ring r300_gfx_ring = {
 	.get_rptr = &r100_gfx_get_rptr,
 	.get_wptr = &r100_gfx_get_wptr,
 	.set_wptr = &r100_gfx_set_wptr,
+	.hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r300_asic = {
@@ -1987,7 +1989,7 @@  static struct radeon_asic ci_asic = {
 	.resume = &cik_resume,
 	.asic_reset = &cik_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
-	.mmio_hdp_flush = NULL,
+	.mmio_hdp_flush = &r600_mmio_hdp_flush,
 	.gui_idle = &r600_gui_idle,
 	.mc_wait_for_idle = &evergreen_mc_wait_for_idle,
 	.get_xclk = &cik_get_xclk,
@@ -2091,7 +2093,7 @@  static struct radeon_asic kv_asic = {
 	.resume = &cik_resume,
 	.asic_reset = &cik_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
-	.mmio_hdp_flush = NULL,
+	.mmio_hdp_flush = &r600_mmio_hdp_flush,
 	.gui_idle = &r600_gui_idle,
 	.mc_wait_for_idle = &evergreen_mc_wait_for_idle,
 	.get_xclk = &cik_get_xclk,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b8826c6..3cf6be6 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -148,7 +148,8 @@  u32 r100_gfx_get_wptr(struct radeon_device *rdev,
 		      struct radeon_ring *ring);
 void r100_gfx_set_wptr(struct radeon_device *rdev,
 		       struct radeon_ring *ring);
-
+void r100_ring_hdp_flush(struct radeon_device *rdev,
+			 struct radeon_ring *ring);
 /*
  * r200,rv250,rs300,rv280
  */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e9e3610..54aa293 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -82,9 +82,11 @@ 
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
  *   2.39.0 - Add INFO query for number of active CUs
+ *   2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
+ *            CS to GPU
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	39
+#define KMS_DRIVER_MINOR	40
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 20b0e4f..ec27e05 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -445,11 +445,21 @@  int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
  */
 void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
+	/* If we are emitting the HDP flush via the ring buffer, we need to
+	 * do it before padding.
+	 */
+	if (rdev->asic->ring[ring->idx]->hdp_flush)
+		rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
 	/* We pad to match fetch size */
 	while (ring->wptr & ring->align_mask) {
 		radeon_ring_write(ring, ring->nop);
 	}
 	mb();
+	/* If we are emitting the HDP flush via MMIO, we need to do it after
+	 * all CPU writes to VRAM finished.
+	 */
+	if (rdev->asic->mmio_hdp_flush)
+		rdev->asic->mmio_hdp_flush(rdev);
 	radeon_ring_set_wptr(rdev, ring);
 }