diff mbox

[2/4] drm/radeon: allow concurrent BO access by different engines

Message ID 1408032725-6236-3-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König Aug. 14, 2014, 4:12 p.m. UTC
From: Christian König <christian.koenig@amd.com>

This patch allows concurrent access of different engines to the same BO
as long as everybody only reads from it. Since TTM can't (yet) handle
multiple fences for one BO we still sync the fence after executing the IB.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h     |  2 ++
 drivers/gpu/drm/radeon/radeon_cs.c  | 24 +++++++++++++++++++++++-
 drivers/gpu/drm/radeon/radeon_ttm.c |  8 ++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4579361..c0f7773 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -478,6 +478,7 @@  struct radeon_bo {
 	u32				tiling_flags;
 	u32				pitch;
 	int				surface_reg;
+	struct radeon_fence		*written;
 	/* list of all virtual address to which this bo
 	 * is associated to
 	 */
@@ -1017,6 +1018,7 @@  struct radeon_cs_reloc {
 	unsigned			allowed_domains;
 	uint32_t			tiling_flags;
 	uint32_t			handle;
+	bool				written;
 };
 
 struct radeon_cs_chunk {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 2be4fc5..3aa7e48 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -166,6 +166,7 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].handle = r->handle;
+		p->relocs[i].written = !!r->write_domain;
 
 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 				      priority);
@@ -236,7 +237,16 @@  static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
 			continue;
 
 		fence = bo->tbo.sync_obj;
-		radeon_semaphore_sync_to(p->ib.presync, fence);
+
+		if (bo->written && radeon_fence_signaled(bo->written))
+			radeon_fence_unref(&bo->written);
+
+		/* if either this CS or the last one write to
+		   the BO we sync before executing the IB */
+		if (reloc->written || bo->written)
+			radeon_semaphore_sync_to(p->ib.presync, fence);
+		else
+			radeon_semaphore_sync_to(p->ib.postsync, fence);
 	}
 }
 
@@ -406,6 +416,18 @@  static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 		 */
 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
 
+		/* remember which BOs we write to */
+		for (i = 0; i < parser->nrelocs; i++) {
+			struct radeon_cs_reloc *reloc = &parser->relocs[i];
+			struct radeon_bo *bo = reloc->robj;
+
+			if (!bo || !reloc->written)
+				continue;
+
+			radeon_fence_unref(&bo->written);
+			bo->written = radeon_fence_ref(parser->ib.fence);
+		}
+
 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
 					    parser->ib.fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 72afe82..76be612 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -228,10 +228,12 @@  static int radeon_move_blit(struct ttm_buffer_object *bo,
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
 	struct radeon_fence *fence;
+	struct radeon_bo *rbo;
 	int r, ridx;
 
 	rdev = radeon_get_rdev(bo->bdev);
 	ridx = radeon_copy_ring_index(rdev);
+	rbo = container_of(bo, struct radeon_bo, tbo);
 	old_start = old_mem->start << PAGE_SHIFT;
 	new_start = new_mem->start << PAGE_SHIFT;
 
@@ -269,6 +271,12 @@  static int radeon_move_blit(struct ttm_buffer_object *bo,
 	r = radeon_copy(rdev, old_start, new_start,
 			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
 			&fence);
+
+	if (!r) {
+		radeon_fence_unref(&rbo->written);
+		rbo->written = radeon_fence_ref(fence);
+	}
+
 	/* FIXME: handle copy error */
 	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
 				      evict, no_wait_gpu, new_mem);