Message ID | 1373571341-3694-2-git-send-email-alexdeucher@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
I have just discovered that WAIT_UNTIL=WAIT_3D_IDLE must be set before using CP DMA. Marek On Thu, Jul 11, 2013 at 9:35 PM, <alexdeucher@gmail.com> wrote: > From: Alex Deucher <alexander.deucher@amd.com> > > Lighter weight than using the 3D engine. > > v2: fix ring count > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/radeon/r600.c | 81 ++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/r600d.h | 1 + > drivers/gpu/drm/radeon/radeon_asic.h | 3 + > 3 files changed, 85 insertions(+), 0 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index 2d3655f..f7d494f 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -3145,6 +3145,87 @@ int r600_copy_blit(struct radeon_device *rdev, > } > > /** > + * r600_copy_cpdma - copy pages using the CP DMA engine > + * > + * @rdev: radeon_device pointer > + * @src_offset: src GPU address > + * @dst_offset: dst GPU address > + * @num_gpu_pages: number of GPU pages to xfer > + * @fence: radeon fence object > + * > + * Copy GPU paging using the CP DMA engine (r6xx+). > + * Used by the radeon ttm implementation to move pages if > + * registered as the asic copy callback. > + */ > +int r600_copy_cpdma(struct radeon_device *rdev, > + uint64_t src_offset, uint64_t dst_offset, > + unsigned num_gpu_pages, > + struct radeon_fence **fence) > +{ > + struct radeon_semaphore *sem = NULL; > + int ring_index = rdev->asic->copy.blit_ring_index; > + struct radeon_ring *ring = &rdev->ring[ring_index]; > + u32 size_in_bytes, cur_size_in_bytes, tmp; > + int i, num_loops; > + int r = 0; > + > + r = radeon_semaphore_create(rdev, &sem); > + if (r) { > + DRM_ERROR("radeon: moving bo (%d).\n", r); > + return r; > + } > + > + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); > + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); > + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); > + if (r) { > + DRM_ERROR("radeon: moving bo (%d).\n", r); > + radeon_semaphore_free(rdev, &sem, NULL); > + return r; > + } > + > + if (radeon_fence_need_sync(*fence, ring->idx)) { > + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, > + ring->idx); > + radeon_fence_note_sync(*fence, ring->idx); > + } else { > + radeon_semaphore_free(rdev, &sem, NULL); > + } > + > + for (i = 0; i < num_loops; i++) { > + cur_size_in_bytes = size_in_bytes; > + if (cur_size_in_bytes > 0x1fffff) > + cur_size_in_bytes = 0x1fffff; > + size_in_bytes -= cur_size_in_bytes; > + tmp = upper_32_bits(src_offset) & 0xff; > + if (size_in_bytes == 0) > + tmp |= PACKET3_CP_DMA_CP_SYNC; > + radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4)); > + radeon_ring_write(ring, src_offset & 0xffffffff); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, dst_offset & 0xffffffff); > + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); > + radeon_ring_write(ring, cur_size_in_bytes); > + src_offset += cur_size_in_bytes; > + dst_offset += cur_size_in_bytes; > + } > + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); > + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); > + radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit); > + > + r = radeon_fence_emit(rdev, fence, ring->idx); > + if (r) { > + radeon_ring_unlock_undo(rdev, ring); > + return r; > + } > + > + radeon_ring_unlock_commit(rdev, ring); > + radeon_semaphore_free(rdev, &sem, *fence); > + > + return r; > +} > + > +/** > * r600_copy_dma - copy pages using the DMA engine > * > * @rdev: radeon_device pointer > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h > index f1b3084..8e3fe81 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -602,6 +602,7 @@ > #define L2_BUSY (1 << 0) > > #define WAIT_UNTIL 0x8040 > +#define WAIT_CP_DMA_IDLE_bit (1 << 8) > #define WAIT_2D_IDLE_bit (1 << 14) > #define WAIT_3D_IDLE_bit (1 << 15) > #define WAIT_2D_IDLECLEAN_bit (1 << 16) > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h > index 45d0693..b04b578 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -340,6 +340,9 @@ int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, uint64_t dst_offset, > unsigned num_gpu_pages, struct radeon_fence **fence); > +int r600_copy_cpdma(struct radeon_device *rdev, > + uint64_t src_offset, uint64_t dst_offset, > + unsigned num_gpu_pages, struct radeon_fence **fence); > int r600_copy_dma(struct radeon_device *rdev, > uint64_t src_offset, uint64_t dst_offset, > unsigned num_gpu_pages, struct radeon_fence **fence); > -- > 1.7.7.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 2d3655f..f7d494f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3145,6 +3145,87 @@ int r600_copy_blit(struct radeon_device *rdev, } /** + * r600_copy_cpdma - copy pages using the CP DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the CP DMA engine (r6xx+). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_cpdma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.blit_ring_index; + struct radeon_ring *ring = &rdev->ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes, tmp; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, &sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, &sem, NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0x1fffff) + cur_size_in_bytes = 0x1fffff; + size_in_bytes -= cur_size_in_bytes; + tmp = upper_32_bits(src_offset) & 0xff; + if (size_in_bytes == 0) + tmp |= PACKET3_CP_DMA_CP_SYNC; + radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4)); + radeon_ring_write(ring, src_offset & 0xffffffff); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, dst_offset & 0xffffffff); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, cur_size_in_bytes); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit); + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, *fence); + + return r; +} + +/** * r600_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index f1b3084..8e3fe81 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -602,6 +602,7 @@ #define L2_BUSY (1 << 0) #define WAIT_UNTIL 0x8040 +#define WAIT_CP_DMA_IDLE_bit (1 << 8) #define WAIT_2D_IDLE_bit (1 << 14) #define WAIT_3D_IDLE_bit (1 << 15) #define WAIT_2D_IDLECLEAN_bit (1 << 16) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 45d0693..b04b578 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -340,6 +340,9 @@ int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); +int r600_copy_cpdma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, struct radeon_fence **fence); int r600_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence);