Message ID | 1364944719-5175-3-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian König wrote: > Just everything needed to decode videos using UVD. > > v6: just all the bugfixes and support for R7xx-SI merged in one patch > v7: UVD_CGC_GATE is a write only register, lockup detection fix > > Signed-off-by: Christian König <deathsimple@vodafone.de> > --- > drivers/gpu/drm/radeon/Makefile | 2 +- > drivers/gpu/drm/radeon/evergreen.c | 40 ++- > drivers/gpu/drm/radeon/evergreend.h | 7 + > drivers/gpu/drm/radeon/ni.c | 49 +++ > drivers/gpu/drm/radeon/nid.h | 9 + > drivers/gpu/drm/radeon/r600.c | 291 ++++++++++++++++++ > drivers/gpu/drm/radeon/r600d.h | 61 ++++ > drivers/gpu/drm/radeon/radeon.h | 47 ++- > drivers/gpu/drm/radeon/radeon_asic.c | 63 ++++ > drivers/gpu/drm/radeon/radeon_asic.h | 19 ++ > drivers/gpu/drm/radeon/radeon_cs.c | 27 +- > drivers/gpu/drm/radeon/radeon_fence.c | 23 +- > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > drivers/gpu/drm/radeon/radeon_object.c | 12 +- > drivers/gpu/drm/radeon/radeon_object.h | 2 +- > drivers/gpu/drm/radeon/radeon_ring.c | 24 +- > drivers/gpu/drm/radeon/radeon_test.c | 72 +++-- > drivers/gpu/drm/radeon/radeon_uvd.c | 521 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/rv770.c | 134 ++++++++ > drivers/gpu/drm/radeon/rv770d.h | 14 + > drivers/gpu/drm/radeon/si.c | 32 ++ > drivers/gpu/drm/radeon/sid.h | 6 + > include/uapi/drm/radeon_drm.h | 1 + > 23 files changed, 1400 insertions(+), 57 deletions(-) > create mode 100644 drivers/gpu/drm/radeon/radeon_uvd.c > > diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile > index bf17252..86c5e36 100644 > --- a/drivers/gpu/drm/radeon/Makefile > +++ b/drivers/gpu/drm/radeon/Makefile > @@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ > evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ > evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ > atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ > - si_blit_shaders.o radeon_prime.o > + si_blit_shaders.o radeon_prime.o radeon_uvd.o > > radeon-$(CONFIG_COMPAT) += radeon_ioc32.o > radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o > diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c > index 305a657..18b66ff 100644 > --- a/drivers/gpu/drm/radeon/evergreen.c > +++ b/drivers/gpu/drm/radeon/evergreen.c > @@ -3360,6 +3360,9 @@ restart_ih: > DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); > break; > } > + case 124: /* UVD */ > + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); > + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); > break; > case 146: > case 147: > @@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, > > static int evergreen_startup(struct radeon_device *rdev) > { > - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > + struct radeon_ring *ring; > int r; > > /* enable pcie gen2 link */ > @@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = r600_irq_init(rdev); > if (r) { > @@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev) > } > evergreen_irq_set(rdev); > > + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, > R600_CP_RB_RPTR, R600_CP_RB_WPTR, > 0, 0xfffff, RADEON_CP_PACKET2); > @@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: error initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev) > int evergreen_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > evergreen_pcie_gart_disable(rdev); > @@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > evergreen_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h > index 982d25a..c5d873e 100644 > --- a/drivers/gpu/drm/radeon/evergreend.h > +++ b/drivers/gpu/drm/radeon/evergreend.h > @@ -992,6 +992,13 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > > + > +/* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > /* > * PM4 > */ > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 27769e7..ac944f5 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -931,6 +931,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > radeon_ring_write(ring, 10); /* poll interval */ > } > > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr = semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); > +} > + > static void cayman_cp_enable(struct radeon_device *rdev, bool enable) > { > if (enable) > @@ -1682,6 +1699,16 @@ static int cayman_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); > if (r) { > dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > @@ -1748,6 +1775,18 @@ static int cayman_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1794,6 +1833,8 @@ int cayman_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > cayman_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > cayman_pcie_gart_disable(rdev); > @@ -1868,6 +1909,13 @@ int cayman_init(struct radeon_device *rdev) > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -1919,6 +1967,7 @@ void cayman_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > cayman_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h > index 079dee2..3731f6c 100644 > --- a/drivers/gpu/drm/radeon/nid.h > +++ b/drivers/gpu/drm/radeon/nid.h > @@ -486,6 +486,15 @@ > # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) > > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xEF00 > +#define UVD_SEMA_ADDR_HIGH 0xEF04 > +#define UVD_SEMA_CMD 0xEF08 > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index 0740db3..ca6117d 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev) > } > > /* > + * UVD > + */ > +int r600_uvd_rbc_start(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + uint64_t rptr_addr; > + uint32_t rb_bufsz, tmp; > + int r; > + > + rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; > + > + if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) { > + DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); > + return -EINVAL; > + } > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + > + /* Set the write pointer delay */ > + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); > + > + /* set the wb address */ > + WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); > + > + /* programm the 4GB memory segment for rptr and ring buffer */ > + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | > + (0x7 << 16) | (0x1 << 31)); > + > + /* Initialize the ring buffer's read and write pointers */ > + WREG32(UVD_RBC_RB_RPTR, 0x0); > + > + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); > + WREG32(UVD_RBC_RB_WPTR, ring->wptr); > + > + /* set the ring address */ > + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); > + > + /* Set ring buffer size */ > + rb_bufsz = drm_order(ring->ring_size); > + rb_bufsz = (0x1 << 8) | rb_bufsz; > + WREG32(UVD_RBC_RB_CNTL, rb_bufsz); > + > + ring->ready = true; > + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); > + if (r) { > + ring->ready = false; > + return r; > + } > + > + r = radeon_ring_lock(rdev, ring, 10); > + if (r) { > + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); > + return r; > + } > + > + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + /* Clear timeout status bits */ > + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); > + radeon_ring_write(ring, 0x8); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); > + radeon_ring_write(ring, 1); > + > + radeon_ring_unlock_commit(rdev, ring); > + > + return 0; > +} > + > +void r600_uvd_rbc_stop(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + ring->ready = false; > +} > + > +int r600_uvd_init(struct radeon_device *rdev) > +{ > + int i, j, r; > + > + /* disable clock gating */ > + WREG32(UVD_CGC_GATE, 0); > + > + /* disable interupt */ > + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); > + > + /* put LMI, VCPU, RBC etc... into reset */ > + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | > + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | > + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); > + mdelay(5); > + > + /* take UVD block out of reset */ > + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); > + mdelay(5); > + > + /* initialize UVD memory controller */ > + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | > + (1 << 21) | (1 << 9) | (1 << 20)); > + > + /* disable byte swapping */ > + WREG32(UVD_LMI_SWAP_CNTL, 0); > + WREG32(UVD_MP_SWAP_CNTL, 0); > + > + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXA1, 0x0); > + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXB1, 0x0); > + WREG32(UVD_MPC_SET_ALU, 0); > + WREG32(UVD_MPC_SET_MUX, 0x88); > + > + /* Stall UMC */ > + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); > + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); > + > + /* take all subblocks out of reset, except VCPU */ > + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); > + mdelay(5); > + > + /* enable VCPU clock */ > + WREG32(UVD_VCPU_CNTL, 1 << 9); > + > + /* enable UMC */ > + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); > + > + /* boot up the VCPU */ > + WREG32(UVD_SOFT_RESET, 0); > + mdelay(10); > + > + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); > + > + for (i = 0; i < 10; ++i) { > + uint32_t status; > + for (j = 0; j < 100; ++j) { > + status = RREG32(UVD_STATUS); > + if (status & 2) > + break; > + mdelay(10); > + } > + r = 0; > + if (status & 2) > + break; > + > + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); > + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); > + mdelay(10); > + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); > + mdelay(10); > + r = -1; > + } > + if (r) { > + DRM_ERROR("UVD not responding, giving up!!!\n"); > + return r; > + } > + /* enable interupt */ > + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); > + > + r = r600_uvd_rbc_start(rdev); > + if (r) > + return r; > + > + DRM_INFO("UVD initialized successfully.\n"); > + return 0; > +} > + > +/* > * GPU scratch registers helpers function. > */ > void r600_scratch_init(struct radeon_device *rdev) > @@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev, > return r; > } > > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) > +{ > + uint32_t tmp = 0; > + unsigned i; > + int r; > + > + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); > + r = radeon_ring_lock(rdev, ring, 3); > + if (r) { > + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", > + ring->idx, r); > + return r; > + } > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, 0xDEADBEEF); > + radeon_ring_unlock_commit(rdev, ring); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(UVD_CONTEXT_ID); > + if (tmp == 0xDEADBEEF) > + break; > + DRM_UDELAY(1); > + } > + > + if (i < rdev->usec_timeout) { > + DRM_INFO("ring test on %d succeeded in %d usecs\n", > + ring->idx, i); > + } else { > + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", > + ring->idx, tmp); > + r = -EINVAL; > + } > + return r; > +} > + > /* > * CP fences/semaphores > */ > @@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev, > } > } > > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence) > +{ > + struct radeon_ring *ring = &rdev->ring[fence->ring]; > + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, fence->seq); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, addr & 0xffffffff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 0); > + > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 2); > + return; > +} > + > void r600_semaphore_ring_emit(struct radeon_device *rdev, > struct radeon_ring *ring, > struct radeon_semaphore *semaphore, > @@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, > radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > } > > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr = semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, emit_wait ? 1 : 0); > +} > + > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, > uint64_t dst_offset, > @@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > radeon_ring_write(ring, ib->length_dw); > } > > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > +{ > + struct radeon_ring *ring = &rdev->ring[ib->ring]; > + > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); > + radeon_ring_write(ring, ib->gpu_addr); > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); > + radeon_ring_write(ring, ib->length_dw); > +} > + > int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > { > struct radeon_ib ib; > @@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > return r; > } > > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > +{ > + struct radeon_fence *fence; > + int r; > + > + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); > + return r; > + } > + > + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); > + if (r) { > + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); > + return r; > + } > + > + r = radeon_fence_wait(fence, false); > + if (r) { > + DRM_ERROR("radeon: fence wait failed (%d).\n", r); > + return r; > + } > + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); > + radeon_fence_unref(&fence); > + return r; > +} > + > /** > * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine > * > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h > index a42ba11..441bdb8 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -691,6 +691,7 @@ > #define SRBM_SOFT_RESET 0xe60 > # define SOFT_RESET_DMA (1 << 12) > # define SOFT_RESET_RLC (1 << 13) > +# define SOFT_RESET_UVD (1 << 18) > # define RV770_SOFT_RESET_DMA (1 << 20) > > #define CP_INT_CNTL 0xc124 > @@ -1143,6 +1144,66 @@ > # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30) > > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xef00 > +#define UVD_SEMA_ADDR_HIGH 0xef04 > +#define UVD_SEMA_CMD 0xef08 > + > +#define UVD_GPCOM_VCPU_CMD 0xef0c > +#define UVD_GPCOM_VCPU_DATA0 0xef10 > +#define UVD_GPCOM_VCPU_DATA1 0xef14 > +#define UVD_ENGINE_CNTL 0xef18 > + > +#define UVD_SEMA_CNTL 0xf400 > +#define UVD_RB_ARB_CTRL 0xf480 > + > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_CGC_GATE 0xf4a8 > +#define UVD_LMI_CTRL2 0xf4f4 > +#define UVD_MASTINT_EN 0xf500 > +#define UVD_LMI_ADDR_EXT 0xf594 > +#define UVD_LMI_CTRL 0xf598 > +#define UVD_LMI_SWAP_CNTL 0xf5b4 > +#define UVD_MP_SWAP_CNTL 0xf5bC > +#define UVD_MPC_CNTL 0xf5dC > +#define UVD_MPC_SET_MUXA0 0xf5e4 > +#define UVD_MPC_SET_MUXA1 0xf5e8 > +#define UVD_MPC_SET_MUXB0 0xf5eC > +#define UVD_MPC_SET_MUXB1 0xf5f0 > +#define UVD_MPC_SET_MUX 0xf5f4 > +#define UVD_MPC_SET_ALU 0xf5f8 > + > +#define UVD_VCPU_CNTL 0xf660 > +#define UVD_SOFT_RESET 0xf680 > +#define RBC_SOFT_RESET (1<<0) > +#define LBSI_SOFT_RESET (1<<1) > +#define LMI_SOFT_RESET (1<<2) > +#define VCPU_SOFT_RESET (1<<3) > +#define CSM_SOFT_RESET (1<<5) > +#define CXW_SOFT_RESET (1<<6) > +#define TAP_SOFT_RESET (1<<7) > +#define LMI_UMC_SOFT_RESET (1<<13) > +#define UVD_RBC_IB_BASE 0xf684 > +#define UVD_RBC_IB_SIZE 0xf688 > +#define UVD_RBC_RB_BASE 0xf68c > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > +#define UVD_RBC_RB_WPTR_CNTL 0xf698 > + > +#define UVD_STATUS 0xf6bc > + > +#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0 > +#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4 > +#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8 > +#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc > + > +#define UVD_RBC_RB_CNTL 0xf6a4 > +#define UVD_RBC_RB_RPTR_ADDR 0xf6a8 > + > +#define UVD_CONTEXT_ID 0xf6f4 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 8263af3..3f5572d 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -109,24 +109,27 @@ extern int radeon_lockup_timeout; > #define RADEON_BIOS_NUM_SCRATCH 8 > > /* max number of rings */ > -#define RADEON_NUM_RINGS 5 > +#define RADEON_NUM_RINGS 6 > > /* fence seq are set to this number when signaled */ > #define RADEON_FENCE_SIGNALED_SEQ 0LL > > /* internal ring indices */ > /* r1xx+ has gfx CP ring */ > -#define RADEON_RING_TYPE_GFX_INDEX 0 > +#define RADEON_RING_TYPE_GFX_INDEX 0 > > /* cayman has 2 compute CP rings */ > -#define CAYMAN_RING_TYPE_CP1_INDEX 1 > -#define CAYMAN_RING_TYPE_CP2_INDEX 2 > +#define CAYMAN_RING_TYPE_CP1_INDEX 1 > +#define CAYMAN_RING_TYPE_CP2_INDEX 2 > > /* R600+ has an async dma ring */ > #define R600_RING_TYPE_DMA_INDEX 3 > /* cayman add a second async dma ring */ > #define CAYMAN_RING_TYPE_DMA1_INDEX 4 > > +/* R600+ */ > +#define R600_RING_TYPE_UVD_INDEX 5 > + > /* hardcode those limit for now */ > #define RADEON_VA_IB_OFFSET (1 << 20) > #define RADEON_VA_RESERVED_SIZE (8 << 20) > @@ -357,8 +360,9 @@ struct radeon_bo_list { > struct ttm_validate_buffer tv; > struct radeon_bo *bo; > uint64_t gpu_offset; > - unsigned rdomain; > - unsigned wdomain; > + bool written; > + unsigned domain; > + unsigned alt_domain; > u32 tiling_flags; > }; I think that the change to the rdomain/wdomain should be in a patch of its own. I think the change is fine but we had issue with change that touched that part previously, would make bisecting and understanding the change implication easier. > > @@ -826,7 +830,6 @@ struct radeon_cs_reloc { > struct radeon_bo *robj; > struct radeon_bo_list lobj; > uint32_t handle; > - uint32_t flags; > }; Why removing the flags ? iirc it's not really use right now but i remember plan to use it. > > struct radeon_cs_chunk { > @@ -918,6 +921,7 @@ struct radeon_wb { > #define R600_WB_DMA_RPTR_OFFSET 1792 > #define R600_WB_IH_WPTR_OFFSET 2048 > #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 > +#define R600_WB_UVD_RPTR_OFFSET 2560 > #define R600_WB_EVENT_OFFSET 3072 > > /** > @@ -1118,6 +1122,33 @@ struct radeon_pm { > int radeon_pm_get_type_index(struct radeon_device *rdev, > enum radeon_pm_state_type ps_type, > int instance); > +/* > + * UVD > + */ > +#define RADEON_MAX_UVD_HANDLES 10 > +#define RADEON_UVD_STACK_SIZE (1024*1024) > +#define RADEON_UVD_HEAP_SIZE (1024*1024) > + > +struct radeon_uvd { > + struct radeon_bo *vcpu_bo; > + void *cpu_addr; > + uint64_t gpu_addr; > + atomic_t handles[RADEON_MAX_UVD_HANDLES]; > + struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; > +}; > + > +int radeon_uvd_init(struct radeon_device *rdev); > +void radeon_uvd_fini(struct radeon_device *rdev); > +int radeon_uvd_suspend(struct radeon_device *rdev); > +int radeon_uvd_resume(struct radeon_device *rdev); > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); > +void radeon_uvd_free_handles(struct radeon_device *rdev, > + struct drm_file *filp); > +int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); > > struct r600_audio { > int channels; > @@ -1608,6 +1639,7 @@ struct radeon_device { > struct radeon_asic *asic; > struct radeon_gem gem; > struct radeon_pm pm; > + struct radeon_uvd uvd; > uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; > struct radeon_wb wb; > struct radeon_dummy_page dummy_page; > @@ -1621,6 +1653,7 @@ struct radeon_device { > const struct firmware *rlc_fw; /* r6/700 RLC firmware */ > const struct firmware *mc_fw; /* NI MC firmware */ > const struct firmware *ce_fw; /* SI CE firmware */ > + const struct firmware *uvd_fw; /* UVD firmware */ > struct r600_blit r600_blit; > struct r600_vram_scratch vram_scratch; > int msi_enabled; /* msi enabled */ > diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c > index aba0a89..a7a7b2b 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.c > +++ b/drivers/gpu/drm/radeon/radeon_asic.c > @@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &r600_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &cayman_dma_is_lockup, > .vm_flush = &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &cayman_dma_is_lockup, > .vm_flush = &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &si_dma_is_lockup, > .vm_flush = &si_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h > index 3535f73..515db96 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); > void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); > int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); > int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, uint64_t dst_offset, > unsigned num_gpu_pages, struct radeon_fence **fence); > @@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev); > u32 r600_get_xclk(struct radeon_device *rdev); > uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); > > +/* uvd */ > +int r600_uvd_init(struct radeon_device *rdev); > +int r600_uvd_rbc_start(struct radeon_device *rdev); > +void r600_uvd_rbc_stop(struct radeon_device *rdev); > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence); > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); > + > /* > * rv770,rv730,rv710,rv740 > */ > @@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev, > unsigned num_gpu_pages, > struct radeon_fence **fence); > u32 rv770_get_xclk(struct radeon_device *rdev); > +int rv770_uvd_resume(struct radeon_device *rdev); > > /* > * evergreen > @@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev, > */ > void cayman_fence_ring_emit(struct radeon_device *rdev, > struct radeon_fence *fence); > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); > int cayman_init(struct radeon_device *rdev); > void cayman_fini(struct radeon_device *rdev); > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 7d66e01..532ff68 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -75,18 +75,34 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) > p->relocs_ptr[i] = &p->relocs[i]; > p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); > p->relocs[i].lobj.bo = p->relocs[i].robj; > - p->relocs[i].lobj.wdomain = r->write_domain; > - p->relocs[i].lobj.rdomain = r->read_domains; > + p->relocs[i].lobj.written = !!r->write_domain; > + > + /* the first reloc of an UVD job is the > + msg and that must be in VRAM */ > + if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { > + > + p->relocs[i].lobj.domain = > + RADEON_GEM_DOMAIN_VRAM; > + > + p->relocs[i].lobj.alt_domain = > + RADEON_GEM_DOMAIN_VRAM; > + } else { > + uint32_t domain = r->write_domain ? > + r->write_domain : r->read_domains; > + p->relocs[i].lobj.domain = domain; > + if (domain == RADEON_GEM_DOMAIN_VRAM) > + domain |= RADEON_GEM_DOMAIN_GTT; > + p->relocs[i].lobj.alt_domain = domain; > + } > p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; > p->relocs[i].handle = r->handle; > - p->relocs[i].flags = r->flags; > radeon_bo_list_add_object(&p->relocs[i].lobj, > &p->validated); > > } else > p->relocs[i].handle = 0; > } > - return radeon_bo_list_validate(&p->validated); > + return radeon_bo_list_validate(&p->validated, p->ring); > } > > static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) > @@ -121,6 +137,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority > return -EINVAL; > } > break; > + case RADEON_CS_RING_UVD: > + p->ring = R600_RING_TYPE_UVD_INDEX; > + break; > } > return 0; > } > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c > index 3435625..82fe183 100644 > --- a/drivers/gpu/drm/radeon/radeon_fence.c > +++ b/drivers/gpu/drm/radeon/radeon_fence.c > @@ -31,9 +31,9 @@ > #include <linux/seq_file.h> > #include <linux/atomic.h> > #include <linux/wait.h> > -#include <linux/list.h> > #include <linux/kref.h> > #include <linux/slab.h> > +#include <linux/firmware.h> > #include <drm/drmP.h> > #include "radeon_reg.h" > #include "radeon.h" > @@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) > > radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); > if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { > - rdev->fence_drv[ring].scratch_reg = 0; > - index = R600_WB_EVENT_OFFSET + ring * 4; > + if (ring != R600_RING_TYPE_UVD_INDEX) { > + rdev->fence_drv[ring].scratch_reg = 0; > + index = R600_WB_EVENT_OFFSET + ring * 4; > + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + > + index; > + > + } else { > + /* put fence directly behind firmware */ > + rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + > + rdev->uvd_fw->size; > + rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + > + rdev->uvd_fw->size; > + } > + > } else { > r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); > if (r) { > @@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) > index = RADEON_WB_SCRATCH_OFFSET + > rdev->fence_drv[ring].scratch_reg - > rdev->scratch.reg_base; > + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; > } > - rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > - rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; > radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); > rdev->fence_drv[ring].initialized = true; > dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index c75cb2c..3019759 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -513,6 +513,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, > rdev->hyperz_filp = NULL; > if (rdev->cmask_filp == file_priv) > rdev->cmask_filp = NULL; > + radeon_uvd_free_handles(rdev, file_priv); > } > > /* > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c > index d3aface..0e34446 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -339,14 +339,14 @@ void radeon_bo_fini(struct radeon_device *rdev) > void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head) > { > - if (lobj->wdomain) { > + if (lobj->written) { > list_add(&lobj->tv.head, head); > } else { > list_add_tail(&lobj->tv.head, head); > } > } > > -int radeon_bo_list_validate(struct list_head *head) > +int radeon_bo_list_validate(struct list_head *head, int ring) > { > struct radeon_bo_list *lobj; > struct radeon_bo *bo; > @@ -360,15 +360,17 @@ int radeon_bo_list_validate(struct list_head *head) > list_for_each_entry(lobj, head, tv.head) { > bo = lobj->bo; > if (!bo->pin_count) { > - domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; > + domain = lobj->domain; > > retry: > radeon_ttm_placement_from_domain(bo, domain); > + if (ring == R600_RING_TYPE_UVD_INDEX) > + radeon_uvd_force_into_uvd_segment(bo); > r = ttm_bo_validate(&bo->tbo, &bo->placement, > true, false); > if (unlikely(r)) { > - if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) { > - domain |= RADEON_GEM_DOMAIN_GTT; > + if (r != -ERESTARTSYS && domain != lobj->alt_domain) { > + domain = lobj->alt_domain; > goto retry; > } > return r; > diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h > index 5fc86b0..e2cb80a 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.h > +++ b/drivers/gpu/drm/radeon/radeon_object.h > @@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); > extern void radeon_bo_fini(struct radeon_device *rdev); > extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head); > -extern int radeon_bo_list_validate(struct list_head *head); > +extern int radeon_bo_list_validate(struct list_head *head, int ring); > extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, > struct vm_area_struct *vma); > extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c > index 8d58e26..31e47d8 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) > { > u32 rptr; > > - if (rdev->wb.enabled) > + if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX]) > rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); > else > rptr = RREG32(ring->rptr_reg); > @@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) > return 0; > } > > -static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX; > -static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; > -static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; > -static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX; > -static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; > +static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX; > +static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; > +static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; > +static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; > +static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; > +static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; > > static struct drm_info_list radeon_debugfs_ring_info_list[] = { > - {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index}, > - {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index}, > - {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index}, > - {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index}, > - {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index}, > + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, > + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index}, > + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index}, > + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, > + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, > + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, > }; > > static int radeon_debugfs_sa_info(struct seq_file *m, void *data) > diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c > index fda09c9..bbed4af 100644 > --- a/drivers/gpu/drm/radeon/radeon_test.c > +++ b/drivers/gpu/drm/radeon/radeon_test.c > @@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev) > radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); > } > > +static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_fence **fence) > +{ > + int r; > + > + if (ring->idx == R600_RING_TYPE_UVD_INDEX) { > + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("Failed to get dummy create msg\n"); > + return r; > + } > + > + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); > + if (r) { > + DRM_ERROR("Failed to get dummy destroy msg\n"); > + return r; > + } > + } else { > + r = radeon_ring_lock(rdev, ring, 64); > + if (r) { > + DRM_ERROR("Failed to lock ring A %d\n", ring->idx); > + return r; > + } > + radeon_fence_emit(rdev, fence, ring->idx); > + radeon_ring_unlock_commit(rdev, ring); > + } > + return 0; > +} > + > void radeon_test_ring_sync(struct radeon_device *rdev, > struct radeon_ring *ringA, > struct radeon_ring *ringB) > @@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fence1, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + radeon_ring_unlock_commit(rdev, ringA); > + > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1); > + if (r) > goto out_cleanup; > - } > - radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fence2, ringA->idx); > + > + r = radeon_ring_lock(rdev, ringA, 64); > if (r) { > - DRM_ERROR("Failed to emit fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); > goto out_cleanup; > } > + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > radeon_ring_unlock_commit(rdev, ringA); > > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2); > + if (r) > + goto out_cleanup; > + > mdelay(1000); > > if (radeon_fence_signaled(fence1)) { > @@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fenceA, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit sync fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringA); > > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); > + if (r) > + goto out_cleanup; > + > r = radeon_ring_lock(rdev, ringB, 64); > if (r) { > DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); > - r = radeon_fence_emit(rdev, &fenceB, ringB->idx); > - if (r) { > - DRM_ERROR("Failed to create sync fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringB); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringB); > + r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); > + if (r) > + goto out_cleanup; > > mdelay(1000); > > @@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, > goto out_cleanup; > } > if (radeon_fence_signaled(fenceB)) { > - DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); > + DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); > goto out_cleanup; > } > > diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c > new file mode 100644 > index 0000000..8ab7bb9 > --- /dev/null > +++ b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -0,0 +1,521 @@ > +/* > + * Copyright 2011 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +/* > + * Authors: > + * Christian König <deathsimple@vodafone.de> > + */ > + > +#include <linux/firmware.h> > +#include <linux/module.h> > +#include <drm/drmP.h> > +#include <drm/drm.h> > + > +#include "radeon.h" > +#include "r600d.h" > + > +/* Firmware Names */ > +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" > +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" > +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" > +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" > +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" > + > +MODULE_FIRMWARE(FIRMWARE_RV770); > +MODULE_FIRMWARE(FIRMWARE_RV710); > +MODULE_FIRMWARE(FIRMWARE_CYPRESS); > +MODULE_FIRMWARE(FIRMWARE_SUMO); > +MODULE_FIRMWARE(FIRMWARE_TAHITI); > + > +int radeon_uvd_init(struct radeon_device *rdev) > +{ > + struct platform_device *pdev; > + unsigned long bo_size; > + const char *fw_name; > + int i, r; > + > + pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0); > + r = IS_ERR(pdev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n"); > + return -EINVAL; > + } > + > + switch (rdev->family) { > + case CHIP_RV770: > + fw_name = FIRMWARE_RV770; > + break; > + > + case CHIP_RV710: > + case CHIP_RV730: > + case CHIP_RV740: > + fw_name = FIRMWARE_RV710; > + break; > + > + case CHIP_CYPRESS: > + case CHIP_JUNIPER: > + case CHIP_REDWOOD: > + case CHIP_CEDAR: > + fw_name = FIRMWARE_CYPRESS; > + break; > + > + case CHIP_SUMO: > + case CHIP_SUMO2: > + case CHIP_PALM: > + case CHIP_CAYMAN: > + case CHIP_BARTS: > + case CHIP_TURKS: > + case CHIP_CAICOS: > + fw_name = FIRMWARE_SUMO; > + break; > + > + case CHIP_TAHITI: > + case CHIP_VERDE: > + case CHIP_PITCAIRN: > + case CHIP_ARUBA: > + fw_name = FIRMWARE_TAHITI; > + break; > + > + default: > + return -EINVAL; > + } > + > + r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", > + fw_name); > + platform_device_unregister(pdev); > + return r; > + } > + > + platform_device_unregister(pdev); > + > + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) + > + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; > + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); > + if (r) { > + dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); > + return r; > + } > + > + r = radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + memset(rdev->uvd.cpu_addr, 0, bo_size); > + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); > + > + r = radeon_uvd_suspend(rdev); > + if (r) > + return r; > + > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + atomic_set(&rdev->uvd.handles[i], 0); > + rdev->uvd.filp[i] = NULL; > + } > + > + return 0; > +} > + > +void radeon_uvd_fini(struct radeon_device *rdev) > +{ > + radeon_uvd_suspend(rdev); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > +} > + > +int radeon_uvd_suspend(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo == NULL) > + return 0; > + > + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (!r) { > + radeon_bo_kunmap(rdev->uvd.vcpu_bo); > + radeon_bo_unpin(rdev->uvd.vcpu_bo); > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + } > + return r; > +} > + > +int radeon_uvd_resume(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo == NULL) > + return -EINVAL; > + > + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (r) { > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); > + return r; > + } > + > + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, > + &rdev->uvd.gpu_addr); > + if (r) { > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); > + return r; > + } > + > + r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); > + if (r) { > + dev_err(rdev->dev, "(%d) UVD map failed\n", r); > + return r; > + } > + > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + > + return 0; > +} > + > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) > +{ > + rbo->placement.fpfn = 0 >> PAGE_SHIFT; > + rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; > +} > + > +void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) > +{ > + int i, r; > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (rdev->uvd.filp[i] == filp) { > + uint32_t handle = atomic_read(&rdev->uvd.handles[i]); > + struct radeon_fence *fence; > + > + r = radeon_uvd_get_destroy_msg(rdev, > + R600_RING_TYPE_UVD_INDEX, handle, &fence); > + if (r) { > + DRM_ERROR("Error destroying UVD (%d)!\n", r); > + continue; > + } > + > + radeon_fence_wait(fence, false); > + radeon_fence_unref(&fence); > + > + rdev->uvd.filp[i] = NULL; > + atomic_set(&rdev->uvd.handles[i], 0); > + } > + } > +} > + > +static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *msg) > +{ > + uint32_t *map, msg_type, handle; > + int i, r; > + > + r = radeon_bo_kmap(msg, (void **)&map); > + if (r) > + return r; > + > + msg_type = map[1]; > + handle = map[2]; > + > + radeon_bo_kunmap(msg); > + > + if (handle == 0) { > + DRM_ERROR("Invalid UVD handle!\n"); > + return -EINVAL; > + } > + > + if (msg_type == 2) { > + /* it's a destroy msg, free the handle */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) > + atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); > + return 0; > + } > + > + /* create or decode, validate the handle */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) > + return 0; > + } > + /* handle not found try to alloc a new one */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { > + p->rdev->uvd.filp[i] = p->filp; > + return 0; > + } > + } > + DRM_ERROR("No more free UVD handles!\n"); > + return -EINVAL; > +} > + > +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) > +{ > + struct radeon_cs_chunk *relocs_chunk; > + struct radeon_cs_reloc *reloc; > + unsigned idx, cmd; > + uint64_t start, end; > + > + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; > + idx = radeon_get_ib_value(p, data1); > + if (idx >= relocs_chunk->length_dw) { > + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", > + idx, relocs_chunk->length_dw); > + return -EINVAL; > + } > + > + reloc = p->relocs_ptr[(idx / 4)]; > + start = reloc->lobj.gpu_offset; > + end = start + radeon_bo_size(reloc->robj); > + start += radeon_get_ib_value(p, data0); I am assuming there is no way for you to know the size that the uvd engine will write to ? You are not checking anything on uvd possibly overwritting after the bo end. > + > + p->ib.ptr[data0] = start & 0xFFFFFFFF; > + p->ib.ptr[data1] = start >> 32; > + > + > + cmd = radeon_get_ib_value(p, p->idx); > + if (cmd == 0) { > + if (end & 0xFFFFFFFFF0000000) { > + DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n", > + start, end); > + return -EINVAL; > + } > + > + return radeon_uvd_cs_msg(p, reloc->robj); > + > + } > + > + if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) { > + DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", > + start, end); > + return -EINVAL; > + } > + return 0; > +} > + > +int radeon_uvd_cs_parse(struct radeon_cs_parser *p) > +{ > + struct radeon_cs_packet pkt; > + int i, r, data0 = 0, data1 = 0; > + > + if (p->chunks[p->chunk_ib_idx].length_dw % 16) { > + DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", > + p->chunks[p->chunk_ib_idx].length_dw); > + return -EINVAL; > + } > + > + if (p->chunk_relocs_idx == -1) { > + DRM_ERROR("No relocation chunk !\n"); > + return -EINVAL; > + } > + > + > + do { > + r = radeon_cs_packet_parse(p, &pkt, p->idx); > + if (r) > + return r; > + switch (pkt.type) { > + case RADEON_PACKET_TYPE0: > + p->idx++; > + for (i = 0; i <= pkt.count; ++i) { > + switch (pkt.reg + i*4) { > + case UVD_GPCOM_VCPU_DATA0: > + data0 = p->idx; > + break; > + case UVD_GPCOM_VCPU_DATA1: > + data1 = p->idx; > + break; > + case UVD_GPCOM_VCPU_CMD: > + r = radeon_uvd_cs_reloc(p, data0, > + data1); > + if (r) > + return r; > + break; > + case UVD_ENGINE_CNTL: > + break; > + default: > + DRM_ERROR("Invalid reg 0x%X!\n", > + pkt.reg + i*4); > + return -EINVAL; > + } > + p->idx++; > + } > + break; > + case RADEON_PACKET_TYPE2: > + p->idx += pkt.count + 2; > + break; > + default: > + DRM_ERROR("Unknown packet type %d !\n", pkt.type); > + return -EINVAL; > + } > + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); > + return 0; > +} > + > +static int radeon_uvd_send_msg(struct radeon_device *rdev, > + int ring, struct radeon_bo *bo, > + struct radeon_fence **fence) > +{ > + struct ttm_validate_buffer tv; > + struct list_head head; > + struct radeon_ib ib; > + uint64_t addr; > + int i, r; > + > + memset(&tv, 0, sizeof(tv)); > + tv.bo = &bo->tbo; > + > + INIT_LIST_HEAD(&head); > + list_add(&tv.head, &head); > + > + r = ttm_eu_reserve_buffers(&head); > + if (r) > + return r; > + > + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); > + radeon_uvd_force_into_uvd_segment(bo); > + > + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + r = radeon_ib_get(rdev, ring, &ib, NULL, 16); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + addr = radeon_bo_gpu_offset(bo); > + ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); > + ib.ptr[1] = addr; > + ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); > + ib.ptr[3] = addr >> 32; > + ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); > + ib.ptr[5] = 0; > + for (i = 6; i < 16; ++i) > + ib.ptr[i] = PACKET2(0); > + ib.length_dw = 16; > + > + r = radeon_ib_schedule(rdev, &ib, NULL); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + ttm_eu_fence_buffer_objects(&head, ib.fence); > + > + if (fence) > + *fence = radeon_fence_ref(ib.fence); > + > + radeon_ib_free(rdev, &ib); > + radeon_bo_unref(&bo); > + return 0; > +} > + > +/* multiple fence commands without any stream commands in between can > + crash the vcpu so just try to emmit a dummy create/destroy msg to > + avoid this */ > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r = radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r = radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD create msg */ > + msg[0] = 0x00000de4; > + msg[1] = 0x00000000; > + msg[2] = handle; > + msg[3] = 0x00000000; > + msg[4] = 0x00000000; > + msg[5] = 0x00000000; > + msg[6] = 0x00000000; > + msg[7] = 0x00000780; > + msg[8] = 0x00000440; > + msg[9] = 0x00000000; > + msg[10] = 0x01b37000; > + for (i = 11; i < 1024; ++i) > + msg[i] = 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > + > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r = radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r = radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD destroy msg */ > + msg[0] = 0x00000de4; > + msg[1] = 0x00000002; > + msg[2] = handle; > + msg[3] = 0x00000000; > + for (i = 4; i < 1024; ++i) > + msg[i] = 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c > index d63fe1d..5a78cce 100644 > --- a/drivers/gpu/drm/radeon/rv770.c > +++ b/drivers/gpu/drm/radeon/rv770.c > @@ -68,6 +68,107 @@ u32 rv770_get_xclk(struct radeon_device *rdev) > return reference_clock; > } > > +int rv770_uvd_resume(struct radeon_device *rdev) > +{ > + uint64_t addr; > + uint32_t chip_id, size; > + int r; > + > + r = radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + /* programm the VCPU memory controller bits 0-27 */ > + addr = rdev->uvd.gpu_addr >> 3; > + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); > + WREG32(UVD_VCPU_CACHE_SIZE0, size); > + > + addr += size; > + size = RADEON_UVD_STACK_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); > + WREG32(UVD_VCPU_CACHE_SIZE1, size); > + > + addr += size; > + size = RADEON_UVD_HEAP_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); > + WREG32(UVD_VCPU_CACHE_SIZE2, size); > + > + /* bits 28-31 */ > + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; > + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); > + > + /* bits 32-39 */ > + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; > + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); > + > + /* tell firmware which hardware it is running on */ > + switch (rdev->family) { > + default: > + return -EINVAL; > + case CHIP_RV770: > + chip_id = 0x01000004; > + break; > + case CHIP_RV710: > + chip_id = 0x01000005; > + break; > + case CHIP_RV730: > + chip_id = 0x01000006; > + break; > + case CHIP_RV740: > + chip_id = 0x01000007; > + break; > + case CHIP_CYPRESS: > + chip_id = 0x01000008; > + break; > + case CHIP_JUNIPER: > + chip_id = 0x01000009; > + break; > + case CHIP_REDWOOD: > + chip_id = 0x0100000a; > + break; > + case CHIP_CEDAR: > + chip_id = 0x0100000b; > + break; > + case CHIP_SUMO: > + chip_id = 0x0100000c; > + break; > + case CHIP_SUMO2: > + chip_id = 0x0100000d; > + break; > + case CHIP_PALM: > + chip_id = 0x0100000e; > + break; > + case CHIP_CAYMAN: > + chip_id = 0x0100000f; > + break; > + case CHIP_BARTS: > + chip_id = 0x01000010; > + break; > + case CHIP_TURKS: > + chip_id = 0x01000011; > + break; > + case CHIP_CAICOS: > + chip_id = 0x01000012; > + break; > + case CHIP_TAHITI: > + chip_id = 0x01000014; > + break; > + case CHIP_VERDE: > + chip_id = 0x01000015; > + break; > + case CHIP_PITCAIRN: > + chip_id = 0x01000016; > + break; > + case CHIP_ARUBA: > + chip_id = 0x01000017; > + break; > + } > + WREG32(UVD_VCPU_CHIP_ID, chip_id); > + > + return 0; > +} > + > u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) > { > struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; > @@ -1040,6 +1141,17 @@ static int rv770_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = r600_irq_init(rdev); > if (r) { > @@ -1074,6 +1186,19 @@ static int rv770_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1115,6 +1240,7 @@ int rv770_resume(struct radeon_device *rdev) > int rv770_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > r600_irq_suspend(rdev); > @@ -1190,6 +1316,13 @@ int rv770_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -1224,6 +1357,7 @@ void rv770_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > rv770_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h > index c55f950..da158b54 100644 > --- a/drivers/gpu/drm/radeon/rv770d.h > +++ b/drivers/gpu/drm/radeon/rv770d.h > @@ -671,4 +671,18 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > > +/* UVD */ > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_VCPU_CHIP_ID 0xf4d4 > +#define UVD_VCPU_CACHE_OFFSET0 0xf4d8 > +#define UVD_VCPU_CACHE_SIZE0 0xf4dc > +#define UVD_VCPU_CACHE_OFFSET1 0xf4e0 > +#define UVD_VCPU_CACHE_SIZE1 0xf4e4 > +#define UVD_VCPU_CACHE_OFFSET2 0xf4e8 > +#define UVD_VCPU_CACHE_SIZE2 0xf4ec > +#define UVD_LMI_ADDR_EXT 0xf594 > + > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > #endif > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index bafbe32..cc9fe39 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -4372,6 +4372,16 @@ static int si_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = si_irq_init(rdev); > if (r) { > @@ -4429,6 +4439,18 @@ static int si_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -4472,6 +4494,8 @@ int si_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > si_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > si_irq_suspend(rdev); > radeon_wb_disable(rdev); > si_pcie_gart_disable(rdev); > @@ -4557,6 +4581,13 @@ int si_init(struct radeon_device *rdev) > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -4605,6 +4636,7 @@ void si_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > si_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h > index 23fc08f..759f682 100644 > --- a/drivers/gpu/drm/radeon/sid.h > +++ b/drivers/gpu/drm/radeon/sid.h > @@ -798,6 +798,12 @@ > # define THREAD_TRACE_FINISH (55 << 0) > > /* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index eeda917..cd085d1 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -918,6 +918,7 @@ struct drm_radeon_gem_va { > #define RADEON_CS_RING_GFX 0 > #define RADEON_CS_RING_COMPUTE 1 > #define RADEON_CS_RING_DMA 2 > +#define RADEON_CS_RING_UVD 3 > /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ > /* 0 = normal, + = higher priority, - = lower priority */ > > -- > 1.7.9.5 > Cheers, Jerome
Am 03.04.2013 16:53, schrieb Jerome Glisse: > On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian König wrote: >> [SNIP] >> >> /* hardcode those limit for now */ >> #define RADEON_VA_IB_OFFSET (1 << 20) >> #define RADEON_VA_RESERVED_SIZE (8 << 20) >> @@ -357,8 +360,9 @@ struct radeon_bo_list { >> struct ttm_validate_buffer tv; >> struct radeon_bo *bo; >> uint64_t gpu_offset; >> - unsigned rdomain; >> - unsigned wdomain; >> + bool written; >> + unsigned domain; >> + unsigned alt_domain; >> u32 tiling_flags; >> }; > I think that the change to the rdomain/wdomain should be in a patch > of its own. I think the change is fine but we had issue with change > that touched that part previously, would make bisecting and > understanding the change implication easier. Agree, I actually planed to do so, but for the whole IP review stuff we needed to maintain a more or less stable patch base. Long story, but I'm going to change it. >> >> @@ -826,7 +830,6 @@ struct radeon_cs_reloc { >> struct radeon_bo *robj; >> struct radeon_bo_list lobj; >> uint32_t handle; >> - uint32_t flags; >> }; > Why removing the flags ? iirc it's not really use right now but i > remember plan to use it. Ups, just a rebasing artifact. But when it's unused we should remove it, probably just not in this patch. >> >> [SNIP] >> >> +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) >> +{ >> + struct radeon_cs_chunk *relocs_chunk; >> + struct radeon_cs_reloc *reloc; >> + unsigned idx, cmd; >> + uint64_t start, end; >> + >> + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; >> + idx = radeon_get_ib_value(p, data1); >> + if (idx >= relocs_chunk->length_dw) { >> + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", >> + idx, relocs_chunk->length_dw); >> + return -EINVAL; >> + } >> + >> + reloc = p->relocs_ptr[(idx / 4)]; >> + start = reloc->lobj.gpu_offset; >> + end = start + radeon_bo_size(reloc->robj); >> + start += radeon_get_ib_value(p, data0); > I am assuming there is no way for you to know the size that the uvd engine will write to ? > You are not checking anything on uvd possibly overwritting after the bo end. Yeah that gave me headache for a quite long time, too. The problem is to figure out how much is actually written you need to keep track of the whole lot of informations including the UVD session, create/decode/destroy messages and allot of fiddling with the codec specific parameters. And if I understand the UVD internals correctly even if we check everything there is no guarantee that a special crafted bitstream could not let UVD to write over the end of the buffer.... Is it ok if we but a big TODO on it for the initial patch? Cheers, Christian.
On Wed, Apr 03, 2013 at 05:53:55PM +0200, Christian König wrote: > Am 03.04.2013 16:53, schrieb Jerome Glisse: > >On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian König wrote: > >>[SNIP] > >> > >> /* hardcode those limit for now */ > >> #define RADEON_VA_IB_OFFSET (1 << 20) > >> #define RADEON_VA_RESERVED_SIZE (8 << 20) > >>@@ -357,8 +360,9 @@ struct radeon_bo_list { > >> struct ttm_validate_buffer tv; > >> struct radeon_bo *bo; > >> uint64_t gpu_offset; > >>- unsigned rdomain; > >>- unsigned wdomain; > >>+ bool written; > >>+ unsigned domain; > >>+ unsigned alt_domain; > >> u32 tiling_flags; > >> }; > >I think that the change to the rdomain/wdomain should be in a patch > >of its own. I think the change is fine but we had issue with change > >that touched that part previously, would make bisecting and > >understanding the change implication easier. > > Agree, I actually planed to do so, but for the whole IP review stuff > we needed to maintain a more or less stable patch base. Long story, > but I'm going to change it. > > >>@@ -826,7 +830,6 @@ struct radeon_cs_reloc { > >> struct radeon_bo *robj; > >> struct radeon_bo_list lobj; > >> uint32_t handle; > >>- uint32_t flags; > >> }; > >Why removing the flags ? iirc it's not really use right now but i > >remember plan to use it. > > Ups, just a rebasing artifact. But when it's unused we should remove > it, probably just not in this patch. > > >>[SNIP] > >> > >>+static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) > >>+{ > >>+ struct radeon_cs_chunk *relocs_chunk; > >>+ struct radeon_cs_reloc *reloc; > >>+ unsigned idx, cmd; > >>+ uint64_t start, end; > >>+ > >>+ relocs_chunk = &p->chunks[p->chunk_relocs_idx]; > >>+ idx = radeon_get_ib_value(p, data1); > >>+ if (idx >= relocs_chunk->length_dw) { > >>+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", > >>+ idx, relocs_chunk->length_dw); > >>+ return -EINVAL; > >>+ } > >>+ > >>+ reloc = p->relocs_ptr[(idx / 4)]; > >>+ start = reloc->lobj.gpu_offset; > >>+ end = start + radeon_bo_size(reloc->robj); > >>+ start += radeon_get_ib_value(p, data0); > >I am assuming there is no way for you to know the size that the uvd engine will write to ? > >You are not checking anything on uvd possibly overwritting after the bo end. > > Yeah that gave me headache for a quite long time, too. The problem > is to figure out how much is actually written you need to keep track > of the whole lot of informations including the UVD session, > create/decode/destroy messages and allot of fiddling with the codec > specific parameters. > > And if I understand the UVD internals correctly even if we check > everything there is no guarantee that a special crafted bitstream > could not let UVD to write over the end of the buffer.... > > Is it ok if we but a big TODO on it for the initial patch? > > Cheers, > Christian. I think i only need one assurance and i think for uvd this will be the case. If UVD block write past bo end can you be sure that no matter what it will overwritte to address > start ie it could not overwritte to begining of VRAM. I have big doubt on that given the 256M window, i fear that it might go back to writting to begining of memory where the page table is. Note that i think that now that we have cp dma pagetable entry update we can probably just move the pagetable to end of vram on 90% GPU with UVD this will be > 256M which seems like a zone where UVD can never write. If we can have such assurance i guess we can make uvd as an option and make a very explicit comment stating that UVD engine can be use as an exploit vector path. Cheers, Jerome
Am 03.04.2013 19:10, schrieb Jerome Glisse: > On Wed, Apr 03, 2013 at 05:53:55PM +0200, Christian König wrote: >> Am 03.04.2013 16:53, schrieb Jerome Glisse: >>> On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian König wrote: >>>> [SNIP] >>>> >>>> /* hardcode those limit for now */ >>>> #define RADEON_VA_IB_OFFSET (1 << 20) >>>> #define RADEON_VA_RESERVED_SIZE (8 << 20) >>>> @@ -357,8 +360,9 @@ struct radeon_bo_list { >>>> struct ttm_validate_buffer tv; >>>> struct radeon_bo *bo; >>>> uint64_t gpu_offset; >>>> - unsigned rdomain; >>>> - unsigned wdomain; >>>> + bool written; >>>> + unsigned domain; >>>> + unsigned alt_domain; >>>> u32 tiling_flags; >>>> }; >>> I think that the change to the rdomain/wdomain should be in a patch >>> of its own. I think the change is fine but we had issue with change >>> that touched that part previously, would make bisecting and >>> understanding the change implication easier. >> Agree, I actually planed to do so, but for the whole IP review stuff >> we needed to maintain a more or less stable patch base. Long story, >> but I'm going to change it. >> >>>> @@ -826,7 +830,6 @@ struct radeon_cs_reloc { >>>> struct radeon_bo *robj; >>>> struct radeon_bo_list lobj; >>>> uint32_t handle; >>>> - uint32_t flags; >>>> }; >>> Why removing the flags ? iirc it's not really use right now but i >>> remember plan to use it. >> Ups, just a rebasing artifact. But when it's unused we should remove >> it, probably just not in this patch. >> >>>> [SNIP] >>>> >>>> +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) >>>> +{ >>>> + struct radeon_cs_chunk *relocs_chunk; >>>> + struct radeon_cs_reloc *reloc; >>>> + unsigned idx, cmd; >>>> + uint64_t start, end; >>>> + >>>> + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; >>>> + idx = radeon_get_ib_value(p, data1); >>>> + if (idx >= relocs_chunk->length_dw) { >>>> + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", >>>> + idx, relocs_chunk->length_dw); >>>> + return -EINVAL; >>>> + } >>>> + >>>> + reloc = p->relocs_ptr[(idx / 4)]; >>>> + start = reloc->lobj.gpu_offset; >>>> + end = start + radeon_bo_size(reloc->robj); >>>> + start += radeon_get_ib_value(p, data0); >>> I am assuming there is no way for you to know the size that the uvd engine will write to ? >>> You are not checking anything on uvd possibly overwritting after the bo end. >> Yeah that gave me headache for a quite long time, too. The problem >> is to figure out how much is actually written you need to keep track >> of the whole lot of informations including the UVD session, >> create/decode/destroy messages and allot of fiddling with the codec >> specific parameters. >> >> And if I understand the UVD internals correctly even if we check >> everything there is no guarantee that a special crafted bitstream >> could not let UVD to write over the end of the buffer.... >> >> Is it ok if we but a big TODO on it for the initial patch? >> >> Cheers, >> Christian. > I think i only need one assurance and i think for uvd this will be the case. > If UVD block write past bo end can you be sure that no matter what it will > overwritte to address > start ie it could not overwritte to begining of VRAM. > > I have big doubt on that given the 256M window, i fear that it might go back > to writting to begining of memory where the page table is. Crafting an attack from it would still be a bit tricky because it is compressed image data that gets written, but never less it is indeed possible. > Note that i think that now that we have cp dma pagetable entry update we can > probably just move the pagetable to end of vram on 90% GPU with UVD this will > be > 256M which seems like a zone where UVD can never write. Well not exactly, it is planned that the 256M limit goes away with some of the next hw generations. And at least at this point we need to make sure that UVD never writes somewhere it shouldn't. Anyway moving the page table to not CPU accessible VRAM sounds like a pretty good idea. > If we can have such assurance i guess we can make uvd as an option and make > a very explicit comment stating that UVD engine can be use as an exploit > vector path. I think I will just sit down and implement size checking, at least for the destination buffer, cause after all that's just a texture. And for the reference buffer I maybe just use what userspace send to the hardware as buffer size, and make a sanity check on that one also. Ok, need to think about it a bit more. > Jerome >
On Tue, Apr 2, 2013 at 7:18 PM, Christian König <deathsimple@vodafone.de> wrote: > Just everything needed to decode videos using UVD. > > v6: just all the bugfixes and support for R7xx-SI merged in one patch > v7: UVD_CGC_GATE is a write only register, lockup detection fix > > Signed-off-by: Christian König <deathsimple@vodafone.de> > --- <snip> > diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c > new file mode 100644 > index 0000000..8ab7bb9 > --- /dev/null > +++ b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -0,0 +1,521 @@ > +/* > + * Copyright 2011 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +/* > + * Authors: > + * Christian König <deathsimple@vodafone.de> > + */ > + > +#include <linux/firmware.h> > +#include <linux/module.h> > +#include <drm/drmP.h> > +#include <drm/drm.h> > + > +#include "radeon.h" > +#include "r600d.h" > + > +/* Firmware Names */ > +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" > +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" > +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" > +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" > +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" > + > +MODULE_FIRMWARE(FIRMWARE_RV770); > +MODULE_FIRMWARE(FIRMWARE_RV710); > +MODULE_FIRMWARE(FIRMWARE_CYPRESS); > +MODULE_FIRMWARE(FIRMWARE_SUMO); > +MODULE_FIRMWARE(FIRMWARE_TAHITI); > + > +int radeon_uvd_init(struct radeon_device *rdev) > +{ > + struct platform_device *pdev; > + unsigned long bo_size; > + const char *fw_name; > + int i, r; > + > + pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0); > + r = IS_ERR(pdev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n"); > + return -EINVAL; > + } > + > + switch (rdev->family) { > + case CHIP_RV770: > + fw_name = FIRMWARE_RV770; > + break; > + > + case CHIP_RV710: > + case CHIP_RV730: > + case CHIP_RV740: > + fw_name = FIRMWARE_RV710; > + break; > + > + case CHIP_CYPRESS: We are missing CHIP_HEMLOCK here. Alex > + case CHIP_JUNIPER: > + case CHIP_REDWOOD: > + case CHIP_CEDAR: > + fw_name = FIRMWARE_CYPRESS; > + break; > + > + case CHIP_SUMO: > + case CHIP_SUMO2: > + case CHIP_PALM: > + case CHIP_CAYMAN: > + case CHIP_BARTS: > + case CHIP_TURKS: > + case CHIP_CAICOS: > + fw_name = FIRMWARE_SUMO; > + break; > + > + case CHIP_TAHITI: > + case CHIP_VERDE: > + case CHIP_PITCAIRN: > + case CHIP_ARUBA: > + fw_name = FIRMWARE_TAHITI; > + break; > + > + default: > + return -EINVAL; > + }
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index bf17252..86c5e36 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ - si_blit_shaders.o radeon_prime.o + si_blit_shaders.o radeon_prime.o radeon_uvd.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 305a657..18b66ff 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3360,6 +3360,9 @@ restart_ih: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); break; } + case 124: /* UVD */ + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); break; case 146: case 147: @@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, static int evergreen_startup(struct radeon_device *rdev) { - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + struct radeon_ring *ring; int r; /* enable pcie gen2 link */ @@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } + r = rv770_uvd_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } + + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev) } evergreen_irq_set(rdev); + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, R600_CP_RB_RPTR, R600_CP_RB_WPTR, 0, 0xfffff, RADEON_CP_PACKET2); @@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, + R600_WB_UVD_RPTR_OFFSET, + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (!r) + r = r600_uvd_init(rdev); + + if (r) + DRM_ERROR("radeon: error initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); + r600_uvd_rbc_stop(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); @@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev) rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], + 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); + radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 982d25a..c5d873e 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -992,6 +992,13 @@ # define TARGET_LINK_SPEED_MASK (0xf << 0) # define SELECTABLE_DEEMPHASIS (1 << 6) + +/* + * UVD + */ +#define UVD_RBC_RB_RPTR 0xf690 +#define UVD_RBC_RB_WPTR 0xf694 + /* * PM4 */ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 27769e7..ac944f5 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -931,6 +931,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, 10); /* poll interval */ } +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); +} + static void cayman_cp_enable(struct radeon_device *rdev, bool enable) { if (enable) @@ -1682,6 +1699,16 @@ static int cayman_startup(struct radeon_device *rdev) return r; } + r = rv770_uvd_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); @@ -1748,6 +1775,18 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, + R600_WB_UVD_RPTR_OFFSET, + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (!r) + r = r600_uvd_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -1794,6 +1833,8 @@ int cayman_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); cayman_cp_enable(rdev, false); cayman_dma_stop(rdev); + r600_uvd_rbc_stop(rdev); + radeon_uvd_suspend(rdev); evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); cayman_pcie_gart_disable(rdev); @@ -1868,6 +1909,13 @@ int cayman_init(struct radeon_device *rdev) ring->ring_obj = NULL; r600_ring_init(rdev, ring, 64 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1919,6 +1967,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); + radeon_uvd_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 079dee2..3731f6c 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -486,6 +486,15 @@ # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) /* + * UVD + */ +#define UVD_SEMA_ADDR_LOW 0xEF00 +#define UVD_SEMA_ADDR_HIGH 0xEF04 +#define UVD_SEMA_CMD 0xEF08 +#define UVD_RBC_RB_RPTR 0xF690 +#define UVD_RBC_RB_WPTR 0xF694 + +/* * PM4 */ #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 0740db3..ca6117d 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev) } /* + * UVD + */ +int r600_uvd_rbc_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + uint64_t rptr_addr; + uint32_t rb_bufsz, tmp; + int r; + + rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; + + if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) { + DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); + return -EINVAL; + } + + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + + /* Set the write pointer delay */ + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); + + /* programm the 4GB memory segment for rptr and ring buffer */ + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | + (0x7 << 16) | (0x1 << 31)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(UVD_RBC_RB_RPTR, 0x0); + + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); + WREG32(UVD_RBC_RB_WPTR, ring->wptr); + + /* set the ring address */ + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); + + /* Set ring buffer size */ + rb_bufsz = drm_order(ring->ring_size); + rb_bufsz = (0x1 << 8) | rb_bufsz; + WREG32(UVD_RBC_RB_CNTL, rb_bufsz); + + ring->ready = true; + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + r = radeon_ring_lock(rdev, ring, 10); + if (r) { + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); + return r; + } + + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); + radeon_ring_write(ring, 0x8); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); + radeon_ring_write(ring, 1); + + radeon_ring_unlock_commit(rdev, ring); + + return 0; +} + +void r600_uvd_rbc_stop(struct radeon_device *rdev) +{ + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + + /* force RBC into idle state */ + WREG32(UVD_RBC_RB_CNTL, 0x11010101); + ring->ready = false; +} + +int r600_uvd_init(struct radeon_device *rdev) +{ + int i, j, r; + + /* disable clock gating */ + WREG32(UVD_CGC_GATE, 0); + + /* disable interupt */ + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); + mdelay(5); + + /* take UVD block out of reset */ + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | + (1 << 21) | (1 << 9) | (1 << 20)); + + /* disable byte swapping */ + WREG32(UVD_LMI_SWAP_CNTL, 0); + WREG32(UVD_MP_SWAP_CNTL, 0); + + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXA1, 0x0); + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); + WREG32(UVD_MPC_SET_MUXB1, 0x0); + WREG32(UVD_MPC_SET_ALU, 0); + WREG32(UVD_MPC_SET_MUX, 0x88); + + /* Stall UMC */ + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); + + /* take all subblocks out of reset, except VCPU */ + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); + mdelay(5); + + /* enable VCPU clock */ + WREG32(UVD_VCPU_CNTL, 1 << 9); + + /* enable UMC */ + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + + /* boot up the VCPU */ + WREG32(UVD_SOFT_RESET, 0); + mdelay(10); + + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(UVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); + mdelay(10); + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); + mdelay(10); + r = -1; + } + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + /* enable interupt */ + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); + + r = r600_uvd_rbc_start(rdev); + if (r) + return r; + + DRM_INFO("UVD initialized successfully.\n"); + return 0; +} + +/* * GPU scratch registers helpers function. */ void r600_scratch_init(struct radeon_device *rdev) @@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev, return r; } +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); + r = radeon_ring_lock(rdev, ring, 3); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, 0xDEADBEEF); + radeon_ring_unlock_commit(rdev, ring); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(UVD_CONTEXT_ID); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + /* * CP fences/semaphores */ @@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } +void r600_uvd_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + void r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(addr) & 0xff); } +void r600_uvd_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); + radeon_ring_write(ring, emit_wait ? 1 : 0); +} + int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, ib->length_dw); } +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); + radeon_ring_write(ring, ib->length_dw); +} + int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_ib ib; @@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence; + int r; + + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + return r; + } + + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + return r; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + return r; + } + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + radeon_fence_unref(&fence); + return r; +} + /** * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine * diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index a42ba11..441bdb8 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -691,6 +691,7 @@ #define SRBM_SOFT_RESET 0xe60 # define SOFT_RESET_DMA (1 << 12) # define SOFT_RESET_RLC (1 << 13) +# define SOFT_RESET_UVD (1 << 18) # define RV770_SOFT_RESET_DMA (1 << 20) #define CP_INT_CNTL 0xc124 @@ -1143,6 +1144,66 @@ # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30) /* + * UVD + */ +#define UVD_SEMA_ADDR_LOW 0xef00 +#define UVD_SEMA_ADDR_HIGH 0xef04 +#define UVD_SEMA_CMD 0xef08 + +#define UVD_GPCOM_VCPU_CMD 0xef0c +#define UVD_GPCOM_VCPU_DATA0 0xef10 +#define UVD_GPCOM_VCPU_DATA1 0xef14 +#define UVD_ENGINE_CNTL 0xef18 + +#define UVD_SEMA_CNTL 0xf400 +#define UVD_RB_ARB_CTRL 0xf480 + +#define UVD_LMI_EXT40_ADDR 0xf498 +#define UVD_CGC_GATE 0xf4a8 +#define UVD_LMI_CTRL2 0xf4f4 +#define UVD_MASTINT_EN 0xf500 +#define UVD_LMI_ADDR_EXT 0xf594 +#define UVD_LMI_CTRL 0xf598 +#define UVD_LMI_SWAP_CNTL 0xf5b4 +#define UVD_MP_SWAP_CNTL 0xf5bC +#define UVD_MPC_CNTL 0xf5dC +#define UVD_MPC_SET_MUXA0 0xf5e4 +#define UVD_MPC_SET_MUXA1 0xf5e8 +#define UVD_MPC_SET_MUXB0 0xf5eC +#define UVD_MPC_SET_MUXB1 0xf5f0 +#define UVD_MPC_SET_MUX 0xf5f4 +#define UVD_MPC_SET_ALU 0xf5f8 + +#define UVD_VCPU_CNTL 0xf660 +#define UVD_SOFT_RESET 0xf680 +#define RBC_SOFT_RESET (1<<0) +#define LBSI_SOFT_RESET (1<<1) +#define LMI_SOFT_RESET (1<<2) +#define VCPU_SOFT_RESET (1<<3) +#define CSM_SOFT_RESET (1<<5) +#define CXW_SOFT_RESET (1<<6) +#define TAP_SOFT_RESET (1<<7) +#define LMI_UMC_SOFT_RESET (1<<13) +#define UVD_RBC_IB_BASE 0xf684 +#define UVD_RBC_IB_SIZE 0xf688 +#define UVD_RBC_RB_BASE 0xf68c +#define UVD_RBC_RB_RPTR 0xf690 +#define UVD_RBC_RB_WPTR 0xf694 +#define UVD_RBC_RB_WPTR_CNTL 0xf698 + +#define UVD_STATUS 0xf6bc + +#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0 +#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4 +#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8 +#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc + +#define UVD_RBC_RB_CNTL 0xf6a4 +#define UVD_RBC_RB_RPTR_ADDR 0xf6a8 + +#define UVD_CONTEXT_ID 0xf6f4 + +/* * PM4 */ #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8263af3..3f5572d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -109,24 +109,27 @@ extern int radeon_lockup_timeout; #define RADEON_BIOS_NUM_SCRATCH 8 /* max number of rings */ -#define RADEON_NUM_RINGS 5 +#define RADEON_NUM_RINGS 6 /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL /* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0 /* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2 /* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 /* cayman add a second async dma ring */ #define CAYMAN_RING_TYPE_DMA1_INDEX 4 +/* R600+ */ +#define R600_RING_TYPE_UVD_INDEX 5 + /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -357,8 +360,9 @@ struct radeon_bo_list { struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset; - unsigned rdomain; - unsigned wdomain; + bool written; + unsigned domain; + unsigned alt_domain; u32 tiling_flags; }; @@ -826,7 +830,6 @@ struct radeon_cs_reloc { struct radeon_bo *robj; struct radeon_bo_list lobj; uint32_t handle; - uint32_t flags; }; struct radeon_cs_chunk { @@ -918,6 +921,7 @@ struct radeon_wb { #define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_IH_WPTR_OFFSET 2048 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 +#define R600_WB_UVD_RPTR_OFFSET 2560 #define R600_WB_EVENT_OFFSET 3072 /** @@ -1118,6 +1122,33 @@ struct radeon_pm { int radeon_pm_get_type_index(struct radeon_device *rdev, enum radeon_pm_state_type ps_type, int instance); +/* + * UVD + */ +#define RADEON_MAX_UVD_HANDLES 10 +#define RADEON_UVD_STACK_SIZE (1024*1024) +#define RADEON_UVD_HEAP_SIZE (1024*1024) + +struct radeon_uvd { + struct radeon_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + atomic_t handles[RADEON_MAX_UVD_HANDLES]; + struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; +}; + +int radeon_uvd_init(struct radeon_device *rdev); +void radeon_uvd_fini(struct radeon_device *rdev); +int radeon_uvd_suspend(struct radeon_device *rdev); +int radeon_uvd_resume(struct radeon_device *rdev); +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); +void radeon_uvd_free_handles(struct radeon_device *rdev, + struct drm_file *filp); +int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); struct r600_audio { int channels; @@ -1608,6 +1639,7 @@ struct radeon_device { struct radeon_asic *asic; struct radeon_gem gem; struct radeon_pm pm; + struct radeon_uvd uvd; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct radeon_wb wb; struct radeon_dummy_page dummy_page; @@ -1621,6 +1653,7 @@ struct radeon_device { const struct firmware *rlc_fw; /* r6/700 RLC firmware */ const struct firmware *mc_fw; /* NI MC firmware */ const struct firmware *ce_fw; /* SI CE firmware */ + const struct firmware *uvd_fw; /* UVD firmware */ struct r600_blit r600_blit; struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index aba0a89..a7a7b2b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &r600_dma_is_lockup, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &r600_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &evergreen_dma_is_lockup, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &r600_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &evergreen_dma_is_lockup, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &r600_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = { .ring_test = &r600_dma_ring_test, .ib_test = &r600_dma_ib_test, .is_lockup = &evergreen_dma_is_lockup, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &r600_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = { .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, .vm_flush = &cayman_dma_vm_flush, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &cayman_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = { .ib_test = &r600_dma_ib_test, .is_lockup = &cayman_dma_is_lockup, .vm_flush = &cayman_dma_vm_flush, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &cayman_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { @@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = { .ib_test = &r600_dma_ib_test, .is_lockup = &si_dma_is_lockup, .vm_flush = &si_dma_vm_flush, + }, + [R600_RING_TYPE_UVD_INDEX] = { + .ib_execute = &r600_uvd_ib_execute, + .emit_fence = &r600_uvd_fence_emit, + .emit_semaphore = &cayman_uvd_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &r600_uvd_ring_test, + .ib_test = &r600_uvd_ib_test, + .is_lockup = &radeon_ring_test_lockup, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3535f73..515db96 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); @@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev); u32 r600_get_xclk(struct radeon_device *rdev); uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); +/* uvd */ +int r600_uvd_init(struct radeon_device *rdev); +int r600_uvd_rbc_start(struct radeon_device *rdev); +void r600_uvd_rbc_stop(struct radeon_device *rdev); +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); +void r600_uvd_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +void r600_uvd_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); + /* * rv770,rv730,rv710,rv740 */ @@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct radeon_fence **fence); u32 rv770_get_xclk(struct radeon_device *rdev); +int rv770_uvd_resume(struct radeon_device *rdev); /* * evergreen @@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev, */ void cayman_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 7d66e01..532ff68 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -75,18 +75,34 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.wdomain = r->write_domain; - p->relocs[i].lobj.rdomain = r->read_domains; + p->relocs[i].lobj.written = !!r->write_domain; + + /* the first reloc of an UVD job is the + msg and that must be in VRAM */ + if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { + + p->relocs[i].lobj.domain = + RADEON_GEM_DOMAIN_VRAM; + + p->relocs[i].lobj.alt_domain = + RADEON_GEM_DOMAIN_VRAM; + } else { + uint32_t domain = r->write_domain ? + r->write_domain : r->read_domains; + p->relocs[i].lobj.domain = domain; + if (domain == RADEON_GEM_DOMAIN_VRAM) + domain |= RADEON_GEM_DOMAIN_GTT; + p->relocs[i].lobj.alt_domain = domain; + } p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; - p->relocs[i].flags = r->flags; radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); } else p->relocs[i].handle = 0; } - return radeon_bo_list_validate(&p->validated); + return radeon_bo_list_validate(&p->validated, p->ring); } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) @@ -121,6 +137,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return -EINVAL; } break; + case RADEON_CS_RING_UVD: + p->ring = R600_RING_TYPE_UVD_INDEX; + break; } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 3435625..82fe183 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -31,9 +31,9 @@ #include <linux/seq_file.h> #include <linux/atomic.h> #include <linux/wait.h> -#include <linux/list.h> #include <linux/kref.h> #include <linux/slab.h> +#include <linux/firmware.h> #include <drm/drmP.h> #include "radeon_reg.h" #include "radeon.h" @@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { - rdev->fence_drv[ring].scratch_reg = 0; - index = R600_WB_EVENT_OFFSET + ring * 4; + if (ring != R600_RING_TYPE_UVD_INDEX) { + rdev->fence_drv[ring].scratch_reg = 0; + index = R600_WB_EVENT_OFFSET + ring * 4; + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + + index; + + } else { + /* put fence directly behind firmware */ + rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + + rdev->uvd_fw->size; + rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + + rdev->uvd_fw->size; + } + } else { r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); if (r) { @@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) index = RADEON_WB_SCRATCH_OFFSET + rdev->fence_drv[ring].scratch_reg - rdev->scratch.reg_base; + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; } - rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; - rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index c75cb2c..3019759 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -513,6 +513,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, rdev->hyperz_filp = NULL; if (rdev->cmask_filp == file_priv) rdev->cmask_filp = NULL; + radeon_uvd_free_handles(rdev, file_priv); } /* diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index d3aface..0e34446 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -339,14 +339,14 @@ void radeon_bo_fini(struct radeon_device *rdev) void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head) { - if (lobj->wdomain) { + if (lobj->written) { list_add(&lobj->tv.head, head); } else { list_add_tail(&lobj->tv.head, head); } } -int radeon_bo_list_validate(struct list_head *head) +int radeon_bo_list_validate(struct list_head *head, int ring) { struct radeon_bo_list *lobj; struct radeon_bo *bo; @@ -360,15 +360,17 @@ int radeon_bo_list_validate(struct list_head *head) list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { - domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; + domain = lobj->domain; retry: radeon_ttm_placement_from_domain(bo, domain); + if (ring == R600_RING_TYPE_UVD_INDEX) + radeon_uvd_force_into_uvd_segment(bo); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (unlikely(r)) { - if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) { - domain |= RADEON_GEM_DOMAIN_GTT; + if (r != -ERESTARTSYS && domain != lobj->alt_domain) { + domain = lobj->alt_domain; goto retry; } return r; diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 5fc86b0..e2cb80a 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); -extern int radeon_bo_list_validate(struct list_head *head); +extern int radeon_bo_list_validate(struct list_head *head, int ring); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8d58e26..31e47d8 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) { u32 rptr; - if (rdev->wb.enabled) + if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX]) rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); else rptr = RREG32(ring->rptr_reg); @@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) return 0; } -static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX; -static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; -static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; -static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX; -static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; +static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX; +static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; +static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; +static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; +static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; +static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; static struct drm_info_list radeon_debugfs_ring_info_list[] = { - {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index}, - {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index}, - {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index}, - {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index}, - {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index}, + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index}, + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index}, + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, }; static int radeon_debugfs_sa_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index fda09c9..bbed4af 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); } +static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_fence **fence) +{ + int r; + + if (ring->idx == R600_RING_TYPE_UVD_INDEX) { + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("Failed to get dummy create msg\n"); + return r; + } + + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); + if (r) { + DRM_ERROR("Failed to get dummy destroy msg\n"); + return r; + } + } else { + r = radeon_ring_lock(rdev, ring, 64); + if (r) { + DRM_ERROR("Failed to lock ring A %d\n", ring->idx); + return r; + } + radeon_fence_emit(rdev, fence, ring->idx); + radeon_ring_unlock_commit(rdev, ring); + } + return 0; +} + void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB) @@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - r = radeon_fence_emit(rdev, &fence1, ringA->idx); - if (r) { - DRM_ERROR("Failed to emit fence 1\n"); - radeon_ring_unlock_undo(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA); + + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1); + if (r) goto out_cleanup; - } - radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - r = radeon_fence_emit(rdev, &fence2, ringA->idx); + + r = radeon_ring_lock(rdev, ringA, 64); if (r) { - DRM_ERROR("Failed to emit fence 2\n"); - radeon_ring_unlock_undo(rdev, ringA); + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); radeon_ring_unlock_commit(rdev, ringA); + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2); + if (r) + goto out_cleanup; + mdelay(1000); if (radeon_fence_signaled(fence1)) { @@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - r = radeon_fence_emit(rdev, &fenceA, ringA->idx); - if (r) { - DRM_ERROR("Failed to emit sync fence 1\n"); - radeon_ring_unlock_undo(rdev, ringA); - goto out_cleanup; - } radeon_ring_unlock_commit(rdev, ringA); + r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); + if (r) + goto out_cleanup; + r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); - r = radeon_fence_emit(rdev, &fenceB, ringB->idx); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - radeon_ring_unlock_undo(rdev, ringB); - goto out_cleanup; - } radeon_ring_unlock_commit(rdev, ringB); + r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); + if (r) + goto out_cleanup; mdelay(1000); @@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } if (radeon_fence_signaled(fenceB)) { - DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); + DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); goto out_cleanup; } diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c new file mode 100644 index 0000000..8ab7bb9 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -0,0 +1,521 @@ +/* + * Copyright 2011 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König <deathsimple@vodafone.de> + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "radeon.h" +#include "r600d.h" + +/* Firmware Names */ +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" + +MODULE_FIRMWARE(FIRMWARE_RV770); +MODULE_FIRMWARE(FIRMWARE_RV710); +MODULE_FIRMWARE(FIRMWARE_CYPRESS); +MODULE_FIRMWARE(FIRMWARE_SUMO); +MODULE_FIRMWARE(FIRMWARE_TAHITI); + +int radeon_uvd_init(struct radeon_device *rdev) +{ + struct platform_device *pdev; + unsigned long bo_size; + const char *fw_name; + int i, r; + + pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0); + r = IS_ERR(pdev); + if (r) { + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n"); + return -EINVAL; + } + + switch (rdev->family) { + case CHIP_RV770: + fw_name = FIRMWARE_RV770; + break; + + case CHIP_RV710: + case CHIP_RV730: + case CHIP_RV740: + fw_name = FIRMWARE_RV710; + break; + + case CHIP_CYPRESS: + case CHIP_JUNIPER: + case CHIP_REDWOOD: + case CHIP_CEDAR: + fw_name = FIRMWARE_CYPRESS; + break; + + case CHIP_SUMO: + case CHIP_SUMO2: + case CHIP_PALM: + case CHIP_CAYMAN: + case CHIP_BARTS: + case CHIP_TURKS: + case CHIP_CAICOS: + fw_name = FIRMWARE_SUMO; + break; + + case CHIP_TAHITI: + case CHIP_VERDE: + case CHIP_PITCAIRN: + case CHIP_ARUBA: + fw_name = FIRMWARE_TAHITI; + break; + + default: + return -EINVAL; + } + + r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); + if (r) { + dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", + fw_name); + platform_device_unregister(pdev); + return r; + } + + platform_device_unregister(pdev); + + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) + + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); + return r; + } + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + memset(rdev->uvd.cpu_addr, 0, bo_size); + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + + r = radeon_uvd_suspend(rdev); + if (r) + return r; + + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + atomic_set(&rdev->uvd.handles[i], 0); + rdev->uvd.filp[i] = NULL; + } + + return 0; +} + +void radeon_uvd_fini(struct radeon_device *rdev) +{ + radeon_uvd_suspend(rdev); + radeon_bo_unref(&rdev->uvd.vcpu_bo); +} + +int radeon_uvd_suspend(struct radeon_device *rdev) +{ + int r; + + if (rdev->uvd.vcpu_bo == NULL) + return 0; + + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); + if (!r) { + radeon_bo_kunmap(rdev->uvd.vcpu_bo); + radeon_bo_unpin(rdev->uvd.vcpu_bo); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + } + return r; +} + +int radeon_uvd_resume(struct radeon_device *rdev) +{ + int r; + + if (rdev->uvd.vcpu_bo == NULL) + return -EINVAL; + + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); + if (r) { + radeon_bo_unref(&rdev->uvd.vcpu_bo); + dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); + return r; + } + + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, + &rdev->uvd.gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + radeon_bo_unref(&rdev->uvd.vcpu_bo); + dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); + return r; + } + + r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) UVD map failed\n", r); + return r; + } + + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + + return 0; +} + +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) +{ + rbo->placement.fpfn = 0 >> PAGE_SHIFT; + rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; +} + +void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) +{ + int i, r; + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + if (rdev->uvd.filp[i] == filp) { + uint32_t handle = atomic_read(&rdev->uvd.handles[i]); + struct radeon_fence *fence; + + r = radeon_uvd_get_destroy_msg(rdev, + R600_RING_TYPE_UVD_INDEX, handle, &fence); + if (r) { + DRM_ERROR("Error destroying UVD (%d)!\n", r); + continue; + } + + radeon_fence_wait(fence, false); + radeon_fence_unref(&fence); + + rdev->uvd.filp[i] = NULL; + atomic_set(&rdev->uvd.handles[i], 0); + } + } +} + +static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *msg) +{ + uint32_t *map, msg_type, handle; + int i, r; + + r = radeon_bo_kmap(msg, (void **)&map); + if (r) + return r; + + msg_type = map[1]; + handle = map[2]; + + radeon_bo_kunmap(msg); + + if (handle == 0) { + DRM_ERROR("Invalid UVD handle!\n"); + return -EINVAL; + } + + if (msg_type == 2) { + /* it's a destroy msg, free the handle */ + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) + atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); + return 0; + } + + /* create or decode, validate the handle */ + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) + return 0; + } + /* handle not found try to alloc a new one */ + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { + p->rdev->uvd.filp[i] = p->filp; + return 0; + } + } + DRM_ERROR("No more free UVD handles!\n"); + return -EINVAL; +} + +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) +{ + struct radeon_cs_chunk *relocs_chunk; + struct radeon_cs_reloc *reloc; + unsigned idx, cmd; + uint64_t start, end; + + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + idx = radeon_get_ib_value(p, data1); + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + + reloc = p->relocs_ptr[(idx / 4)]; + start = reloc->lobj.gpu_offset; + end = start + radeon_bo_size(reloc->robj); + start += radeon_get_ib_value(p, data0); + + p->ib.ptr[data0] = start & 0xFFFFFFFF; + p->ib.ptr[data1] = start >> 32; + + + cmd = radeon_get_ib_value(p, p->idx); + if (cmd == 0) { + if (end & 0xFFFFFFFFF0000000) { + DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n", + start, end); + return -EINVAL; + } + + return radeon_uvd_cs_msg(p, reloc->robj); + + } + + if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) { + DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", + start, end); + return -EINVAL; + } + return 0; +} + +int radeon_uvd_cs_parse(struct radeon_cs_parser *p) +{ + struct radeon_cs_packet pkt; + int i, r, data0 = 0, data1 = 0; + + if (p->chunks[p->chunk_ib_idx].length_dw % 16) { + DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", + p->chunks[p->chunk_ib_idx].length_dw); + return -EINVAL; + } + + if (p->chunk_relocs_idx == -1) { + DRM_ERROR("No relocation chunk !\n"); + return -EINVAL; + } + + + do { + r = radeon_cs_packet_parse(p, &pkt, p->idx); + if (r) + return r; + switch (pkt.type) { + case RADEON_PACKET_TYPE0: + p->idx++; + for (i = 0; i <= pkt.count; ++i) { + switch (pkt.reg + i*4) { + case UVD_GPCOM_VCPU_DATA0: + data0 = p->idx; + break; + case UVD_GPCOM_VCPU_DATA1: + data1 = p->idx; + break; + case UVD_GPCOM_VCPU_CMD: + r = radeon_uvd_cs_reloc(p, data0, + data1); + if (r) + return r; + break; + case UVD_ENGINE_CNTL: + break; + default: + DRM_ERROR("Invalid reg 0x%X!\n", + pkt.reg + i*4); + return -EINVAL; + } + p->idx++; + } + break; + case RADEON_PACKET_TYPE2: + p->idx += pkt.count + 2; + break; + default: + DRM_ERROR("Unknown packet type %d !\n", pkt.type); + return -EINVAL; + } + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); + return 0; +} + +static int radeon_uvd_send_msg(struct radeon_device *rdev, + int ring, struct radeon_bo *bo, + struct radeon_fence **fence) +{ + struct ttm_validate_buffer tv; + struct list_head head; + struct radeon_ib ib; + uint64_t addr; + int i, r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&head); + if (r) + return r; + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); + radeon_uvd_force_into_uvd_segment(bo); + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) { + ttm_eu_backoff_reservation(&head); + return r; + } + + r = radeon_ib_get(rdev, ring, &ib, NULL, 16); + if (r) { + ttm_eu_backoff_reservation(&head); + return r; + } + + addr = radeon_bo_gpu_offset(bo); + ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); + ib.ptr[1] = addr; + ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); + ib.ptr[3] = addr >> 32; + ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); + ib.ptr[5] = 0; + for (i = 6; i < 16; ++i) + ib.ptr[i] = PACKET2(0); + ib.length_dw = 16; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + ttm_eu_backoff_reservation(&head); + return r; + } + ttm_eu_fence_buffer_objects(&head, ib.fence); + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + radeon_bo_unref(&bo); + return 0; +} + +/* multiple fence commands without any stream commands in between can + crash the vcpu so just try to emmit a dummy create/destroy msg to + avoid this */ +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + struct radeon_bo *bo; + uint32_t *msg; + int r, i; + + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); + if (r) + return r; + + r = radeon_bo_reserve(bo, false); + if (r) { + radeon_bo_unref(&bo); + return r; + } + + r = radeon_bo_kmap(bo, (void **)&msg); + if (r) { + radeon_bo_unreserve(bo); + radeon_bo_unref(&bo); + return r; + } + + /* stitch together an UVD create msg */ + msg[0] = 0x00000de4; + msg[1] = 0x00000000; + msg[2] = handle; + msg[3] = 0x00000000; + msg[4] = 0x00000000; + msg[5] = 0x00000000; + msg[6] = 0x00000000; + msg[7] = 0x00000780; + msg[8] = 0x00000440; + msg[9] = 0x00000000; + msg[10] = 0x01b37000; + for (i = 11; i < 1024; ++i) + msg[i] = 0x0; + + radeon_bo_kunmap(bo); + radeon_bo_unreserve(bo); + + return radeon_uvd_send_msg(rdev, ring, bo, fence); +} + +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + struct radeon_bo *bo; + uint32_t *msg; + int r, i; + + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); + if (r) + return r; + + r = radeon_bo_reserve(bo, false); + if (r) { + radeon_bo_unref(&bo); + return r; + } + + r = radeon_bo_kmap(bo, (void **)&msg); + if (r) { + radeon_bo_unreserve(bo); + radeon_bo_unref(&bo); + return r; + } + + /* stitch together an UVD destroy msg */ + msg[0] = 0x00000de4; + msg[1] = 0x00000002; + msg[2] = handle; + msg[3] = 0x00000000; + for (i = 4; i < 1024; ++i) + msg[i] = 0x0; + + radeon_bo_kunmap(bo); + radeon_bo_unreserve(bo); + + return radeon_uvd_send_msg(rdev, ring, bo, fence); +} diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index d63fe1d..5a78cce 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -68,6 +68,107 @@ u32 rv770_get_xclk(struct radeon_device *rdev) return reference_clock; } +int rv770_uvd_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t chip_id, size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = rdev->uvd.gpu_addr >> 3; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + /* tell firmware which hardware it is running on */ + switch (rdev->family) { + default: + return -EINVAL; + case CHIP_RV770: + chip_id = 0x01000004; + break; + case CHIP_RV710: + chip_id = 0x01000005; + break; + case CHIP_RV730: + chip_id = 0x01000006; + break; + case CHIP_RV740: + chip_id = 0x01000007; + break; + case CHIP_CYPRESS: + chip_id = 0x01000008; + break; + case CHIP_JUNIPER: + chip_id = 0x01000009; + break; + case CHIP_REDWOOD: + chip_id = 0x0100000a; + break; + case CHIP_CEDAR: + chip_id = 0x0100000b; + break; + case CHIP_SUMO: + chip_id = 0x0100000c; + break; + case CHIP_SUMO2: + chip_id = 0x0100000d; + break; + case CHIP_PALM: + chip_id = 0x0100000e; + break; + case CHIP_CAYMAN: + chip_id = 0x0100000f; + break; + case CHIP_BARTS: + chip_id = 0x01000010; + break; + case CHIP_TURKS: + chip_id = 0x01000011; + break; + case CHIP_CAICOS: + chip_id = 0x01000012; + break; + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + chip_id = 0x01000016; + break; + case CHIP_ARUBA: + chip_id = 0x01000017; + break; + } + WREG32(UVD_VCPU_CHIP_ID, chip_id); + + return 0; +} + u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; @@ -1040,6 +1141,17 @@ static int rv770_startup(struct radeon_device *rdev) return r; } + r = rv770_uvd_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } + + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + /* Enable IRQ */ r = r600_irq_init(rdev); if (r) { @@ -1074,6 +1186,19 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, + R600_WB_UVD_RPTR_OFFSET, + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (!r) + r = r600_uvd_init(rdev); + + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -1115,6 +1240,7 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); + radeon_uvd_suspend(rdev); r700_cp_stop(rdev); r600_dma_stop(rdev); r600_irq_suspend(rdev); @@ -1190,6 +1316,13 @@ int rv770_init(struct radeon_device *rdev) rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], + 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -1224,6 +1357,7 @@ void rv770_fini(struct radeon_device *rdev) radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); + radeon_uvd_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index c55f950..da158b54 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -671,4 +671,18 @@ # define TARGET_LINK_SPEED_MASK (0xf << 0) # define SELECTABLE_DEEMPHASIS (1 << 6) +/* UVD */ +#define UVD_LMI_EXT40_ADDR 0xf498 +#define UVD_VCPU_CHIP_ID 0xf4d4 +#define UVD_VCPU_CACHE_OFFSET0 0xf4d8 +#define UVD_VCPU_CACHE_SIZE0 0xf4dc +#define UVD_VCPU_CACHE_OFFSET1 0xf4e0 +#define UVD_VCPU_CACHE_SIZE1 0xf4e4 +#define UVD_VCPU_CACHE_OFFSET2 0xf4e8 +#define UVD_VCPU_CACHE_SIZE2 0xf4ec +#define UVD_LMI_ADDR_EXT 0xf594 + +#define UVD_RBC_RB_RPTR 0xf690 +#define UVD_RBC_RB_WPTR 0xf694 + #endif diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index bafbe32..cc9fe39 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4372,6 +4372,16 @@ static int si_startup(struct radeon_device *rdev) return r; } + r = rv770_uvd_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, + R600_RING_TYPE_UVD_INDEX); + if (r) + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); + } + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + /* Enable IRQ */ r = si_irq_init(rdev); if (r) { @@ -4429,6 +4439,18 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, + R600_WB_UVD_RPTR_OFFSET, + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, + 0, 0xfffff, RADEON_CP_PACKET2); + if (!r) + r = r600_uvd_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -4472,6 +4494,8 @@ int si_suspend(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); si_cp_enable(rdev, false); cayman_dma_stop(rdev); + r600_uvd_rbc_stop(rdev); + radeon_uvd_suspend(rdev); si_irq_suspend(rdev); radeon_wb_disable(rdev); si_pcie_gart_disable(rdev); @@ -4557,6 +4581,13 @@ int si_init(struct radeon_device *rdev) ring->ring_obj = NULL; r600_ring_init(rdev, ring, 64 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -4605,6 +4636,7 @@ void si_fini(struct radeon_device *rdev) radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); + radeon_uvd_fini(rdev); si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 23fc08f..759f682 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -798,6 +798,12 @@ # define THREAD_TRACE_FINISH (55 << 0) /* + * UVD + */ +#define UVD_RBC_RB_RPTR 0xF690 +#define UVD_RBC_RB_WPTR 0xF694 + +/* * PM4 */ #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index eeda917..cd085d1 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -918,6 +918,7 @@ struct drm_radeon_gem_va { #define RADEON_CS_RING_GFX 0 #define RADEON_CS_RING_COMPUTE 1 #define RADEON_CS_RING_DMA 2 +#define RADEON_CS_RING_UVD 3 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* 0 = normal, + = higher priority, - = lower priority */
Just everything needed to decode videos using UVD. v6: just all the bugfixes and support for R7xx-SI merged in one patch v7: UVD_CGC_GATE is a write only register, lockup detection fix Signed-off-by: Christian König <deathsimple@vodafone.de> --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/evergreen.c | 40 ++- drivers/gpu/drm/radeon/evergreend.h | 7 + drivers/gpu/drm/radeon/ni.c | 49 +++ drivers/gpu/drm/radeon/nid.h | 9 + drivers/gpu/drm/radeon/r600.c | 291 ++++++++++++++++++ drivers/gpu/drm/radeon/r600d.h | 61 ++++ drivers/gpu/drm/radeon/radeon.h | 47 ++- drivers/gpu/drm/radeon/radeon_asic.c | 63 ++++ drivers/gpu/drm/radeon/radeon_asic.h | 19 ++ drivers/gpu/drm/radeon/radeon_cs.c | 27 +- drivers/gpu/drm/radeon/radeon_fence.c | 23 +- drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_object.c | 12 +- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ring.c | 24 +- drivers/gpu/drm/radeon/radeon_test.c | 72 +++-- drivers/gpu/drm/radeon/radeon_uvd.c | 521 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/rv770.c | 134 ++++++++ drivers/gpu/drm/radeon/rv770d.h | 14 + drivers/gpu/drm/radeon/si.c | 32 ++ drivers/gpu/drm/radeon/sid.h | 6 + include/uapi/drm/radeon_drm.h | 1 + 23 files changed, 1400 insertions(+), 57 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_uvd.c