Message ID | 1428936737-19103-4-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > WIP patch which adds an user fence IOCTL. > > Signed-off-by: Christian König <christian.koenig@amd.com> I've discussed userspace fences a lot with Jerome last XDC, so here's my comments: My primary concern with mid-batch fences is that if we create real kernel fences (which might even escape to other places using android syncpts or dma-buf) then we end up relying upon correct userspace to not hang the kernel, which isn't good. So imo any kind of mid-batch fence must be done completely in userspace and never show up as a fence object on the kernel side. I thought that just busy-spinning in userspace would be all that's needed, but adding an ioctl to wait on such user fences seems like a nice idea too. On i915 we even have 2 interrupt sources per ring, so we could split the irq processing between kernel fences and userspace fences. One thing to keep in mind (I dunno radeon/ttm internals enough to know) is to make sure that while being blocked for a userspace fence in the ioctl you're not starving anyone else. But it doesn't look like you're holding any reservation objects or something similar which might prevent concurrent cs. Anyway if there's nothing more to this then I think this is very sane idea. Cheers, Daniel > --- > drivers/gpu/drm/radeon/radeon.h | 2 ++ > drivers/gpu/drm/radeon/radeon_gem.c | 71 +++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > include/uapi/drm/radeon_drm.h | 41 +++++++++++++-------- > 4 files changed, 100 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 57d63c4..110baae 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2215,6 +2215,8 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp); > int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp); > +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, > + struct drm_file *filp); > int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c > index ac3c131..094b3d5 100644 > --- a/drivers/gpu/drm/radeon/radeon_gem.c > +++ b/drivers/gpu/drm/radeon/radeon_gem.c > @@ -365,6 +365,77 @@ handle_lockup: > return r; > } > > +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, > + struct drm_file *filp) > +{ > + struct radeon_device *rdev = dev->dev_private; > + struct drm_radeon_gem_wait_userfence *args = data; > + struct drm_gem_object *gobj; > + struct radeon_bo *robj; > + uint64_t *fence_addr; > + void *cpu_addr; > + int r, ring; > + > + down_read(&rdev->exclusive_lock); > + > + ring = radeon_cs_get_ring(rdev, args->ring, args->priority); > + if (ring < 0) > + return ring; > + > + gobj = drm_gem_object_lookup(dev, filp, args->handle); > + if (gobj == NULL) { > + r = -ENOENT; > + goto error_unref; > + } > + robj = gem_to_radeon_bo(gobj); > + > + if (args->offset & 0x7 || args->offset + 8 > radeon_bo_size(robj)) { > + r = -EINVAL; > + goto error_unref; > + } > + > + r = radeon_bo_reserve(robj, false); > + if (r) > + goto error_unref; > + > + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM | > + RADEON_GEM_DOMAIN_GTT, NULL); > + if (r) > + goto error_unreserve; > + > + r = radeon_bo_kmap(robj, &cpu_addr); > + if (r) > + goto error_unpin; > + > + radeon_bo_unreserve(robj); > + > + fence_addr = (uint64_t *)(((uint8_t*)cpu_addr) + args->offset); > + > + radeon_irq_kms_sw_irq_get(rdev, ring); > + r = wait_event_interruptible(rdev->fence_queue, ( > + *fence_addr >= args->fence || rdev->needs_reset > + )); > + radeon_irq_kms_sw_irq_put(rdev, ring); > + > + if (rdev->needs_reset) > + r = -EDEADLK; > + > + radeon_bo_reserve(robj, false); > + radeon_bo_kunmap(robj); > + > +error_unpin: > + radeon_bo_unpin(robj); > + > +error_unreserve: > + radeon_bo_unreserve(robj); > + > +error_unref: > + drm_gem_object_unreference_unlocked(gobj); > + up_read(&rdev->exclusive_lock); > + r = radeon_gem_handle_lockup(robj->rdev, r); > + return r; > +} > + > int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > { > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index 686411e..4757f25 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -893,5 +893,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { > DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USERFENCE, radeon_gem_wait_userfence_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index 50d0fb4..43fe660 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -512,6 +512,7 @@ typedef struct { > #define DRM_RADEON_GEM_VA 0x2b > #define DRM_RADEON_GEM_OP 0x2c > #define DRM_RADEON_GEM_USERPTR 0x2d > +#define DRM_RADEON_GEM_WAIT_USERFENCE 0x2e > > #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) > #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) > @@ -541,21 +542,22 @@ typedef struct { > #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) > #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) > /* KMS */ > -#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) > -#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) > -#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) > -#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) > -#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) > -#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) > -#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) > -#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) > -#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) > -#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) > -#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) > -#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) > -#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) > -#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) > -#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) > +#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) > +#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) > +#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) > +#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) > +#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) > +#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) > +#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) > +#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) > +#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) > +#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) > +#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) > +#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) > +#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) > +#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) > +#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) > +#define DRM_IOCTL_RADEON_GEM_WAIT_USERFENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USERFENCE, struct drm_radeon_gem_wait_userfence) > > typedef struct drm_radeon_init { > enum { > @@ -831,6 +833,15 @@ struct drm_radeon_gem_userptr { > uint32_t handle; > }; > > +struct drm_radeon_gem_wait_userfence { > + uint32_t handle; > + uint32_t offset; > + uint64_t fence; > + uint32_t ring; > + int32_t priority; > + uint32_t flags; > +}; > + > #define RADEON_TILING_MACRO 0x1 > #define RADEON_TILING_MICRO 0x2 > #define RADEON_TILING_SWAP_16BIT 0x4 > -- > 1.9.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > WIP patch which adds an user fence IOCTL. > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/radeon/radeon.h | 2 ++ > drivers/gpu/drm/radeon/radeon_gem.c | 71 +++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > include/uapi/drm/radeon_drm.h | 41 +++++++++++++-------- > 4 files changed, 100 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 57d63c4..110baae 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2215,6 +2215,8 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp); > int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp); > +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, > + struct drm_file *filp); > int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c > index ac3c131..094b3d5 100644 > --- a/drivers/gpu/drm/radeon/radeon_gem.c > +++ b/drivers/gpu/drm/radeon/radeon_gem.c > @@ -365,6 +365,77 @@ handle_lockup: > return r; > } > > +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, > + struct drm_file *filp) > +{ > + struct radeon_device *rdev = dev->dev_private; > + struct drm_radeon_gem_wait_userfence *args = data; > + struct drm_gem_object *gobj; > + struct radeon_bo *robj; > + uint64_t *fence_addr; > + void *cpu_addr; > + int r, ring; > + > + down_read(&rdev->exclusive_lock); > + > + ring = radeon_cs_get_ring(rdev, args->ring, args->priority); > + if (ring < 0) > + return ring; > + > + gobj = drm_gem_object_lookup(dev, filp, args->handle); > + if (gobj == NULL) { > + r = -ENOENT; > + goto error_unref; > + } > + robj = gem_to_radeon_bo(gobj); > + > + if (args->offset & 0x7 || args->offset + 8 > radeon_bo_size(robj)) { > + r = -EINVAL; > + goto error_unref; > + } > + > + r = radeon_bo_reserve(robj, false); > + if (r) > + goto error_unref; > + > + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM | > + RADEON_GEM_DOMAIN_GTT, NULL); One thing on top of making sure that you don't hold any locks while waiting to ensure you can't abuse this: I think you need to add a check here that the bo is reasonably size, otherwise someone will abuse this to lock in massive amounts of ram. As long as the limit is reasonable small we can subsume the overhead under the per-thread limits imo. -Daniel > + if (r) > + goto error_unreserve; > + > + r = radeon_bo_kmap(robj, &cpu_addr); > + if (r) > + goto error_unpin; > + > + radeon_bo_unreserve(robj); > + > + fence_addr = (uint64_t *)(((uint8_t*)cpu_addr) + args->offset); > + > + radeon_irq_kms_sw_irq_get(rdev, ring); > + r = wait_event_interruptible(rdev->fence_queue, ( > + *fence_addr >= args->fence || rdev->needs_reset > + )); > + radeon_irq_kms_sw_irq_put(rdev, ring); > + > + if (rdev->needs_reset) > + r = -EDEADLK; > + > + radeon_bo_reserve(robj, false); > + radeon_bo_kunmap(robj); > + > +error_unpin: > + radeon_bo_unpin(robj); > + > +error_unreserve: > + radeon_bo_unreserve(robj); > + > +error_unref: > + drm_gem_object_unreference_unlocked(gobj); > + up_read(&rdev->exclusive_lock); > + r = radeon_gem_handle_lockup(robj->rdev, r); > + return r; > +} > + > int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > { > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index 686411e..4757f25 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -893,5 +893,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { > DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USERFENCE, radeon_gem_wait_userfence_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index 50d0fb4..43fe660 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -512,6 +512,7 @@ typedef struct { > #define DRM_RADEON_GEM_VA 0x2b > #define DRM_RADEON_GEM_OP 0x2c > #define DRM_RADEON_GEM_USERPTR 0x2d > +#define DRM_RADEON_GEM_WAIT_USERFENCE 0x2e > > #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) > #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) > @@ -541,21 +542,22 @@ typedef struct { > #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) > #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) > /* KMS */ > -#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) > -#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) > -#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) > -#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) > -#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) > -#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) > -#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) > -#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) > -#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) > -#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) > -#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) > -#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) > -#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) > -#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) > -#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) > +#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) > +#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) > +#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) > +#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) > +#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) > +#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) > +#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) > +#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) > +#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) > +#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) > +#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) > +#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) > +#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) > +#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) > +#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) > +#define DRM_IOCTL_RADEON_GEM_WAIT_USERFENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USERFENCE, struct drm_radeon_gem_wait_userfence) > > typedef struct drm_radeon_init { > enum { > @@ -831,6 +833,15 @@ struct drm_radeon_gem_userptr { > uint32_t handle; > }; > > +struct drm_radeon_gem_wait_userfence { > + uint32_t handle; > + uint32_t offset; > + uint64_t fence; > + uint32_t ring; > + int32_t priority; > + uint32_t flags; > +}; > + > #define RADEON_TILING_MACRO 0x1 > #define RADEON_TILING_MICRO 0x2 > #define RADEON_TILING_SWAP_16BIT 0x4 > -- > 1.9.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Mon, Apr 13, 2015 at 07:23:34PM +0200, Daniel Vetter wrote: > On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: > > From: Christian König <christian.koenig@amd.com> > > > > WIP patch which adds an user fence IOCTL. > > > > Signed-off-by: Christian König <christian.koenig@amd.com> > > I've discussed userspace fences a lot with Jerome last XDC, so here's my > comments: > > My primary concern with mid-batch fences is that if we create real kernel > fences (which might even escape to other places using android syncpts or > dma-buf) then we end up relying upon correct userspace to not hang the > kernel, which isn't good. Yes i agree on that, solution i propose make sure that this can not happen. > > So imo any kind of mid-batch fence must be done completely in userspace > and never show up as a fence object on the kernel side. I thought that > just busy-spinning in userspace would be all that's needed, but adding an > ioctl to wait on such user fences seems like a nice idea too. On i915 we > even have 2 interrupt sources per ring, so we could split the irq > processing between kernel fences and userspace fences. Technicaly here the kernel does not allocate any object it just that kernel can enable GPU interrupt and thus wait "inteligently" until the GPU fire an interrupt telling us that it might be a good time to look at the fence value. So technicaly this ioctl is nothing more than a wait for irq and check memory value. > > One thing to keep in mind (I dunno radeon/ttm internals enough to know) is > to make sure that while being blocked for a userspace fence in the ioctl > you're not starving anyone else. But it doesn't look like you're holding > any reservation objects or something similar which might prevent > concurrent cs. Yes this is the discussion we are having, how to make sure that such ioctl would not block any regular processing so that it could not be abuse in anyway (well at least in anyway my devious imagination can think of right now :)). Cheers, Jérôme
On 13.04.2015 19:51, Jerome Glisse wrote: > On Mon, Apr 13, 2015 at 07:23:34PM +0200, Daniel Vetter wrote: >> On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: >>> From: Christian König <christian.koenig@amd.com> >>> >>> WIP patch which adds an user fence IOCTL. >>> >>> Signed-off-by: Christian König <christian.koenig@amd.com> >> I've discussed userspace fences a lot with Jerome last XDC, so here's my >> comments: >> >> My primary concern with mid-batch fences is that if we create real kernel >> fences (which might even escape to other places using android syncpts or >> dma-buf) then we end up relying upon correct userspace to not hang the >> kernel, which isn't good. > Yes i agree on that, solution i propose make sure that this can not happen. What if we want to base a GPU scheduler and Android sync points on that functionality? E.g. it might be necessary to create "struct fence" objects which are based on the information from userspace. Would that be possible or would we run into issues? Regards, Christian. > >> So imo any kind of mid-batch fence must be done completely in userspace >> and never show up as a fence object on the kernel side. I thought that >> just busy-spinning in userspace would be all that's needed, but adding an >> ioctl to wait on such user fences seems like a nice idea too. On i915 we >> even have 2 interrupt sources per ring, so we could split the irq >> processing between kernel fences and userspace fences. > Technicaly here the kernel does not allocate any object it just that kernel > can enable GPU interrupt and thus wait "inteligently" until the GPU fire > an interrupt telling us that it might be a good time to look at the fence > value. > > So technicaly this ioctl is nothing more than a wait for irq and check > memory value. > >> One thing to keep in mind (I dunno radeon/ttm internals enough to know) is >> to make sure that while being blocked for a userspace fence in the ioctl >> you're not starving anyone else. But it doesn't look like you're holding >> any reservation objects or something similar which might prevent >> concurrent cs. > Yes this is the discussion we are having, how to make sure that such ioctl > would not block any regular processing so that it could not be abuse in > anyway (well at least in anyway my devious imagination can think of right > now :)). > > Cheers, > Jérôme
On Mon, Apr 13, 2015 at 08:26:05PM +0200, Christian König wrote: > On 13.04.2015 19:51, Jerome Glisse wrote: > >On Mon, Apr 13, 2015 at 07:23:34PM +0200, Daniel Vetter wrote: > >>On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: > >>>From: Christian König <christian.koenig@amd.com> > >>> > >>>WIP patch which adds an user fence IOCTL. > >>> > >>>Signed-off-by: Christian König <christian.koenig@amd.com> > >>I've discussed userspace fences a lot with Jerome last XDC, so here's my > >>comments: > >> > >>My primary concern with mid-batch fences is that if we create real kernel > >>fences (which might even escape to other places using android syncpts or > >>dma-buf) then we end up relying upon correct userspace to not hang the > >>kernel, which isn't good. > >Yes i agree on that, solution i propose make sure that this can not happen. > > What if we want to base a GPU scheduler and Android sync points on that > functionality? E.g. it might be necessary to create "struct fence" objects > which are based on the information from userspace. > > Would that be possible or would we run into issues? Well i would not like that, but i do not like Android code much, but you could use the same code as i show in my other email and just have the android fence struct take a reference on the BO where the fence is. Then using same code to check status. Obviously there should be timeout as there is no way for the kernel to know if such fence will ever signal. Cheers, Jérôme
On Mon, Apr 13, 2015 at 03:48:51PM -0400, Jerome Glisse wrote: > On Mon, Apr 13, 2015 at 08:26:05PM +0200, Christian König wrote: > > On 13.04.2015 19:51, Jerome Glisse wrote: > > >On Mon, Apr 13, 2015 at 07:23:34PM +0200, Daniel Vetter wrote: > > >>On Mon, Apr 13, 2015 at 04:52:17PM +0200, Christian König wrote: > > >>>From: Christian König <christian.koenig@amd.com> > > >>> > > >>>WIP patch which adds an user fence IOCTL. > > >>> > > >>>Signed-off-by: Christian König <christian.koenig@amd.com> > > >>I've discussed userspace fences a lot with Jerome last XDC, so here's my > > >>comments: > > >> > > >>My primary concern with mid-batch fences is that if we create real kernel > > >>fences (which might even escape to other places using android syncpts or > > >>dma-buf) then we end up relying upon correct userspace to not hang the > > >>kernel, which isn't good. > > >Yes i agree on that, solution i propose make sure that this can not happen. > > > > What if we want to base a GPU scheduler and Android sync points on that > > functionality? E.g. it might be necessary to create "struct fence" objects > > which are based on the information from userspace. > > > > Would that be possible or would we run into issues? > > Well i would not like that, but i do not like Android code much, but you > could use the same code as i show in my other email and just have the > android fence struct take a reference on the BO where the fence is. Then > using same code to check status. > > Obviously there should be timeout as there is no way for the kernel to > know if such fence will ever signal. Yeah I think if your umd is using this for all fencing needs, including cross-process and everything then I don't think it's such a good idea any more ;-) But if it's just fine-grained sync for heterogenous compute within the same process (ocl, hsa or whatever) then this seems reasonable. I guess this boils down to what the userspace side will look like, and what kind of standard you're trying to implement here. -Daniel
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 57d63c4..110baae 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2215,6 +2215,8 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c131..094b3d5 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -365,6 +365,77 @@ handle_lockup: return r; } +int radeon_gem_wait_userfence_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_gem_wait_userfence *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + uint64_t *fence_addr; + void *cpu_addr; + int r, ring; + + down_read(&rdev->exclusive_lock); + + ring = radeon_cs_get_ring(rdev, args->ring, args->priority); + if (ring < 0) + return ring; + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + r = -ENOENT; + goto error_unref; + } + robj = gem_to_radeon_bo(gobj); + + if (args->offset & 0x7 || args->offset + 8 > radeon_bo_size(robj)) { + r = -EINVAL; + goto error_unref; + } + + r = radeon_bo_reserve(robj, false); + if (r) + goto error_unref; + + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT, NULL); + if (r) + goto error_unreserve; + + r = radeon_bo_kmap(robj, &cpu_addr); + if (r) + goto error_unpin; + + radeon_bo_unreserve(robj); + + fence_addr = (uint64_t *)(((uint8_t*)cpu_addr) + args->offset); + + radeon_irq_kms_sw_irq_get(rdev, ring); + r = wait_event_interruptible(rdev->fence_queue, ( + *fence_addr >= args->fence || rdev->needs_reset + )); + radeon_irq_kms_sw_irq_put(rdev, ring); + + if (rdev->needs_reset) + r = -EDEADLK; + + radeon_bo_reserve(robj, false); + radeon_bo_kunmap(robj); + +error_unpin: + radeon_bo_unpin(robj); + +error_unreserve: + radeon_bo_unreserve(robj); + +error_unref: + drm_gem_object_unreference_unlocked(gobj); + up_read(&rdev->exclusive_lock); + r = radeon_gem_handle_lockup(robj->rdev, r); + return r; +} + int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 686411e..4757f25 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -893,5 +893,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USERFENCE, radeon_gem_wait_userfence_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 50d0fb4..43fe660 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -512,6 +512,7 @@ typedef struct { #define DRM_RADEON_GEM_VA 0x2b #define DRM_RADEON_GEM_OP 0x2c #define DRM_RADEON_GEM_USERPTR 0x2d +#define DRM_RADEON_GEM_WAIT_USERFENCE 0x2e #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -541,21 +542,22 @@ typedef struct { #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) /* KMS */ -#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) -#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) -#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) -#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) -#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) -#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) -#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) -#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) -#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) -#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) -#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) -#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) -#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) -#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) -#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) +#define DRM_IOCTL_RADEON_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info) +#define DRM_IOCTL_RADEON_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create) +#define DRM_IOCTL_RADEON_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap) +#define DRM_IOCTL_RADEON_GEM_PREAD DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread) +#define DRM_IOCTL_RADEON_GEM_PWRITE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite) +#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain) +#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) +#define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) +#define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) +#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) +#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) +#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) +#define DRM_IOCTL_RADEON_GEM_WAIT_USERFENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USERFENCE, struct drm_radeon_gem_wait_userfence) typedef struct drm_radeon_init { enum { @@ -831,6 +833,15 @@ struct drm_radeon_gem_userptr { uint32_t handle; }; +struct drm_radeon_gem_wait_userfence { + uint32_t handle; + uint32_t offset; + uint64_t fence; + uint32_t ring; + int32_t priority; + uint32_t flags; +}; + #define RADEON_TILING_MACRO 0x1 #define RADEON_TILING_MICRO 0x2 #define RADEON_TILING_SWAP_16BIT 0x4