Message ID | 20241212202337.381614-2-mcanal@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/vc4: Clean-up the BO seqnos | expand |
Am 12.12.24 um 21:20 schrieb Maíra Canal: > VC4 has internal copies of `drm_gem_lock_reservations()` and > `drm_gem_unlock_reservations()` inside the driver and ideally, we should > move those hard-coded functions to the generic functions provided by DRM. > But, instead of using the DRM GEM functions to (un)lock reservations, move > the new DRM Execution Contexts API. > > Signed-off-by: Maíra Canal <mcanal@igalia.com> Oh, that looks really straight forward and clean. But I can only give an Acked-by: Christian König <christian.koenig@amd.com> since I don't know the VC4 code at all. Regards, Christian. > --- > drivers/gpu/drm/vc4/Kconfig | 1 + > drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- > 2 files changed, 22 insertions(+), 78 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig > index c5f30b317698..0627e826fda4 100644 > --- a/drivers/gpu/drm/vc4/Kconfig > +++ b/drivers/gpu/drm/vc4/Kconfig > @@ -13,6 +13,7 @@ config DRM_VC4 > select DRM_DISPLAY_HDMI_HELPER > select DRM_DISPLAY_HDMI_STATE_HELPER > select DRM_DISPLAY_HELPER > + select DRM_EXEC > select DRM_KMS_HELPER > select DRM_GEM_DMA_HELPER > select DRM_PANEL_BRIDGE > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 22bccd69eb62..1021f45cb53c 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -29,6 +29,7 @@ > #include <linux/sched/signal.h> > #include <linux/dma-fence-array.h> > > +#include <drm/drm_exec.h> > #include <drm/drm_syncobj.h> > > #include "uapi/drm/vc4_drm.h" > @@ -578,19 +579,6 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) > } > } > > -static void > -vc4_unlock_bo_reservations(struct drm_device *dev, > - struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx) > -{ > - int i; > - > - for (i = 0; i < exec->bo_count; i++) > - dma_resv_unlock(exec->bo[i]->resv); > - > - ww_acquire_fini(acquire_ctx); > -} > - > /* Takes the reservation lock on all the BOs being referenced, so that > * at queue submit time we can update the reservations. > * > @@ -599,70 +587,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, > * to vc4, so we don't attach dma-buf fences to them. > */ > static int > -vc4_lock_bo_reservations(struct drm_device *dev, > - struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx) > +vc4_lock_bo_reservations(struct vc4_exec_info *exec, > + struct drm_exec *exec_ctx) > { > - int contended_lock = -1; > - int i, ret; > - struct drm_gem_object *bo; > - > - ww_acquire_init(acquire_ctx, &reservation_ww_class); > - > -retry: > - if (contended_lock != -1) { > - bo = exec->bo[contended_lock]; > - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); > - if (ret) { > - ww_acquire_done(acquire_ctx); > - return ret; > - } > - } > - > - for (i = 0; i < exec->bo_count; i++) { > - if (i == contended_lock) > - continue; > - > - bo = exec->bo[i]; > - > - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); > - if (ret) { > - int j; > - > - for (j = 0; j < i; j++) { > - bo = exec->bo[j]; > - dma_resv_unlock(bo->resv); > - } > - > - if (contended_lock != -1 && contended_lock >= i) { > - bo = exec->bo[contended_lock]; > - > - dma_resv_unlock(bo->resv); > - } > - > - if (ret == -EDEADLK) { > - contended_lock = i; > - goto retry; > - } > - > - ww_acquire_done(acquire_ctx); > - return ret; > - } > - } > - > - ww_acquire_done(acquire_ctx); > + int ret; > > /* Reserve space for our shared (read-only) fence references, > * before we commit the CL to the hardware. > */ > - for (i = 0; i < exec->bo_count; i++) { > - bo = exec->bo[i]; > + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); > + drm_exec_until_all_locked(exec_ctx) { > + ret = drm_exec_prepare_array(exec_ctx, exec->bo, > + exec->bo_count, 1); > + } > > - ret = dma_resv_reserve_fences(bo->resv, 1); > - if (ret) { > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > - return ret; > - } > + if (ret) { > + drm_exec_fini(exec_ctx); > + return ret; > } > > return 0; > @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, > */ > static int > vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx, > + struct drm_exec *exec_ctx, > struct drm_syncobj *out_sync) > { > struct vc4_dev *vc4 = to_vc4_dev(dev); > @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > > vc4_update_bo_seqnos(exec, seqno); > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > + drm_exec_fini(exec_ctx); > > list_add_tail(&exec->head, &vc4->bin_job_list); > > @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > struct drm_vc4_submit_cl *args = data; > struct drm_syncobj *out_sync = NULL; > struct vc4_exec_info *exec; > - struct ww_acquire_ctx acquire_ctx; > + struct drm_exec exec_ctx; > struct dma_fence *in_fence; > int ret = 0; > > @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > if (ret) > goto fail; > > - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); > + ret = vc4_lock_bo_reservations(exec, &exec_ctx); > if (ret) > goto fail; > > @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > out_sync = drm_syncobj_find(file_priv, args->out_sync); > if (!out_sync) { > ret = -EINVAL; > - goto fail; > + goto fail_unreserve; > } > > /* We replace the fence in out_sync in vc4_queue_submit since > @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > */ > exec->args = NULL; > > - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); > + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); > > /* The syncobj isn't part of the exec data and we need to free our > * reference even if job submission failed. > @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > drm_syncobj_put(out_sync); > > if (ret) > - goto fail; > + goto fail_unreserve; > > /* Return the seqno for our job. */ > args->seqno = vc4->emit_seqno; > > return 0; > > +fail_unreserve: > + drm_exec_fini(&exec_ctx); > fail: > vc4_complete_exec(&vc4->base, exec); >
On 12/12, Maíra Canal wrote: > VC4 has internal copies of `drm_gem_lock_reservations()` and > `drm_gem_unlock_reservations()` inside the driver and ideally, we should > move those hard-coded functions to the generic functions provided by DRM. > But, instead of using the DRM GEM functions to (un)lock reservations, move > the new DRM Execution Contexts API. > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > --- > drivers/gpu/drm/vc4/Kconfig | 1 + > drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- > 2 files changed, 22 insertions(+), 78 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig > index c5f30b317698..0627e826fda4 100644 > --- a/drivers/gpu/drm/vc4/Kconfig > +++ b/drivers/gpu/drm/vc4/Kconfig > @@ -13,6 +13,7 @@ config DRM_VC4 > select DRM_DISPLAY_HDMI_HELPER > select DRM_DISPLAY_HDMI_STATE_HELPER > select DRM_DISPLAY_HELPER > + select DRM_EXEC > select DRM_KMS_HELPER > select DRM_GEM_DMA_HELPER > select DRM_PANEL_BRIDGE > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 22bccd69eb62..1021f45cb53c 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -29,6 +29,7 @@ > #include <linux/sched/signal.h> > #include <linux/dma-fence-array.h> > > +#include <drm/drm_exec.h> > #include <drm/drm_syncobj.h> > > #include "uapi/drm/vc4_drm.h" > @@ -578,19 +579,6 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) > } > } > > -static void > -vc4_unlock_bo_reservations(struct drm_device *dev, > - struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx) > -{ > - int i; > - > - for (i = 0; i < exec->bo_count; i++) > - dma_resv_unlock(exec->bo[i]->resv); > - > - ww_acquire_fini(acquire_ctx); > -} > - > /* Takes the reservation lock on all the BOs being referenced, so that > * at queue submit time we can update the reservations. > * > @@ -599,70 +587,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, > * to vc4, so we don't attach dma-buf fences to them. > */ > static int > -vc4_lock_bo_reservations(struct drm_device *dev, > - struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx) > +vc4_lock_bo_reservations(struct vc4_exec_info *exec, > + struct drm_exec *exec_ctx) > { > - int contended_lock = -1; > - int i, ret; > - struct drm_gem_object *bo; > - > - ww_acquire_init(acquire_ctx, &reservation_ww_class); > - > -retry: > - if (contended_lock != -1) { > - bo = exec->bo[contended_lock]; > - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); > - if (ret) { > - ww_acquire_done(acquire_ctx); > - return ret; > - } > - } > - > - for (i = 0; i < exec->bo_count; i++) { > - if (i == contended_lock) > - continue; > - > - bo = exec->bo[i]; > - > - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); > - if (ret) { > - int j; > - > - for (j = 0; j < i; j++) { > - bo = exec->bo[j]; > - dma_resv_unlock(bo->resv); > - } > - > - if (contended_lock != -1 && contended_lock >= i) { > - bo = exec->bo[contended_lock]; > - > - dma_resv_unlock(bo->resv); > - } > - > - if (ret == -EDEADLK) { > - contended_lock = i; > - goto retry; > - } > - > - ww_acquire_done(acquire_ctx); > - return ret; > - } > - } > - > - ww_acquire_done(acquire_ctx); > + int ret; > > /* Reserve space for our shared (read-only) fence references, > * before we commit the CL to the hardware. > */ > - for (i = 0; i < exec->bo_count; i++) { > - bo = exec->bo[i]; > + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); > + drm_exec_until_all_locked(exec_ctx) { > + ret = drm_exec_prepare_array(exec_ctx, exec->bo, > + exec->bo_count, 1); Hi Maíra, So, I'm not familiar too drm_exec, but the original implementation of vc4_lock_bo_reservations() has a retry of locks from the contention and I don't see it inside the drm_exec_prepare_array(), why don't use the loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() (similar to the typical usage documented for drm_exec)? Also, probably you already considered that: we can extend this update to v3d, right? Melissa > + } > > - ret = dma_resv_reserve_fences(bo->resv, 1); > - if (ret) { > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > - return ret; > - } > + if (ret) { > + drm_exec_fini(exec_ctx); > + return ret; > } > > return 0; > @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, > */ > static int > vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx, > + struct drm_exec *exec_ctx, > struct drm_syncobj *out_sync) > { > struct vc4_dev *vc4 = to_vc4_dev(dev); > @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > > vc4_update_bo_seqnos(exec, seqno); > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > + drm_exec_fini(exec_ctx); > > list_add_tail(&exec->head, &vc4->bin_job_list); > > @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > struct drm_vc4_submit_cl *args = data; > struct drm_syncobj *out_sync = NULL; > struct vc4_exec_info *exec; > - struct ww_acquire_ctx acquire_ctx; > + struct drm_exec exec_ctx; > struct dma_fence *in_fence; > int ret = 0; > > @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > if (ret) > goto fail; > > - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); > + ret = vc4_lock_bo_reservations(exec, &exec_ctx); > if (ret) > goto fail; > > @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > out_sync = drm_syncobj_find(file_priv, args->out_sync); > if (!out_sync) { > ret = -EINVAL; > - goto fail; > + goto fail_unreserve; > } > > /* We replace the fence in out_sync in vc4_queue_submit since > @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > */ > exec->args = NULL; > > - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); > + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); > > /* The syncobj isn't part of the exec data and we need to free our > * reference even if job submission failed. > @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > drm_syncobj_put(out_sync); > > if (ret) > - goto fail; > + goto fail_unreserve; > > /* Return the seqno for our job. */ > args->seqno = vc4->emit_seqno; > > return 0; > > +fail_unreserve: > + drm_exec_fini(&exec_ctx); > fail: > vc4_complete_exec(&vc4->base, exec); > > -- > 2.47.1 >
Am 16.12.24 um 20:08 schrieb Melissa Wen: > On 12/12, Maíra Canal wrote: >> VC4 has internal copies of `drm_gem_lock_reservations()` and >> `drm_gem_unlock_reservations()` inside the driver and ideally, we should >> move those hard-coded functions to the generic functions provided by DRM. >> But, instead of using the DRM GEM functions to (un)lock reservations, move >> the new DRM Execution Contexts API. >> >> Signed-off-by: Maíra Canal <mcanal@igalia.com> >> --- >> drivers/gpu/drm/vc4/Kconfig | 1 + >> drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- >> 2 files changed, 22 insertions(+), 78 deletions(-) >> >> diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig >> index c5f30b317698..0627e826fda4 100644 >> --- a/drivers/gpu/drm/vc4/Kconfig >> +++ b/drivers/gpu/drm/vc4/Kconfig >> @@ -13,6 +13,7 @@ config DRM_VC4 >> select DRM_DISPLAY_HDMI_HELPER >> select DRM_DISPLAY_HDMI_STATE_HELPER >> select DRM_DISPLAY_HELPER >> + select DRM_EXEC >> select DRM_KMS_HELPER >> select DRM_GEM_DMA_HELPER >> select DRM_PANEL_BRIDGE >> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c >> index 22bccd69eb62..1021f45cb53c 100644 >> --- a/drivers/gpu/drm/vc4/vc4_gem.c >> +++ b/drivers/gpu/drm/vc4/vc4_gem.c >> @@ -29,6 +29,7 @@ >> #include <linux/sched/signal.h> >> #include <linux/dma-fence-array.h> >> >> +#include <drm/drm_exec.h> >> #include <drm/drm_syncobj.h> >> >> #include "uapi/drm/vc4_drm.h" >> @@ -578,19 +579,6 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) >> } >> } >> >> -static void >> -vc4_unlock_bo_reservations(struct drm_device *dev, >> - struct vc4_exec_info *exec, >> - struct ww_acquire_ctx *acquire_ctx) >> -{ >> - int i; >> - >> - for (i = 0; i < exec->bo_count; i++) >> - dma_resv_unlock(exec->bo[i]->resv); >> - >> - ww_acquire_fini(acquire_ctx); >> -} >> - >> /* Takes the reservation lock on all the BOs being referenced, so that >> * at queue submit time we can update the reservations. >> * >> @@ -599,70 +587,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, >> * to vc4, so we don't attach dma-buf fences to them. >> */ >> static int >> -vc4_lock_bo_reservations(struct drm_device *dev, >> - struct vc4_exec_info *exec, >> - struct ww_acquire_ctx *acquire_ctx) >> +vc4_lock_bo_reservations(struct vc4_exec_info *exec, >> + struct drm_exec *exec_ctx) >> { >> - int contended_lock = -1; >> - int i, ret; >> - struct drm_gem_object *bo; >> - >> - ww_acquire_init(acquire_ctx, &reservation_ww_class); >> - >> -retry: >> - if (contended_lock != -1) { >> - bo = exec->bo[contended_lock]; >> - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); >> - if (ret) { >> - ww_acquire_done(acquire_ctx); >> - return ret; >> - } >> - } >> - >> - for (i = 0; i < exec->bo_count; i++) { >> - if (i == contended_lock) >> - continue; >> - >> - bo = exec->bo[i]; >> - >> - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); >> - if (ret) { >> - int j; >> - >> - for (j = 0; j < i; j++) { >> - bo = exec->bo[j]; >> - dma_resv_unlock(bo->resv); >> - } >> - >> - if (contended_lock != -1 && contended_lock >= i) { >> - bo = exec->bo[contended_lock]; >> - >> - dma_resv_unlock(bo->resv); >> - } >> - >> - if (ret == -EDEADLK) { >> - contended_lock = i; >> - goto retry; >> - } >> - >> - ww_acquire_done(acquire_ctx); >> - return ret; >> - } >> - } >> - >> - ww_acquire_done(acquire_ctx); >> + int ret; >> >> /* Reserve space for our shared (read-only) fence references, >> * before we commit the CL to the hardware. >> */ >> - for (i = 0; i < exec->bo_count; i++) { >> - bo = exec->bo[i]; >> + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); >> + drm_exec_until_all_locked(exec_ctx) { >> + ret = drm_exec_prepare_array(exec_ctx, exec->bo, >> + exec->bo_count, 1); > Hi Maíra, > > So, I'm not familiar too drm_exec, but the original implementation of > vc4_lock_bo_reservations() has a retry of locks from the contention and > I don't see it inside the drm_exec_prepare_array(), why don't use the > loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() (similar > to the typical usage documented for drm_exec)? The way how drm_exec and drm_exec_prepare_array is used seems to be correct here. drm_exec_prepare_array() basically just loops over all the GEM BOs in the array and calls drm_exec_prepare_obj() on them, so no need to open code that. drm_exec_retry_on_contention() is only needed if you have multiple calls to drm_exec_prepare_array() or drm_exec_prepare_obj(), so that the loop is restarted in between the calls. Regards, Christian. > > Also, probably you already considered that: we can extend this update to > v3d, right? > > Melissa > >> + } >> >> - ret = dma_resv_reserve_fences(bo->resv, 1); >> - if (ret) { >> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >> - return ret; >> - } >> + if (ret) { >> + drm_exec_fini(exec_ctx); >> + return ret; >> } >> >> return 0; >> @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, >> */ >> static int >> vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, >> - struct ww_acquire_ctx *acquire_ctx, >> + struct drm_exec *exec_ctx, >> struct drm_syncobj *out_sync) >> { >> struct vc4_dev *vc4 = to_vc4_dev(dev); >> @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, >> >> vc4_update_bo_seqnos(exec, seqno); >> >> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >> + drm_exec_fini(exec_ctx); >> >> list_add_tail(&exec->head, &vc4->bin_job_list); >> >> @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, >> struct drm_vc4_submit_cl *args = data; >> struct drm_syncobj *out_sync = NULL; >> struct vc4_exec_info *exec; >> - struct ww_acquire_ctx acquire_ctx; >> + struct drm_exec exec_ctx; >> struct dma_fence *in_fence; >> int ret = 0; >> >> @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, >> if (ret) >> goto fail; >> >> - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); >> + ret = vc4_lock_bo_reservations(exec, &exec_ctx); >> if (ret) >> goto fail; >> >> @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, >> out_sync = drm_syncobj_find(file_priv, args->out_sync); >> if (!out_sync) { >> ret = -EINVAL; >> - goto fail; >> + goto fail_unreserve; >> } >> >> /* We replace the fence in out_sync in vc4_queue_submit since >> @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, >> */ >> exec->args = NULL; >> >> - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); >> + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); >> >> /* The syncobj isn't part of the exec data and we need to free our >> * reference even if job submission failed. >> @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, >> drm_syncobj_put(out_sync); >> >> if (ret) >> - goto fail; >> + goto fail_unreserve; >> >> /* Return the seqno for our job. */ >> args->seqno = vc4->emit_seqno; >> >> return 0; >> >> +fail_unreserve: >> + drm_exec_fini(&exec_ctx); >> fail: >> vc4_complete_exec(&vc4->base, exec); >> >> -- >> 2.47.1 >>
Hi Christian, On 17/12/24 07:30, Christian König wrote: > Am 16.12.24 um 20:08 schrieb Melissa Wen: >> On 12/12, Maíra Canal wrote: >>> VC4 has internal copies of `drm_gem_lock_reservations()` and >>> `drm_gem_unlock_reservations()` inside the driver and ideally, we should >>> move those hard-coded functions to the generic functions provided by >>> DRM. >>> But, instead of using the DRM GEM functions to (un)lock reservations, >>> move >>> the new DRM Execution Contexts API. >>> >>> Signed-off-by: Maíra Canal <mcanal@igalia.com> >>> --- >>> drivers/gpu/drm/vc4/Kconfig | 1 + >>> drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- >>> 2 files changed, 22 insertions(+), 78 deletions(-) >>> [...] >>> - >>> - ww_acquire_done(acquire_ctx); >>> + int ret; >>> /* Reserve space for our shared (read-only) fence references, >>> * before we commit the CL to the hardware. >>> */ >>> - for (i = 0; i < exec->bo_count; i++) { >>> - bo = exec->bo[i]; >>> + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec- >>> >bo_count); >>> + drm_exec_until_all_locked(exec_ctx) { >>> + ret = drm_exec_prepare_array(exec_ctx, exec->bo, >>> + exec->bo_count, 1); >> Hi Maíra, >> >> So, I'm not familiar too drm_exec, but the original implementation of >> vc4_lock_bo_reservations() has a retry of locks from the contention and >> I don't see it inside the drm_exec_prepare_array(), why don't use the >> loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() (similar >> to the typical usage documented for drm_exec)? > > The way how drm_exec and drm_exec_prepare_array is used seems to be > correct here. > > drm_exec_prepare_array() basically just loops over all the GEM BOs in > the array and calls drm_exec_prepare_obj() on them, so no need to open > code that. > > drm_exec_retry_on_contention() is only needed if you have multiple calls > to drm_exec_prepare_array() or drm_exec_prepare_obj(), so that the loop > is restarted in between the calls. But doesn't `drm_exec_prepare_array()` have multiple calls to `drm_exec_prepare_obj()`? The fact that the multiple calls are wrapped in the function makes a difference? Best Regards, - Maíra > > Regards, > Christian. > >> >> Also, probably you already considered that: we can extend this update to >> v3d, right? >> >> Melissa >> >>> + } >>> - ret = dma_resv_reserve_fences(bo->resv, 1); >>> - if (ret) { >>> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >>> - return ret; >>> - } >>> + if (ret) { >>> + drm_exec_fini(exec_ctx); >>> + return ret; >>> } >>> return 0; >>> @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, >>> */ >>> static int >>> vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, >>> - struct ww_acquire_ctx *acquire_ctx, >>> + struct drm_exec *exec_ctx, >>> struct drm_syncobj *out_sync) >>> { >>> struct vc4_dev *vc4 = to_vc4_dev(dev); >>> @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct >>> vc4_exec_info *exec, >>> vc4_update_bo_seqnos(exec, seqno); >>> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >>> + drm_exec_fini(exec_ctx); >>> list_add_tail(&exec->head, &vc4->bin_job_list); >>> @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>> void *data, >>> struct drm_vc4_submit_cl *args = data; >>> struct drm_syncobj *out_sync = NULL; >>> struct vc4_exec_info *exec; >>> - struct ww_acquire_ctx acquire_ctx; >>> + struct drm_exec exec_ctx; >>> struct dma_fence *in_fence; >>> int ret = 0; >>> @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>> void *data, >>> if (ret) >>> goto fail; >>> - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); >>> + ret = vc4_lock_bo_reservations(exec, &exec_ctx); >>> if (ret) >>> goto fail; >>> @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>> void *data, >>> out_sync = drm_syncobj_find(file_priv, args->out_sync); >>> if (!out_sync) { >>> ret = -EINVAL; >>> - goto fail; >>> + goto fail_unreserve; >>> } >>> /* We replace the fence in out_sync in vc4_queue_submit since >>> @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>> void *data, >>> */ >>> exec->args = NULL; >>> - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); >>> + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); >>> /* The syncobj isn't part of the exec data and we need to free our >>> * reference even if job submission failed. >>> @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>> void *data, >>> drm_syncobj_put(out_sync); >>> if (ret) >>> - goto fail; >>> + goto fail_unreserve; >>> /* Return the seqno for our job. */ >>> args->seqno = vc4->emit_seqno; >>> return 0; >>> +fail_unreserve: >>> + drm_exec_fini(&exec_ctx); >>> fail: >>> vc4_complete_exec(&vc4->base, exec); >>> -- >>> 2.47.1 >>> >
On 12/17, Christian König wrote: > Am 16.12.24 um 20:08 schrieb Melissa Wen: > > On 12/12, Maíra Canal wrote: > > > VC4 has internal copies of `drm_gem_lock_reservations()` and > > > `drm_gem_unlock_reservations()` inside the driver and ideally, we should > > > move those hard-coded functions to the generic functions provided by DRM. > > > But, instead of using the DRM GEM functions to (un)lock reservations, move > > > the new DRM Execution Contexts API. > > > > > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > > > --- > > > drivers/gpu/drm/vc4/Kconfig | 1 + > > > drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- > > > 2 files changed, 22 insertions(+), 78 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig > > > index c5f30b317698..0627e826fda4 100644 > > > --- a/drivers/gpu/drm/vc4/Kconfig > > > +++ b/drivers/gpu/drm/vc4/Kconfig > > > @@ -13,6 +13,7 @@ config DRM_VC4 > > > select DRM_DISPLAY_HDMI_HELPER > > > select DRM_DISPLAY_HDMI_STATE_HELPER > > > select DRM_DISPLAY_HELPER > > > + select DRM_EXEC > > > select DRM_KMS_HELPER > > > select DRM_GEM_DMA_HELPER > > > select DRM_PANEL_BRIDGE > > > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > > > index 22bccd69eb62..1021f45cb53c 100644 > > > --- a/drivers/gpu/drm/vc4/vc4_gem.c > > > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > > > @@ -29,6 +29,7 @@ > > > #include <linux/sched/signal.h> > > > #include <linux/dma-fence-array.h> > > > +#include <drm/drm_exec.h> > > > #include <drm/drm_syncobj.h> > > > #include "uapi/drm/vc4_drm.h" > > > @@ -578,19 +579,6 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) > > > } > > > } > > > -static void > > > -vc4_unlock_bo_reservations(struct drm_device *dev, > > > - struct vc4_exec_info *exec, > > > - struct ww_acquire_ctx *acquire_ctx) > > > -{ > > > - int i; > > > - > > > - for (i = 0; i < exec->bo_count; i++) > > > - dma_resv_unlock(exec->bo[i]->resv); > > > - > > > - ww_acquire_fini(acquire_ctx); > > > -} > > > - > > > /* Takes the reservation lock on all the BOs being referenced, so that > > > * at queue submit time we can update the reservations. > > > * > > > @@ -599,70 +587,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, > > > * to vc4, so we don't attach dma-buf fences to them. > > > */ > > > static int > > > -vc4_lock_bo_reservations(struct drm_device *dev, > > > - struct vc4_exec_info *exec, > > > - struct ww_acquire_ctx *acquire_ctx) > > > +vc4_lock_bo_reservations(struct vc4_exec_info *exec, > > > + struct drm_exec *exec_ctx) > > > { > > > - int contended_lock = -1; > > > - int i, ret; > > > - struct drm_gem_object *bo; > > > - > > > - ww_acquire_init(acquire_ctx, &reservation_ww_class); > > > - > > > -retry: > > > - if (contended_lock != -1) { > > > - bo = exec->bo[contended_lock]; > > > - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); > > > - if (ret) { > > > - ww_acquire_done(acquire_ctx); > > > - return ret; > > > - } > > > - } > > > - > > > - for (i = 0; i < exec->bo_count; i++) { > > > - if (i == contended_lock) > > > - continue; > > > - > > > - bo = exec->bo[i]; > > > - > > > - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); > > > - if (ret) { > > > - int j; > > > - > > > - for (j = 0; j < i; j++) { > > > - bo = exec->bo[j]; > > > - dma_resv_unlock(bo->resv); > > > - } > > > - > > > - if (contended_lock != -1 && contended_lock >= i) { > > > - bo = exec->bo[contended_lock]; > > > - > > > - dma_resv_unlock(bo->resv); > > > - } > > > - > > > - if (ret == -EDEADLK) { > > > - contended_lock = i; > > > - goto retry; > > > - } > > > - > > > - ww_acquire_done(acquire_ctx); > > > - return ret; > > > - } > > > - } > > > - > > > - ww_acquire_done(acquire_ctx); > > > + int ret; > > > /* Reserve space for our shared (read-only) fence references, > > > * before we commit the CL to the hardware. > > > */ > > > - for (i = 0; i < exec->bo_count; i++) { > > > - bo = exec->bo[i]; > > > + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); > > > + drm_exec_until_all_locked(exec_ctx) { > > > + ret = drm_exec_prepare_array(exec_ctx, exec->bo, > > > + exec->bo_count, 1); > > Hi Maíra, > > > > So, I'm not familiar too drm_exec, but the original implementation of > > vc4_lock_bo_reservations() has a retry of locks from the contention and > > I don't see it inside the drm_exec_prepare_array(), why don't use the > > loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() (similar > > to the typical usage documented for drm_exec)? > > The way how drm_exec and drm_exec_prepare_array is used seems to be correct > here. > > drm_exec_prepare_array() basically just loops over all the GEM BOs in the > array and calls drm_exec_prepare_obj() on them, so no need to open code > that. > > drm_exec_retry_on_contention() is only needed if you have multiple calls to > drm_exec_prepare_array() or drm_exec_prepare_obj(), so that the loop is > restarted in between the calls. Current behavior of vc4 is to retry locking the contended lock and then all others, instead of just return on error, and I don't see this being covered by drm_exec_prepare_array(). vc4 behavior seems closer to what is done by MSM, a loop of drm_exec_prepare_obj() plus drm_exec_retry_on_contention(). Moreover, I don't see any DRM driver using drm_exec_prepare_array(), so maybe updating this function would make it more useful and meet vc4 current behavior too. Something as below: ``` int drm_exec_prepare_array(struct drm_exec *exec, struct drm_gem_object **objects, unsigned int num_objects, unsigned int num_fences) { int ret; for (unsigned int i = 0; i < num_objects; ++i) { ret = drm_exec_prepare_obj(exec, objects[i], num_fences); drm_exec_retry_on_contention(exec); <----- here if (unlikely(ret)) return ret; } return 0; } ``` WDYT? Melissa > > Regards, > Christian. > > > > > Also, probably you already considered that: we can extend this update to > > v3d, right? > > > > Melissa > > > > > + } > > > - ret = dma_resv_reserve_fences(bo->resv, 1); > > > - if (ret) { > > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > > > - return ret; > > > - } > > > + if (ret) { > > > + drm_exec_fini(exec_ctx); > > > + return ret; > > > } > > > return 0; > > > @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, > > > */ > > > static int > > > vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > > > - struct ww_acquire_ctx *acquire_ctx, > > > + struct drm_exec *exec_ctx, > > > struct drm_syncobj *out_sync) > > > { > > > struct vc4_dev *vc4 = to_vc4_dev(dev); > > > @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > > > vc4_update_bo_seqnos(exec, seqno); > > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > > > + drm_exec_fini(exec_ctx); > > > list_add_tail(&exec->head, &vc4->bin_job_list); > > > @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > > > struct drm_vc4_submit_cl *args = data; > > > struct drm_syncobj *out_sync = NULL; > > > struct vc4_exec_info *exec; > > > - struct ww_acquire_ctx acquire_ctx; > > > + struct drm_exec exec_ctx; > > > struct dma_fence *in_fence; > > > int ret = 0; > > > @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > > > if (ret) > > > goto fail; > > > - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); > > > + ret = vc4_lock_bo_reservations(exec, &exec_ctx); > > > if (ret) > > > goto fail; > > > @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > > > out_sync = drm_syncobj_find(file_priv, args->out_sync); > > > if (!out_sync) { > > > ret = -EINVAL; > > > - goto fail; > > > + goto fail_unreserve; > > > } > > > /* We replace the fence in out_sync in vc4_queue_submit since > > > @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > > > */ > > > exec->args = NULL; > > > - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); > > > + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); > > > /* The syncobj isn't part of the exec data and we need to free our > > > * reference even if job submission failed. > > > @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > > > drm_syncobj_put(out_sync); > > > if (ret) > > > - goto fail; > > > + goto fail_unreserve; > > > /* Return the seqno for our job. */ > > > args->seqno = vc4->emit_seqno; > > > return 0; > > > +fail_unreserve: > > > + drm_exec_fini(&exec_ctx); > > > fail: > > > vc4_complete_exec(&vc4->base, exec); > > > -- > > > 2.47.1 > > > >
Am 17.12.24 um 12:09 schrieb Maíra Canal: > Hi Christian, > > On 17/12/24 07:30, Christian König wrote: >> Am 16.12.24 um 20:08 schrieb Melissa Wen: >>> On 12/12, Maíra Canal wrote: >>>> VC4 has internal copies of `drm_gem_lock_reservations()` and >>>> `drm_gem_unlock_reservations()` inside the driver and ideally, we >>>> should >>>> move those hard-coded functions to the generic functions provided >>>> by DRM. >>>> But, instead of using the DRM GEM functions to (un)lock >>>> reservations, move >>>> the new DRM Execution Contexts API. >>>> >>>> Signed-off-by: Maíra Canal <mcanal@igalia.com> >>>> --- >>>> drivers/gpu/drm/vc4/Kconfig | 1 + >>>> drivers/gpu/drm/vc4/vc4_gem.c | 99 >>>> ++++++++--------------------------- >>>> 2 files changed, 22 insertions(+), 78 deletions(-) >>>> > > [...] > >>>> - >>>> - ww_acquire_done(acquire_ctx); >>>> + int ret; >>>> /* Reserve space for our shared (read-only) fence references, >>>> * before we commit the CL to the hardware. >>>> */ >>>> - for (i = 0; i < exec->bo_count; i++) { >>>> - bo = exec->bo[i]; >>>> + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec- >>>> >bo_count); >>>> + drm_exec_until_all_locked(exec_ctx) { >>>> + ret = drm_exec_prepare_array(exec_ctx, exec->bo, >>>> + exec->bo_count, 1); >>> Hi Maíra, >>> >>> So, I'm not familiar too drm_exec, but the original implementation of >>> vc4_lock_bo_reservations() has a retry of locks from the contention and >>> I don't see it inside the drm_exec_prepare_array(), why don't use the >>> loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() >>> (similar >>> to the typical usage documented for drm_exec)? >> >> The way how drm_exec and drm_exec_prepare_array is used seems to be >> correct here. >> >> drm_exec_prepare_array() basically just loops over all the GEM BOs in >> the array and calls drm_exec_prepare_obj() on them, so no need to >> open code that. >> >> drm_exec_retry_on_contention() is only needed if you have multiple >> calls to drm_exec_prepare_array() or drm_exec_prepare_obj(), so that >> the loop is restarted in between the calls. > > But doesn't `drm_exec_prepare_array()` have multiple calls to > `drm_exec_prepare_obj()`? The fact that the multiple calls are wrapped > in the function makes a difference? Yeah. I know, it's not so easy to understand :) What drm_exec_until_all_locked() and drm_exec_retry_on_contention() are basically doing is nicely wrapped error handling. In other words drm_exec_retry_on_contention() does a "goto *__drm_exec_retry_ptr" if it detects that we have a contention. But you can't goto from a label in a function back into the caller. So what drm_exec_prepare_array() does is to abort as soon as it sees the first error: ret = drm_exec_prepare_obj(exec, objects[i], num_fences); if (unlikely(ret)) return ret; And in the caller we have: drm_exec_until_all_locked(exec_ctx) { ret = drm_exec_prepare_array(exec_ctx, exec->bo, exec->bo_count, 1); } So the loop restarts after drm_exec_prepare_array() anyway and because of this using drm_exec_retry_on_contention() is not explicitly necessary. Regards, Christian. > > Best Regards, > - Maíra > >> >> Regards, >> Christian. >> >>> >>> Also, probably you already considered that: we can extend this >>> update to >>> v3d, right? >>> >>> Melissa >>> >>>> + } >>>> - ret = dma_resv_reserve_fences(bo->resv, 1); >>>> - if (ret) { >>>> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >>>> - return ret; >>>> - } >>>> + if (ret) { >>>> + drm_exec_fini(exec_ctx); >>>> + return ret; >>>> } >>>> return 0; >>>> @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, >>>> */ >>>> static int >>>> vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, >>>> - struct ww_acquire_ctx *acquire_ctx, >>>> + struct drm_exec *exec_ctx, >>>> struct drm_syncobj *out_sync) >>>> { >>>> struct vc4_dev *vc4 = to_vc4_dev(dev); >>>> @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct >>>> vc4_exec_info *exec, >>>> vc4_update_bo_seqnos(exec, seqno); >>>> - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); >>>> + drm_exec_fini(exec_ctx); >>>> list_add_tail(&exec->head, &vc4->bin_job_list); >>>> @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>>> void *data, >>>> struct drm_vc4_submit_cl *args = data; >>>> struct drm_syncobj *out_sync = NULL; >>>> struct vc4_exec_info *exec; >>>> - struct ww_acquire_ctx acquire_ctx; >>>> + struct drm_exec exec_ctx; >>>> struct dma_fence *in_fence; >>>> int ret = 0; >>>> @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>>> void *data, >>>> if (ret) >>>> goto fail; >>>> - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); >>>> + ret = vc4_lock_bo_reservations(exec, &exec_ctx); >>>> if (ret) >>>> goto fail; >>>> @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>>> void *data, >>>> out_sync = drm_syncobj_find(file_priv, args->out_sync); >>>> if (!out_sync) { >>>> ret = -EINVAL; >>>> - goto fail; >>>> + goto fail_unreserve; >>>> } >>>> /* We replace the fence in out_sync in vc4_queue_submit >>>> since >>>> @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>>> void *data, >>>> */ >>>> exec->args = NULL; >>>> - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); >>>> + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); >>>> /* The syncobj isn't part of the exec data and we need to >>>> free our >>>> * reference even if job submission failed. >>>> @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, >>>> void *data, >>>> drm_syncobj_put(out_sync); >>>> if (ret) >>>> - goto fail; >>>> + goto fail_unreserve; >>>> /* Return the seqno for our job. */ >>>> args->seqno = vc4->emit_seqno; >>>> return 0; >>>> +fail_unreserve: >>>> + drm_exec_fini(&exec_ctx); >>>> fail: >>>> vc4_complete_exec(&vc4->base, exec); >>>> -- >>>> 2.47.1 >>>> >> >
On 12/17, Christian König wrote: > Am 17.12.24 um 12:09 schrieb Maíra Canal: > > Hi Christian, > > > > On 17/12/24 07:30, Christian König wrote: > > > Am 16.12.24 um 20:08 schrieb Melissa Wen: > > > > On 12/12, Maíra Canal wrote: > > > > > VC4 has internal copies of `drm_gem_lock_reservations()` and > > > > > `drm_gem_unlock_reservations()` inside the driver and > > > > > ideally, we should > > > > > move those hard-coded functions to the generic functions > > > > > provided by DRM. > > > > > But, instead of using the DRM GEM functions to (un)lock > > > > > reservations, move > > > > > the new DRM Execution Contexts API. > > > > > > > > > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > > > > > --- > > > > > drivers/gpu/drm/vc4/Kconfig | 1 + > > > > > drivers/gpu/drm/vc4/vc4_gem.c | 99 > > > > > ++++++++--------------------------- > > > > > 2 files changed, 22 insertions(+), 78 deletions(-) > > > > > > > > > [...] > > > > > > > - > > > > > - ww_acquire_done(acquire_ctx); > > > > > + int ret; > > > > > /* Reserve space for our shared (read-only) fence references, > > > > > * before we commit the CL to the hardware. > > > > > */ > > > > > - for (i = 0; i < exec->bo_count; i++) { > > > > > - bo = exec->bo[i]; > > > > > + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, > > > > > exec- >bo_count); > > > > > + drm_exec_until_all_locked(exec_ctx) { > > > > > + ret = drm_exec_prepare_array(exec_ctx, exec->bo, > > > > > + exec->bo_count, 1); > > > > Hi Maíra, > > > > > > > > So, I'm not familiar too drm_exec, but the original implementation of > > > > vc4_lock_bo_reservations() has a retry of locks from the contention and > > > > I don't see it inside the drm_exec_prepare_array(), why don't use the > > > > loop drm_exec_prepare_obj() plus drm_exec_retry_on_contention() > > > > (similar > > > > to the typical usage documented for drm_exec)? > > > > > > The way how drm_exec and drm_exec_prepare_array is used seems to be > > > correct here. > > > > > > drm_exec_prepare_array() basically just loops over all the GEM BOs > > > in the array and calls drm_exec_prepare_obj() on them, so no need to > > > open code that. > > > > > > drm_exec_retry_on_contention() is only needed if you have multiple > > > calls to drm_exec_prepare_array() or drm_exec_prepare_obj(), so that > > > the loop is restarted in between the calls. > > > > But doesn't `drm_exec_prepare_array()` have multiple calls to > > `drm_exec_prepare_obj()`? The fact that the multiple calls are wrapped > > in the function makes a difference? > > Yeah. I know, it's not so easy to understand :) > > What drm_exec_until_all_locked() and drm_exec_retry_on_contention() are > basically doing is nicely wrapped error handling. > > In other words drm_exec_retry_on_contention() does a "goto > *__drm_exec_retry_ptr" if it detects that we have a contention. But you > can't goto from a label in a function back into the caller. > > So what drm_exec_prepare_array() does is to abort as soon as it sees the > first error: > > ret = drm_exec_prepare_obj(exec, objects[i], num_fences); > if (unlikely(ret)) > return ret; > > > And in the caller we have: > > drm_exec_until_all_locked(exec_ctx) { > ret = drm_exec_prepare_array(exec_ctx, exec->bo, exec->bo_count, 1); > } > > So the loop restarts after drm_exec_prepare_array() anyway and because of > this using drm_exec_retry_on_contention() is not explicitly necessary. Ah, got it now. Thanks for explaining. Melissa > > Regards, > Christian. > > > > > Best Regards, > > - Maíra > > > > > > > > Regards, > > > Christian. > > > > > > > > > > > Also, probably you already considered that: we can extend this > > > > update to > > > > v3d, right? > > > > > > > > Melissa > > > > > > > > > + } > > > > > - ret = dma_resv_reserve_fences(bo->resv, 1); > > > > > - if (ret) { > > > > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > > > > > - return ret; > > > > > - } > > > > > + if (ret) { > > > > > + drm_exec_fini(exec_ctx); > > > > > + return ret; > > > > > } > > > > > return 0; > > > > > @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, > > > > > */ > > > > > static int > > > > > vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > > > > > - struct ww_acquire_ctx *acquire_ctx, > > > > > + struct drm_exec *exec_ctx, > > > > > struct drm_syncobj *out_sync) > > > > > { > > > > > struct vc4_dev *vc4 = to_vc4_dev(dev); > > > > > @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, > > > > > struct vc4_exec_info *exec, > > > > > vc4_update_bo_seqnos(exec, seqno); > > > > > - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > > > > > + drm_exec_fini(exec_ctx); > > > > > list_add_tail(&exec->head, &vc4->bin_job_list); > > > > > @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device > > > > > *dev, void *data, > > > > > struct drm_vc4_submit_cl *args = data; > > > > > struct drm_syncobj *out_sync = NULL; > > > > > struct vc4_exec_info *exec; > > > > > - struct ww_acquire_ctx acquire_ctx; > > > > > + struct drm_exec exec_ctx; > > > > > struct dma_fence *in_fence; > > > > > int ret = 0; > > > > > @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device > > > > > *dev, void *data, > > > > > if (ret) > > > > > goto fail; > > > > > - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); > > > > > + ret = vc4_lock_bo_reservations(exec, &exec_ctx); > > > > > if (ret) > > > > > goto fail; > > > > > @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device > > > > > *dev, void *data, > > > > > out_sync = drm_syncobj_find(file_priv, args->out_sync); > > > > > if (!out_sync) { > > > > > ret = -EINVAL; > > > > > - goto fail; > > > > > + goto fail_unreserve; > > > > > } > > > > > /* We replace the fence in out_sync in > > > > > vc4_queue_submit since > > > > > @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device > > > > > *dev, void *data, > > > > > */ > > > > > exec->args = NULL; > > > > > - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); > > > > > + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); > > > > > /* The syncobj isn't part of the exec data and we need > > > > > to free our > > > > > * reference even if job submission failed. > > > > > @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct > > > > > drm_device *dev, void *data, > > > > > drm_syncobj_put(out_sync); > > > > > if (ret) > > > > > - goto fail; > > > > > + goto fail_unreserve; > > > > > /* Return the seqno for our job. */ > > > > > args->seqno = vc4->emit_seqno; > > > > > return 0; > > > > > +fail_unreserve: > > > > > + drm_exec_fini(&exec_ctx); > > > > > fail: > > > > > vc4_complete_exec(&vc4->base, exec); > > > > > -- > > > > > 2.47.1 > > > > > > > > > > >
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index c5f30b317698..0627e826fda4 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -13,6 +13,7 @@ config DRM_VC4 select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDMI_STATE_HELPER select DRM_DISPLAY_HELPER + select DRM_EXEC select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 22bccd69eb62..1021f45cb53c 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -29,6 +29,7 @@ #include <linux/sched/signal.h> #include <linux/dma-fence-array.h> +#include <drm/drm_exec.h> #include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" @@ -578,19 +579,6 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) } } -static void -vc4_unlock_bo_reservations(struct drm_device *dev, - struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) -{ - int i; - - for (i = 0; i < exec->bo_count; i++) - dma_resv_unlock(exec->bo[i]->resv); - - ww_acquire_fini(acquire_ctx); -} - /* Takes the reservation lock on all the BOs being referenced, so that * at queue submit time we can update the reservations. * @@ -599,70 +587,23 @@ vc4_unlock_bo_reservations(struct drm_device *dev, * to vc4, so we don't attach dma-buf fences to them. */ static int -vc4_lock_bo_reservations(struct drm_device *dev, - struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) +vc4_lock_bo_reservations(struct vc4_exec_info *exec, + struct drm_exec *exec_ctx) { - int contended_lock = -1; - int i, ret; - struct drm_gem_object *bo; - - ww_acquire_init(acquire_ctx, &reservation_ww_class); - -retry: - if (contended_lock != -1) { - bo = exec->bo[contended_lock]; - ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); - if (ret) { - ww_acquire_done(acquire_ctx); - return ret; - } - } - - for (i = 0; i < exec->bo_count; i++) { - if (i == contended_lock) - continue; - - bo = exec->bo[i]; - - ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); - if (ret) { - int j; - - for (j = 0; j < i; j++) { - bo = exec->bo[j]; - dma_resv_unlock(bo->resv); - } - - if (contended_lock != -1 && contended_lock >= i) { - bo = exec->bo[contended_lock]; - - dma_resv_unlock(bo->resv); - } - - if (ret == -EDEADLK) { - contended_lock = i; - goto retry; - } - - ww_acquire_done(acquire_ctx); - return ret; - } - } - - ww_acquire_done(acquire_ctx); + int ret; /* Reserve space for our shared (read-only) fence references, * before we commit the CL to the hardware. */ - for (i = 0; i < exec->bo_count; i++) { - bo = exec->bo[i]; + drm_exec_init(exec_ctx, DRM_EXEC_INTERRUPTIBLE_WAIT, exec->bo_count); + drm_exec_until_all_locked(exec_ctx) { + ret = drm_exec_prepare_array(exec_ctx, exec->bo, + exec->bo_count, 1); + } - ret = dma_resv_reserve_fences(bo->resv, 1); - if (ret) { - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); - return ret; - } + if (ret) { + drm_exec_fini(exec_ctx); + return ret; } return 0; @@ -679,7 +620,7 @@ vc4_lock_bo_reservations(struct drm_device *dev, */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx, + struct drm_exec *exec_ctx, struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -708,7 +649,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, vc4_update_bo_seqnos(exec, seqno); - vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + drm_exec_fini(exec_ctx); list_add_tail(&exec->head, &vc4->bin_job_list); @@ -1123,7 +1064,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_vc4_submit_cl *args = data; struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; - struct ww_acquire_ctx acquire_ctx; + struct drm_exec exec_ctx; struct dma_fence *in_fence; int ret = 0; @@ -1216,7 +1157,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); + ret = vc4_lock_bo_reservations(exec, &exec_ctx); if (ret) goto fail; @@ -1224,7 +1165,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, out_sync = drm_syncobj_find(file_priv, args->out_sync); if (!out_sync) { ret = -EINVAL; - goto fail; + goto fail_unreserve; } /* We replace the fence in out_sync in vc4_queue_submit since @@ -1239,7 +1180,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + ret = vc4_queue_submit(dev, exec, &exec_ctx, out_sync); /* The syncobj isn't part of the exec data and we need to free our * reference even if job submission failed. @@ -1248,13 +1189,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, drm_syncobj_put(out_sync); if (ret) - goto fail; + goto fail_unreserve; /* Return the seqno for our job. */ args->seqno = vc4->emit_seqno; return 0; +fail_unreserve: + drm_exec_fini(&exec_ctx); fail: vc4_complete_exec(&vc4->base, exec);
VC4 has internal copies of `drm_gem_lock_reservations()` and `drm_gem_unlock_reservations()` inside the driver and ideally, we should move those hard-coded functions to the generic functions provided by DRM. But, instead of using the DRM GEM functions to (un)lock reservations, move the new DRM Execution Contexts API. Signed-off-by: Maíra Canal <mcanal@igalia.com> --- drivers/gpu/drm/vc4/Kconfig | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 99 ++++++++--------------------------- 2 files changed, 22 insertions(+), 78 deletions(-)