Message ID | 20240628145536.778349-3-boris.brezillon@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/panthor: Fix support for sync-only jobs | expand |
On Fri, Jun 28, 2024 at 04:55:36PM +0200, Boris Brezillon wrote: > A sync-only job is meant to provide a synchronization point on a > queue, so we can't return a NULL fence there, we have to add a signal > operation to the command stream which executes after all other > previously submitted jobs are done. > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Took me a bit longer to read, lets blame Friday. > --- > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > include/uapi/drm/panthor_drm.h | 5 +++ > 2 files changed, 38 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index 79ffcbc41d78..951ff7e63ea8 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -458,6 +458,16 @@ struct panthor_queue { > /** @seqno: Sequence number of the last initialized fence. */ > atomic64_t seqno; > > + /** > + * @last_fence: Fence of the last submitted job. > + * > + * We return this fence when we get an empty command stream. > + * This way, we are guaranteed that all earlier jobs have completed > + * when drm_sched_job::s_fence::finished without having to feed > + * the CS ring buffer with a dummy job that only signals the fence. > + */ > + struct dma_fence *last_fence; > + > /** > * @in_flight_jobs: List containing all in-flight jobs. > * > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > panthor_kernel_bo_destroy(queue->ringbuf); > panthor_kernel_bo_destroy(queue->iface.mem); > > + /* Release the last_fence we were holding, if any. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + > kfree(queue); > } > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > static_assert(sizeof(call_instrs) % 64 == 0, > "call_instrs is not aligned on a cacheline"); > > - /* Stream size is zero, nothing to do => return a NULL fence and let > - * drm_sched signal the parent. > + /* Stream size is zero, nothing to do except making sure all previously > + * submitted jobs are done before we signal the > + * drm_sched_job::s_fence::finished fence. > */ > - if (!job->call_info.size) > - return NULL; > + if (!job->call_info.size) { > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > + return job->done_fence; What happens if the last job's done_fence was cancelled or timed out? Is the sync job's done_fence going to be signalled with the same error? Now that we're returning a fence here, should the job be also added into the in_flight_jobs? If you're happy with depending on the previous job's done_fence and not track the sync job in in_flight_jobs, then you can have my Reviewed-by: Liviu Dudau <liviu.dudau@arm.com> Best regards, Liviu > + } > > ret = pm_runtime_resume_and_get(ptdev->base.dev); > if (drm_WARN_ON(&ptdev->base, ret)) > @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) > } > } > > + /* Update the last fence. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); > + > done_fence = dma_fence_get(job->done_fence); > > out_unlock: > @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, > goto err_put_job; > } > > - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > - if (!job->done_fence) { > - ret = -ENOMEM; > - goto err_put_job; > + /* Empty command streams don't need a fence, they'll pick the one from > + * the previously submitted job. > + */ > + if (job->call_info.size) { > + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > + if (!job->done_fence) { > + ret = -ENOMEM; > + goto err_put_job; > + } > } > > ret = drm_sched_job_init(&job->base, > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > index aaed8e12ad0b..926b1deb1116 100644 > --- a/include/uapi/drm/panthor_drm.h > +++ b/include/uapi/drm/panthor_drm.h > @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { > * Must be 64-bit/8-byte aligned (the size of a CS instruction) > * > * Can be zero if stream_addr is zero too. > + * > + * When the stream size is zero, the queue submit serves as a > + * synchronization point. > */ > __u32 stream_size; > > @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { > * ensure the GPU doesn't get garbage when reading the indirect command > * stream buffers. If you want the cache flush to happen > * unconditionally, pass a zero here. > + * > + * Ignored when stream_size is zero. > */ > __u32 latest_flush; > > -- > 2.45.0 >
On Fri, 28 Jun 2024 22:34:57 +0100 Liviu Dudau <liviu.dudau@arm.com> wrote: > On Fri, Jun 28, 2024 at 04:55:36PM +0200, Boris Brezillon wrote: > > A sync-only job is meant to provide a synchronization point on a > > queue, so we can't return a NULL fence there, we have to add a signal > > operation to the command stream which executes after all other > > previously submitted jobs are done. > > > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > Took me a bit longer to read, lets blame Friday. > > > --- > > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > > include/uapi/drm/panthor_drm.h | 5 +++ > > 2 files changed, 38 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > > index 79ffcbc41d78..951ff7e63ea8 100644 > > --- a/drivers/gpu/drm/panthor/panthor_sched.c > > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > > @@ -458,6 +458,16 @@ struct panthor_queue { > > /** @seqno: Sequence number of the last initialized fence. */ > > atomic64_t seqno; > > > > + /** > > + * @last_fence: Fence of the last submitted job. > > + * > > + * We return this fence when we get an empty command stream. > > + * This way, we are guaranteed that all earlier jobs have completed > > + * when drm_sched_job::s_fence::finished without having to feed > > + * the CS ring buffer with a dummy job that only signals the fence. > > + */ > > + struct dma_fence *last_fence; > > + > > /** > > * @in_flight_jobs: List containing all in-flight jobs. > > * > > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > > panthor_kernel_bo_destroy(queue->ringbuf); > > panthor_kernel_bo_destroy(queue->iface.mem); > > > > + /* Release the last_fence we were holding, if any. */ > > + dma_fence_put(queue->fence_ctx.last_fence); > > + > > kfree(queue); > > } > > > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > > static_assert(sizeof(call_instrs) % 64 == 0, > > "call_instrs is not aligned on a cacheline"); > > > > - /* Stream size is zero, nothing to do => return a NULL fence and let > > - * drm_sched signal the parent. > > + /* Stream size is zero, nothing to do except making sure all previously > > + * submitted jobs are done before we signal the > > + * drm_sched_job::s_fence::finished fence. > > */ > > - if (!job->call_info.size) > > - return NULL; > > + if (!job->call_info.size) { > > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > > + return job->done_fence; > > What happens if the last job's done_fence was cancelled or timed out? Is the > sync job's done_fence going to be signalled with the same error? It's the same object, so yes, the job will also be considered faulty (the error propagated to the job::s_fence::finished fence). I guess synchronization jobs are not supposed to fail/timeout in theory, because they don't do anything, but I don't think that's an issue in practice, because dma_fence errors are never propagated to user-space (only the queue status is). > > Now that we're returning a fence here, should the job be also added into the > in_flight_jobs? Yeah, that's done on purpose, such that we don't end up signalling the same dma_fence object twice (which is forbidden). This makes me realize I should probably drop the 'is_cs_empty()' check in group_sync_upd_work(), since we're not supposed to have a job with an empty CS in the in_flight_jobs list. diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 951ff7e63ea8..8bf01b7b1596 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2797,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work) spin_lock(&queue->fence_ctx.lock); list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (!job->call_info.size) - continue; - if (syncobj->seqno < job->done_fence->seqno) break; > > If you're happy with depending on the previous job's done_fence and not > track the sync job in in_flight_jobs, then you can have my > > Reviewed-by: Liviu Dudau <liviu.dudau@arm.com> > > Best regards, > Liviu > > > + } > > > > ret = pm_runtime_resume_and_get(ptdev->base.dev); > > if (drm_WARN_ON(&ptdev->base, ret)) > > @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) > > } > > } > > > > + /* Update the last fence. */ > > + dma_fence_put(queue->fence_ctx.last_fence); > > + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); > > + > > done_fence = dma_fence_get(job->done_fence); > > > > out_unlock: > > @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, > > goto err_put_job; > > } > > > > - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > > - if (!job->done_fence) { > > - ret = -ENOMEM; > > - goto err_put_job; > > + /* Empty command streams don't need a fence, they'll pick the one from > > + * the previously submitted job. > > + */ > > + if (job->call_info.size) { > > + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > > + if (!job->done_fence) { > > + ret = -ENOMEM; > > + goto err_put_job; > > + } > > } > > > > ret = drm_sched_job_init(&job->base, > > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > > index aaed8e12ad0b..926b1deb1116 100644 > > --- a/include/uapi/drm/panthor_drm.h > > +++ b/include/uapi/drm/panthor_drm.h > > @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { > > * Must be 64-bit/8-byte aligned (the size of a CS instruction) > > * > > * Can be zero if stream_addr is zero too. > > + * > > + * When the stream size is zero, the queue submit serves as a > > + * synchronization point. > > */ > > __u32 stream_size; > > > > @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { > > * ensure the GPU doesn't get garbage when reading the indirect command > > * stream buffers. If you want the cache flush to happen > > * unconditionally, pass a zero here. > > + * > > + * Ignored when stream_size is zero. > > */ > > __u32 latest_flush; > > > > -- > > 2.45.0 > > >
On Sat, Jun 29, 2024 at 10:52:56AM +0200, Boris Brezillon wrote: > On Fri, 28 Jun 2024 22:34:57 +0100 > Liviu Dudau <liviu.dudau@arm.com> wrote: > > > On Fri, Jun 28, 2024 at 04:55:36PM +0200, Boris Brezillon wrote: > > > A sync-only job is meant to provide a synchronization point on a > > > queue, so we can't return a NULL fence there, we have to add a signal > > > operation to the command stream which executes after all other > > > previously submitted jobs are done. > > > > > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > Took me a bit longer to read, lets blame Friday. > > > > > --- > > > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > > > include/uapi/drm/panthor_drm.h | 5 +++ > > > 2 files changed, 38 insertions(+), 8 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > > > index 79ffcbc41d78..951ff7e63ea8 100644 > > > --- a/drivers/gpu/drm/panthor/panthor_sched.c > > > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > > > @@ -458,6 +458,16 @@ struct panthor_queue { > > > /** @seqno: Sequence number of the last initialized fence. */ > > > atomic64_t seqno; > > > > > > + /** > > > + * @last_fence: Fence of the last submitted job. > > > + * > > > + * We return this fence when we get an empty command stream. > > > + * This way, we are guaranteed that all earlier jobs have completed > > > + * when drm_sched_job::s_fence::finished without having to feed > > > + * the CS ring buffer with a dummy job that only signals the fence. > > > + */ > > > + struct dma_fence *last_fence; > > > + > > > /** > > > * @in_flight_jobs: List containing all in-flight jobs. > > > * > > > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > > > panthor_kernel_bo_destroy(queue->ringbuf); > > > panthor_kernel_bo_destroy(queue->iface.mem); > > > > > > + /* Release the last_fence we were holding, if any. */ > > > + dma_fence_put(queue->fence_ctx.last_fence); > > > + > > > kfree(queue); > > > } > > > > > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > > > static_assert(sizeof(call_instrs) % 64 == 0, > > > "call_instrs is not aligned on a cacheline"); > > > > > > - /* Stream size is zero, nothing to do => return a NULL fence and let > > > - * drm_sched signal the parent. > > > + /* Stream size is zero, nothing to do except making sure all previously > > > + * submitted jobs are done before we signal the > > > + * drm_sched_job::s_fence::finished fence. > > > */ > > > - if (!job->call_info.size) > > > - return NULL; > > > + if (!job->call_info.size) { > > > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > > > + return job->done_fence; > > > > What happens if the last job's done_fence was cancelled or timed out? Is the > > sync job's done_fence going to be signalled with the same error? > > It's the same object, so yes, the job will also be considered faulty > (the error propagated to the job::s_fence::finished fence). I guess > synchronization jobs are not supposed to fail/timeout in theory, because > they don't do anything, but I don't think that's an issue in > practice, because dma_fence errors are never propagated to user-space > (only the queue status is). > > > > > Now that we're returning a fence here, should the job be also added into the > > in_flight_jobs? > > Yeah, that's done on purpose, such that we don't end up signalling the > same dma_fence object twice (which is forbidden). That's the thing I was trying to figure out on Friday: how do we stop the fence returned as last_fence for the sync job to be signalled after the job's done_fence has also been signalled. I can't say that I found a good answer, if you can point me to what I've missed it will be appreciated. > This makes me realize > I should probably drop the 'is_cs_empty()' check in > group_sync_upd_work(), since we're not supposed to have a job with an > empty CS in the in_flight_jobs list. > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index 951ff7e63ea8..8bf01b7b1596 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -2797,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work) > > spin_lock(&queue->fence_ctx.lock); > list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { > - if (!job->call_info.size) > - continue; > - > if (syncobj->seqno < job->done_fence->seqno) > break; Looks good to me. Best regards, Liviu > > > > > > If you're happy with depending on the previous job's done_fence and not > > track the sync job in in_flight_jobs, then you can have my > > > > Reviewed-by: Liviu Dudau <liviu.dudau@arm.com> > > > > Best regards, > > Liviu > > > > > + } > > > > > > ret = pm_runtime_resume_and_get(ptdev->base.dev); > > > if (drm_WARN_ON(&ptdev->base, ret)) > > > @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) > > > } > > > } > > > > > > + /* Update the last fence. */ > > > + dma_fence_put(queue->fence_ctx.last_fence); > > > + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); > > > + > > > done_fence = dma_fence_get(job->done_fence); > > > > > > out_unlock: > > > @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, > > > goto err_put_job; > > > } > > > > > > - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > > > - if (!job->done_fence) { > > > - ret = -ENOMEM; > > > - goto err_put_job; > > > + /* Empty command streams don't need a fence, they'll pick the one from > > > + * the previously submitted job. > > > + */ > > > + if (job->call_info.size) { > > > + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > > > + if (!job->done_fence) { > > > + ret = -ENOMEM; > > > + goto err_put_job; > > > + } > > > } > > > > > > ret = drm_sched_job_init(&job->base, > > > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > > > index aaed8e12ad0b..926b1deb1116 100644 > > > --- a/include/uapi/drm/panthor_drm.h > > > +++ b/include/uapi/drm/panthor_drm.h > > > @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { > > > * Must be 64-bit/8-byte aligned (the size of a CS instruction) > > > * > > > * Can be zero if stream_addr is zero too. > > > + * > > > + * When the stream size is zero, the queue submit serves as a > > > + * synchronization point. > > > */ > > > __u32 stream_size; > > > > > > @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { > > > * ensure the GPU doesn't get garbage when reading the indirect command > > > * stream buffers. If you want the cache flush to happen > > > * unconditionally, pass a zero here. > > > + * > > > + * Ignored when stream_size is zero. > > > */ > > > __u32 latest_flush; > > > > > > -- > > > 2.45.0 > > > > > >
On Sat, 29 Jun 2024 13:10:35 +0100 Liviu Dudau <liviu.dudau@arm.com> wrote: > On Sat, Jun 29, 2024 at 10:52:56AM +0200, Boris Brezillon wrote: > > On Fri, 28 Jun 2024 22:34:57 +0100 > > Liviu Dudau <liviu.dudau@arm.com> wrote: > > > > > On Fri, Jun 28, 2024 at 04:55:36PM +0200, Boris Brezillon wrote: > > > > A sync-only job is meant to provide a synchronization point on a > > > > queue, so we can't return a NULL fence there, we have to add a signal > > > > operation to the command stream which executes after all other > > > > previously submitted jobs are done. > > > > > > > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > > > Took me a bit longer to read, lets blame Friday. > > > > > > > --- > > > > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > > > > include/uapi/drm/panthor_drm.h | 5 +++ > > > > 2 files changed, 38 insertions(+), 8 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > > > > index 79ffcbc41d78..951ff7e63ea8 100644 > > > > --- a/drivers/gpu/drm/panthor/panthor_sched.c > > > > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > > > > @@ -458,6 +458,16 @@ struct panthor_queue { > > > > /** @seqno: Sequence number of the last initialized fence. */ > > > > atomic64_t seqno; > > > > > > > > + /** > > > > + * @last_fence: Fence of the last submitted job. > > > > + * > > > > + * We return this fence when we get an empty command stream. > > > > + * This way, we are guaranteed that all earlier jobs have completed > > > > + * when drm_sched_job::s_fence::finished without having to feed > > > > + * the CS ring buffer with a dummy job that only signals the fence. > > > > + */ > > > > + struct dma_fence *last_fence; > > > > + > > > > /** > > > > * @in_flight_jobs: List containing all in-flight jobs. > > > > * > > > > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > > > > panthor_kernel_bo_destroy(queue->ringbuf); > > > > panthor_kernel_bo_destroy(queue->iface.mem); > > > > > > > > + /* Release the last_fence we were holding, if any. */ > > > > + dma_fence_put(queue->fence_ctx.last_fence); > > > > + > > > > kfree(queue); > > > > } > > > > > > > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > > > > static_assert(sizeof(call_instrs) % 64 == 0, > > > > "call_instrs is not aligned on a cacheline"); > > > > > > > > - /* Stream size is zero, nothing to do => return a NULL fence and let > > > > - * drm_sched signal the parent. > > > > + /* Stream size is zero, nothing to do except making sure all previously > > > > + * submitted jobs are done before we signal the > > > > + * drm_sched_job::s_fence::finished fence. > > > > */ > > > > - if (!job->call_info.size) > > > > - return NULL; > > > > + if (!job->call_info.size) { > > > > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > > > > + return job->done_fence; > > > > > > What happens if the last job's done_fence was cancelled or timed out? Is the > > > sync job's done_fence going to be signalled with the same error? > > > > It's the same object, so yes, the job will also be considered faulty > > (the error propagated to the job::s_fence::finished fence). I guess > > synchronization jobs are not supposed to fail/timeout in theory, because > > they don't do anything, but I don't think that's an issue in > > practice, because dma_fence errors are never propagated to user-space > > (only the queue status is). > > > > > > > > Now that we're returning a fence here, should the job be also added into the > > > in_flight_jobs? > > > > Yeah, that's done on purpose, such that we don't end up signalling the > > same dma_fence object twice (which is forbidden). > > That's the thing I was trying to figure out on Friday: how do we stop the fence > returned as last_fence for the sync job to be signalled after the job's done_fence > has also been signalled. I can't say that I found a good answer, if you can point > me to what I've missed it will be appreciated. Well, there's only just one job that's added to the in_flight for a given done_fence object, so there's no risk of signaling such a fence more than once (even in case of a timeout). Now, let's say the previous 'real' job is done when this 'dummy/sync' job queued, the queue::fence_ctx::last_fence object will be signalled too, since this is essentially the same object, and when we return a signalled fence to the drm_sched layer in our ::run_job() callback, it just signals the finished drm_sched_job::s_fence::finished immediately, just like when you return a NULL fence.
On 28/06/2024 15:55, Boris Brezillon wrote: > A sync-only job is meant to provide a synchronization point on a > queue, so we can't return a NULL fence there, we have to add a signal > operation to the command stream which executes after all other > previously submitted jobs are done. > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Looks correct to me. Reviewed-by: Steven Price <steven.price@arm.com> > --- > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > include/uapi/drm/panthor_drm.h | 5 +++ > 2 files changed, 38 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index 79ffcbc41d78..951ff7e63ea8 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -458,6 +458,16 @@ struct panthor_queue { > /** @seqno: Sequence number of the last initialized fence. */ > atomic64_t seqno; > > + /** > + * @last_fence: Fence of the last submitted job. > + * > + * We return this fence when we get an empty command stream. > + * This way, we are guaranteed that all earlier jobs have completed > + * when drm_sched_job::s_fence::finished without having to feed > + * the CS ring buffer with a dummy job that only signals the fence. > + */ > + struct dma_fence *last_fence; > + > /** > * @in_flight_jobs: List containing all in-flight jobs. > * > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > panthor_kernel_bo_destroy(queue->ringbuf); > panthor_kernel_bo_destroy(queue->iface.mem); > > + /* Release the last_fence we were holding, if any. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + > kfree(queue); > } > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > static_assert(sizeof(call_instrs) % 64 == 0, > "call_instrs is not aligned on a cacheline"); > > - /* Stream size is zero, nothing to do => return a NULL fence and let > - * drm_sched signal the parent. > + /* Stream size is zero, nothing to do except making sure all previously > + * submitted jobs are done before we signal the > + * drm_sched_job::s_fence::finished fence. > */ > - if (!job->call_info.size) > - return NULL; > + if (!job->call_info.size) { > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > + return job->done_fence; > + } > > ret = pm_runtime_resume_and_get(ptdev->base.dev); > if (drm_WARN_ON(&ptdev->base, ret)) > @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) > } > } > > + /* Update the last fence. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); > + > done_fence = dma_fence_get(job->done_fence); > > out_unlock: > @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, > goto err_put_job; > } > > - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > - if (!job->done_fence) { > - ret = -ENOMEM; > - goto err_put_job; > + /* Empty command streams don't need a fence, they'll pick the one from > + * the previously submitted job. > + */ > + if (job->call_info.size) { > + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > + if (!job->done_fence) { > + ret = -ENOMEM; > + goto err_put_job; > + } > } > > ret = drm_sched_job_init(&job->base, > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > index aaed8e12ad0b..926b1deb1116 100644 > --- a/include/uapi/drm/panthor_drm.h > +++ b/include/uapi/drm/panthor_drm.h > @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { > * Must be 64-bit/8-byte aligned (the size of a CS instruction) > * > * Can be zero if stream_addr is zero too. > + * > + * When the stream size is zero, the queue submit serves as a > + * synchronization point. > */ > __u32 stream_size; > > @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { > * ensure the GPU doesn't get garbage when reading the indirect command > * stream buffers. If you want the cache flush to happen > * unconditionally, pass a zero here. > + * > + * Ignored when stream_size is zero. > */ > __u32 latest_flush; >
Hi Boris,
kernel test robot noticed the following build warnings:
[auto build test WARNING on drm/drm-next]
[also build test WARNING on drm-exynos/exynos-drm-next drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.10-rc6 next-20240628]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Boris-Brezillon/drm-panthor-Don-t-check-the-array-stride-on-empty-uobj-arrays/20240630-032128
base: git://anongit.freedesktop.org/drm/drm drm-next
patch link: https://lore.kernel.org/r/20240628145536.778349-3-boris.brezillon%40collabora.com
patch subject: [PATCH 2/2] drm/panthor: Fix sync-only jobs
config: arm-randconfig-003-20240701
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 326ba38a991250a8587a399a260b0f7af2c9166a)
reproduce (this is a W=1 build):
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407011848.1VqjPD9w-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'runnable' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'idle' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'waiting' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'has_ref' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'in_progress' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:319: warning: Excess struct member 'stopped_groups' description in 'panthor_scheduler'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'mem' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'input' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'output' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'input_fw_va' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'output_fw_va' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'gpu_va' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'ref' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'gt' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'sync64' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'bo' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'offset' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'kmap' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'lock' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'id' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'seqno' description in 'panthor_queue'
>> drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'last_fence' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:479: warning: Excess struct member 'in_flight_jobs' description in 'panthor_queue'
drivers/gpu/drm/panthor/panthor_sched.c:777: warning: Excess struct member 'start' description in 'panthor_job'
drivers/gpu/drm/panthor/panthor_sched.c:777: warning: Excess struct member 'size' description in 'panthor_job'
drivers/gpu/drm/panthor/panthor_sched.c:777: warning: Excess struct member 'latest_flush' description in 'panthor_job'
drivers/gpu/drm/panthor/panthor_sched.c:777: warning: Excess struct member 'start' description in 'panthor_job'
drivers/gpu/drm/panthor/panthor_sched.c:777: warning: Excess struct member 'end' description in 'panthor_job'
drivers/gpu/drm/panthor/panthor_sched.c:1697: warning: Function parameter or struct member 'ptdev' not described in 'panthor_sched_report_fw_events'
drivers/gpu/drm/panthor/panthor_sched.c:1697: warning: Function parameter or struct member 'events' not described in 'panthor_sched_report_fw_events'
drivers/gpu/drm/panthor/panthor_sched.c:2591: warning: Function parameter or struct member 'ptdev' not described in 'panthor_sched_report_mmu_fault'
vim +479 drivers/gpu/drm/panthor/panthor_sched.c
de85488138247d Boris Brezillon 2024-02-29 394
de85488138247d Boris Brezillon 2024-02-29 395 /** @ringbuf: Command stream ring-buffer. */
de85488138247d Boris Brezillon 2024-02-29 396 struct panthor_kernel_bo *ringbuf;
de85488138247d Boris Brezillon 2024-02-29 397
de85488138247d Boris Brezillon 2024-02-29 398 /** @iface: Firmware interface. */
de85488138247d Boris Brezillon 2024-02-29 399 struct {
de85488138247d Boris Brezillon 2024-02-29 400 /** @mem: FW memory allocated for this interface. */
de85488138247d Boris Brezillon 2024-02-29 401 struct panthor_kernel_bo *mem;
de85488138247d Boris Brezillon 2024-02-29 402
de85488138247d Boris Brezillon 2024-02-29 403 /** @input: Input interface. */
de85488138247d Boris Brezillon 2024-02-29 404 struct panthor_fw_ringbuf_input_iface *input;
de85488138247d Boris Brezillon 2024-02-29 405
de85488138247d Boris Brezillon 2024-02-29 406 /** @output: Output interface. */
de85488138247d Boris Brezillon 2024-02-29 407 const struct panthor_fw_ringbuf_output_iface *output;
de85488138247d Boris Brezillon 2024-02-29 408
de85488138247d Boris Brezillon 2024-02-29 409 /** @input_fw_va: FW virtual address of the input interface buffer. */
de85488138247d Boris Brezillon 2024-02-29 410 u32 input_fw_va;
de85488138247d Boris Brezillon 2024-02-29 411
de85488138247d Boris Brezillon 2024-02-29 412 /** @output_fw_va: FW virtual address of the output interface buffer. */
de85488138247d Boris Brezillon 2024-02-29 413 u32 output_fw_va;
de85488138247d Boris Brezillon 2024-02-29 414 } iface;
de85488138247d Boris Brezillon 2024-02-29 415
de85488138247d Boris Brezillon 2024-02-29 416 /**
de85488138247d Boris Brezillon 2024-02-29 417 * @syncwait: Stores information about the synchronization object this
de85488138247d Boris Brezillon 2024-02-29 418 * queue is waiting on.
de85488138247d Boris Brezillon 2024-02-29 419 */
de85488138247d Boris Brezillon 2024-02-29 420 struct {
de85488138247d Boris Brezillon 2024-02-29 421 /** @gpu_va: GPU address of the synchronization object. */
de85488138247d Boris Brezillon 2024-02-29 422 u64 gpu_va;
de85488138247d Boris Brezillon 2024-02-29 423
de85488138247d Boris Brezillon 2024-02-29 424 /** @ref: Reference value to compare against. */
de85488138247d Boris Brezillon 2024-02-29 425 u64 ref;
de85488138247d Boris Brezillon 2024-02-29 426
de85488138247d Boris Brezillon 2024-02-29 427 /** @gt: True if this is a greater-than test. */
de85488138247d Boris Brezillon 2024-02-29 428 bool gt;
de85488138247d Boris Brezillon 2024-02-29 429
de85488138247d Boris Brezillon 2024-02-29 430 /** @sync64: True if this is a 64-bit sync object. */
de85488138247d Boris Brezillon 2024-02-29 431 bool sync64;
de85488138247d Boris Brezillon 2024-02-29 432
de85488138247d Boris Brezillon 2024-02-29 433 /** @bo: Buffer object holding the synchronization object. */
de85488138247d Boris Brezillon 2024-02-29 434 struct drm_gem_object *obj;
de85488138247d Boris Brezillon 2024-02-29 435
de85488138247d Boris Brezillon 2024-02-29 436 /** @offset: Offset of the synchronization object inside @bo. */
de85488138247d Boris Brezillon 2024-02-29 437 u64 offset;
de85488138247d Boris Brezillon 2024-02-29 438
de85488138247d Boris Brezillon 2024-02-29 439 /**
de85488138247d Boris Brezillon 2024-02-29 440 * @kmap: Kernel mapping of the buffer object holding the
de85488138247d Boris Brezillon 2024-02-29 441 * synchronization object.
de85488138247d Boris Brezillon 2024-02-29 442 */
de85488138247d Boris Brezillon 2024-02-29 443 void *kmap;
de85488138247d Boris Brezillon 2024-02-29 444 } syncwait;
de85488138247d Boris Brezillon 2024-02-29 445
de85488138247d Boris Brezillon 2024-02-29 446 /** @fence_ctx: Fence context fields. */
de85488138247d Boris Brezillon 2024-02-29 447 struct {
de85488138247d Boris Brezillon 2024-02-29 448 /** @lock: Used to protect access to all fences allocated by this context. */
de85488138247d Boris Brezillon 2024-02-29 449 spinlock_t lock;
de85488138247d Boris Brezillon 2024-02-29 450
de85488138247d Boris Brezillon 2024-02-29 451 /**
de85488138247d Boris Brezillon 2024-02-29 452 * @id: Fence context ID.
de85488138247d Boris Brezillon 2024-02-29 453 *
de85488138247d Boris Brezillon 2024-02-29 454 * Allocated with dma_fence_context_alloc().
de85488138247d Boris Brezillon 2024-02-29 455 */
de85488138247d Boris Brezillon 2024-02-29 456 u64 id;
de85488138247d Boris Brezillon 2024-02-29 457
de85488138247d Boris Brezillon 2024-02-29 458 /** @seqno: Sequence number of the last initialized fence. */
de85488138247d Boris Brezillon 2024-02-29 459 atomic64_t seqno;
de85488138247d Boris Brezillon 2024-02-29 460
966c853e5c43b4 Boris Brezillon 2024-06-28 461 /**
966c853e5c43b4 Boris Brezillon 2024-06-28 462 * @last_fence: Fence of the last submitted job.
966c853e5c43b4 Boris Brezillon 2024-06-28 463 *
966c853e5c43b4 Boris Brezillon 2024-06-28 464 * We return this fence when we get an empty command stream.
966c853e5c43b4 Boris Brezillon 2024-06-28 465 * This way, we are guaranteed that all earlier jobs have completed
966c853e5c43b4 Boris Brezillon 2024-06-28 466 * when drm_sched_job::s_fence::finished without having to feed
966c853e5c43b4 Boris Brezillon 2024-06-28 467 * the CS ring buffer with a dummy job that only signals the fence.
966c853e5c43b4 Boris Brezillon 2024-06-28 468 */
966c853e5c43b4 Boris Brezillon 2024-06-28 469 struct dma_fence *last_fence;
966c853e5c43b4 Boris Brezillon 2024-06-28 470
de85488138247d Boris Brezillon 2024-02-29 471 /**
de85488138247d Boris Brezillon 2024-02-29 472 * @in_flight_jobs: List containing all in-flight jobs.
de85488138247d Boris Brezillon 2024-02-29 473 *
de85488138247d Boris Brezillon 2024-02-29 474 * Used to keep track and signal panthor_job::done_fence when the
de85488138247d Boris Brezillon 2024-02-29 475 * synchronization object attached to the queue is signaled.
de85488138247d Boris Brezillon 2024-02-29 476 */
de85488138247d Boris Brezillon 2024-02-29 477 struct list_head in_flight_jobs;
de85488138247d Boris Brezillon 2024-02-29 478 } fence_ctx;
de85488138247d Boris Brezillon 2024-02-29 @479 };
de85488138247d Boris Brezillon 2024-02-29 480
On Fri, 28 Jun 2024 16:55:36 +0200 Boris Brezillon <boris.brezillon@collabora.com> wrote: > - /* Stream size is zero, nothing to do => return a NULL fence and let > - * drm_sched signal the parent. > + /* Stream size is zero, nothing to do except making sure all previously > + * submitted jobs are done before we signal the > + * drm_sched_job::s_fence::finished fence. > */ > - if (!job->call_info.size) > - return NULL; > + if (!job->call_info.size) { > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > + return job->done_fence; Should be return dma_fence_get(job->done_fence); to keep the get/put balanced. > + } >
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 79ffcbc41d78..951ff7e63ea8 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -458,6 +458,16 @@ struct panthor_queue { /** @seqno: Sequence number of the last initialized fence. */ atomic64_t seqno; + /** + * @last_fence: Fence of the last submitted job. + * + * We return this fence when we get an empty command stream. + * This way, we are guaranteed that all earlier jobs have completed + * when drm_sched_job::s_fence::finished without having to feed + * the CS ring buffer with a dummy job that only signals the fence. + */ + struct dma_fence *last_fence; + /** * @in_flight_jobs: List containing all in-flight jobs. * @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); + /* Release the last_fence we were holding, if any. */ + dma_fence_put(queue->fence_ctx.last_fence); + kfree(queue); } @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) static_assert(sizeof(call_instrs) % 64 == 0, "call_instrs is not aligned on a cacheline"); - /* Stream size is zero, nothing to do => return a NULL fence and let - * drm_sched signal the parent. + /* Stream size is zero, nothing to do except making sure all previously + * submitted jobs are done before we signal the + * drm_sched_job::s_fence::finished fence. */ - if (!job->call_info.size) - return NULL; + if (!job->call_info.size) { + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); + return job->done_fence; + } ret = pm_runtime_resume_and_get(ptdev->base.dev); if (drm_WARN_ON(&ptdev->base, ret)) @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) } } + /* Update the last fence. */ + dma_fence_put(queue->fence_ctx.last_fence); + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); + done_fence = dma_fence_get(job->done_fence); out_unlock: @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); - if (!job->done_fence) { - ret = -ENOMEM; - goto err_put_job; + /* Empty command streams don't need a fence, they'll pick the one from + * the previously submitted job. + */ + if (job->call_info.size) { + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); + if (!job->done_fence) { + ret = -ENOMEM; + goto err_put_job; + } } ret = drm_sched_job_init(&job->base, diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b..926b1deb1116 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { * Must be 64-bit/8-byte aligned (the size of a CS instruction) * * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. */ __u32 stream_size; @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { * ensure the GPU doesn't get garbage when reading the indirect command * stream buffers. If you want the cache flush to happen * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. */ __u32 latest_flush;
A sync-only job is meant to provide a synchronization point on a queue, so we can't return a NULL fence there, we have to add a signal operation to the command stream which executes after all other previously submitted jobs are done. Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> --- drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- include/uapi/drm/panthor_drm.h | 5 +++ 2 files changed, 38 insertions(+), 8 deletions(-)