[v2,5/5] drm/tegra: Implement page-flipping support

Message ID	1358179560-26799-6-git-send-email-thierry.reding@avionic-design.de (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org> From: Thierry Reding <thierry.reding@avionic-design.de> To: David Airlie <airlied@linux.ie> Subject: [PATCH v2 5/5] drm/tegra: Implement page-flipping support Date: Mon, 14 Jan 2013 17:06:00 +0100 Message-Id: <1358179560-26799-6-git-send-email-thierry.reding@avionic-design.de> In-Reply-To: <1358179560-26799-1-git-send-email-thierry.reding@avionic-design.de> References: <1358179560-26799-1-git-send-email-thierry.reding@avionic-design.de> Cc: linux-tegra@vger.kernel.org, dri-devel@lists.freedesktop.org Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org Errors-To: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org

Thierry Reding Jan. 14, 2013, 4:06 p.m. UTC

All the necessary support bits like .mode_set_base() and VBLANK are now
available, so page-flipping case easily be implemented on top.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 drivers/gpu/drm/tegra/dc.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/drm.c |  9 ++++++
 drivers/gpu/drm/tegra/drm.h |  5 ++++
 3 files changed, 86 insertions(+)

Mark Zhang Jan. 16, 2013, 11:10 a.m. UTC | #1

I'm testing the pageflip & vblank change on cardhu. Seems the HDMI
doesn't work(LVDS only is OK). I'll let you know if I get something.

Mark
On 01/15/2013 12:06 AM, Thierry Reding wrote:
> All the necessary support bits like .mode_set_base() and VBLANK are now
> available, so page-flipping case easily be implemented on top.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> ---
>  drivers/gpu/drm/tegra/dc.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/tegra/drm.c |  9 ++++++
>  drivers/gpu/drm/tegra/drm.h |  5 ++++
>  3 files changed, 86 insertions(+)
> 
> diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
> index 3aa7ccc..ce4e14a 100644
> --- a/drivers/gpu/drm/tegra/dc.c
> +++ b/drivers/gpu/drm/tegra/dc.c
> @@ -161,7 +161,78 @@ void tegra_dc_disable_vblank(struct tegra_dc *dc)
>  	spin_unlock_irqrestore(&dc->lock, flags);
>  }
>  
> +static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
> +{
> +	struct drm_pending_vblank_event *event;
> +	struct drm_device *drm = dc->base.dev;
> +	unsigned long flags;
> +	struct timeval now;
> +
> +	spin_lock_irqsave(&drm->event_lock, flags);
> +	event = dc->event;
> +	dc->event = NULL;
> +	spin_unlock_irqrestore(&drm->event_lock, flags);
> +
> +	if (!event)
> +		return;
> +
> +	event->event.sequence = drm_vblank_count_and_time(drm, dc->pipe, &now);
> +	event->event.tv_sec = now.tv_sec;
> +	event->event.tv_usec = now.tv_usec;
> +
> +	spin_lock_irqsave(&drm->event_lock, flags);
> +	list_add_tail(&event->base.link, &event->base.file_priv->event_list);
> +	wake_up_interruptible(&event->base.file_priv->event_wait);
> +	spin_unlock_irqrestore(&drm->event_lock, flags);
> +
> +	drm_vblank_put(drm, dc->pipe);
> +}
> +
> +void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
> +{
> +	struct tegra_dc *dc = to_tegra_dc(crtc);
> +	struct drm_device *drm = crtc->dev;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&drm->event_lock, flags);
> +
> +	if (dc->event && dc->event->base.file_priv == file) {
> +		dc->event->base.destroy(&dc->event->base);
> +		drm_vblank_put(drm, dc->pipe);
> +		dc->event = NULL;
> +	}
> +
> +	spin_unlock_irqrestore(&drm->event_lock, flags);
> +}
> +
> +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> +			      struct drm_pending_vblank_event *event)
> +{
> +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
> +	struct tegra_dc *dc = to_tegra_dc(crtc);
> +	struct drm_device *drm = crtc->dev;
> +	unsigned long flags;
> +
> +	if (dc->event)
> +		return -EBUSY;
> +
> +	tegra_dc_set_base(dc, 0, 0, newfb);
> +
> +	if (event) {
> +		event->pipe = dc->pipe;
> +
> +		spin_lock_irqsave(&drm->event_lock, flags);
> +		dc->event = event;
> +		spin_unlock_irqrestore(&drm->event_lock, flags);
> +
> +		drm_vblank_get(drm, dc->pipe);
> +	}
> +
> +	return 0;
> +}
> +
>  static const struct drm_crtc_funcs tegra_crtc_funcs = {
> +	.page_flip = tegra_dc_page_flip,
>  	.set_config = drm_crtc_helper_set_config,
>  	.destroy = drm_crtc_cleanup,
>  };
> @@ -556,6 +627,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
>  		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
>  		*/
>  		drm_handle_vblank(dc->base.dev, dc->pipe);
> +		tegra_dc_finish_page_flip(dc);
>  	}
>  
>  	if (status & (WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT)) {
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index 62f8121..7bab784 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -129,11 +129,20 @@ static void tegra_drm_disable_vblank(struct drm_device *drm, int pipe)
>  		tegra_dc_disable_vblank(dc);
>  }
>  
> +static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
> +{
> +	struct drm_crtc *crtc;
> +
> +	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
> +		tegra_dc_cancel_page_flip(crtc, file);
> +}
> +
>  struct drm_driver tegra_drm_driver = {
>  	.driver_features = DRIVER_BUS_PLATFORM | DRIVER_MODESET | DRIVER_GEM,
>  	.load = tegra_drm_load,
>  	.unload = tegra_drm_unload,
>  	.open = tegra_drm_open,
> +	.preclose = tegra_drm_preclose,
>  	.lastclose = tegra_drm_lastclose,
>  
>  	.get_vblank_counter = drm_vblank_count,
> diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
> index ca742b2..1f1cb37 100644
> --- a/drivers/gpu/drm/tegra/drm.h
> +++ b/drivers/gpu/drm/tegra/drm.h
> @@ -100,6 +100,9 @@ struct tegra_dc {
>  	struct drm_info_list *debugfs_files;
>  	struct drm_minor *minor;
>  	struct dentry *debugfs;
> +
> +	/* page-flip handling */
> +	struct drm_pending_vblank_event *event;
>  };
>  
>  static inline struct tegra_dc *host1x_client_to_dc(struct host1x_client *client)
> @@ -149,6 +152,8 @@ extern int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
>  				 const struct tegra_dc_window *window);
>  extern void tegra_dc_enable_vblank(struct tegra_dc *dc);
>  extern void tegra_dc_disable_vblank(struct tegra_dc *dc);
> +extern void tegra_dc_cancel_page_flip(struct drm_crtc *crtc,
> +				      struct drm_file *file);
>  
>  struct tegra_output_ops {
>  	int (*enable)(struct tegra_output *output);
>

Lucas Stach Jan. 16, 2013, 11:53 a.m. UTC | #2

Am Mittwoch, den 16.01.2013, 19:10 +0800 schrieb Mark Zhang:
> I'm testing the pageflip & vblank change on cardhu. Seems the HDMI
> doesn't work(LVDS only is OK). I'll let you know if I get something.
> 
Just to provide another data point: I'm running this series and don't
see any failures with DVI output. Though I'm only run a single output,
not some dual-head config.

Even vbltest from libdrm/tests works and shows correct refresh rate.

Regards,
Lucas

Mark Zhang Jan. 17, 2013, 4:49 a.m. UTC | #3

On 01/16/2013 07:53 PM, Lucas Stach wrote:
> Am Mittwoch, den 16.01.2013, 19:10 +0800 schrieb Mark Zhang:
>> I'm testing the pageflip & vblank change on cardhu. Seems the HDMI
>> doesn't work(LVDS only is OK). I'll let you know if I get something.
>>
> Just to provide another data point: I'm running this series and don't
> see any failures with DVI output. Though I'm only run a single output,
> not some dual-head config.
>

Yes. HDMI only is OK. I met some problems when the LVDS & HDMI are
enabled at the same time.

Mark
> Even vbltest from libdrm/tests works and shows correct refresh rate.
> 
> Regards,
> Lucas
>

Mark Zhang Jan. 17, 2013, 6:33 a.m. UTC | #4

On 01/15/2013 12:06 AM, Thierry Reding wrote:
> All the necessary support bits like .mode_set_base() and VBLANK are now
> available, so page-flipping case easily be implemented on top.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
[...]
> +
> +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> +			      struct drm_pending_vblank_event *event)
> +{
> +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
> +	struct tegra_dc *dc = to_tegra_dc(crtc);
> +	struct drm_device *drm = crtc->dev;
> +	unsigned long flags;
> +
> +	if (dc->event)
> +		return -EBUSY;
> +
> +	tegra_dc_set_base(dc, 0, 0, newfb);

"tegra_dc_set_base" only updates the frame buffer start address to dc
registers. I think this is not enough because it's possible that this
new FB may trigger a full modeset while not just a fb change. For
example, the "bpp" of the new FB differs from current setting in dc
register.

So I suggest to trigger a full modeset here(although it's slower than fb
change) or maybe you can explain why the FB change is enough.

Mark
> +
> +	if (event) {
> +		event->pipe = dc->pipe;
> +
> +		spin_lock_irqsave(&drm->event_lock, flags);
> +		dc->event = event;
> +		spin_unlock_irqrestore(&drm->event_lock, flags);
> +
> +		drm_vblank_get(drm, dc->pipe);
> +	}
> +
> +	return 0;
> +}
> +
[...]
>  struct tegra_output_ops {
>  	int (*enable)(struct tegra_output *output);
>

Terje Bergstrom Jan. 22, 2013, 8:31 a.m. UTC | #5

On 14.01.2013 18:06, Thierry Reding wrote:
> +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> +			      struct drm_pending_vblank_event *event)
> +{
> +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
> +	struct tegra_dc *dc = to_tegra_dc(crtc);
> +	struct drm_device *drm = crtc->dev;
> +	unsigned long flags;
> +
> +	if (dc->event)
> +		return -EBUSY;
> +
> +	tegra_dc_set_base(dc, 0, 0, newfb);
> +
> +	if (event) {
> +		event->pipe = dc->pipe;
> +
> +		spin_lock_irqsave(&drm->event_lock, flags);
> +		dc->event = event;
> +		spin_unlock_irqrestore(&drm->event_lock, flags);
> +
> +		drm_vblank_get(drm, dc->pipe);
> +	}
> +
> +	return 0;
> +}

The patch seems fine to me. I have a question for a follow-up.

In downstream dc driver we initiate a page flip, and assign a fence
(syncpt id, value) to it. We return the fence to user space. Then when
the actual page flip is done, dc increments the sync point.

User space can take the fence and use it for synchronizing graphics
operations. In downstream, we use that fence to be able to submit
operations to graphics units and synchronize them to the flip by adding
a host wait. It improves performance, because we can prepare and send
the graphics operations to hardware while flip is still happening.

Is this something we could do in tegra-drm, too? DRM's page flip IOCTL
doesn't seem to have a way to pass a fence back from fence, so we'd need
to find a way to pass the fence back to user space.

Of course, this has the prerequisite of having support for syncpts,
which I hoped we would get to 3.9. The review for host1x patches seem to
have stalled, so that's iffy.

Terje

Thierry Reding Jan. 22, 2013, 8:57 a.m. UTC | #6

On Tue, Jan 22, 2013 at 10:31:11AM +0200, Terje Bergström wrote:
> On 14.01.2013 18:06, Thierry Reding wrote:
> > +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> > +			      struct drm_pending_vblank_event *event)
> > +{
> > +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
> > +	struct tegra_dc *dc = to_tegra_dc(crtc);
> > +	struct drm_device *drm = crtc->dev;
> > +	unsigned long flags;
> > +
> > +	if (dc->event)
> > +		return -EBUSY;
> > +
> > +	tegra_dc_set_base(dc, 0, 0, newfb);
> > +
> > +	if (event) {
> > +		event->pipe = dc->pipe;
> > +
> > +		spin_lock_irqsave(&drm->event_lock, flags);
> > +		dc->event = event;
> > +		spin_unlock_irqrestore(&drm->event_lock, flags);
> > +
> > +		drm_vblank_get(drm, dc->pipe);
> > +	}
> > +
> > +	return 0;
> > +}
> 
> The patch seems fine to me. I have a question for a follow-up.
> 
> In downstream dc driver we initiate a page flip, and assign a fence
> (syncpt id, value) to it. We return the fence to user space. Then when
> the actual page flip is done, dc increments the sync point.
> 
> User space can take the fence and use it for synchronizing graphics
> operations. In downstream, we use that fence to be able to submit
> operations to graphics units and synchronize them to the flip by adding
> a host wait. It improves performance, because we can prepare and send
> the graphics operations to hardware while flip is still happening.
> 
> Is this something we could do in tegra-drm, too? DRM's page flip IOCTL
> doesn't seem to have a way to pass a fence back from fence, so we'd need
> to find a way to pass the fence back to user space.
> 
> Of course, this has the prerequisite of having support for syncpts,
> which I hoped we would get to 3.9. The review for host1x patches seem to
> have stalled, so that's iffy.

Yes, I haven't found as much time as I would have liked to look at the
latest patches. Perhaps there will be a time slot at the end of the
week. I thought there was also the remaining issue with the memory
allocator that Lucas (Cc'ed) was working on?

Thierry

Lucas Stach Jan. 22, 2013, 9:15 a.m. UTC | #7

Am Dienstag, den 22.01.2013, 09:57 +0100 schrieb Thierry Reding:
> On Tue, Jan 22, 2013 at 10:31:11AM +0200, Terje Bergström wrote:
> > Of course, this has the prerequisite of having support for syncpts,
> > which I hoped we would get to 3.9. The review for host1x patches seem to
> > have stalled, so that's iffy.
> 
> Yes, I haven't found as much time as I would have liked to look at the
> latest patches. Perhaps there will be a time slot at the end of the
> week. I thought there was also the remaining issue with the memory
> allocator that Lucas (Cc'ed) was working on?
> 
Yes, I haven't finished this work yet. I got side tracked by upstreaming
the platform patches for the Colibri and some real life activities. I'll
get back to this ASAP.

But even if I get this out real soon, I'm not really comfortable with
speeding things to 3.9. I would like to review the userspace side of
thing a lot more thoroughly, before committing to the interface. But
maybe this can also happen in the 3.9 RC timeframe.

Regards,
Lucas

Thierry Reding Jan. 22, 2013, 9:31 a.m. UTC | #8

On Tue, Jan 22, 2013 at 10:15:21AM +0100, Lucas Stach wrote:
> Am Dienstag, den 22.01.2013, 09:57 +0100 schrieb Thierry Reding:
> > On Tue, Jan 22, 2013 at 10:31:11AM +0200, Terje Bergström wrote:
> > > Of course, this has the prerequisite of having support for syncpts,
> > > which I hoped we would get to 3.9. The review for host1x patches seem to
> > > have stalled, so that's iffy.
> > 
> > Yes, I haven't found as much time as I would have liked to look at the
> > latest patches. Perhaps there will be a time slot at the end of the
> > week. I thought there was also the remaining issue with the memory
> > allocator that Lucas (Cc'ed) was working on?
> > 
> Yes, I haven't finished this work yet. I got side tracked by upstreaming
> the platform patches for the Colibri and some real life activities. I'll
> get back to this ASAP.
> 
> But even if I get this out real soon, I'm not really comfortable with
> speeding things to 3.9. I would like to review the userspace side of
> thing a lot more thoroughly, before committing to the interface. But
> maybe this can also happen in the 3.9 RC timeframe.

Maybe it'd make sense to finish up the various userspace parts first, so
that we can test an accelerated DDX on top of the kernel patches before
merging the host1x and gr2d patches.

I'm not quite sure if I remember correctly, but I think David mentioned
something along the lines of requiring a working userspace that can be
used to test the DRM interfaces as a prerequisite to getting this kind
of code merged.

Thierry

Terje Bergstrom Jan. 22, 2013, 9:35 a.m. UTC | #9

On 22.01.2013 11:15, Lucas Stach wrote:
> But even if I get this out real soon, I'm not really comfortable with
> speeding things to 3.9. I would like to review the userspace side of
> thing a lot more thoroughly, before committing to the interface. But
> maybe this can also happen in the 3.9 RC timeframe.

Ok. I uploaded the libdrm patches to gitorious
(git@gitorious.org:linux-host1x/libdrm-host1x.git). We sent out the user
space patches more than a month ago, so I expect them to have fallen out
of everybody's radar long ago.

If there's a need for us to resend the patches, let me know.

Terje

Terje Bergstrom Jan. 22, 2013, 9:44 a.m. UTC | #10

On 22.01.2013 11:31, Thierry Reding wrote:
> I'm not quite sure if I remember correctly, but I think David mentioned
> something along the lines of requiring a working userspace that can be
> used to test the DRM interfaces as a prerequisite to getting this kind
> of code merged.

That's why we have provided user space, test suite that tests all
interfaces we are exporting, and a demo application that runs. If the
prerequisite is a working DDX, that's obviously not enough.

Terje

Lucas Stach Jan. 22, 2013, 9:48 a.m. UTC | #11

Am Dienstag, den 22.01.2013, 11:44 +0200 schrieb Terje Bergström:
> On 22.01.2013 11:31, Thierry Reding wrote:
> > I'm not quite sure if I remember correctly, but I think David mentioned
> > something along the lines of requiring a working userspace that can be
> > used to test the DRM interfaces as a prerequisite to getting this kind
> > of code merged.
> 
> That's why we have provided user space, test suite that tests all
> interfaces we are exporting, and a demo application that runs. If the
> prerequisite is a working DDX, that's obviously not enough.
> 
I think the test suite is enough to fulfil the formal requirement of
having a working userspace. But still it would be nice to have at least
some simple accel functions working in the DDX. Maybe just to see on how
well the current design integrates into the DDX code.

So I wouldn't make a working DDX a hard requirement for merging G2D
code, but I suspect that if the kernel code goes into 3.10 instead of
3.9, we'll just naturally get to the point of working DDX accel by the
same time.

Regards,
Lucas

Terje Bergstrom Jan. 22, 2013, 10:39 a.m. UTC | #12

On 22.01.2013 11:48, Lucas Stach wrote:
> I think the test suite is enough to fulfil the formal requirement of
> having a working userspace. But still it would be nice to have at least
> some simple accel functions working in the DDX. Maybe just to see on how
> well the current design integrates into the DDX code.
> 
> So I wouldn't make a working DDX a hard requirement for merging G2D
> code, but I suspect that if the kernel code goes into 3.10 instead of
> 3.9, we'll just naturally get to the point of working DDX accel by the
> same time.

For me, the most important thing would be to nail down a baseline
structure. That way new feature development would be unblocked (IOMMU,
other host1x clients, context switching, wait bases come to mind) and we
could start to synchronize downstream with upstream.

Biggest benefit of getting merged is that it's a pretty strong
indication of a solid baseline. :-)

Terje

Mario Kleiner Jan. 22, 2013, 5:27 p.m. UTC | #13

On 22.01.13 09:31, Terje Bergström wrote:
> On 14.01.2013 18:06, Thierry Reding wrote:
>> +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
>> +			      struct drm_pending_vblank_event *event)
>> +{
>> +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
>> +	struct tegra_dc *dc = to_tegra_dc(crtc);
>> +	struct drm_device *drm = crtc->dev;
>> +	unsigned long flags;
>> +
>> +	if (dc->event)
>> +		return -EBUSY;

Hi

I haven't read the Tegra programming manual or played with this, so 
maybe i'm wrong for some reason, but i think there is a race here 
between actual pageflip completion - latching newfb into the scanout 
base register and the completion routine that gets called from the 
vblank irq handler.

If this code gets called close to vblank or inside vblank, couldn't it 
happen that tegra_dc_set_base() programs a pageflip that gets executed 
immediately - the new fb gets latched into the crtc, but the 
corresponding vblank irq handler for the vblank of flip completion runs 
before the "if (event)" block can set dc->event = event;? Or vblank 
irq's are off because drm_vblank_get() is only called at the end of the 
routine? In both cases the completion routine would miss the correct 
vblank and only timestamp and emit the completion event 1 vblank after 
actual flip completion.

In that case it would be better to place tegra_dc_set_base() after the 
"if (event)" block and have some check inside the flip completion 
routine to make sure the pageflip completion event is only emitted if 
the scanout is really already latched with the newfb.

thanks,
-mario

>> +
>> +	tegra_dc_set_base(dc, 0, 0, newfb);
>> +
>> +	if (event) {
>> +		event->pipe = dc->pipe;
>> +
>> +		spin_lock_irqsave(&drm->event_lock, flags);
>> +		dc->event = event;
>> +		spin_unlock_irqrestore(&drm->event_lock, flags);
>> +
>> +		drm_vblank_get(drm, dc->pipe);
>> +	}
>> +
>> +	return 0;
>> +}
>

Thierry Reding Feb. 11, 2013, 9 a.m. UTC | #14

On Tue, Jan 22, 2013 at 06:27:24PM +0100, Mario Kleiner wrote:
> On 22.01.13 09:31, Terje Bergström wrote:
> >On 14.01.2013 18:06, Thierry Reding wrote:
> >>+static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> >>+			      struct drm_pending_vblank_event *event)
> >>+{
> >>+	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
> >>+	struct tegra_dc *dc = to_tegra_dc(crtc);
> >>+	struct drm_device *drm = crtc->dev;
> >>+	unsigned long flags;
> >>+
> >>+	if (dc->event)
> >>+		return -EBUSY;
> 
> Hi
> 
> I haven't read the Tegra programming manual or played with this, so
> maybe i'm wrong for some reason, but i think there is a race here
> between actual pageflip completion - latching newfb into the scanout
> base register and the completion routine that gets called from the
> vblank irq handler.
> 
> If this code gets called close to vblank or inside vblank, couldn't
> it happen that tegra_dc_set_base() programs a pageflip that gets
> executed immediately - the new fb gets latched into the crtc, but
> the corresponding vblank irq handler for the vblank of flip
> completion runs before the "if (event)" block can set dc->event =
> event;? Or vblank irq's are off because drm_vblank_get() is only
> called at the end of the routine? In both cases the completion
> routine would miss the correct vblank and only timestamp and emit
> the completion event 1 vblank after actual flip completion.
> 
> In that case it would be better to place tegra_dc_set_base() after
> the "if (event)" block and have some check inside the flip
> completion routine to make sure the pageflip completion event is
> only emitted if the scanout is really already latched with the
> newfb.

An easier solution might be to expand the scope of the lock a bit to
encompass the call to tegra_dc_set_base() and extend until the end of
tegra_dc_page_flip(). That should properly keep the page-flip completion
from interfering, right?

	spin_lock_irqsave(&drm->event_lock, flags);

	tegra_dc_set_base(dc, 0, 0, newfb);

	if (event) {
		event->pipe = dc->pipe;
		dc->event = event;
		drm_vblank_get(drm, dc->pipe);
	}

	spin_unlock_irqrestore(&drm->event_lock, flags);

Thierry

Mario Kleiner Feb. 15, 2013, 10:34 p.m. UTC | #15

On 02/11/2013 10:00 AM, Thierry Reding wrote:
> On Tue, Jan 22, 2013 at 06:27:24PM +0100, Mario Kleiner wrote:
>> On 22.01.13 09:31, Terje Bergström wrote:
>>> On 14.01.2013 18:06, Thierry Reding wrote:
>>>> +static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
>>>> +			      struct drm_pending_vblank_event *event)
>>>> +{
>>>> +	struct tegra_framebuffer *newfb = to_tegra_fb(fb);
>>>> +	struct tegra_dc *dc = to_tegra_dc(crtc);
>>>> +	struct drm_device *drm = crtc->dev;
>>>> +	unsigned long flags;
>>>> +
>>>> +	if (dc->event)
>>>> +		return -EBUSY;
>> Hi
>>
>> I haven't read the Tegra programming manual or played with this, so
>> maybe i'm wrong for some reason, but i think there is a race here
>> between actual pageflip completion - latching newfb into the scanout
>> base register and the completion routine that gets called from the
>> vblank irq handler.
>>
>> If this code gets called close to vblank or inside vblank, couldn't
>> it happen that tegra_dc_set_base() programs a pageflip that gets
>> executed immediately - the new fb gets latched into the crtc, but
>> the corresponding vblank irq handler for the vblank of flip
>> completion runs before the "if (event)" block can set dc->event =
>> event;? Or vblank irq's are off because drm_vblank_get() is only
>> called at the end of the routine? In both cases the completion
>> routine would miss the correct vblank and only timestamp and emit
>> the completion event 1 vblank after actual flip completion.
>>
>> In that case it would be better to place tegra_dc_set_base() after
>> the "if (event)" block and have some check inside the flip
>> completion routine to make sure the pageflip completion event is
>> only emitted if the scanout is really already latched with the
>> newfb.
> An easier solution might be to expand the scope of the lock a bit to
> encompass the call to tegra_dc_set_base() and extend until the end of
> tegra_dc_page_flip(). That should properly keep the page-flip completion
> from interfering, right?
>
> 	spin_lock_irqsave(&drm->event_lock, flags);
>
> 	tegra_dc_set_base(dc, 0, 0, newfb);
>
> 	if (event) {
> 		event->pipe = dc->pipe;
> 		dc->event = event;
> 		drm_vblank_get(drm, dc->pipe);
> 	}
>
> 	spin_unlock_irqrestore(&drm->event_lock, flags);
>
> Thierry
Yes, that would avoid races between page flip ioctl and the irq handler. 
But i think the tegra_dc_set_base() should go below the if (event) {} 
block, before the spin_unlock_irqrestore() so you can be sure that 
drm_vblank_get() has been called before tegra_dc_set_base() is executed. 
Otherwise vblank irq's may be off during the vblank in which the 
tegra_dc_set_base() takes effect and a flip complete would only get 
reported one scanout cycle later at the next vblank -> You'd signal a 
pageflip completion 1 frame too late.

You also still need to check in tegra_dc_finish_page_flip() if the new 
scanout buffer has really been latched before emitting the flip complete 
event. Otherwise it could happen that your execution of 
tegra_dc_page_flip() intersects with the vblank interval and manages to 
queue the event in time, so the finish_page_flip picks it up, but the 
register programming in tegra_dc_set_base() happened a bit too late, so 
it doesn't get latched in the same vblank but one vblank later --> You'd 
signal pageflip completion 1 frame too early.

I've just downloaded the Tegra-3 TRM and had a quick look. Section 
20.5.1 "Display shadow registers". As far as i understand, if dc->event 
is pending in tegra_dc_finish_page_flip(), you could read back the 
ACTIVE copy of DC_WINBUF_START_ADDR by setting READ_MUX properly in 
DC_CMD_STATE_ACCESS_0, and compare its value against the ARM copy in 
DC_WINBUF_A_START_ADDR_NS_0. If both values match, the pageflip has 
happened and the dc->event can be dequeued and emitted.

That assumes that the ARM copy is latched into the ACTIVE copy only at 
leading edge of vblank. Section 20.5.1 says "...latching happens on the 
next frame boundary after (GENERAL/WIN_A/WIN_B/WIN_C)_ACT_REQ is 
programmed..." - not totally clear to me if this is the same as start of 
vblank?

-mario

[v2,5/5] drm/tegra: Implement page-flipping support

Commit Message

Comments

Patch