[4/6] drm/radeon: Provide page_flip_target hook
diff mbox

Message ID 1470281981-18172-5-git-send-email-michel@daenzer.net
State New
Headers show

Commit Message

Michel Dänzer Aug. 4, 2016, 3:39 a.m. UTC
From: Michel Dänzer <michel.daenzer@amd.com>

Now we can program a flip during a vertical blank period, if it's the
one targeted by the flip (or a later one). This allows simplifying
radeon_flip_work_func considerably.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h         |  1 +
 drivers/gpu/drm/radeon/radeon_display.c | 89 +++++++++------------------------
 2 files changed, 25 insertions(+), 65 deletions(-)

Comments

Mario Kleiner Aug. 16, 2016, 12:35 a.m. UTC | #1
Hi Michel,

sorry for the super-late reply, i was just catching up with all the 
mails and discussions, starting in June, leading to this patch set.

Looks all pretty good.

I'll look at this radeon patch and 2/6 for amdgpu later this week when i 
have a fresh brain and enough "obsessive compulsive time", to make sure 
all the magic wrt. "virtually extended vblank" and the fudging logic is 
fine.

I'll then also run it through my timing tests. I assume the 
ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are 
all i need for testing?

-mario


On 08/04/2016 05:39 AM, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer@amd.com>
>
> Now we can program a flip during a vertical blank period, if it's the
> one targeted by the flip (or a later one). This allows simplifying
> radeon_flip_work_func considerably.
>
> Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
> ---
>  drivers/gpu/drm/radeon/radeon.h         |  1 +
>  drivers/gpu/drm/radeon/radeon_display.c | 89 +++++++++------------------------
>  2 files changed, 25 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5633ee3..1b0dcad 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -742,6 +742,7 @@ struct radeon_flip_work {
>  	struct work_struct		unpin_work;
>  	struct radeon_device		*rdev;
>  	int				crtc_id;
> +	u32				target_vblank;
>  	uint64_t			base;
>  	struct drm_pending_vblank_event *event;
>  	struct radeon_bo		*old_rbo;
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index 5f1cd69..bd9d995 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -400,14 +400,13 @@ static void radeon_flip_work_func(struct work_struct *__work)
>  	struct radeon_flip_work *work =
>  		container_of(__work, struct radeon_flip_work, flip_work);
>  	struct radeon_device *rdev = work->rdev;
> +	struct drm_device *dev = rdev->ddev;
>  	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id];
>
>  	struct drm_crtc *crtc = &radeon_crtc->base;
>  	unsigned long flags;
>  	int r;
> -	int vpos, hpos, stat, min_udelay = 0;
> -	unsigned repcnt = 4;
> -	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
> +	int vpos, hpos;
>
>  	down_read(&rdev->exclusive_lock);
>  	if (work->fence) {
> @@ -438,59 +437,25 @@ static void radeon_flip_work_func(struct work_struct *__work)
>  		work->fence = NULL;
>  	}
>
> +	/* Wait until we're out of the vertical blank period before the one
> +	 * targeted by the flip
> +	 */
> +	while (radeon_crtc->enabled &&
> +	       (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0,
> +					   &vpos, &hpos, NULL, NULL,
> +					   &crtc->hwmode)
> +		& (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
> +	       (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
> +	       (int)(work->target_vblank -
> +		     dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0)
> +		usleep_range(1000, 2000);
> +
>  	/* We borrow the event spin lock for protecting flip_status */
>  	spin_lock_irqsave(&crtc->dev->event_lock, flags);
>
>  	/* set the proper interrupt */
>  	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
>
> -	/* If this happens to execute within the "virtually extended" vblank
> -	 * interval before the start of the real vblank interval then it needs
> -	 * to delay programming the mmio flip until the real vblank is entered.
> -	 * This prevents completing a flip too early due to the way we fudge
> -	 * our vblank counter and vblank timestamps in order to work around the
> -	 * problem that the hw fires vblank interrupts before actual start of
> -	 * vblank (when line buffer refilling is done for a frame). It
> -	 * complements the fudging logic in radeon_get_crtc_scanoutpos() for
> -	 * timestamping and radeon_get_vblank_counter_kms() for vblank counts.
> -	 *
> -	 * In practice this won't execute very often unless on very fast
> -	 * machines because the time window for this to happen is very small.
> -	 */
> -	while (radeon_crtc->enabled && --repcnt) {
> -		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
> -		 * start in hpos, and to the "fudged earlier" vblank start in
> -		 * vpos.
> -		 */
> -		stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id,
> -						  GET_DISTANCE_TO_VBLANKSTART,
> -						  &vpos, &hpos, NULL, NULL,
> -						  &crtc->hwmode);
> -
> -		if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
> -		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
> -		    !(vpos >= 0 && hpos <= 0))
> -			break;
> -
> -		/* Sleep at least until estimated real start of hw vblank */
> -		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
> -		if (min_udelay > vblank->framedur_ns / 2000) {
> -			/* Don't wait ridiculously long - something is wrong */
> -			repcnt = 0;
> -			break;
> -		}
> -		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
> -		usleep_range(min_udelay, 2 * min_udelay);
> -		spin_lock_irqsave(&crtc->dev->event_lock, flags);
> -	};
> -
> -	if (!repcnt)
> -		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
> -				 "framedur %d, linedur %d, stat %d, vpos %d, "
> -				 "hpos %d\n", work->crtc_id, min_udelay,
> -				 vblank->framedur_ns / 1000,
> -				 vblank->linedur_ns / 1000, stat, vpos, hpos);
> -
>  	/* do the flip (mmio) */
>  	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base, work->async);
>
> @@ -499,10 +464,11 @@ static void radeon_flip_work_func(struct work_struct *__work)
>  	up_read(&rdev->exclusive_lock);
>  }
>
> -static int radeon_crtc_page_flip(struct drm_crtc *crtc,
> -				 struct drm_framebuffer *fb,
> -				 struct drm_pending_vblank_event *event,
> -				 uint32_t page_flip_flags)
> +static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
> +					struct drm_framebuffer *fb,
> +					struct drm_pending_vblank_event *event,
> +					uint32_t page_flip_flags,
> +					uint32_t target)
>  {
>  	struct drm_device *dev = crtc->dev;
>  	struct radeon_device *rdev = dev->dev_private;
> @@ -599,12 +565,8 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
>  		base &= ~7;
>  	}
>  	work->base = base;
> -
> -	r = drm_crtc_vblank_get(crtc);
> -	if (r) {
> -		DRM_ERROR("failed to get vblank before flip\n");
> -		goto pflip_cleanup;
> -	}
> +	work->target_vblank = target - drm_crtc_vblank_count(crtc) +
> +		dev->driver->get_vblank_counter(dev, work->crtc_id);
>
>  	/* We borrow the event spin lock for protecting flip_work */
>  	spin_lock_irqsave(&crtc->dev->event_lock, flags);
> @@ -613,7 +575,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
>  		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
>  		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
>  		r = -EBUSY;
> -		goto vblank_cleanup;
> +		goto pflip_cleanup;
>  	}
>  	radeon_crtc->flip_status = RADEON_FLIP_PENDING;
>  	radeon_crtc->flip_work = work;
> @@ -626,9 +588,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
>  	queue_work(radeon_crtc->flip_queue, &work->flip_work);
>  	return 0;
>
> -vblank_cleanup:
> -	drm_crtc_vblank_put(crtc);
> -
>  pflip_cleanup:
>  	if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) {
>  		DRM_ERROR("failed to reserve new rbo in error path\n");
> @@ -697,7 +656,7 @@ static const struct drm_crtc_funcs radeon_crtc_funcs = {
>  	.gamma_set = radeon_crtc_gamma_set,
>  	.set_config = radeon_crtc_set_config,
>  	.destroy = radeon_crtc_destroy,
> -	.page_flip = radeon_crtc_page_flip,
> +	.page_flip_target = radeon_crtc_page_flip_target,
>  };
>
>  static void radeon_crtc_init(struct drm_device *dev, int index)
>
Michel Dänzer Aug. 16, 2016, 1:49 a.m. UTC | #2
On 16/08/16 09:35 AM, Mario Kleiner wrote:
> Hi Michel,
> 
> sorry for the super-late reply, i was just catching up with all the
> mails and discussions, starting in June, leading to this patch set.
> 
> Looks all pretty good.
> 
> I'll look at this radeon patch and 2/6 for amdgpu later this week when i
> have a fresh brain and enough "obsessive compulsive time", to make sure
> all the magic wrt. "virtually extended vblank" and the fudging logic is
> fine.

Excellent!


> I'll then also run it through my timing tests. I assume the
> ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are
> all i need for testing?

Yes, although it would also be nice to test with unmodified userspace
and make sure there are no regressions with that.
Mario Kleiner Sept. 17, 2016, 12:41 p.m. UTC | #3
Hi Michel,

all your patches, both the already merged kernel bits in radeon/amdgpu 
and also all the userspace bits in libdrm/ati-ddx/amdgpu-ddx are all

Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com>

I successfully tested with old/current userspace and the new userspace 
patches from your own libdrm/ati-ddx/amdgpu-ddx repos, under radeon-kms 
with HD-5770 and amdgpu-kms with R9 380, under DRI2 and DRI3/Present 
with the new userspace and at least DRI3/Present with the old/current 
userspace (can't quite remember if i also tested with DRI2 on 
old/current userspace, but probably). Hardware measured timing tests all 
work fine.

So all is good, except for pre-DCE4 without pflip irqs, but for that see 
the patchset i just sent out, which should make those old asics work 
well as well.

-mario

On 08/16/2016 03:49 AM, Michel Dänzer wrote:
> On 16/08/16 09:35 AM, Mario Kleiner wrote:
>> Hi Michel,
>>
>> sorry for the super-late reply, i was just catching up with all the
>> mails and discussions, starting in June, leading to this patch set.
>>
>> Looks all pretty good.
>>
>> I'll look at this radeon patch and 2/6 for amdgpu later this week when i
>> have a fresh brain and enough "obsessive compulsive time", to make sure
>> all the magic wrt. "virtually extended vblank" and the fudging logic is
>> fine.
>
> Excellent!
>
>
>> I'll then also run it through my timing tests. I assume the
>> ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are
>> all i need for testing?
>
> Yes, although it would also be nice to test with unmodified userspace
> and make sure there are no regressions with that.
>
>
Michel Dänzer Sept. 20, 2016, 2:59 a.m. UTC | #4
On 17/09/16 09:41 PM, Mario Kleiner wrote:
> Hi Michel,
> 
> all your patches, both the already merged kernel bits in radeon/amdgpu
> and also all the userspace bits in libdrm/ati-ddx/amdgpu-ddx are all
> 
> Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com>
> 
> I successfully tested with old/current userspace and the new userspace
> patches from your own libdrm/ati-ddx/amdgpu-ddx repos, under radeon-kms
> with HD-5770 and amdgpu-kms with R9 380, under DRI2 and DRI3/Present
> with the new userspace and at least DRI3/Present with the old/current
> userspace (can't quite remember if i also tested with DRI2 on
> old/current userspace, but probably). Hardware measured timing tests all
> work fine.
> 
> So all is good, except for pre-DCE4 without pflip irqs, but for that see
> the patchset i just sent out, which should make those old asics work
> well as well.

Thanks for the testing and patches!

Patch
diff mbox

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5633ee3..1b0dcad 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -742,6 +742,7 @@  struct radeon_flip_work {
 	struct work_struct		unpin_work;
 	struct radeon_device		*rdev;
 	int				crtc_id;
+	u32				target_vblank;
 	uint64_t			base;
 	struct drm_pending_vblank_event *event;
 	struct radeon_bo		*old_rbo;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 5f1cd69..bd9d995 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -400,14 +400,13 @@  static void radeon_flip_work_func(struct work_struct *__work)
 	struct radeon_flip_work *work =
 		container_of(__work, struct radeon_flip_work, flip_work);
 	struct radeon_device *rdev = work->rdev;
+	struct drm_device *dev = rdev->ddev;
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id];
 
 	struct drm_crtc *crtc = &radeon_crtc->base;
 	unsigned long flags;
 	int r;
-	int vpos, hpos, stat, min_udelay = 0;
-	unsigned repcnt = 4;
-	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
+	int vpos, hpos;
 
 	down_read(&rdev->exclusive_lock);
 	if (work->fence) {
@@ -438,59 +437,25 @@  static void radeon_flip_work_func(struct work_struct *__work)
 		work->fence = NULL;
 	}
 
+	/* Wait until we're out of the vertical blank period before the one
+	 * targeted by the flip
+	 */
+	while (radeon_crtc->enabled &&
+	       (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0,
+					   &vpos, &hpos, NULL, NULL,
+					   &crtc->hwmode)
+		& (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
+	       (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
+	       (int)(work->target_vblank -
+		     dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0)
+		usleep_range(1000, 2000);
+
 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
 	/* set the proper interrupt */
 	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
 
-	/* If this happens to execute within the "virtually extended" vblank
-	 * interval before the start of the real vblank interval then it needs
-	 * to delay programming the mmio flip until the real vblank is entered.
-	 * This prevents completing a flip too early due to the way we fudge
-	 * our vblank counter and vblank timestamps in order to work around the
-	 * problem that the hw fires vblank interrupts before actual start of
-	 * vblank (when line buffer refilling is done for a frame). It
-	 * complements the fudging logic in radeon_get_crtc_scanoutpos() for
-	 * timestamping and radeon_get_vblank_counter_kms() for vblank counts.
-	 *
-	 * In practice this won't execute very often unless on very fast
-	 * machines because the time window for this to happen is very small.
-	 */
-	while (radeon_crtc->enabled && --repcnt) {
-		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
-		 * start in hpos, and to the "fudged earlier" vblank start in
-		 * vpos.
-		 */
-		stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id,
-						  GET_DISTANCE_TO_VBLANKSTART,
-						  &vpos, &hpos, NULL, NULL,
-						  &crtc->hwmode);
-
-		if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
-		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
-		    !(vpos >= 0 && hpos <= 0))
-			break;
-
-		/* Sleep at least until estimated real start of hw vblank */
-		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
-		if (min_udelay > vblank->framedur_ns / 2000) {
-			/* Don't wait ridiculously long - something is wrong */
-			repcnt = 0;
-			break;
-		}
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-		usleep_range(min_udelay, 2 * min_udelay);
-		spin_lock_irqsave(&crtc->dev->event_lock, flags);
-	};
-
-	if (!repcnt)
-		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
-				 "framedur %d, linedur %d, stat %d, vpos %d, "
-				 "hpos %d\n", work->crtc_id, min_udelay,
-				 vblank->framedur_ns / 1000,
-				 vblank->linedur_ns / 1000, stat, vpos, hpos);
-
 	/* do the flip (mmio) */
 	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base, work->async);
 
@@ -499,10 +464,11 @@  static void radeon_flip_work_func(struct work_struct *__work)
 	up_read(&rdev->exclusive_lock);
 }
 
-static int radeon_crtc_page_flip(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags)
+static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
+					struct drm_framebuffer *fb,
+					struct drm_pending_vblank_event *event,
+					uint32_t page_flip_flags,
+					uint32_t target)
 {
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -599,12 +565,8 @@  static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 		base &= ~7;
 	}
 	work->base = base;
-
-	r = drm_crtc_vblank_get(crtc);
-	if (r) {
-		DRM_ERROR("failed to get vblank before flip\n");
-		goto pflip_cleanup;
-	}
+	work->target_vblank = target - drm_crtc_vblank_count(crtc) +
+		dev->driver->get_vblank_counter(dev, work->crtc_id);
 
 	/* We borrow the event spin lock for protecting flip_work */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
@@ -613,7 +575,7 @@  static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		r = -EBUSY;
-		goto vblank_cleanup;
+		goto pflip_cleanup;
 	}
 	radeon_crtc->flip_status = RADEON_FLIP_PENDING;
 	radeon_crtc->flip_work = work;
@@ -626,9 +588,6 @@  static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 	queue_work(radeon_crtc->flip_queue, &work->flip_work);
 	return 0;
 
-vblank_cleanup:
-	drm_crtc_vblank_put(crtc);
-
 pflip_cleanup:
 	if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) {
 		DRM_ERROR("failed to reserve new rbo in error path\n");
@@ -697,7 +656,7 @@  static const struct drm_crtc_funcs radeon_crtc_funcs = {
 	.gamma_set = radeon_crtc_gamma_set,
 	.set_config = radeon_crtc_set_config,
 	.destroy = radeon_crtc_destroy,
-	.page_flip = radeon_crtc_page_flip,
+	.page_flip_target = radeon_crtc_page_flip_target,
 };
 
 static void radeon_crtc_init(struct drm_device *dev, int index)