Message ID | 1470281981-18172-5-git-send-email-michel@daenzer.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Michel, sorry for the super-late reply, i was just catching up with all the mails and discussions, starting in June, leading to this patch set. Looks all pretty good. I'll look at this radeon patch and 2/6 for amdgpu later this week when i have a fresh brain and enough "obsessive compulsive time", to make sure all the magic wrt. "virtually extended vblank" and the fudging logic is fine. I'll then also run it through my timing tests. I assume the ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are all i need for testing? -mario On 08/04/2016 05:39 AM, Michel Dänzer wrote: > From: Michel Dänzer <michel.daenzer@amd.com> > > Now we can program a flip during a vertical blank period, if it's the > one targeted by the flip (or a later one). This allows simplifying > radeon_flip_work_func considerably. > > Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> > --- > drivers/gpu/drm/radeon/radeon.h | 1 + > drivers/gpu/drm/radeon/radeon_display.c | 89 +++++++++------------------------ > 2 files changed, 25 insertions(+), 65 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5633ee3..1b0dcad 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -742,6 +742,7 @@ struct radeon_flip_work { > struct work_struct unpin_work; > struct radeon_device *rdev; > int crtc_id; > + u32 target_vblank; > uint64_t base; > struct drm_pending_vblank_event *event; > struct radeon_bo *old_rbo; > diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c > index 5f1cd69..bd9d995 100644 > --- a/drivers/gpu/drm/radeon/radeon_display.c > +++ b/drivers/gpu/drm/radeon/radeon_display.c > @@ -400,14 +400,13 @@ static void radeon_flip_work_func(struct work_struct *__work) > struct radeon_flip_work *work = > container_of(__work, struct radeon_flip_work, flip_work); > struct radeon_device *rdev = work->rdev; > + struct drm_device *dev = rdev->ddev; > struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; > > struct drm_crtc *crtc = &radeon_crtc->base; > unsigned long flags; > int r; > - int vpos, hpos, stat, min_udelay = 0; > - unsigned repcnt = 4; > - struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; > + int vpos, hpos; > > down_read(&rdev->exclusive_lock); > if (work->fence) { > @@ -438,59 +437,25 @@ static void radeon_flip_work_func(struct work_struct *__work) > work->fence = NULL; > } > > + /* Wait until we're out of the vertical blank period before the one > + * targeted by the flip > + */ > + while (radeon_crtc->enabled && > + (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0, > + &vpos, &hpos, NULL, NULL, > + &crtc->hwmode) > + & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == > + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && > + (int)(work->target_vblank - > + dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0) > + usleep_range(1000, 2000); > + > /* We borrow the event spin lock for protecting flip_status */ > spin_lock_irqsave(&crtc->dev->event_lock, flags); > > /* set the proper interrupt */ > radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id); > > - /* If this happens to execute within the "virtually extended" vblank > - * interval before the start of the real vblank interval then it needs > - * to delay programming the mmio flip until the real vblank is entered. > - * This prevents completing a flip too early due to the way we fudge > - * our vblank counter and vblank timestamps in order to work around the > - * problem that the hw fires vblank interrupts before actual start of > - * vblank (when line buffer refilling is done for a frame). It > - * complements the fudging logic in radeon_get_crtc_scanoutpos() for > - * timestamping and radeon_get_vblank_counter_kms() for vblank counts. > - * > - * In practice this won't execute very often unless on very fast > - * machines because the time window for this to happen is very small. > - */ > - while (radeon_crtc->enabled && --repcnt) { > - /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank > - * start in hpos, and to the "fudged earlier" vblank start in > - * vpos. > - */ > - stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id, > - GET_DISTANCE_TO_VBLANKSTART, > - &vpos, &hpos, NULL, NULL, > - &crtc->hwmode); > - > - if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != > - (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) || > - !(vpos >= 0 && hpos <= 0)) > - break; > - > - /* Sleep at least until estimated real start of hw vblank */ > - min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); > - if (min_udelay > vblank->framedur_ns / 2000) { > - /* Don't wait ridiculously long - something is wrong */ > - repcnt = 0; > - break; > - } > - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); > - usleep_range(min_udelay, 2 * min_udelay); > - spin_lock_irqsave(&crtc->dev->event_lock, flags); > - }; > - > - if (!repcnt) > - DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, " > - "framedur %d, linedur %d, stat %d, vpos %d, " > - "hpos %d\n", work->crtc_id, min_udelay, > - vblank->framedur_ns / 1000, > - vblank->linedur_ns / 1000, stat, vpos, hpos); > - > /* do the flip (mmio) */ > radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base, work->async); > > @@ -499,10 +464,11 @@ static void radeon_flip_work_func(struct work_struct *__work) > up_read(&rdev->exclusive_lock); > } > > -static int radeon_crtc_page_flip(struct drm_crtc *crtc, > - struct drm_framebuffer *fb, > - struct drm_pending_vblank_event *event, > - uint32_t page_flip_flags) > +static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, > + struct drm_framebuffer *fb, > + struct drm_pending_vblank_event *event, > + uint32_t page_flip_flags, > + uint32_t target) > { > struct drm_device *dev = crtc->dev; > struct radeon_device *rdev = dev->dev_private; > @@ -599,12 +565,8 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, > base &= ~7; > } > work->base = base; > - > - r = drm_crtc_vblank_get(crtc); > - if (r) { > - DRM_ERROR("failed to get vblank before flip\n"); > - goto pflip_cleanup; > - } > + work->target_vblank = target - drm_crtc_vblank_count(crtc) + > + dev->driver->get_vblank_counter(dev, work->crtc_id); > > /* We borrow the event spin lock for protecting flip_work */ > spin_lock_irqsave(&crtc->dev->event_lock, flags); > @@ -613,7 +575,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, > DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); > spin_unlock_irqrestore(&crtc->dev->event_lock, flags); > r = -EBUSY; > - goto vblank_cleanup; > + goto pflip_cleanup; > } > radeon_crtc->flip_status = RADEON_FLIP_PENDING; > radeon_crtc->flip_work = work; > @@ -626,9 +588,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, > queue_work(radeon_crtc->flip_queue, &work->flip_work); > return 0; > > -vblank_cleanup: > - drm_crtc_vblank_put(crtc); > - > pflip_cleanup: > if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) { > DRM_ERROR("failed to reserve new rbo in error path\n"); > @@ -697,7 +656,7 @@ static const struct drm_crtc_funcs radeon_crtc_funcs = { > .gamma_set = radeon_crtc_gamma_set, > .set_config = radeon_crtc_set_config, > .destroy = radeon_crtc_destroy, > - .page_flip = radeon_crtc_page_flip, > + .page_flip_target = radeon_crtc_page_flip_target, > }; > > static void radeon_crtc_init(struct drm_device *dev, int index) >
On 16/08/16 09:35 AM, Mario Kleiner wrote: > Hi Michel, > > sorry for the super-late reply, i was just catching up with all the > mails and discussions, starting in June, leading to this patch set. > > Looks all pretty good. > > I'll look at this radeon patch and 2/6 for amdgpu later this week when i > have a fresh brain and enough "obsessive compulsive time", to make sure > all the magic wrt. "virtually extended vblank" and the fudging logic is > fine. Excellent! > I'll then also run it through my timing tests. I assume the > ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are > all i need for testing? Yes, although it would also be nice to test with unmodified userspace and make sure there are no regressions with that.
Hi Michel, all your patches, both the already merged kernel bits in radeon/amdgpu and also all the userspace bits in libdrm/ati-ddx/amdgpu-ddx are all Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> I successfully tested with old/current userspace and the new userspace patches from your own libdrm/ati-ddx/amdgpu-ddx repos, under radeon-kms with HD-5770 and amdgpu-kms with R9 380, under DRI2 and DRI3/Present with the new userspace and at least DRI3/Present with the old/current userspace (can't quite remember if i also tested with DRI2 on old/current userspace, but probably). Hardware measured timing tests all work fine. So all is good, except for pre-DCE4 without pflip irqs, but for that see the patchset i just sent out, which should make those old asics work well as well. -mario On 08/16/2016 03:49 AM, Michel Dänzer wrote: > On 16/08/16 09:35 AM, Mario Kleiner wrote: >> Hi Michel, >> >> sorry for the super-late reply, i was just catching up with all the >> mails and discussions, starting in June, leading to this patch set. >> >> Looks all pretty good. >> >> I'll look at this radeon patch and 2/6 for amdgpu later this week when i >> have a fresh brain and enough "obsessive compulsive time", to make sure >> all the magic wrt. "virtually extended vblank" and the fudging logic is >> fine. > > Excellent! > > >> I'll then also run it through my timing tests. I assume the >> ati/amdgpu-ddx patches and libdrm patches in your freedesktop home are >> all i need for testing? > > Yes, although it would also be nice to test with unmodified userspace > and make sure there are no regressions with that. > >
On 17/09/16 09:41 PM, Mario Kleiner wrote: > Hi Michel, > > all your patches, both the already merged kernel bits in radeon/amdgpu > and also all the userspace bits in libdrm/ati-ddx/amdgpu-ddx are all > > Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> > > I successfully tested with old/current userspace and the new userspace > patches from your own libdrm/ati-ddx/amdgpu-ddx repos, under radeon-kms > with HD-5770 and amdgpu-kms with R9 380, under DRI2 and DRI3/Present > with the new userspace and at least DRI3/Present with the old/current > userspace (can't quite remember if i also tested with DRI2 on > old/current userspace, but probably). Hardware measured timing tests all > work fine. > > So all is good, except for pre-DCE4 without pflip irqs, but for that see > the patchset i just sent out, which should make those old asics work > well as well. Thanks for the testing and patches!
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5633ee3..1b0dcad 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -742,6 +742,7 @@ struct radeon_flip_work { struct work_struct unpin_work; struct radeon_device *rdev; int crtc_id; + u32 target_vblank; uint64_t base; struct drm_pending_vblank_event *event; struct radeon_bo *old_rbo; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 5f1cd69..bd9d995 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -400,14 +400,13 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_flip_work *work = container_of(__work, struct radeon_flip_work, flip_work); struct radeon_device *rdev = work->rdev; + struct drm_device *dev = rdev->ddev; struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; unsigned long flags; int r; - int vpos, hpos, stat, min_udelay = 0; - unsigned repcnt = 4; - struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; + int vpos, hpos; down_read(&rdev->exclusive_lock); if (work->fence) { @@ -438,59 +437,25 @@ static void radeon_flip_work_func(struct work_struct *__work) work->fence = NULL; } + /* Wait until we're out of the vertical blank period before the one + * targeted by the flip + */ + while (radeon_crtc->enabled && + (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0, + &vpos, &hpos, NULL, NULL, + &crtc->hwmode) + & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == + (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && + (int)(work->target_vblank - + dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0) + usleep_range(1000, 2000); + /* We borrow the event spin lock for protecting flip_status */ spin_lock_irqsave(&crtc->dev->event_lock, flags); /* set the proper interrupt */ radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id); - /* If this happens to execute within the "virtually extended" vblank - * interval before the start of the real vblank interval then it needs - * to delay programming the mmio flip until the real vblank is entered. - * This prevents completing a flip too early due to the way we fudge - * our vblank counter and vblank timestamps in order to work around the - * problem that the hw fires vblank interrupts before actual start of - * vblank (when line buffer refilling is done for a frame). It - * complements the fudging logic in radeon_get_crtc_scanoutpos() for - * timestamping and radeon_get_vblank_counter_kms() for vblank counts. - * - * In practice this won't execute very often unless on very fast - * machines because the time window for this to happen is very small. - */ - while (radeon_crtc->enabled && --repcnt) { - /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank - * start in hpos, and to the "fudged earlier" vblank start in - * vpos. - */ - stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id, - GET_DISTANCE_TO_VBLANKSTART, - &vpos, &hpos, NULL, NULL, - &crtc->hwmode); - - if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) != - (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) || - !(vpos >= 0 && hpos <= 0)) - break; - - /* Sleep at least until estimated real start of hw vblank */ - min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); - if (min_udelay > vblank->framedur_ns / 2000) { - /* Don't wait ridiculously long - something is wrong */ - repcnt = 0; - break; - } - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - usleep_range(min_udelay, 2 * min_udelay); - spin_lock_irqsave(&crtc->dev->event_lock, flags); - }; - - if (!repcnt) - DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, " - "framedur %d, linedur %d, stat %d, vpos %d, " - "hpos %d\n", work->crtc_id, min_udelay, - vblank->framedur_ns / 1000, - vblank->linedur_ns / 1000, stat, vpos, hpos); - /* do the flip (mmio) */ radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base, work->async); @@ -499,10 +464,11 @@ static void radeon_flip_work_func(struct work_struct *__work) up_read(&rdev->exclusive_lock); } -static int radeon_crtc_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags) +static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, + uint32_t target) { struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; @@ -599,12 +565,8 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, base &= ~7; } work->base = base; - - r = drm_crtc_vblank_get(crtc); - if (r) { - DRM_ERROR("failed to get vblank before flip\n"); - goto pflip_cleanup; - } + work->target_vblank = target - drm_crtc_vblank_count(crtc) + + dev->driver->get_vblank_counter(dev, work->crtc_id); /* We borrow the event spin lock for protecting flip_work */ spin_lock_irqsave(&crtc->dev->event_lock, flags); @@ -613,7 +575,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); spin_unlock_irqrestore(&crtc->dev->event_lock, flags); r = -EBUSY; - goto vblank_cleanup; + goto pflip_cleanup; } radeon_crtc->flip_status = RADEON_FLIP_PENDING; radeon_crtc->flip_work = work; @@ -626,9 +588,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, queue_work(radeon_crtc->flip_queue, &work->flip_work); return 0; -vblank_cleanup: - drm_crtc_vblank_put(crtc); - pflip_cleanup: if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) { DRM_ERROR("failed to reserve new rbo in error path\n"); @@ -697,7 +656,7 @@ static const struct drm_crtc_funcs radeon_crtc_funcs = { .gamma_set = radeon_crtc_gamma_set, .set_config = radeon_crtc_set_config, .destroy = radeon_crtc_destroy, - .page_flip = radeon_crtc_page_flip, + .page_flip_target = radeon_crtc_page_flip_target, }; static void radeon_crtc_init(struct drm_device *dev, int index)