Message ID | 1407411210-1939-1-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Aug 7, 2014 at 7:33 AM, Christian König <deathsimple@vodafone.de> wrote: > From: Marco A Benatto <marco.antonio.780@gmail.com> > > Adding a Frames Per Second estimation logic on UVD handles > when it has being used. This estimation is per handle basis > and will help on DPM profile calculation. > > v2 (chk): fix timestamp type, move functions around and > cleanup code a bit. Will this really help much? I thought the problem was mainly due to sclk and mclk for post processing. Alex > > Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/radeon/radeon.h | 10 ++++++ > drivers/gpu/drm/radeon/radeon_uvd.c | 64 +++++++++++++++++++++++++++++++++---- > 2 files changed, 68 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 9e1732e..e92f6cb 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device *rdev, > #define RADEON_UVD_STACK_SIZE (1024*1024) > #define RADEON_UVD_HEAP_SIZE (1024*1024) > > +#define RADEON_UVD_FPS_EVENTS_MAX 8 > +#define RADEON_UVD_DEFAULT_FPS 60 > + > +struct radeon_uvd_fps { > + uint64_t timestamp; > + uint8_t event_index; > + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; > +}; > + > struct radeon_uvd { > struct radeon_bo *vcpu_bo; > void *cpu_addr; > @@ -1626,6 +1635,7 @@ struct radeon_uvd { > struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; > unsigned img_size[RADEON_MAX_UVD_HANDLES]; > struct delayed_work idle_work; > + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; > }; > > int radeon_uvd_init(struct radeon_device *rdev); > diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c > index 6bf55ec..ef5667a 100644 > --- a/drivers/gpu/drm/radeon/radeon_uvd.c > +++ b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) > rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; > } > > +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int idx) > +{ > + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; > + unsigned i; > + > + fps->timestamp = jiffies_64; > + fps->event_index = 0; > + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) > + fps->events[i] = 0; > +} > + > +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int idx) > +{ > + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; > + uint64_t timestamp = jiffies_64; > + unsigned rate = 0; > + > + uint8_t index = fps->event_index++; > + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; > + > + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); > + > + fps->timestamp = timestamp; > + fps->events[index] = min(rate, 120u); > +} > + > +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int idx) > +{ > + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; > + unsigned i, valid = 0, count = 0; > + > + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { > + /* We should ignore zero values */ > + if (fps->events[i] != 0) { > + count += fps->events[i]; > + valid++; > + } > + } > + > + if (valid > 0) > + return count / valid; > + else > + return RADEON_UVD_DEFAULT_FPS; > +} > + > void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) > { > int i, r; > @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, > > /* create or decode, validate the handle */ > for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) > + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { > + radeon_uvd_fps_note_event(p->rdev, i); > return 0; > + } > } > > /* handle not found try to alloc a new one */ > @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, > if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { > p->rdev->uvd.filp[i] = p->filp; > p->rdev->uvd.img_size[i] = img_size; > + radeon_uvd_fps_clear_events(p->rdev, i); > return 0; > } > } > @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > static void radeon_uvd_count_handles(struct radeon_device *rdev, > unsigned *sd, unsigned *hd) > { > - unsigned i; > + unsigned i, fps_rate = 0; > > *sd = 0; > *hd = 0; > @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct radeon_device *rdev, > if (!atomic_read(&rdev->uvd.handles[i])) > continue; > > - if (rdev->uvd.img_size[i] >= 720*576) > - ++(*hd); > - else > - ++(*sd); > + fps_rate = radeon_uvd_estimate_fps(rdev, i); > + > + if (rdev->uvd.img_size[i] >= 720*576) { > + (*hd) += fps_rate > 30 ? 1 : 2; > + } else { > + (*sd) += fps_rate > 30 ? 1 : 2; > + } > } > } > > @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) > set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, > msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); > > + > if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { > unsigned hd = 0, sd = 0; > radeon_uvd_count_handles(rdev, &sd, &hd); > -- > 1.9.1 >
Am 07.08.2014 um 16:32 schrieb Alex Deucher: > On Thu, Aug 7, 2014 at 7:33 AM, Christian König <deathsimple@vodafone.de> wrote: >> From: Marco A Benatto <marco.antonio.780@gmail.com> >> >> Adding a Frames Per Second estimation logic on UVD handles >> when it has being used. This estimation is per handle basis >> and will help on DPM profile calculation. >> >> v2 (chk): fix timestamp type, move functions around and >> cleanup code a bit. > Will this really help much? I thought the problem was mainly due to > sclk and mclk for post processing. It should at least handle the UVD side for upclocking when you get a lot of streams / fps. And at on my NI the patch seems to do exactly that. Switching sclk and mclk for post processing is a different task, and I actually have no idea what to do with them. Christian. > > Alex > >> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >> Signed-off-by: Christian König <christian.koenig@amd.com> >> --- >> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >> drivers/gpu/drm/radeon/radeon_uvd.c | 64 +++++++++++++++++++++++++++++++++---- >> 2 files changed, 68 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h >> index 9e1732e..e92f6cb 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device *rdev, >> #define RADEON_UVD_STACK_SIZE (1024*1024) >> #define RADEON_UVD_HEAP_SIZE (1024*1024) >> >> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >> +#define RADEON_UVD_DEFAULT_FPS 60 >> + >> +struct radeon_uvd_fps { >> + uint64_t timestamp; >> + uint8_t event_index; >> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >> +}; >> + >> struct radeon_uvd { >> struct radeon_bo *vcpu_bo; >> void *cpu_addr; >> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >> struct delayed_work idle_work; >> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >> }; >> >> int radeon_uvd_init(struct radeon_device *rdev); >> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c >> index 6bf55ec..ef5667a 100644 >> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) >> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >> } >> >> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int idx) >> +{ >> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >> + unsigned i; >> + >> + fps->timestamp = jiffies_64; >> + fps->event_index = 0; >> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >> + fps->events[i] = 0; >> +} >> + >> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int idx) >> +{ >> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >> + uint64_t timestamp = jiffies_64; >> + unsigned rate = 0; >> + >> + uint8_t index = fps->event_index++; >> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >> + >> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >> + >> + fps->timestamp = timestamp; >> + fps->events[index] = min(rate, 120u); >> +} >> + >> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int idx) >> +{ >> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >> + unsigned i, valid = 0, count = 0; >> + >> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >> + /* We should ignore zero values */ >> + if (fps->events[i] != 0) { >> + count += fps->events[i]; >> + valid++; >> + } >> + } >> + >> + if (valid > 0) >> + return count / valid; >> + else >> + return RADEON_UVD_DEFAULT_FPS; >> +} >> + >> void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) >> { >> int i, r; >> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, >> >> /* create or decode, validate the handle */ >> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >> + radeon_uvd_fps_note_event(p->rdev, i); >> return 0; >> + } >> } >> >> /* handle not found try to alloc a new one */ >> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, >> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { >> p->rdev->uvd.filp[i] = p->filp; >> p->rdev->uvd.img_size[i] = img_size; >> + radeon_uvd_fps_clear_events(p->rdev, i); >> return 0; >> } >> } >> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, >> static void radeon_uvd_count_handles(struct radeon_device *rdev, >> unsigned *sd, unsigned *hd) >> { >> - unsigned i; >> + unsigned i, fps_rate = 0; >> >> *sd = 0; >> *hd = 0; >> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct radeon_device *rdev, >> if (!atomic_read(&rdev->uvd.handles[i])) >> continue; >> >> - if (rdev->uvd.img_size[i] >= 720*576) >> - ++(*hd); >> - else >> - ++(*sd); >> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >> + >> + if (rdev->uvd.img_size[i] >= 720*576) { >> + (*hd) += fps_rate > 30 ? 1 : 2; >> + } else { >> + (*sd) += fps_rate > 30 ? 1 : 2; >> + } >> } >> } >> >> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) >> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >> >> + >> if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { >> unsigned hd = 0, sd = 0; >> radeon_uvd_count_handles(rdev, &sd, &hd); >> -- >> 1.9.1 >>
On Thu, Aug 7, 2014 at 11:32 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 07.08.2014 um 16:32 schrieb Alex Deucher: > >> On Thu, Aug 7, 2014 at 7:33 AM, Christian König <deathsimple@vodafone.de> >> wrote: >>> >>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>> >>> Adding a Frames Per Second estimation logic on UVD handles >>> when it has being used. This estimation is per handle basis >>> and will help on DPM profile calculation. >>> >>> v2 (chk): fix timestamp type, move functions around and >>> cleanup code a bit. >> >> Will this really help much? I thought the problem was mainly due to >> sclk and mclk for post processing. > > > It should at least handle the UVD side for upclocking when you get a lot of > streams / fps. And at on my NI the patch seems to do exactly that. > > Switching sclk and mclk for post processing is a different task, and I > actually have no idea what to do with them. At this point we always choose the plain UVD state anyway so this patch would only take effect if we re-enabled the dynamic UVD state selection. For the post processing, we probably need a hint we can pass to the driver in the CS ioctl to denote what state we need. Although if we did that, this could would largely be moot. That said, newer asics support dynamic UVD clocks so we really only need something like that for older asics and I guess VCE. Alex > > Christian. > > >> >> Alex >> >>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>> Signed-off-by: Christian König <christian.koenig@amd.com> >>> --- >>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>> +++++++++++++++++++++++++++++++++---- >>> 2 files changed, 68 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index 9e1732e..e92f6cb 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device >>> *rdev, >>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>> >>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>> +#define RADEON_UVD_DEFAULT_FPS 60 >>> + >>> +struct radeon_uvd_fps { >>> + uint64_t timestamp; >>> + uint8_t event_index; >>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>> +}; >>> + >>> struct radeon_uvd { >>> struct radeon_bo *vcpu_bo; >>> void *cpu_addr; >>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>> struct delayed_work idle_work; >>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>> }; >>> >>> int radeon_uvd_init(struct radeon_device *rdev); >>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>> index 6bf55ec..ef5667a 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>> radeon_bo *rbo) >>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>> } >>> >>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>> + unsigned i; >>> + >>> + fps->timestamp = jiffies_64; >>> + fps->event_index = 0; >>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>> + fps->events[i] = 0; >>> +} >>> + >>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>> + uint64_t timestamp = jiffies_64; >>> + unsigned rate = 0; >>> + >>> + uint8_t index = fps->event_index++; >>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>> + >>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>> + >>> + fps->timestamp = timestamp; >>> + fps->events[index] = min(rate, 120u); >>> +} >>> + >>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>> + unsigned i, valid = 0, count = 0; >>> + >>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>> + /* We should ignore zero values */ >>> + if (fps->events[i] != 0) { >>> + count += fps->events[i]; >>> + valid++; >>> + } >>> + } >>> + >>> + if (valid > 0) >>> + return count / valid; >>> + else >>> + return RADEON_UVD_DEFAULT_FPS; >>> +} >>> + >>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>> drm_file *filp) >>> { >>> int i, r; >>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>> *p, struct radeon_bo *bo, >>> >>> /* create or decode, validate the handle */ >>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>> + radeon_uvd_fps_note_event(p->rdev, i); >>> return 0; >>> + } >>> } >>> >>> /* handle not found try to alloc a new one */ >>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>> *p, struct radeon_bo *bo, >>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>> handle)) { >>> p->rdev->uvd.filp[i] = p->filp; >>> p->rdev->uvd.img_size[i] = img_size; >>> + radeon_uvd_fps_clear_events(p->rdev, i); >>> return 0; >>> } >>> } >>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>> *rdev, int ring, >>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>> unsigned *sd, unsigned *hd) >>> { >>> - unsigned i; >>> + unsigned i, fps_rate = 0; >>> >>> *sd = 0; >>> *hd = 0; >>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>> radeon_device *rdev, >>> if (!atomic_read(&rdev->uvd.handles[i])) >>> continue; >>> >>> - if (rdev->uvd.img_size[i] >= 720*576) >>> - ++(*hd); >>> - else >>> - ++(*sd); >>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>> + >>> + if (rdev->uvd.img_size[i] >= 720*576) { >>> + (*hd) += fps_rate > 30 ? 1 : 2; >>> + } else { >>> + (*sd) += fps_rate > 30 ? 1 : 2; >>> + } >>> } >>> } >>> >>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>> *rdev) >>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>> >>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>> >>> + >>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>> rdev->pm.dpm_enabled) { >>> unsigned hd = 0, sd = 0; >>> radeon_uvd_count_handles(rdev, &sd, &hd); >>> -- >>> 1.9.1 >>> >
Am 07.08.2014 um 21:43 schrieb Alex Deucher: > On Thu, Aug 7, 2014 at 11:32 AM, Christian König > <deathsimple@vodafone.de> wrote: >> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >> >>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König <deathsimple@vodafone.de> >>> wrote: >>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>> >>>> Adding a Frames Per Second estimation logic on UVD handles >>>> when it has being used. This estimation is per handle basis >>>> and will help on DPM profile calculation. >>>> >>>> v2 (chk): fix timestamp type, move functions around and >>>> cleanup code a bit. >>> Will this really help much? I thought the problem was mainly due to >>> sclk and mclk for post processing. >> >> It should at least handle the UVD side for upclocking when you get a lot of >> streams / fps. And at on my NI the patch seems to do exactly that. >> >> Switching sclk and mclk for post processing is a different task, and I >> actually have no idea what to do with them. > At this point we always choose the plain UVD state anyway so this > patch would only take effect if we re-enabled the dynamic UVD state > selection. Hui? I thought we already re-enabled the dynamic UVD state selection, but double checking this I found it disabled again. What was the problem with that? Looks like I somehow missed the discussion around it. Christian. > For the post processing, we probably need a hint we can > pass to the driver in the CS ioctl to denote what state we need. > Although if we did that, this could would largely be moot. That said, > newer asics support dynamic UVD clocks so we really only need > something like that for older asics and I guess VCE. > > Alex > >> Christian. >> >> >>> Alex >>> >>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>> --- >>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>> +++++++++++++++++++++++++++++++++---- >>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>> b/drivers/gpu/drm/radeon/radeon.h >>>> index 9e1732e..e92f6cb 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device >>>> *rdev, >>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>> >>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>> + >>>> +struct radeon_uvd_fps { >>>> + uint64_t timestamp; >>>> + uint8_t event_index; >>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>> +}; >>>> + >>>> struct radeon_uvd { >>>> struct radeon_bo *vcpu_bo; >>>> void *cpu_addr; >>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>> struct delayed_work idle_work; >>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>> }; >>>> >>>> int radeon_uvd_init(struct radeon_device *rdev); >>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> index 6bf55ec..ef5667a 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>> radeon_bo *rbo) >>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>> } >>>> >>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + unsigned i; >>>> + >>>> + fps->timestamp = jiffies_64; >>>> + fps->event_index = 0; >>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>> + fps->events[i] = 0; >>>> +} >>>> + >>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + uint64_t timestamp = jiffies_64; >>>> + unsigned rate = 0; >>>> + >>>> + uint8_t index = fps->event_index++; >>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>> + >>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>> + >>>> + fps->timestamp = timestamp; >>>> + fps->events[index] = min(rate, 120u); >>>> +} >>>> + >>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + unsigned i, valid = 0, count = 0; >>>> + >>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>> + /* We should ignore zero values */ >>>> + if (fps->events[i] != 0) { >>>> + count += fps->events[i]; >>>> + valid++; >>>> + } >>>> + } >>>> + >>>> + if (valid > 0) >>>> + return count / valid; >>>> + else >>>> + return RADEON_UVD_DEFAULT_FPS; >>>> +} >>>> + >>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>> drm_file *filp) >>>> { >>>> int i, r; >>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> >>>> /* create or decode, validate the handle */ >>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>> return 0; >>>> + } >>>> } >>>> >>>> /* handle not found try to alloc a new one */ >>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>> handle)) { >>>> p->rdev->uvd.filp[i] = p->filp; >>>> p->rdev->uvd.img_size[i] = img_size; >>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>> return 0; >>>> } >>>> } >>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>>> *rdev, int ring, >>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>> unsigned *sd, unsigned *hd) >>>> { >>>> - unsigned i; >>>> + unsigned i, fps_rate = 0; >>>> >>>> *sd = 0; >>>> *hd = 0; >>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>> radeon_device *rdev, >>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>> continue; >>>> >>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>> - ++(*hd); >>>> - else >>>> - ++(*sd); >>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>> + >>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>> + } else { >>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>> + } >>>> } >>>> } >>>> >>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>> *rdev) >>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>> >>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>> >>>> + >>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>> rdev->pm.dpm_enabled) { >>>> unsigned hd = 0, sd = 0; >>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>> -- >>>> 1.9.1 >>>>
On Mon, Aug 11, 2014 at 5:08 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 07.08.2014 um 21:43 schrieb Alex Deucher: > >> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >> <deathsimple@vodafone.de> wrote: >>> >>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>> >>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>> <deathsimple@vodafone.de> >>>> wrote: >>>>> >>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>> >>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>> when it has being used. This estimation is per handle basis >>>>> and will help on DPM profile calculation. >>>>> >>>>> v2 (chk): fix timestamp type, move functions around and >>>>> cleanup code a bit. >>>> >>>> Will this really help much? I thought the problem was mainly due to >>>> sclk and mclk for post processing. >>> >>> >>> It should at least handle the UVD side for upclocking when you get a lot >>> of >>> streams / fps. And at on my NI the patch seems to do exactly that. >>> >>> Switching sclk and mclk for post processing is a different task, and I >>> actually have no idea what to do with them. >> >> At this point we always choose the plain UVD state anyway so this >> patch would only take effect if we re-enabled the dynamic UVD state >> selection. > > > Hui? I thought we already re-enabled the dynamic UVD state selection, but > double checking this I found it disabled again. > > What was the problem with that? Looks like I somehow missed the discussion > around it. We did, but after doing so a number of people complained about a regression on IRC because when apps like xmbc enabled post processing, performance went down. Alex > > Christian. > > >> For the post processing, we probably need a hint we can >> pass to the driver in the CS ioctl to denote what state we need. >> Although if we did that, this could would largely be moot. That said, >> newer asics support dynamic UVD clocks so we really only need >> something like that for older asics and I guess VCE. >> >> Alex >> >>> Christian. >>> >>> >>>> Alex >>>> >>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>> --- >>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>> +++++++++++++++++++++++++++++++++---- >>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>> index 9e1732e..e92f6cb 100644 >>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>> radeon_device >>>>> *rdev, >>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>> >>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>> + >>>>> +struct radeon_uvd_fps { >>>>> + uint64_t timestamp; >>>>> + uint8_t event_index; >>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>> +}; >>>>> + >>>>> struct radeon_uvd { >>>>> struct radeon_bo *vcpu_bo; >>>>> void *cpu_addr; >>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>>> struct delayed_work idle_work; >>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>> }; >>>>> >>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> index 6bf55ec..ef5667a 100644 >>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>> radeon_bo *rbo) >>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>> } >>>>> >>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i; >>>>> + >>>>> + fps->timestamp = jiffies_64; >>>>> + fps->event_index = 0; >>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>> + fps->events[i] = 0; >>>>> +} >>>>> + >>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + uint64_t timestamp = jiffies_64; >>>>> + unsigned rate = 0; >>>>> + >>>>> + uint8_t index = fps->event_index++; >>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>> + >>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>>> + >>>>> + fps->timestamp = timestamp; >>>>> + fps->events[index] = min(rate, 120u); >>>>> +} >>>>> + >>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i, valid = 0, count = 0; >>>>> + >>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>> + /* We should ignore zero values */ >>>>> + if (fps->events[i] != 0) { >>>>> + count += fps->events[i]; >>>>> + valid++; >>>>> + } >>>>> + } >>>>> + >>>>> + if (valid > 0) >>>>> + return count / valid; >>>>> + else >>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>> +} >>>>> + >>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>>> drm_file *filp) >>>>> { >>>>> int i, r; >>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> >>>>> /* create or decode, validate the handle */ >>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>> return 0; >>>>> + } >>>>> } >>>>> >>>>> /* handle not found try to alloc a new one */ >>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>>> handle)) { >>>>> p->rdev->uvd.filp[i] = p->filp; >>>>> p->rdev->uvd.img_size[i] = img_size; >>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>> return 0; >>>>> } >>>>> } >>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>>>> *rdev, int ring, >>>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>>> unsigned *sd, unsigned *hd) >>>>> { >>>>> - unsigned i; >>>>> + unsigned i, fps_rate = 0; >>>>> >>>>> *sd = 0; >>>>> *hd = 0; >>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>> radeon_device *rdev, >>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>> continue; >>>>> >>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>> - ++(*hd); >>>>> - else >>>>> - ++(*sd); >>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>> + >>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>> + } else { >>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>> + } >>>>> } >>>>> } >>>>> >>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>> *rdev) >>>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>>> >>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>> >>>>> + >>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>> rdev->pm.dpm_enabled) { >>>>> unsigned hd = 0, sd = 0; >>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>> -- >>>>> 1.9.1 >>>>> >
Am 11.08.2014 um 16:52 schrieb Alex Deucher: > On Mon, Aug 11, 2014 at 5:08 AM, Christian König > <deathsimple@vodafone.de> wrote: >> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >> >>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>> <deathsimple@vodafone.de> wrote: >>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>> >>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>> <deathsimple@vodafone.de> >>>>> wrote: >>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>> >>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>> when it has being used. This estimation is per handle basis >>>>>> and will help on DPM profile calculation. >>>>>> >>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>> cleanup code a bit. >>>>> Will this really help much? I thought the problem was mainly due to >>>>> sclk and mclk for post processing. >>>> >>>> It should at least handle the UVD side for upclocking when you get a lot >>>> of >>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>> >>>> Switching sclk and mclk for post processing is a different task, and I >>>> actually have no idea what to do with them. >>> At this point we always choose the plain UVD state anyway so this >>> patch would only take effect if we re-enabled the dynamic UVD state >>> selection. >> >> Hui? I thought we already re-enabled the dynamic UVD state selection, but >> double checking this I found it disabled again. >> >> What was the problem with that? Looks like I somehow missed the discussion >> around it. > We did, but after doing so a number of people complained about a > regression on IRC because when apps like xmbc enabled post processing, > performance went down. That's strange, from my experience the different UVD performance states only affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to confirms this. You not off hand remember who complained on IRC? Finding something in the IRC logs is like searching for a needle in a haystack. Thanks, Christian. > > Alex > > >> Christian. >> >> >>> For the post processing, we probably need a hint we can >>> pass to the driver in the CS ioctl to denote what state we need. >>> Although if we did that, this could would largely be moot. That said, >>> newer asics support dynamic UVD clocks so we really only need >>> something like that for older asics and I guess VCE. >>> >>> Alex >>> >>>> Christian. >>>> >>>> >>>>> Alex >>>>> >>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>> --- >>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>> +++++++++++++++++++++++++++++++++---- >>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>> index 9e1732e..e92f6cb 100644 >>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>> radeon_device >>>>>> *rdev, >>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>> >>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>> + >>>>>> +struct radeon_uvd_fps { >>>>>> + uint64_t timestamp; >>>>>> + uint8_t event_index; >>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>> +}; >>>>>> + >>>>>> struct radeon_uvd { >>>>>> struct radeon_bo *vcpu_bo; >>>>>> void *cpu_addr; >>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>>>> struct delayed_work idle_work; >>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>> }; >>>>>> >>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>> index 6bf55ec..ef5667a 100644 >>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>> radeon_bo *rbo) >>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>>> } >>>>>> >>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, >>>>>> int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>> + unsigned i; >>>>>> + >>>>>> + fps->timestamp = jiffies_64; >>>>>> + fps->event_index = 0; >>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>> + fps->events[i] = 0; >>>>>> +} >>>>>> + >>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>> + uint64_t timestamp = jiffies_64; >>>>>> + unsigned rate = 0; >>>>>> + >>>>>> + uint8_t index = fps->event_index++; >>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>> + >>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>>>> + >>>>>> + fps->timestamp = timestamp; >>>>>> + fps->events[index] = min(rate, 120u); >>>>>> +} >>>>>> + >>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, >>>>>> int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>> + unsigned i, valid = 0, count = 0; >>>>>> + >>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>> + /* We should ignore zero values */ >>>>>> + if (fps->events[i] != 0) { >>>>>> + count += fps->events[i]; >>>>>> + valid++; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + if (valid > 0) >>>>>> + return count / valid; >>>>>> + else >>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>> +} >>>>>> + >>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>>>> drm_file *filp) >>>>>> { >>>>>> int i, r; >>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>> radeon_cs_parser >>>>>> *p, struct radeon_bo *bo, >>>>>> >>>>>> /* create or decode, validate the handle */ >>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>> return 0; >>>>>> + } >>>>>> } >>>>>> >>>>>> /* handle not found try to alloc a new one */ >>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>> radeon_cs_parser >>>>>> *p, struct radeon_bo *bo, >>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>>>> handle)) { >>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>> return 0; >>>>>> } >>>>>> } >>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>>>>> *rdev, int ring, >>>>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>>>> unsigned *sd, unsigned *hd) >>>>>> { >>>>>> - unsigned i; >>>>>> + unsigned i, fps_rate = 0; >>>>>> >>>>>> *sd = 0; >>>>>> *hd = 0; >>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>> radeon_device *rdev, >>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>> continue; >>>>>> >>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>> - ++(*hd); >>>>>> - else >>>>>> - ++(*sd); >>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>> + >>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>> + } else { >>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>> + } >>>>>> } >>>>>> } >>>>>> >>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>>> *rdev) >>>>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>>>> >>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>> >>>>>> + >>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>> rdev->pm.dpm_enabled) { >>>>>> unsigned hd = 0, sd = 0; >>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>> -- >>>>>> 1.9.1 >>>>>>
On Tue, Aug 12, 2014 at 6:00 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 11.08.2014 um 16:52 schrieb Alex Deucher: > >> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >> <deathsimple@vodafone.de> wrote: >>> >>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>> >>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>> <deathsimple@vodafone.de> wrote: >>>>> >>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>> >>>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>>> <deathsimple@vodafone.de> >>>>>> wrote: >>>>>>> >>>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>> >>>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>>> when it has being used. This estimation is per handle basis >>>>>>> and will help on DPM profile calculation. >>>>>>> >>>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>>> cleanup code a bit. >>>>>> >>>>>> Will this really help much? I thought the problem was mainly due to >>>>>> sclk and mclk for post processing. >>>>> >>>>> >>>>> It should at least handle the UVD side for upclocking when you get a >>>>> lot >>>>> of >>>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>>> >>>>> Switching sclk and mclk for post processing is a different task, and I >>>>> actually have no idea what to do with them. >>>> >>>> At this point we always choose the plain UVD state anyway so this >>>> patch would only take effect if we re-enabled the dynamic UVD state >>>> selection. >>> >>> >>> Hui? I thought we already re-enabled the dynamic UVD state selection, but >>> double checking this I found it disabled again. >>> >>> What was the problem with that? Looks like I somehow missed the >>> discussion >>> around it. >> >> We did, but after doing so a number of people complained about a >> regression on IRC because when apps like xmbc enabled post processing, >> performance went down. > > > That's strange, from my experience the different UVD performance states only > affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to > confirms this. > The sclks and mclks are usually different as well, especially on APUs. I can send you some examples. > You not off hand remember who complained on IRC? Finding something in the > IRC logs is like searching for a needle in a haystack. I don't remember off hand. I think zgreg was involved in some of the discussions. Alex > > Thanks, > Christian. > > >> >> Alex >> >> >>> Christian. >>> >>> >>>> For the post processing, we probably need a hint we can >>>> pass to the driver in the CS ioctl to denote what state we need. >>>> Although if we did that, this could would largely be moot. That said, >>>> newer asics support dynamic UVD clocks so we really only need >>>> something like that for older asics and I guess VCE. >>>> >>>> Alex >>>> >>>>> Christian. >>>>> >>>>> >>>>>> Alex >>>>>> >>>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>>> --- >>>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>>> radeon_device >>>>>>> *rdev, >>>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>>> >>>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>>> + >>>>>>> +struct radeon_uvd_fps { >>>>>>> + uint64_t timestamp; >>>>>>> + uint8_t event_index; >>>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>> +}; >>>>>>> + >>>>>>> struct radeon_uvd { >>>>>>> struct radeon_bo *vcpu_bo; >>>>>>> void *cpu_addr; >>>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>> struct delayed_work idle_work; >>>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>> }; >>>>>>> >>>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>>> radeon_bo *rbo) >>>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>>>> } >>>>>>> >>>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>> + unsigned i; >>>>>>> + >>>>>>> + fps->timestamp = jiffies_64; >>>>>>> + fps->event_index = 0; >>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>> + fps->events[i] = 0; >>>>>>> +} >>>>>>> + >>>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>> + uint64_t timestamp = jiffies_64; >>>>>>> + unsigned rate = 0; >>>>>>> + >>>>>>> + uint8_t index = fps->event_index++; >>>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>>> + >>>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>>>>> + >>>>>>> + fps->timestamp = timestamp; >>>>>>> + fps->events[index] = min(rate, 120u); >>>>>>> +} >>>>>>> + >>>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>> + unsigned i, valid = 0, count = 0; >>>>>>> + >>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>>> + /* We should ignore zero values */ >>>>>>> + if (fps->events[i] != 0) { >>>>>>> + count += fps->events[i]; >>>>>>> + valid++; >>>>>>> + } >>>>>>> + } >>>>>>> + >>>>>>> + if (valid > 0) >>>>>>> + return count / valid; >>>>>>> + else >>>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>>> +} >>>>>>> + >>>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>>>>> drm_file *filp) >>>>>>> { >>>>>>> int i, r; >>>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>>> radeon_cs_parser >>>>>>> *p, struct radeon_bo *bo, >>>>>>> >>>>>>> /* create or decode, validate the handle */ >>>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>>>> { >>>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>>> return 0; >>>>>>> + } >>>>>>> } >>>>>>> >>>>>>> /* handle not found try to alloc a new one */ >>>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>>> radeon_cs_parser >>>>>>> *p, struct radeon_bo *bo, >>>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>>>>> handle)) { >>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>>> return 0; >>>>>>> } >>>>>>> } >>>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct >>>>>>> radeon_device >>>>>>> *rdev, int ring, >>>>>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>>>>> unsigned *sd, unsigned *hd) >>>>>>> { >>>>>>> - unsigned i; >>>>>>> + unsigned i, fps_rate = 0; >>>>>>> >>>>>>> *sd = 0; >>>>>>> *hd = 0; >>>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>>> radeon_device *rdev, >>>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>>> continue; >>>>>>> >>>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>>> - ++(*hd); >>>>>>> - else >>>>>>> - ++(*sd); >>>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>>> + >>>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>>> + } else { >>>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>>> + } >>>>>>> } >>>>>>> } >>>>>>> >>>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>>>> *rdev) >>>>>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>> >>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>> >>>>>>> + >>>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>>> rdev->pm.dpm_enabled) { >>>>>>> unsigned hd = 0, sd = 0; >>>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>>> -- >>>>>>> 1.9.1 >>>>>>> >
Hi guys, to make a long story short every time I watch a movie my laptop start to heat up because we always select the standard UVD power profile without actually measuring if that is necessary. Marco came up with a patch that seems to reliable measure the fps send down to the kernel and so together with knowing the frame size of the video should allow us to select the right UVD power profile. The problem is that Alex (unnoticed by me) completely disabled selecting the UVD profiles because of some issues with advanced post processing discussed on IRC. The problem seems to be that the lower UVD profiles have a to low SCLK/MCLK to handle the 3D load that comes with scaling, deinterlacing etc... I unfortunately don't have time for it, cause this only affects the hardware generations R600-SI and not the newest one CIK. So could you guys stick together and come up with a solution? Something like a sysfs entry that let's us select the minimum UVD power level allowed? I think Marco is happy to come up with a patch, we just need to know what's really needed and what should be the default values. I'm happy to review everything that comes out of it, just don't have time to do it myself. Happy discussion and thanks in advance, Christian. Am 12.08.2014 um 15:05 schrieb Alex Deucher: > On Tue, Aug 12, 2014 at 6:00 AM, Christian König > <deathsimple@vodafone.de> wrote: >> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >> >>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>> <deathsimple@vodafone.de> wrote: >>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>> >>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>>> <deathsimple@vodafone.de> wrote: >>>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>>> >>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>>>> <deathsimple@vodafone.de> >>>>>>> wrote: >>>>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>> >>>>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>>>> when it has being used. This estimation is per handle basis >>>>>>>> and will help on DPM profile calculation. >>>>>>>> >>>>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>>>> cleanup code a bit. >>>>>>> Will this really help much? I thought the problem was mainly due to >>>>>>> sclk and mclk for post processing. >>>>>> >>>>>> It should at least handle the UVD side for upclocking when you get a >>>>>> lot >>>>>> of >>>>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>>>> >>>>>> Switching sclk and mclk for post processing is a different task, and I >>>>>> actually have no idea what to do with them. >>>>> At this point we always choose the plain UVD state anyway so this >>>>> patch would only take effect if we re-enabled the dynamic UVD state >>>>> selection. >>>> >>>> Hui? I thought we already re-enabled the dynamic UVD state selection, but >>>> double checking this I found it disabled again. >>>> >>>> What was the problem with that? Looks like I somehow missed the >>>> discussion >>>> around it. >>> We did, but after doing so a number of people complained about a >>> regression on IRC because when apps like xmbc enabled post processing, >>> performance went down. >> >> That's strange, from my experience the different UVD performance states only >> affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to >> confirms this. >> > The sclks and mclks are usually different as well, especially on APUs. > I can send you some examples. > >> You not off hand remember who complained on IRC? Finding something in the >> IRC logs is like searching for a needle in a haystack. > I don't remember off hand. I think zgreg was involved in some of the > discussions. > > Alex > >> Thanks, >> Christian. >> >> >>> Alex >>> >>> >>>> Christian. >>>> >>>> >>>>> For the post processing, we probably need a hint we can >>>>> pass to the driver in the CS ioctl to denote what state we need. >>>>> Although if we did that, this could would largely be moot. That said, >>>>> newer asics support dynamic UVD clocks so we really only need >>>>> something like that for older asics and I guess VCE. >>>>> >>>>> Alex >>>>> >>>>>> Christian. >>>>>> >>>>>> >>>>>>> Alex >>>>>>> >>>>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>>>> --- >>>>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>>>> >>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>>>> radeon_device >>>>>>>> *rdev, >>>>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>>>> >>>>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>>>> + >>>>>>>> +struct radeon_uvd_fps { >>>>>>>> + uint64_t timestamp; >>>>>>>> + uint8_t event_index; >>>>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>> +}; >>>>>>>> + >>>>>>>> struct radeon_uvd { >>>>>>>> struct radeon_bo *vcpu_bo; >>>>>>>> void *cpu_addr; >>>>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>>> struct delayed_work idle_work; >>>>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>> }; >>>>>>>> >>>>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>>>> radeon_bo *rbo) >>>>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>>>>> } >>>>>>>> >>>>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>> + unsigned i; >>>>>>>> + >>>>>>>> + fps->timestamp = jiffies_64; >>>>>>>> + fps->event_index = 0; >>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>> + fps->events[i] = 0; >>>>>>>> +} >>>>>>>> + >>>>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>> + uint64_t timestamp = jiffies_64; >>>>>>>> + unsigned rate = 0; >>>>>>>> + >>>>>>>> + uint8_t index = fps->event_index++; >>>>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>> + >>>>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>>>>>> + >>>>>>>> + fps->timestamp = timestamp; >>>>>>>> + fps->events[index] = min(rate, 120u); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>> + unsigned i, valid = 0, count = 0; >>>>>>>> + >>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>>>> + /* We should ignore zero values */ >>>>>>>> + if (fps->events[i] != 0) { >>>>>>>> + count += fps->events[i]; >>>>>>>> + valid++; >>>>>>>> + } >>>>>>>> + } >>>>>>>> + >>>>>>>> + if (valid > 0) >>>>>>>> + return count / valid; >>>>>>>> + else >>>>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>>>> +} >>>>>>>> + >>>>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>>>>>> drm_file *filp) >>>>>>>> { >>>>>>>> int i, r; >>>>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>>>> radeon_cs_parser >>>>>>>> *p, struct radeon_bo *bo, >>>>>>>> >>>>>>>> /* create or decode, validate the handle */ >>>>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>>>>> { >>>>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>> return 0; >>>>>>>> + } >>>>>>>> } >>>>>>>> >>>>>>>> /* handle not found try to alloc a new one */ >>>>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>>>> radeon_cs_parser >>>>>>>> *p, struct radeon_bo *bo, >>>>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>>>>>> handle)) { >>>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>>>> return 0; >>>>>>>> } >>>>>>>> } >>>>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct >>>>>>>> radeon_device >>>>>>>> *rdev, int ring, >>>>>>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>>>>>> unsigned *sd, unsigned *hd) >>>>>>>> { >>>>>>>> - unsigned i; >>>>>>>> + unsigned i, fps_rate = 0; >>>>>>>> >>>>>>>> *sd = 0; >>>>>>>> *hd = 0; >>>>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>>>> radeon_device *rdev, >>>>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>> continue; >>>>>>>> >>>>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>>>> - ++(*hd); >>>>>>>> - else >>>>>>>> - ++(*sd); >>>>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>>>> + >>>>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>>>> + } else { >>>>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>>>> + } >>>>>>>> } >>>>>>>> } >>>>>>>> >>>>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>>>>> *rdev) >>>>>>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>>> >>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>> >>>>>>>> + >>>>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>> unsigned hd = 0, sd = 0; >>>>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>>>> -- >>>>>>>> 1.9.1 >>>>>>>>
Hey all, I also had a talk with Alex yesterday about post-processing issues when using dynamic UVD profiles and a chamge on CS ioctl including a flag to let user mode driver tell to the kernel which performance requirement it wants for post processing. A commom point for both discussion is to stablish the default values for these profiles, but probably this ioctl change would be more impacting/complex to implement than a sysfs entry. If a sysfs entry is anough for now I can handle the code to create it and, with your help, the code to setup the UVD profile requested through it. Is there any suggestion? Thanks all for your help, On Fri, Aug 15, 2014 at 5:48 AM, Christian König <christian.koenig@amd.com> wrote: > Hi guys, > > to make a long story short every time I watch a movie my laptop start to > heat up because we always select the standard UVD power profile without > actually measuring if that is necessary. > > Marco came up with a patch that seems to reliable measure the fps send > down to the kernel and so together with knowing the frame size of the video > should allow us to select the right UVD power profile. > > The problem is that Alex (unnoticed by me) completely disabled selecting > the UVD profiles because of some issues with advanced post processing > discussed on IRC. The problem seems to be that the lower UVD profiles have > a to low SCLK/MCLK to handle the 3D load that comes with scaling, > deinterlacing etc... > > I unfortunately don't have time for it, cause this only affects the > hardware generations R600-SI and not the newest one CIK. So could you guys > stick together and come up with a solution? Something like a sysfs entry > that let's us select the minimum UVD power level allowed? > > I think Marco is happy to come up with a patch, we just need to know > what's really needed and what should be the default values. I'm happy to > review everything that comes out of it, just don't have time to do it > myself. > > Happy discussion and thanks in advance, > Christian. > > Am 12.08.2014 um 15:05 schrieb Alex Deucher: > > On Tue, Aug 12, 2014 at 6:00 AM, Christian König >> <deathsimple@vodafone.de> wrote: >> >>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>> >>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>> <deathsimple@vodafone.de> wrote: >>>> >>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>> >>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>>>> <deathsimple@vodafone.de> wrote: >>>>>> >>>>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>>>> >>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>>>>> <deathsimple@vodafone.de> >>>>>>>> wrote: >>>>>>>> >>>>>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>> >>>>>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>>>>> when it has being used. This estimation is per handle basis >>>>>>>>> and will help on DPM profile calculation. >>>>>>>>> >>>>>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>>>>> cleanup code a bit. >>>>>>>>> >>>>>>>> Will this really help much? I thought the problem was mainly due to >>>>>>>> sclk and mclk for post processing. >>>>>>>> >>>>>>> >>>>>>> It should at least handle the UVD side for upclocking when you get a >>>>>>> lot >>>>>>> of >>>>>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>>>>> >>>>>>> Switching sclk and mclk for post processing is a different task, and >>>>>>> I >>>>>>> actually have no idea what to do with them. >>>>>>> >>>>>> At this point we always choose the plain UVD state anyway so this >>>>>> patch would only take effect if we re-enabled the dynamic UVD state >>>>>> selection. >>>>>> >>>>> >>>>> Hui? I thought we already re-enabled the dynamic UVD state selection, >>>>> but >>>>> double checking this I found it disabled again. >>>>> >>>>> What was the problem with that? Looks like I somehow missed the >>>>> discussion >>>>> around it. >>>>> >>>> We did, but after doing so a number of people complained about a >>>> regression on IRC because when apps like xmbc enabled post processing, >>>> performance went down. >>>> >>> >>> That's strange, from my experience the different UVD performance states >>> only >>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to >>> confirms this. >>> >>> The sclks and mclks are usually different as well, especially on APUs. >> I can send you some examples. >> >> You not off hand remember who complained on IRC? Finding something in the >>> IRC logs is like searching for a needle in a haystack. >>> >> I don't remember off hand. I think zgreg was involved in some of the >> discussions. >> >> Alex >> >> Thanks, >>> Christian. >>> >>> >>> Alex >>>> >>>> >>>> Christian. >>>>> >>>>> >>>>> For the post processing, we probably need a hint we can >>>>>> pass to the driver in the CS ioctl to denote what state we need. >>>>>> Although if we did that, this could would largely be moot. That said, >>>>>> newer asics support dynamic UVD clocks so we really only need >>>>>> something like that for older asics and I guess VCE. >>>>>> >>>>>> Alex >>>>>> >>>>>> Christian. >>>>>>> >>>>>>> >>>>>>> Alex >>>>>>>> >>>>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>>>>> radeon_device >>>>>>>>> *rdev, >>>>>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>>>>> >>>>>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>>>>> + >>>>>>>>> +struct radeon_uvd_fps { >>>>>>>>> + uint64_t timestamp; >>>>>>>>> + uint8_t event_index; >>>>>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> struct radeon_uvd { >>>>>>>>> struct radeon_bo *vcpu_bo; >>>>>>>>> void *cpu_addr; >>>>>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>>> unsigned img_size[RADEON_MAX_UVD_ >>>>>>>>> HANDLES]; >>>>>>>>> struct delayed_work idle_work; >>>>>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>>> }; >>>>>>>>> >>>>>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>>>>> radeon_bo *rbo) >>>>>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> >>>>>>>>> PAGE_SHIFT; >>>>>>>>> } >>>>>>>>> >>>>>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device >>>>>>>>> *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>> + unsigned i; >>>>>>>>> + >>>>>>>>> + fps->timestamp = jiffies_64; >>>>>>>>> + fps->event_index = 0; >>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>>> + fps->events[i] = 0; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>> + uint64_t timestamp = jiffies_64; >>>>>>>>> + unsigned rate = 0; >>>>>>>>> + >>>>>>>>> + uint8_t index = fps->event_index++; >>>>>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>> + >>>>>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, >>>>>>>>> 1ULL)); >>>>>>>>> + >>>>>>>>> + fps->timestamp = timestamp; >>>>>>>>> + fps->events[index] = min(rate, 120u); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device >>>>>>>>> *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>> + unsigned i, valid = 0, count = 0; >>>>>>>>> + >>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>>>>> + /* We should ignore zero values */ >>>>>>>>> + if (fps->events[i] != 0) { >>>>>>>>> + count += fps->events[i]; >>>>>>>>> + valid++; >>>>>>>>> + } >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (valid > 0) >>>>>>>>> + return count / valid; >>>>>>>>> + else >>>>>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, >>>>>>>>> struct >>>>>>>>> drm_file *filp) >>>>>>>>> { >>>>>>>>> int i, r; >>>>>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>> radeon_cs_parser >>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>> >>>>>>>>> /* create or decode, validate the handle */ >>>>>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>> handle) >>>>>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>> handle) >>>>>>>>> { >>>>>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>>> return 0; >>>>>>>>> + } >>>>>>>>> } >>>>>>>>> >>>>>>>>> /* handle not found try to alloc a new one */ >>>>>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>> radeon_cs_parser >>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>>>> 0, >>>>>>>>> handle)) { >>>>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>>>>> return 0; >>>>>>>>> } >>>>>>>>> } >>>>>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct >>>>>>>>> radeon_device >>>>>>>>> *rdev, int ring, >>>>>>>>> static void radeon_uvd_count_handles(struct radeon_device >>>>>>>>> *rdev, >>>>>>>>> unsigned *sd, unsigned >>>>>>>>> *hd) >>>>>>>>> { >>>>>>>>> - unsigned i; >>>>>>>>> + unsigned i, fps_rate = 0; >>>>>>>>> >>>>>>>>> *sd = 0; >>>>>>>>> *hd = 0; >>>>>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>>> continue; >>>>>>>>> >>>>>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>>>>> - ++(*hd); >>>>>>>>> - else >>>>>>>>> - ++(*sd); >>>>>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>>>>> + >>>>>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>>>>> + } else { >>>>>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>>>>> + } >>>>>>>>> } >>>>>>>>> } >>>>>>>>> >>>>>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>>>>>> *rdev) >>>>>>>>> set_clocks &= schedule_delayed_work(&rdev-> >>>>>>>>> uvd.idle_work, >>>>>>>>> >>>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>>> >>>>>>>>> + >>>>>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>>> unsigned hd = 0, sd = 0; >>>>>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>>>>> -- >>>>>>>>> 1.9.1 >>>>>>>>> >>>>>>>>> >
Hi Marco, the problem with an CS ioctl flag is that we sometimes don't know how much SCLK/MCLK boost is needed, for example when we do post processing in the player using OpenGL and UVD decoding with VDPAU. In this case VDPAU don't has the slightest idea how high SCLK/MCLK must be and so can't give that info to the kernel either. Regards, Christian. Am 15.08.2014 um 15:21 schrieb Marco Benatto: > Hey all, > > I also had a talk with Alex yesterday about post-processing issues > when using dynamic UVD profiles and a chamge on CS ioctl > including a flag to let user mode driver tell to the kernel which > performance requirement it wants for post processing. A commom > point for both discussion is to stablish the default values for these > profiles, but probably this ioctl change would be more impacting/complex > to implement than a sysfs entry. > > If a sysfs entry is anough for now I can handle the code to create it > and, with your help, the code to setup the UVD profile requested > through it. > > Is there any suggestion? > > Thanks all for your help, > > > On Fri, Aug 15, 2014 at 5:48 AM, Christian König > <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: > > Hi guys, > > to make a long story short every time I watch a movie my laptop > start to heat up because we always select the standard UVD power > profile without actually measuring if that is necessary. > > Marco came up with a patch that seems to reliable measure the fps > send down to the kernel and so together with knowing the frame > size of the video should allow us to select the right UVD power > profile. > > The problem is that Alex (unnoticed by me) completely disabled > selecting the UVD profiles because of some issues with advanced > post processing discussed on IRC. The problem seems to be that the > lower UVD profiles have a to low SCLK/MCLK to handle the 3D load > that comes with scaling, deinterlacing etc... > > I unfortunately don't have time for it, cause this only affects > the hardware generations R600-SI and not the newest one CIK. So > could you guys stick together and come up with a solution? > Something like a sysfs entry that let's us select the minimum UVD > power level allowed? > > I think Marco is happy to come up with a patch, we just need to > know what's really needed and what should be the default values. > I'm happy to review everything that comes out of it, just don't > have time to do it myself. > > Happy discussion and thanks in advance, > Christian. > > Am 12.08.2014 um 15:05 schrieb Alex Deucher: > > On Tue, Aug 12, 2014 at 6:00 AM, Christian König > <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: > > Am 11.08.2014 um 16:52 schrieb Alex Deucher: > > On Mon, Aug 11, 2014 at 5:08 AM, Christian König > <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>> wrote: > > Am 07.08.2014 um 21:43 schrieb Alex Deucher: > > On Thu, Aug 7, 2014 at 11:32 AM, Christian König > <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>> wrote: > > Am 07.08.2014 um 16:32 schrieb Alex Deucher: > > On Thu, Aug 7, 2014 at 7:33 AM, > Christian König > <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>> > wrote: > > From: Marco A Benatto > <marco.antonio.780@gmail.com > <mailto:marco.antonio.780@gmail.com>> > > Adding a Frames Per Second > estimation logic on UVD handles > when it has being used. This > estimation is per handle basis > and will help on DPM profile > calculation. > > v2 (chk): fix timestamp type, move > functions around and > cleanup code a bit. > > Will this really help much? I thought > the problem was mainly due to > sclk and mclk for post processing. > > > It should at least handle the UVD side for > upclocking when you get a > lot > of > streams / fps. And at on my NI the patch > seems to do exactly that. > > Switching sclk and mclk for post > processing is a different task, and I > actually have no idea what to do with them. > > At this point we always choose the plain UVD > state anyway so this > patch would only take effect if we re-enabled > the dynamic UVD state > selection. > > > Hui? I thought we already re-enabled the dynamic > UVD state selection, but > double checking this I found it disabled again. > > What was the problem with that? Looks like I > somehow missed the > discussion > around it. > > We did, but after doing so a number of people > complained about a > regression on IRC because when apps like xmbc enabled > post processing, > performance went down. > > > That's strange, from my experience the different UVD > performance states only > affect UVDs dclk/vclk, not sclk/mclk. I need to get the > DPM dumps to > confirms this. > > The sclks and mclks are usually different as well, especially > on APUs. > I can send you some examples. > > You not off hand remember who complained on IRC? Finding > something in the > IRC logs is like searching for a needle in a haystack. > > I don't remember off hand. I think zgreg was involved in some > of the > discussions. > > Alex > > Thanks, > Christian. > > > Alex > > > Christian. > > > For the post processing, we probably need a > hint we can > pass to the driver in the CS ioctl to denote > what state we need. > Although if we did that, this could would > largely be moot. That said, > newer asics support dynamic UVD clocks so we > really only need > something like that for older asics and I > guess VCE. > > Alex > > Christian. > > > Alex > > Signed-off-by: Marco A Benatto > <marco.antonio.780@gmail.com > <mailto:marco.antonio.780@gmail.com>> > Signed-off-by: Christian König > <christian.koenig@amd.com > <mailto:christian.koenig@amd.com>> > --- > > drivers/gpu/drm/radeon/radeon.h > | 10 ++++++ > > drivers/gpu/drm/radeon/radeon_uvd.c > | 64 > +++++++++++++++++++++++++++++++++---- > 2 files changed, 68 > insertions(+), 6 deletions(-) > > diff --git > a/drivers/gpu/drm/radeon/radeon.h > b/drivers/gpu/drm/radeon/radeon.h > index 9e1732e..e92f6cb 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -1617,6 +1617,15 @@ int > radeon_pm_get_type_index(struct > radeon_device > *rdev, > #define RADEON_UVD_STACK_SIZE > (1024*1024) > #define RADEON_UVD_HEAP_SIZE > (1024*1024) > > +#define RADEON_UVD_FPS_EVENTS_MAX 8 > +#define RADEON_UVD_DEFAULT_FPS 60 > + > +struct radeon_uvd_fps { > + uint64_t timestamp; > + uint8_t event_index; > + uint8_t > events[RADEON_UVD_FPS_EVENTS_MAX]; > +}; > + > struct radeon_uvd { > struct radeon_bo *vcpu_bo; > void *cpu_addr; > @@ -1626,6 +1635,7 @@ struct > radeon_uvd { > struct drm_file > *filp[RADEON_MAX_UVD_HANDLES]; > unsigned > img_size[RADEON_MAX_UVD_HANDLES]; > struct delayed_work > idle_work; > + struct radeon_uvd_fps > fps_info[RADEON_MAX_UVD_HANDLES]; > }; > > int radeon_uvd_init(struct > radeon_device *rdev); > diff --git > a/drivers/gpu/drm/radeon/radeon_uvd.c > b/drivers/gpu/drm/radeon/radeon_uvd.c > index 6bf55ec..ef5667a 100644 > --- > a/drivers/gpu/drm/radeon/radeon_uvd.c > +++ > b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -237,6 +237,51 @@ void > radeon_uvd_force_into_uvd_segment(struct > radeon_bo *rbo) > rbo->placement.lpfn = > (256 * 1024 * 1024) >> PAGE_SHIFT; > } > > +static void > radeon_uvd_fps_clear_events(struct > radeon_device *rdev, > int > idx) > +{ > + struct radeon_uvd_fps *fps > = &rdev->uvd.fps_info[idx]; > + unsigned i; > + > + fps->timestamp = jiffies_64; > + fps->event_index = 0; > + for (i = 0; i < > RADEON_UVD_FPS_EVENTS_MAX; i++) > + fps->events[i] = 0; > +} > + > +static void > radeon_uvd_fps_note_event(struct > radeon_device *rdev, > int > idx) > +{ > + struct radeon_uvd_fps *fps > = &rdev->uvd.fps_info[idx]; > + uint64_t timestamp = > jiffies_64; > + unsigned rate = 0; > + > + uint8_t index = > fps->event_index++; > + fps->event_index %= > RADEON_UVD_FPS_EVENTS_MAX; > + > + rate = div64_u64(HZ, > max(timestamp - fps->timestamp, > 1ULL)); > + > + fps->timestamp = timestamp; > + fps->events[index] = > min(rate, 120u); > +} > + > +static unsigned > radeon_uvd_estimate_fps(struct > radeon_device *rdev, > int > idx) > +{ > + struct radeon_uvd_fps *fps > = &rdev->uvd.fps_info[idx]; > + unsigned i, valid = 0, > count = 0; > + > + for (i = 0; i < > RADEON_UVD_FPS_EVENTS_MAX; i++) { > + /* We should > ignore zero values */ > + if (fps->events[i] > != 0) { > + count += > fps->events[i]; > + valid++; > + } > + } > + > + if (valid > 0) > + return count / valid; > + else > + return > RADEON_UVD_DEFAULT_FPS; > +} > + > void > radeon_uvd_free_handles(struct > radeon_device *rdev, struct > drm_file *filp) > { > int i, r; > @@ -419,8 +464,10 @@ static int > radeon_uvd_cs_msg(struct > radeon_cs_parser > *p, struct radeon_bo *bo, > > /* create or decode, > validate the handle */ > for (i = 0; i < > RADEON_MAX_UVD_HANDLES; ++i) { > - if > (atomic_read(&p->rdev->uvd.handles[i]) > == handle) > + if > (atomic_read(&p->rdev->uvd.handles[i]) > == handle) > { > + > radeon_uvd_fps_note_event(p->rdev, i); > return 0; > + } > } > > /* handle not found > try to alloc a new one */ > @@ -428,6 +475,7 @@ static int > radeon_uvd_cs_msg(struct > radeon_cs_parser > *p, struct radeon_bo *bo, > if > (!atomic_cmpxchg(&p->rdev->uvd.handles[i], > 0, > handle)) { > p->rdev->uvd.filp[i] = p->filp; > p->rdev->uvd.img_size[i] = img_size; > + > radeon_uvd_fps_clear_events(p->rdev, > i); > return 0; > } > } > @@ -763,7 +811,7 @@ int > radeon_uvd_get_destroy_msg(struct > radeon_device > *rdev, int ring, > static void > radeon_uvd_count_handles(struct > radeon_device *rdev, > unsigned *sd, unsigned *hd) > { > - unsigned i; > + unsigned i, fps_rate = 0; > > *sd = 0; > *hd = 0; > @@ -772,10 +820,13 @@ static void > radeon_uvd_count_handles(struct > radeon_device *rdev, > if > (!atomic_read(&rdev->uvd.handles[i])) > continue; > > - if > (rdev->uvd.img_size[i] >= 720*576) > - ++(*hd); > - else > - ++(*sd); > + fps_rate = > radeon_uvd_estimate_fps(rdev, i); > + > + if > (rdev->uvd.img_size[i] >= 720*576) { > + (*hd) += > fps_rate > 30 ? 1 : 2; > + } else { > + (*sd) += > fps_rate > 30 ? 1 : 2; > + } > } > } > > @@ -805,6 +856,7 @@ void > radeon_uvd_note_usage(struct > radeon_device > *rdev) > set_clocks &= > schedule_delayed_work(&rdev->uvd.idle_work, > > msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); > > + > if > ((rdev->pm.pm_method == > PM_METHOD_DPM) && > rdev->pm.dpm_enabled) { > unsigned hd = > 0, sd = 0; > radeon_uvd_count_handles(rdev, > &sd, &hd); > -- > 1.9.1 > > > > > > -- > Marco Antonio Benatto > Linux user ID:#506236
Hi Christian, got it. I'll start to add a sysfs entry here called uvd_power_level where we'll be able to change UVD performance profile, ok? I'll need your help to define the power profiles and some one to test it on r600-SI as I don't have anyone around here. Thank you. On Fri, Aug 15, 2014 at 11:11 AM, Christian König <christian.koenig@amd.com> wrote: > Hi Marco, > > the problem with an CS ioctl flag is that we sometimes don't know how much > SCLK/MCLK boost is needed, for example when we do post processing in the > player using OpenGL and UVD decoding with VDPAU. In this case VDPAU don't > has the slightest idea how high SCLK/MCLK must be and so can't give that > info to the kernel either. > > Regards, > Christian. > > Am 15.08.2014 um 15:21 schrieb Marco Benatto: > > Hey all, > > I also had a talk with Alex yesterday about post-processing issues when > using dynamic UVD profiles and a chamge on CS ioctl > including a flag to let user mode driver tell to the kernel which > performance requirement it wants for post processing. A commom > point for both discussion is to stablish the default values for these > profiles, but probably this ioctl change would be more impacting/complex > to implement than a sysfs entry. > > If a sysfs entry is anough for now I can handle the code to create it > and, with your help, the code to setup the UVD profile requested through it. > > Is there any suggestion? > > Thanks all for your help, > > > On Fri, Aug 15, 2014 at 5:48 AM, Christian König <christian.koenig@amd.com > > wrote: > >> Hi guys, >> >> to make a long story short every time I watch a movie my laptop start to >> heat up because we always select the standard UVD power profile without >> actually measuring if that is necessary. >> >> Marco came up with a patch that seems to reliable measure the fps send >> down to the kernel and so together with knowing the frame size of the video >> should allow us to select the right UVD power profile. >> >> The problem is that Alex (unnoticed by me) completely disabled selecting >> the UVD profiles because of some issues with advanced post processing >> discussed on IRC. The problem seems to be that the lower UVD profiles have >> a to low SCLK/MCLK to handle the 3D load that comes with scaling, >> deinterlacing etc... >> >> I unfortunately don't have time for it, cause this only affects the >> hardware generations R600-SI and not the newest one CIK. So could you guys >> stick together and come up with a solution? Something like a sysfs entry >> that let's us select the minimum UVD power level allowed? >> >> I think Marco is happy to come up with a patch, we just need to know >> what's really needed and what should be the default values. I'm happy to >> review everything that comes out of it, just don't have time to do it >> myself. >> >> Happy discussion and thanks in advance, >> Christian. >> >> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >> >> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>> <deathsimple@vodafone.de> wrote: >>> >>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>> >>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>> <deathsimple@vodafone.de> wrote: >>>>> >>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>> >>>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>>>>> <deathsimple@vodafone.de> wrote: >>>>>>> >>>>>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>>>>> >>>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>>>>>> <deathsimple@vodafone.de> >>>>>>>>> wrote: >>>>>>>>> >>>>>>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>>> >>>>>>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>>>>>> when it has being used. This estimation is per handle basis >>>>>>>>>> and will help on DPM profile calculation. >>>>>>>>>> >>>>>>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>>>>>> cleanup code a bit. >>>>>>>>>> >>>>>>>>> Will this really help much? I thought the problem was mainly due >>>>>>>>> to >>>>>>>>> sclk and mclk for post processing. >>>>>>>>> >>>>>>>> >>>>>>>> It should at least handle the UVD side for upclocking when you get a >>>>>>>> lot >>>>>>>> of >>>>>>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>>>>>> >>>>>>>> Switching sclk and mclk for post processing is a different task, >>>>>>>> and I >>>>>>>> actually have no idea what to do with them. >>>>>>>> >>>>>>> At this point we always choose the plain UVD state anyway so this >>>>>>> patch would only take effect if we re-enabled the dynamic UVD state >>>>>>> selection. >>>>>>> >>>>>> >>>>>> Hui? I thought we already re-enabled the dynamic UVD state selection, >>>>>> but >>>>>> double checking this I found it disabled again. >>>>>> >>>>>> What was the problem with that? Looks like I somehow missed the >>>>>> discussion >>>>>> around it. >>>>>> >>>>> We did, but after doing so a number of people complained about a >>>>> regression on IRC because when apps like xmbc enabled post processing, >>>>> performance went down. >>>>> >>>> >>>> That's strange, from my experience the different UVD performance states >>>> only >>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to >>>> confirms this. >>>> >>>> The sclks and mclks are usually different as well, especially on APUs. >>> I can send you some examples. >>> >>> You not off hand remember who complained on IRC? Finding something in >>>> the >>>> IRC logs is like searching for a needle in a haystack. >>>> >>> I don't remember off hand. I think zgreg was involved in some of the >>> discussions. >>> >>> Alex >>> >>> Thanks, >>>> Christian. >>>> >>>> >>>> Alex >>>>> >>>>> >>>>> Christian. >>>>>> >>>>>> >>>>>> For the post processing, we probably need a hint we can >>>>>>> pass to the driver in the CS ioctl to denote what state we need. >>>>>>> Although if we did that, this could would largely be moot. That >>>>>>> said, >>>>>>> newer asics support dynamic UVD clocks so we really only need >>>>>>> something like that for older asics and I guess VCE. >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>> Christian. >>>>>>>> >>>>>>>> >>>>>>>> Alex >>>>>>>>> >>>>>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>>>>>> --- >>>>>>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>>>>>> >>>>>>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>>>>>> + >>>>>>>>>> +struct radeon_uvd_fps { >>>>>>>>>> + uint64_t timestamp; >>>>>>>>>> + uint8_t event_index; >>>>>>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>>>> +}; >>>>>>>>>> + >>>>>>>>>> struct radeon_uvd { >>>>>>>>>> struct radeon_bo *vcpu_bo; >>>>>>>>>> void *cpu_addr; >>>>>>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> unsigned >>>>>>>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> struct delayed_work idle_work; >>>>>>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> }; >>>>>>>>>> >>>>>>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>>>>>> radeon_bo *rbo) >>>>>>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> >>>>>>>>>> PAGE_SHIFT; >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + unsigned i; >>>>>>>>>> + >>>>>>>>>> + fps->timestamp = jiffies_64; >>>>>>>>>> + fps->event_index = 0; >>>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>>>> + fps->events[i] = 0; >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + uint64_t timestamp = jiffies_64; >>>>>>>>>> + unsigned rate = 0; >>>>>>>>>> + >>>>>>>>>> + uint8_t index = fps->event_index++; >>>>>>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>>> + >>>>>>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, >>>>>>>>>> 1ULL)); >>>>>>>>>> + >>>>>>>>>> + fps->timestamp = timestamp; >>>>>>>>>> + fps->events[index] = min(rate, 120u); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + unsigned i, valid = 0, count = 0; >>>>>>>>>> + >>>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>>>>>> + /* We should ignore zero values */ >>>>>>>>>> + if (fps->events[i] != 0) { >>>>>>>>>> + count += fps->events[i]; >>>>>>>>>> + valid++; >>>>>>>>>> + } >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + if (valid > 0) >>>>>>>>>> + return count / valid; >>>>>>>>>> + else >>>>>>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, >>>>>>>>>> struct >>>>>>>>>> drm_file *filp) >>>>>>>>>> { >>>>>>>>>> int i, r; >>>>>>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>>> radeon_cs_parser >>>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>>> >>>>>>>>>> /* create or decode, validate the handle */ >>>>>>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>>> handle) >>>>>>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>>> handle) >>>>>>>>>> { >>>>>>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>>>> return 0; >>>>>>>>>> + } >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> /* handle not found try to alloc a new one */ >>>>>>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>>> radeon_cs_parser >>>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>>>>> 0, >>>>>>>>>> handle)) { >>>>>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>>>>>> return 0; >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev, int ring, >>>>>>>>>> static void radeon_uvd_count_handles(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> unsigned *sd, unsigned >>>>>>>>>> *hd) >>>>>>>>>> { >>>>>>>>>> - unsigned i; >>>>>>>>>> + unsigned i, fps_rate = 0; >>>>>>>>>> >>>>>>>>>> *sd = 0; >>>>>>>>>> *hd = 0; >>>>>>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>>>>>> radeon_device *rdev, >>>>>>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>>>> continue; >>>>>>>>>> >>>>>>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>>>>>> - ++(*hd); >>>>>>>>>> - else >>>>>>>>>> - ++(*sd); >>>>>>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>>>>>> + >>>>>>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>>>>>> + } else { >>>>>>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>>>>>> + } >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev) >>>>>>>>>> set_clocks &= >>>>>>>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>>>>> >>>>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>>>> >>>>>>>>>> + >>>>>>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>>>> unsigned hd = 0, sd = 0; >>>>>>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>>>>>> -- >>>>>>>>>> 1.9.1 >>>>>>>>>> >>>>>>>>>> >> > > > -- > Marco Antonio Benatto > Linux user ID: #506236 > > >
On 15.08.2014 16:11, Christian König wrote: > Hi Marco, > > the problem with an CS ioctl flag is that we sometimes don't know how > much SCLK/MCLK boost is needed, for example when we do post processing > in the player using OpenGL and UVD decoding with VDPAU. In this case > VDPAU don't has the slightest idea how high SCLK/MCLK must be and so > can't give that info to the kernel either. > Maybe it's an acceptable workaround to simply disable dynamic UVD state selection in case the UVD states only have a single power level. That will avoid the performance issues on affected systems, while still allowing dynamic UVD states on systems that have a saner DPM table setup. I think it is mosly older systems that suffer from this. Best regards Grigori > Regards, > Christian. > > Am 15.08.2014 um 15:21 schrieb Marco Benatto: >> Hey all, >> >> I also had a talk with Alex yesterday about post-processing issues >> when using dynamic UVD profiles and a chamge on CS ioctl >> including a flag to let user mode driver tell to the kernel which >> performance requirement it wants for post processing. A commom >> point for both discussion is to stablish the default values for these >> profiles, but probably this ioctl change would be more impacting/complex >> to implement than a sysfs entry. >> >> If a sysfs entry is anough for now I can handle the code to create it >> and, with your help, the code to setup the UVD profile requested >> through it. >> >> Is there any suggestion? >> >> Thanks all for your help, >> >> >> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >> >> Hi guys, >> >> to make a long story short every time I watch a movie my laptop >> start to heat up because we always select the standard UVD power >> profile without actually measuring if that is necessary. >> >> Marco came up with a patch that seems to reliable measure the fps >> send down to the kernel and so together with knowing the frame >> size of the video should allow us to select the right UVD power >> profile. >> >> The problem is that Alex (unnoticed by me) completely disabled >> selecting the UVD profiles because of some issues with advanced >> post processing discussed on IRC. The problem seems to be that the >> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >> that comes with scaling, deinterlacing etc... >> >> I unfortunately don't have time for it, cause this only affects >> the hardware generations R600-SI and not the newest one CIK. So >> could you guys stick together and come up with a solution? >> Something like a sysfs entry that let's us select the minimum UVD >> power level allowed? >> >> I think Marco is happy to come up with a patch, we just need to >> know what's really needed and what should be the default values. >> I'm happy to review everything that comes out of it, just don't >> have time to do it myself. >> >> Happy discussion and thanks in advance, >> Christian. >> >> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >> >> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >> >> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >> >> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >> <deathsimple@vodafone.de >> <mailto:deathsimple@vodafone.de>> wrote: >> >> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >> >> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >> <deathsimple@vodafone.de >> <mailto:deathsimple@vodafone.de>> wrote: >> >> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >> >> On Thu, Aug 7, 2014 at 7:33 AM, >> Christian König >> <deathsimple@vodafone.de >> <mailto:deathsimple@vodafone.de>> >> wrote: >> >> From: Marco A Benatto >> <marco.antonio.780@gmail.com >> <mailto:marco.antonio.780@gmail.com>> >> >> Adding a Frames Per Second >> estimation logic on UVD handles >> when it has being used. This >> estimation is per handle basis >> and will help on DPM profile >> calculation. >> >> v2 (chk): fix timestamp type, move >> functions around and >> cleanup code a bit. >> >> Will this really help much? I thought >> the problem was mainly due to >> sclk and mclk for post processing. >> >> >> It should at least handle the UVD side for >> upclocking when you get a >> lot >> of >> streams / fps. And at on my NI the patch >> seems to do exactly that. >> >> Switching sclk and mclk for post >> processing is a different task, and I >> actually have no idea what to do with them. >> >> At this point we always choose the plain UVD >> state anyway so this >> patch would only take effect if we re-enabled >> the dynamic UVD state >> selection. >> >> >> Hui? I thought we already re-enabled the dynamic >> UVD state selection, but >> double checking this I found it disabled again. >> >> What was the problem with that? Looks like I >> somehow missed the >> discussion >> around it. >> >> We did, but after doing so a number of people >> complained about a >> regression on IRC because when apps like xmbc enabled >> post processing, >> performance went down. >> >> >> That's strange, from my experience the different UVD >> performance states only >> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >> DPM dumps to >> confirms this. >> >> The sclks and mclks are usually different as well, especially >> on APUs. >> I can send you some examples. >> >> You not off hand remember who complained on IRC? Finding >> something in the >> IRC logs is like searching for a needle in a haystack. >> >> I don't remember off hand. I think zgreg was involved in some >> of the >> discussions. >> >> Alex >> >> Thanks, >> Christian. >> >> >> Alex >> >> >> Christian. >> >> >> For the post processing, we probably need a >> hint we can >> pass to the driver in the CS ioctl to denote >> what state we need. >> Although if we did that, this could would >> largely be moot. That said, >> newer asics support dynamic UVD clocks so we >> really only need >> something like that for older asics and I >> guess VCE. >> >> Alex >> >> Christian. >> >> >> Alex >> >> Signed-off-by: Marco A Benatto >> <marco.antonio.780@gmail.com >> <mailto:marco.antonio.780@gmail.com>> >> Signed-off-by: Christian König >> <christian.koenig@amd.com >> <mailto:christian.koenig@amd.com>> >> --- >> >> drivers/gpu/drm/radeon/radeon.h >> | 10 ++++++ >> >> drivers/gpu/drm/radeon/radeon_uvd.c >> | 64 >> +++++++++++++++++++++++++++++++++---- >> 2 files changed, 68 >> insertions(+), 6 deletions(-) >> >> diff --git >> a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 9e1732e..e92f6cb 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -1617,6 +1617,15 @@ int >> radeon_pm_get_type_index(struct >> radeon_device >> *rdev, >> #define >> RADEON_UVD_STACK_SIZE (1024*1024) >> #define RADEON_UVD_HEAP_SIZE >> (1024*1024) >> >> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >> +#define RADEON_UVD_DEFAULT_FPS 60 >> + >> +struct radeon_uvd_fps { >> + uint64_t timestamp; >> + uint8_t event_index; >> + uint8_t >> events[RADEON_UVD_FPS_EVENTS_MAX]; >> +}; >> + >> struct radeon_uvd { >> struct radeon_bo >> *vcpu_bo; >> void >> *cpu_addr; >> @@ -1626,6 +1635,7 @@ struct >> radeon_uvd { >> struct drm_file >> *filp[RADEON_MAX_UVD_HANDLES]; >> unsigned >> img_size[RADEON_MAX_UVD_HANDLES]; >> struct delayed_work >> idle_work; >> + struct radeon_uvd_fps >> fps_info[RADEON_MAX_UVD_HANDLES]; >> }; >> >> int radeon_uvd_init(struct >> radeon_device *rdev); >> diff --git >> a/drivers/gpu/drm/radeon/radeon_uvd.c >> b/drivers/gpu/drm/radeon/radeon_uvd.c >> index 6bf55ec..ef5667a 100644 >> --- >> a/drivers/gpu/drm/radeon/radeon_uvd.c >> +++ >> b/drivers/gpu/drm/radeon/radeon_uvd.c >> @@ -237,6 +237,51 @@ void >> radeon_uvd_force_into_uvd_segment(struct >> radeon_bo *rbo) >> rbo->placement.lpfn = >> (256 * 1024 * 1024) >> PAGE_SHIFT; >> } >> >> +static void >> radeon_uvd_fps_clear_events(struct >> radeon_device *rdev, >> int >> idx) >> +{ >> + struct radeon_uvd_fps *fps >> = &rdev->uvd.fps_info[idx]; >> + unsigned i; >> + >> + fps->timestamp = jiffies_64; >> + fps->event_index = 0; >> + for (i = 0; i < >> RADEON_UVD_FPS_EVENTS_MAX; i++) >> + fps->events[i] = 0; >> +} >> + >> +static void >> radeon_uvd_fps_note_event(struct >> radeon_device *rdev, >> int >> idx) >> +{ >> + struct radeon_uvd_fps *fps >> = &rdev->uvd.fps_info[idx]; >> + uint64_t timestamp = >> jiffies_64; >> + unsigned rate = 0; >> + >> + uint8_t index = >> fps->event_index++; >> + fps->event_index %= >> RADEON_UVD_FPS_EVENTS_MAX; >> + >> + rate = div64_u64(HZ, >> max(timestamp - fps->timestamp, >> 1ULL)); >> + >> + fps->timestamp = timestamp; >> + fps->events[index] = >> min(rate, 120u); >> +} >> + >> +static unsigned >> radeon_uvd_estimate_fps(struct >> radeon_device *rdev, >> int >> idx) >> +{ >> + struct radeon_uvd_fps *fps >> = &rdev->uvd.fps_info[idx]; >> + unsigned i, valid = 0, >> count = 0; >> + >> + for (i = 0; i < >> RADEON_UVD_FPS_EVENTS_MAX; i++) { >> + /* We should >> ignore zero values */ >> + if (fps->events[i] >> != 0) { >> + count += >> fps->events[i]; >> + valid++; >> + } >> + } >> + >> + if (valid > 0) >> + return count / valid; >> + else >> + return >> RADEON_UVD_DEFAULT_FPS; >> +} >> + >> void >> radeon_uvd_free_handles(struct >> radeon_device *rdev, struct >> drm_file *filp) >> { >> int i, r; >> @@ -419,8 +464,10 @@ static int >> radeon_uvd_cs_msg(struct >> radeon_cs_parser >> *p, struct radeon_bo *bo, >> >> /* create or decode, >> validate the handle */ >> for (i = 0; i < >> RADEON_MAX_UVD_HANDLES; ++i) { >> - if >> (atomic_read(&p->rdev->uvd.handles[i]) >> == handle) >> + if >> (atomic_read(&p->rdev->uvd.handles[i]) >> == handle) >> { >> + >> radeon_uvd_fps_note_event(p->rdev, i); >> return 0; >> + } >> } >> >> /* handle not found >> try to alloc a new one */ >> @@ -428,6 +475,7 @@ static int >> radeon_uvd_cs_msg(struct >> radeon_cs_parser >> *p, struct radeon_bo *bo, >> if >> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >> 0, >> handle)) { >> >> p->rdev->uvd.filp[i] = p->filp; >> >> p->rdev->uvd.img_size[i] = img_size; >> + >> radeon_uvd_fps_clear_events(p->rdev, >> i); >> return 0; >> } >> } >> @@ -763,7 +811,7 @@ int >> radeon_uvd_get_destroy_msg(struct >> radeon_device >> *rdev, int ring, >> static void >> radeon_uvd_count_handles(struct >> radeon_device *rdev, >> >> unsigned *sd, unsigned *hd) >> { >> - unsigned i; >> + unsigned i, fps_rate = 0; >> >> *sd = 0; >> *hd = 0; >> @@ -772,10 +820,13 @@ static void >> radeon_uvd_count_handles(struct >> radeon_device *rdev, >> if >> (!atomic_read(&rdev->uvd.handles[i])) >> continue; >> >> - if >> (rdev->uvd.img_size[i] >= 720*576) >> - ++(*hd); >> - else >> - ++(*sd); >> + fps_rate = >> radeon_uvd_estimate_fps(rdev, i); >> + >> + if >> (rdev->uvd.img_size[i] >= 720*576) { >> + (*hd) += >> fps_rate > 30 ? 1 : 2; >> + } else { >> + (*sd) += >> fps_rate > 30 ? 1 : 2; >> + } >> } >> } >> >> @@ -805,6 +856,7 @@ void >> radeon_uvd_note_usage(struct >> radeon_device >> *rdev) >> set_clocks &= >> schedule_delayed_work(&rdev->uvd.idle_work, >> >> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >> >> + >> if >> ((rdev->pm.pm_method == >> PM_METHOD_DPM) && >> rdev->pm.dpm_enabled) { >> unsigned hd = >> 0, sd = 0; >> >> radeon_uvd_count_handles(rdev, >> &sd, &hd); >> -- >> 1.9.1 >> >> >> >> >> >> -- >> Marco Antonio Benatto >> Linux user ID:#506236 >
On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> wrote: > On 15.08.2014 16:11, Christian König wrote: >> Hi Marco, >> >> the problem with an CS ioctl flag is that we sometimes don't know how >> much SCLK/MCLK boost is needed, for example when we do post processing >> in the player using OpenGL and UVD decoding with VDPAU. In this case >> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >> can't give that info to the kernel either. >> > > Maybe it's an acceptable workaround to simply disable dynamic UVD state > selection in case the UVD states only have a single power level. That > will avoid the performance issues on affected systems, while still > allowing dynamic UVD states on systems that have a saner DPM table > setup. I think it is mosly older systems that suffer from this. > That is exactly what we do now. Alex > Best regards > Grigori > >> Regards, >> Christian. >> >> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>> Hey all, >>> >>> I also had a talk with Alex yesterday about post-processing issues >>> when using dynamic UVD profiles and a chamge on CS ioctl >>> including a flag to let user mode driver tell to the kernel which >>> performance requirement it wants for post processing. A commom >>> point for both discussion is to stablish the default values for these >>> profiles, but probably this ioctl change would be more impacting/complex >>> to implement than a sysfs entry. >>> >>> If a sysfs entry is anough for now I can handle the code to create it >>> and, with your help, the code to setup the UVD profile requested >>> through it. >>> >>> Is there any suggestion? >>> >>> Thanks all for your help, >>> >>> >>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>> >>> Hi guys, >>> >>> to make a long story short every time I watch a movie my laptop >>> start to heat up because we always select the standard UVD power >>> profile without actually measuring if that is necessary. >>> >>> Marco came up with a patch that seems to reliable measure the fps >>> send down to the kernel and so together with knowing the frame >>> size of the video should allow us to select the right UVD power >>> profile. >>> >>> The problem is that Alex (unnoticed by me) completely disabled >>> selecting the UVD profiles because of some issues with advanced >>> post processing discussed on IRC. The problem seems to be that the >>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>> that comes with scaling, deinterlacing etc... >>> >>> I unfortunately don't have time for it, cause this only affects >>> the hardware generations R600-SI and not the newest one CIK. So >>> could you guys stick together and come up with a solution? >>> Something like a sysfs entry that let's us select the minimum UVD >>> power level allowed? >>> >>> I think Marco is happy to come up with a patch, we just need to >>> know what's really needed and what should be the default values. >>> I'm happy to review everything that comes out of it, just don't >>> have time to do it myself. >>> >>> Happy discussion and thanks in advance, >>> Christian. >>> >>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>> >>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >>> >>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>> >>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>> <deathsimple@vodafone.de >>> <mailto:deathsimple@vodafone.de>> wrote: >>> >>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>> >>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>> <deathsimple@vodafone.de >>> <mailto:deathsimple@vodafone.de>> wrote: >>> >>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>> >>> On Thu, Aug 7, 2014 at 7:33 AM, >>> Christian König >>> <deathsimple@vodafone.de >>> <mailto:deathsimple@vodafone.de>> >>> wrote: >>> >>> From: Marco A Benatto >>> <marco.antonio.780@gmail.com >>> <mailto:marco.antonio.780@gmail.com>> >>> >>> Adding a Frames Per Second >>> estimation logic on UVD handles >>> when it has being used. This >>> estimation is per handle basis >>> and will help on DPM profile >>> calculation. >>> >>> v2 (chk): fix timestamp type, move >>> functions around and >>> cleanup code a bit. >>> >>> Will this really help much? I thought >>> the problem was mainly due to >>> sclk and mclk for post processing. >>> >>> >>> It should at least handle the UVD side for >>> upclocking when you get a >>> lot >>> of >>> streams / fps. And at on my NI the patch >>> seems to do exactly that. >>> >>> Switching sclk and mclk for post >>> processing is a different task, and I >>> actually have no idea what to do with them. >>> >>> At this point we always choose the plain UVD >>> state anyway so this >>> patch would only take effect if we re-enabled >>> the dynamic UVD state >>> selection. >>> >>> >>> Hui? I thought we already re-enabled the dynamic >>> UVD state selection, but >>> double checking this I found it disabled again. >>> >>> What was the problem with that? Looks like I >>> somehow missed the >>> discussion >>> around it. >>> >>> We did, but after doing so a number of people >>> complained about a >>> regression on IRC because when apps like xmbc enabled >>> post processing, >>> performance went down. >>> >>> >>> That's strange, from my experience the different UVD >>> performance states only >>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>> DPM dumps to >>> confirms this. >>> >>> The sclks and mclks are usually different as well, especially >>> on APUs. >>> I can send you some examples. >>> >>> You not off hand remember who complained on IRC? Finding >>> something in the >>> IRC logs is like searching for a needle in a haystack. >>> >>> I don't remember off hand. I think zgreg was involved in some >>> of the >>> discussions. >>> >>> Alex >>> >>> Thanks, >>> Christian. >>> >>> >>> Alex >>> >>> >>> Christian. >>> >>> >>> For the post processing, we probably need a >>> hint we can >>> pass to the driver in the CS ioctl to denote >>> what state we need. >>> Although if we did that, this could would >>> largely be moot. That said, >>> newer asics support dynamic UVD clocks so we >>> really only need >>> something like that for older asics and I >>> guess VCE. >>> >>> Alex >>> >>> Christian. >>> >>> >>> Alex >>> >>> Signed-off-by: Marco A Benatto >>> <marco.antonio.780@gmail.com >>> <mailto:marco.antonio.780@gmail.com>> >>> Signed-off-by: Christian König >>> <christian.koenig@amd.com >>> <mailto:christian.koenig@amd.com>> >>> --- >>> >>> drivers/gpu/drm/radeon/radeon.h >>> | 10 ++++++ >>> >>> drivers/gpu/drm/radeon/radeon_uvd.c >>> | 64 >>> +++++++++++++++++++++++++++++++++---- >>> 2 files changed, 68 >>> insertions(+), 6 deletions(-) >>> >>> diff --git >>> a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index 9e1732e..e92f6cb 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -1617,6 +1617,15 @@ int >>> radeon_pm_get_type_index(struct >>> radeon_device >>> *rdev, >>> #define >>> RADEON_UVD_STACK_SIZE (1024*1024) >>> #define RADEON_UVD_HEAP_SIZE >>> (1024*1024) >>> >>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>> +#define RADEON_UVD_DEFAULT_FPS 60 >>> + >>> +struct radeon_uvd_fps { >>> + uint64_t timestamp; >>> + uint8_t event_index; >>> + uint8_t >>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>> +}; >>> + >>> struct radeon_uvd { >>> struct radeon_bo >>> *vcpu_bo; >>> void >>> *cpu_addr; >>> @@ -1626,6 +1635,7 @@ struct >>> radeon_uvd { >>> struct drm_file >>> *filp[RADEON_MAX_UVD_HANDLES]; >>> unsigned >>> img_size[RADEON_MAX_UVD_HANDLES]; >>> struct delayed_work >>> idle_work; >>> + struct radeon_uvd_fps >>> fps_info[RADEON_MAX_UVD_HANDLES]; >>> }; >>> >>> int radeon_uvd_init(struct >>> radeon_device *rdev); >>> diff --git >>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>> index 6bf55ec..ef5667a 100644 >>> --- >>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>> +++ >>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>> @@ -237,6 +237,51 @@ void >>> radeon_uvd_force_into_uvd_segment(struct >>> radeon_bo *rbo) >>> rbo->placement.lpfn = >>> (256 * 1024 * 1024) >> PAGE_SHIFT; >>> } >>> >>> +static void >>> radeon_uvd_fps_clear_events(struct >>> radeon_device *rdev, >>> int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps >>> = &rdev->uvd.fps_info[idx]; >>> + unsigned i; >>> + >>> + fps->timestamp = jiffies_64; >>> + fps->event_index = 0; >>> + for (i = 0; i < >>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>> + fps->events[i] = 0; >>> +} >>> + >>> +static void >>> radeon_uvd_fps_note_event(struct >>> radeon_device *rdev, >>> int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps >>> = &rdev->uvd.fps_info[idx]; >>> + uint64_t timestamp = >>> jiffies_64; >>> + unsigned rate = 0; >>> + >>> + uint8_t index = >>> fps->event_index++; >>> + fps->event_index %= >>> RADEON_UVD_FPS_EVENTS_MAX; >>> + >>> + rate = div64_u64(HZ, >>> max(timestamp - fps->timestamp, >>> 1ULL)); >>> + >>> + fps->timestamp = timestamp; >>> + fps->events[index] = >>> min(rate, 120u); >>> +} >>> + >>> +static unsigned >>> radeon_uvd_estimate_fps(struct >>> radeon_device *rdev, >>> int >>> idx) >>> +{ >>> + struct radeon_uvd_fps *fps >>> = &rdev->uvd.fps_info[idx]; >>> + unsigned i, valid = 0, >>> count = 0; >>> + >>> + for (i = 0; i < >>> RADEON_UVD_FPS_EVENTS_MAX; i++) { >>> + /* We should >>> ignore zero values */ >>> + if (fps->events[i] >>> != 0) { >>> + count += >>> fps->events[i]; >>> + valid++; >>> + } >>> + } >>> + >>> + if (valid > 0) >>> + return count / valid; >>> + else >>> + return >>> RADEON_UVD_DEFAULT_FPS; >>> +} >>> + >>> void >>> radeon_uvd_free_handles(struct >>> radeon_device *rdev, struct >>> drm_file *filp) >>> { >>> int i, r; >>> @@ -419,8 +464,10 @@ static int >>> radeon_uvd_cs_msg(struct >>> radeon_cs_parser >>> *p, struct radeon_bo *bo, >>> >>> /* create or decode, >>> validate the handle */ >>> for (i = 0; i < >>> RADEON_MAX_UVD_HANDLES; ++i) { >>> - if >>> (atomic_read(&p->rdev->uvd.handles[i]) >>> == handle) >>> + if >>> (atomic_read(&p->rdev->uvd.handles[i]) >>> == handle) >>> { >>> + >>> radeon_uvd_fps_note_event(p->rdev, i); >>> return 0; >>> + } >>> } >>> >>> /* handle not found >>> try to alloc a new one */ >>> @@ -428,6 +475,7 @@ static int >>> radeon_uvd_cs_msg(struct >>> radeon_cs_parser >>> *p, struct radeon_bo *bo, >>> if >>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>> 0, >>> handle)) { >>> >>> p->rdev->uvd.filp[i] = p->filp; >>> >>> p->rdev->uvd.img_size[i] = img_size; >>> + >>> radeon_uvd_fps_clear_events(p->rdev, >>> i); >>> return 0; >>> } >>> } >>> @@ -763,7 +811,7 @@ int >>> radeon_uvd_get_destroy_msg(struct >>> radeon_device >>> *rdev, int ring, >>> static void >>> radeon_uvd_count_handles(struct >>> radeon_device *rdev, >>> >>> unsigned *sd, unsigned *hd) >>> { >>> - unsigned i; >>> + unsigned i, fps_rate = 0; >>> >>> *sd = 0; >>> *hd = 0; >>> @@ -772,10 +820,13 @@ static void >>> radeon_uvd_count_handles(struct >>> radeon_device *rdev, >>> if >>> (!atomic_read(&rdev->uvd.handles[i])) >>> continue; >>> >>> - if >>> (rdev->uvd.img_size[i] >= 720*576) >>> - ++(*hd); >>> - else >>> - ++(*sd); >>> + fps_rate = >>> radeon_uvd_estimate_fps(rdev, i); >>> + >>> + if >>> (rdev->uvd.img_size[i] >= 720*576) { >>> + (*hd) += >>> fps_rate > 30 ? 1 : 2; >>> + } else { >>> + (*sd) += >>> fps_rate > 30 ? 1 : 2; >>> + } >>> } >>> } >>> >>> @@ -805,6 +856,7 @@ void >>> radeon_uvd_note_usage(struct >>> radeon_device >>> *rdev) >>> set_clocks &= >>> schedule_delayed_work(&rdev->uvd.idle_work, >>> >>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>> >>> + >>> if >>> ((rdev->pm.pm_method == >>> PM_METHOD_DPM) && >>> rdev->pm.dpm_enabled) { >>> unsigned hd = >>> 0, sd = 0; >>> >>> radeon_uvd_count_handles(rdev, >>> &sd, &hd); >>> -- >>> 1.9.1 >>> >>> >>> >>> >>> >>> -- >>> Marco Antonio Benatto >>> Linux user ID:#506236 >> > >
On Fri, Aug 15, 2014 at 10:11 AM, Christian König <christian.koenig@amd.com> wrote: > Hi Marco, > > the problem with an CS ioctl flag is that we sometimes don't know how much > SCLK/MCLK boost is needed, for example when we do post processing in the > player using OpenGL and UVD decoding with VDPAU. In this case VDPAU don't > has the slightest idea how high SCLK/MCLK must be and so can't give that > info to the kernel either. It could be as simple as just setting a flag when any kind of post processing is enabled so that the kernel selects the highest UVD performance state, otherwise, let the kernel decide based on streams. Alex > > Regards, > Christian. > > Am 15.08.2014 um 15:21 schrieb Marco Benatto: > > Hey all, > > I also had a talk with Alex yesterday about post-processing issues when > using dynamic UVD profiles and a chamge on CS ioctl > including a flag to let user mode driver tell to the kernel which > performance requirement it wants for post processing. A commom > point for both discussion is to stablish the default values for these > profiles, but probably this ioctl change would be more impacting/complex > to implement than a sysfs entry. > > If a sysfs entry is anough for now I can handle the code to create it and, > with your help, the code to setup the UVD profile requested through it. > > Is there any suggestion? > > Thanks all for your help, > > > On Fri, Aug 15, 2014 at 5:48 AM, Christian König <christian.koenig@amd.com> > wrote: >> >> Hi guys, >> >> to make a long story short every time I watch a movie my laptop start to >> heat up because we always select the standard UVD power profile without >> actually measuring if that is necessary. >> >> Marco came up with a patch that seems to reliable measure the fps send >> down to the kernel and so together with knowing the frame size of the video >> should allow us to select the right UVD power profile. >> >> The problem is that Alex (unnoticed by me) completely disabled selecting >> the UVD profiles because of some issues with advanced post processing >> discussed on IRC. The problem seems to be that the lower UVD profiles have a >> to low SCLK/MCLK to handle the 3D load that comes with scaling, >> deinterlacing etc... >> >> I unfortunately don't have time for it, cause this only affects the >> hardware generations R600-SI and not the newest one CIK. So could you guys >> stick together and come up with a solution? Something like a sysfs entry >> that let's us select the minimum UVD power level allowed? >> >> I think Marco is happy to come up with a patch, we just need to know >> what's really needed and what should be the default values. I'm happy to >> review everything that comes out of it, just don't have time to do it >> myself. >> >> Happy discussion and thanks in advance, >> Christian. >> >> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >> >>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>> <deathsimple@vodafone.de> wrote: >>>> >>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>> >>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>> <deathsimple@vodafone.de> wrote: >>>>>> >>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>> >>>>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>>>>> <deathsimple@vodafone.de> wrote: >>>>>>>> >>>>>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>>>>> >>>>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>>>>>>> <deathsimple@vodafone.de> >>>>>>>>> wrote: >>>>>>>>>> >>>>>>>>>> From: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>>> >>>>>>>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>>>>>>> when it has being used. This estimation is per handle basis >>>>>>>>>> and will help on DPM profile calculation. >>>>>>>>>> >>>>>>>>>> v2 (chk): fix timestamp type, move functions around and >>>>>>>>>> cleanup code a bit. >>>>>>>>> >>>>>>>>> Will this really help much? I thought the problem was mainly due >>>>>>>>> to >>>>>>>>> sclk and mclk for post processing. >>>>>>>> >>>>>>>> >>>>>>>> It should at least handle the UVD side for upclocking when you get a >>>>>>>> lot >>>>>>>> of >>>>>>>> streams / fps. And at on my NI the patch seems to do exactly that. >>>>>>>> >>>>>>>> Switching sclk and mclk for post processing is a different task, and >>>>>>>> I >>>>>>>> actually have no idea what to do with them. >>>>>>> >>>>>>> At this point we always choose the plain UVD state anyway so this >>>>>>> patch would only take effect if we re-enabled the dynamic UVD state >>>>>>> selection. >>>>>> >>>>>> >>>>>> Hui? I thought we already re-enabled the dynamic UVD state selection, >>>>>> but >>>>>> double checking this I found it disabled again. >>>>>> >>>>>> What was the problem with that? Looks like I somehow missed the >>>>>> discussion >>>>>> around it. >>>>> >>>>> We did, but after doing so a number of people complained about a >>>>> regression on IRC because when apps like xmbc enabled post processing, >>>>> performance went down. >>>> >>>> >>>> That's strange, from my experience the different UVD performance states >>>> only >>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the DPM dumps to >>>> confirms this. >>>> >>> The sclks and mclks are usually different as well, especially on APUs. >>> I can send you some examples. >>> >>>> You not off hand remember who complained on IRC? Finding something in >>>> the >>>> IRC logs is like searching for a needle in a haystack. >>> >>> I don't remember off hand. I think zgreg was involved in some of the >>> discussions. >>> >>> Alex >>> >>>> Thanks, >>>> Christian. >>>> >>>> >>>>> Alex >>>>> >>>>> >>>>>> Christian. >>>>>> >>>>>> >>>>>>> For the post processing, we probably need a hint we can >>>>>>> pass to the driver in the CS ioctl to denote what state we need. >>>>>>> Although if we did that, this could would largely be moot. That >>>>>>> said, >>>>>>> newer asics support dynamic UVD clocks so we really only need >>>>>>> something like that for older asics and I guess VCE. >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>>> Christian. >>>>>>>> >>>>>>>> >>>>>>>>> Alex >>>>>>>>> >>>>>>>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@gmail.com> >>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>>>>>>>>> --- >>>>>>>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>>>>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>>>>>>> >>>>>>>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>>>>>>> + >>>>>>>>>> +struct radeon_uvd_fps { >>>>>>>>>> + uint64_t timestamp; >>>>>>>>>> + uint8_t event_index; >>>>>>>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>>>> +}; >>>>>>>>>> + >>>>>>>>>> struct radeon_uvd { >>>>>>>>>> struct radeon_bo *vcpu_bo; >>>>>>>>>> void *cpu_addr; >>>>>>>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>>>>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> unsigned >>>>>>>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> struct delayed_work idle_work; >>>>>>>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>>>> }; >>>>>>>>>> >>>>>>>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>>>>>>> radeon_bo *rbo) >>>>>>>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> >>>>>>>>>> PAGE_SHIFT; >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + unsigned i; >>>>>>>>>> + >>>>>>>>>> + fps->timestamp = jiffies_64; >>>>>>>>>> + fps->event_index = 0; >>>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>>>> + fps->events[i] = 0; >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + uint64_t timestamp = jiffies_64; >>>>>>>>>> + unsigned rate = 0; >>>>>>>>>> + >>>>>>>>>> + uint8_t index = fps->event_index++; >>>>>>>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>>> + >>>>>>>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, >>>>>>>>>> 1ULL)); >>>>>>>>>> + >>>>>>>>>> + fps->timestamp = timestamp; >>>>>>>>>> + fps->events[index] = min(rate, 120u); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> int >>>>>>>>>> idx) >>>>>>>>>> +{ >>>>>>>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>>>>>>> + unsigned i, valid = 0, count = 0; >>>>>>>>>> + >>>>>>>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>>>>>> + /* We should ignore zero values */ >>>>>>>>>> + if (fps->events[i] != 0) { >>>>>>>>>> + count += fps->events[i]; >>>>>>>>>> + valid++; >>>>>>>>>> + } >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + if (valid > 0) >>>>>>>>>> + return count / valid; >>>>>>>>>> + else >>>>>>>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, >>>>>>>>>> struct >>>>>>>>>> drm_file *filp) >>>>>>>>>> { >>>>>>>>>> int i, r; >>>>>>>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>>> radeon_cs_parser >>>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>>> >>>>>>>>>> /* create or decode, validate the handle */ >>>>>>>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>>> handle) >>>>>>>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == >>>>>>>>>> handle) >>>>>>>>>> { >>>>>>>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>>>> return 0; >>>>>>>>>> + } >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> /* handle not found try to alloc a new one */ >>>>>>>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>>>>>>> radeon_cs_parser >>>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>>>>> 0, >>>>>>>>>> handle)) { >>>>>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>>>>> p->rdev->uvd.img_size[i] = img_size; >>>>>>>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>>>>>>> return 0; >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev, int ring, >>>>>>>>>> static void radeon_uvd_count_handles(struct radeon_device >>>>>>>>>> *rdev, >>>>>>>>>> unsigned *sd, unsigned >>>>>>>>>> *hd) >>>>>>>>>> { >>>>>>>>>> - unsigned i; >>>>>>>>>> + unsigned i, fps_rate = 0; >>>>>>>>>> >>>>>>>>>> *sd = 0; >>>>>>>>>> *hd = 0; >>>>>>>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>>>>>>> radeon_device *rdev, >>>>>>>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>>>> continue; >>>>>>>>>> >>>>>>>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>>>>>>> - ++(*hd); >>>>>>>>>> - else >>>>>>>>>> - ++(*sd); >>>>>>>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>>>>>>> + >>>>>>>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>>>>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>>>>>>> + } else { >>>>>>>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>>>>>>> + } >>>>>>>>>> } >>>>>>>>>> } >>>>>>>>>> >>>>>>>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct >>>>>>>>>> radeon_device >>>>>>>>>> *rdev) >>>>>>>>>> set_clocks &= >>>>>>>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>>>>> >>>>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>>>> >>>>>>>>>> + >>>>>>>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>>>> unsigned hd = 0, sd = 0; >>>>>>>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>>>>>>> -- >>>>>>>>>> 1.9.1 >>>>>>>>>> >> > > > > -- > Marco Antonio Benatto > Linux user ID: #506236 > >
On 15.08.2014 17:26, Alex Deucher wrote: > On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> wrote: >> On 15.08.2014 16:11, Christian König wrote: >>> Hi Marco, >>> >>> the problem with an CS ioctl flag is that we sometimes don't know how >>> much SCLK/MCLK boost is needed, for example when we do post processing >>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>> can't give that info to the kernel either. >>> >> >> Maybe it's an acceptable workaround to simply disable dynamic UVD state >> selection in case the UVD states only have a single power level. That >> will avoid the performance issues on affected systems, while still >> allowing dynamic UVD states on systems that have a saner DPM table >> setup. I think it is mosly older systems that suffer from this. >> > > That is exactly what we do now. > Is it? In 3.17-wip, dynamic UVD state selection (according to active streams) is still completely disabled. It will always use the generic UVD state. In fact wasn't it reverted again because of the performance issues on some systems? Grigori > Alex > > >> Best regards >> Grigori >> >>> Regards, >>> Christian. >>> >>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>> Hey all, >>>> >>>> I also had a talk with Alex yesterday about post-processing issues >>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>> including a flag to let user mode driver tell to the kernel which >>>> performance requirement it wants for post processing. A commom >>>> point for both discussion is to stablish the default values for these >>>> profiles, but probably this ioctl change would be more impacting/complex >>>> to implement than a sysfs entry. >>>> >>>> If a sysfs entry is anough for now I can handle the code to create it >>>> and, with your help, the code to setup the UVD profile requested >>>> through it. >>>> >>>> Is there any suggestion? >>>> >>>> Thanks all for your help, >>>> >>>> >>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>>> >>>> Hi guys, >>>> >>>> to make a long story short every time I watch a movie my laptop >>>> start to heat up because we always select the standard UVD power >>>> profile without actually measuring if that is necessary. >>>> >>>> Marco came up with a patch that seems to reliable measure the fps >>>> send down to the kernel and so together with knowing the frame >>>> size of the video should allow us to select the right UVD power >>>> profile. >>>> >>>> The problem is that Alex (unnoticed by me) completely disabled >>>> selecting the UVD profiles because of some issues with advanced >>>> post processing discussed on IRC. The problem seems to be that the >>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>>> that comes with scaling, deinterlacing etc... >>>> >>>> I unfortunately don't have time for it, cause this only affects >>>> the hardware generations R600-SI and not the newest one CIK. So >>>> could you guys stick together and come up with a solution? >>>> Something like a sysfs entry that let's us select the minimum UVD >>>> power level allowed? >>>> >>>> I think Marco is happy to come up with a patch, we just need to >>>> know what's really needed and what should be the default values. >>>> I'm happy to review everything that comes out of it, just don't >>>> have time to do it myself. >>>> >>>> Happy discussion and thanks in advance, >>>> Christian. >>>> >>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>> >>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >>>> >>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>> >>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>> <deathsimple@vodafone.de >>>> <mailto:deathsimple@vodafone.de>> wrote: >>>> >>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>> >>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>> <deathsimple@vodafone.de >>>> <mailto:deathsimple@vodafone.de>> wrote: >>>> >>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>> >>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>> Christian König >>>> <deathsimple@vodafone.de >>>> <mailto:deathsimple@vodafone.de>> >>>> wrote: >>>> >>>> From: Marco A Benatto >>>> <marco.antonio.780@gmail.com >>>> <mailto:marco.antonio.780@gmail.com>> >>>> >>>> Adding a Frames Per Second >>>> estimation logic on UVD handles >>>> when it has being used. This >>>> estimation is per handle basis >>>> and will help on DPM profile >>>> calculation. >>>> >>>> v2 (chk): fix timestamp type, move >>>> functions around and >>>> cleanup code a bit. >>>> >>>> Will this really help much? I thought >>>> the problem was mainly due to >>>> sclk and mclk for post processing. >>>> >>>> >>>> It should at least handle the UVD side for >>>> upclocking when you get a >>>> lot >>>> of >>>> streams / fps. And at on my NI the patch >>>> seems to do exactly that. >>>> >>>> Switching sclk and mclk for post >>>> processing is a different task, and I >>>> actually have no idea what to do with them. >>>> >>>> At this point we always choose the plain UVD >>>> state anyway so this >>>> patch would only take effect if we re-enabled >>>> the dynamic UVD state >>>> selection. >>>> >>>> >>>> Hui? I thought we already re-enabled the dynamic >>>> UVD state selection, but >>>> double checking this I found it disabled again. >>>> >>>> What was the problem with that? Looks like I >>>> somehow missed the >>>> discussion >>>> around it. >>>> >>>> We did, but after doing so a number of people >>>> complained about a >>>> regression on IRC because when apps like xmbc enabled >>>> post processing, >>>> performance went down. >>>> >>>> >>>> That's strange, from my experience the different UVD >>>> performance states only >>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>>> DPM dumps to >>>> confirms this. >>>> >>>> The sclks and mclks are usually different as well, especially >>>> on APUs. >>>> I can send you some examples. >>>> >>>> You not off hand remember who complained on IRC? Finding >>>> something in the >>>> IRC logs is like searching for a needle in a haystack. >>>> >>>> I don't remember off hand. I think zgreg was involved in some >>>> of the >>>> discussions. >>>> >>>> Alex >>>> >>>> Thanks, >>>> Christian. >>>> >>>> >>>> Alex >>>> >>>> >>>> Christian. >>>> >>>> >>>> For the post processing, we probably need a >>>> hint we can >>>> pass to the driver in the CS ioctl to denote >>>> what state we need. >>>> Although if we did that, this could would >>>> largely be moot. That said, >>>> newer asics support dynamic UVD clocks so we >>>> really only need >>>> something like that for older asics and I >>>> guess VCE. >>>> >>>> Alex >>>> >>>> Christian. >>>> >>>> >>>> Alex >>>> >>>> Signed-off-by: Marco A Benatto >>>> <marco.antonio.780@gmail.com >>>> <mailto:marco.antonio.780@gmail.com>> >>>> Signed-off-by: Christian König >>>> <christian.koenig@amd.com >>>> <mailto:christian.koenig@amd.com>> >>>> --- >>>> >>>> drivers/gpu/drm/radeon/radeon.h >>>> | 10 ++++++ >>>> >>>> drivers/gpu/drm/radeon/radeon_uvd.c >>>> | 64 >>>> +++++++++++++++++++++++++++++++++---- >>>> 2 files changed, 68 >>>> insertions(+), 6 deletions(-) >>>> >>>> diff --git >>>> a/drivers/gpu/drm/radeon/radeon.h >>>> b/drivers/gpu/drm/radeon/radeon.h >>>> index 9e1732e..e92f6cb 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>> @@ -1617,6 +1617,15 @@ int >>>> radeon_pm_get_type_index(struct >>>> radeon_device >>>> *rdev, >>>> #define >>>> RADEON_UVD_STACK_SIZE (1024*1024) >>>> #define RADEON_UVD_HEAP_SIZE >>>> (1024*1024) >>>> >>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>> + >>>> +struct radeon_uvd_fps { >>>> + uint64_t timestamp; >>>> + uint8_t event_index; >>>> + uint8_t >>>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>>> +}; >>>> + >>>> struct radeon_uvd { >>>> struct radeon_bo >>>> *vcpu_bo; >>>> void >>>> *cpu_addr; >>>> @@ -1626,6 +1635,7 @@ struct >>>> radeon_uvd { >>>> struct drm_file >>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>> unsigned >>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>> struct delayed_work >>>> idle_work; >>>> + struct radeon_uvd_fps >>>> fps_info[RADEON_MAX_UVD_HANDLES]; >>>> }; >>>> >>>> int radeon_uvd_init(struct >>>> radeon_device *rdev); >>>> diff --git >>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> index 6bf55ec..ef5667a 100644 >>>> --- >>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> +++ >>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> @@ -237,6 +237,51 @@ void >>>> radeon_uvd_force_into_uvd_segment(struct >>>> radeon_bo *rbo) >>>> rbo->placement.lpfn = >>>> (256 * 1024 * 1024) >> PAGE_SHIFT; >>>> } >>>> >>>> +static void >>>> radeon_uvd_fps_clear_events(struct >>>> radeon_device *rdev, >>>> int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps >>>> = &rdev->uvd.fps_info[idx]; >>>> + unsigned i; >>>> + >>>> + fps->timestamp = jiffies_64; >>>> + fps->event_index = 0; >>>> + for (i = 0; i < >>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>> + fps->events[i] = 0; >>>> +} >>>> + >>>> +static void >>>> radeon_uvd_fps_note_event(struct >>>> radeon_device *rdev, >>>> int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps >>>> = &rdev->uvd.fps_info[idx]; >>>> + uint64_t timestamp = >>>> jiffies_64; >>>> + unsigned rate = 0; >>>> + >>>> + uint8_t index = >>>> fps->event_index++; >>>> + fps->event_index %= >>>> RADEON_UVD_FPS_EVENTS_MAX; >>>> + >>>> + rate = div64_u64(HZ, >>>> max(timestamp - fps->timestamp, >>>> 1ULL)); >>>> + >>>> + fps->timestamp = timestamp; >>>> + fps->events[index] = >>>> min(rate, 120u); >>>> +} >>>> + >>>> +static unsigned >>>> radeon_uvd_estimate_fps(struct >>>> radeon_device *rdev, >>>> int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps >>>> = &rdev->uvd.fps_info[idx]; >>>> + unsigned i, valid = 0, >>>> count = 0; >>>> + >>>> + for (i = 0; i < >>>> RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>> + /* We should >>>> ignore zero values */ >>>> + if (fps->events[i] >>>> != 0) { >>>> + count += >>>> fps->events[i]; >>>> + valid++; >>>> + } >>>> + } >>>> + >>>> + if (valid > 0) >>>> + return count / valid; >>>> + else >>>> + return >>>> RADEON_UVD_DEFAULT_FPS; >>>> +} >>>> + >>>> void >>>> radeon_uvd_free_handles(struct >>>> radeon_device *rdev, struct >>>> drm_file *filp) >>>> { >>>> int i, r; >>>> @@ -419,8 +464,10 @@ static int >>>> radeon_uvd_cs_msg(struct >>>> radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> >>>> /* create or decode, >>>> validate the handle */ >>>> for (i = 0; i < >>>> RADEON_MAX_UVD_HANDLES; ++i) { >>>> - if >>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>> == handle) >>>> + if >>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>> == handle) >>>> { >>>> + >>>> radeon_uvd_fps_note_event(p->rdev, i); >>>> return 0; >>>> + } >>>> } >>>> >>>> /* handle not found >>>> try to alloc a new one */ >>>> @@ -428,6 +475,7 @@ static int >>>> radeon_uvd_cs_msg(struct >>>> radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> if >>>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>> 0, >>>> handle)) { >>>> >>>> p->rdev->uvd.filp[i] = p->filp; >>>> >>>> p->rdev->uvd.img_size[i] = img_size; >>>> + >>>> radeon_uvd_fps_clear_events(p->rdev, >>>> i); >>>> return 0; >>>> } >>>> } >>>> @@ -763,7 +811,7 @@ int >>>> radeon_uvd_get_destroy_msg(struct >>>> radeon_device >>>> *rdev, int ring, >>>> static void >>>> radeon_uvd_count_handles(struct >>>> radeon_device *rdev, >>>> >>>> unsigned *sd, unsigned *hd) >>>> { >>>> - unsigned i; >>>> + unsigned i, fps_rate = 0; >>>> >>>> *sd = 0; >>>> *hd = 0; >>>> @@ -772,10 +820,13 @@ static void >>>> radeon_uvd_count_handles(struct >>>> radeon_device *rdev, >>>> if >>>> (!atomic_read(&rdev->uvd.handles[i])) >>>> continue; >>>> >>>> - if >>>> (rdev->uvd.img_size[i] >= 720*576) >>>> - ++(*hd); >>>> - else >>>> - ++(*sd); >>>> + fps_rate = >>>> radeon_uvd_estimate_fps(rdev, i); >>>> + >>>> + if >>>> (rdev->uvd.img_size[i] >= 720*576) { >>>> + (*hd) += >>>> fps_rate > 30 ? 1 : 2; >>>> + } else { >>>> + (*sd) += >>>> fps_rate > 30 ? 1 : 2; >>>> + } >>>> } >>>> } >>>> >>>> @@ -805,6 +856,7 @@ void >>>> radeon_uvd_note_usage(struct >>>> radeon_device >>>> *rdev) >>>> set_clocks &= >>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>> >>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>> >>>> + >>>> if >>>> ((rdev->pm.pm_method == >>>> PM_METHOD_DPM) && >>>> rdev->pm.dpm_enabled) { >>>> unsigned hd = >>>> 0, sd = 0; >>>> >>>> radeon_uvd_count_handles(rdev, >>>> &sd, &hd); >>>> -- >>>> 1.9.1 >>>> >>>> >>>> >>>> >>>> >>>> -- >>>> Marco Antonio Benatto >>>> Linux user ID:#506236 >>> >> >>
Looking at radeon_uvd source code and AFAIK it's currently disable since commit dca5086a90c9ec64f4e0de801a659508202b0640 <http://cgit.freedesktop.org/%7Eagd5f/linux/commit/drivers/gpu/drm/radeon/radeon_uvd.c?h=drm-next-3.18-wip&id=dca5086a90c9ec64f4e0de801a659508202b0640> Please, correct me if I'm wrong. Maybe we can check for which system we're running and take a decision in what we use (if sysfs entry or UVD streams) to determine which UVD state can be set, like Grigori suggested. What do you think? On Fri, Aug 15, 2014 at 12:32 PM, Grigori Goronzy <greg@chown.ath.cx> wrote: > On 15.08.2014 17:26, Alex Deucher wrote: > > On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> > wrote: > >> On 15.08.2014 16:11, Christian König wrote: > >>> Hi Marco, > >>> > >>> the problem with an CS ioctl flag is that we sometimes don't know how > >>> much SCLK/MCLK boost is needed, for example when we do post processing > >>> in the player using OpenGL and UVD decoding with VDPAU. In this case > >>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so > >>> can't give that info to the kernel either. > >>> > >> > >> Maybe it's an acceptable workaround to simply disable dynamic UVD state > >> selection in case the UVD states only have a single power level. That > >> will avoid the performance issues on affected systems, while still > >> allowing dynamic UVD states on systems that have a saner DPM table > >> setup. I think it is mosly older systems that suffer from this. > >> > > > > That is exactly what we do now. > > > > Is it? In 3.17-wip, dynamic UVD state selection (according to active > streams) is still completely disabled. It will always use the generic > UVD state. In fact wasn't it reverted again because of the performance > issues on some systems? > > Grigori > > > Alex > > > > > >> Best regards > >> Grigori > >> > >>> Regards, > >>> Christian. > >>> > >>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: > >>>> Hey all, > >>>> > >>>> I also had a talk with Alex yesterday about post-processing issues > >>>> when using dynamic UVD profiles and a chamge on CS ioctl > >>>> including a flag to let user mode driver tell to the kernel which > >>>> performance requirement it wants for post processing. A commom > >>>> point for both discussion is to stablish the default values for these > >>>> profiles, but probably this ioctl change would be more > impacting/complex > >>>> to implement than a sysfs entry. > >>>> > >>>> If a sysfs entry is anough for now I can handle the code to create it > >>>> and, with your help, the code to setup the UVD profile requested > >>>> through it. > >>>> > >>>> Is there any suggestion? > >>>> > >>>> Thanks all for your help, > >>>> > >>>> > >>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König > >>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: > >>>> > >>>> Hi guys, > >>>> > >>>> to make a long story short every time I watch a movie my laptop > >>>> start to heat up because we always select the standard UVD power > >>>> profile without actually measuring if that is necessary. > >>>> > >>>> Marco came up with a patch that seems to reliable measure the fps > >>>> send down to the kernel and so together with knowing the frame > >>>> size of the video should allow us to select the right UVD power > >>>> profile. > >>>> > >>>> The problem is that Alex (unnoticed by me) completely disabled > >>>> selecting the UVD profiles because of some issues with advanced > >>>> post processing discussed on IRC. The problem seems to be that the > >>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load > >>>> that comes with scaling, deinterlacing etc... > >>>> > >>>> I unfortunately don't have time for it, cause this only affects > >>>> the hardware generations R600-SI and not the newest one CIK. So > >>>> could you guys stick together and come up with a solution? > >>>> Something like a sysfs entry that let's us select the minimum UVD > >>>> power level allowed? > >>>> > >>>> I think Marco is happy to come up with a patch, we just need to > >>>> know what's really needed and what should be the default values. > >>>> I'm happy to review everything that comes out of it, just don't > >>>> have time to do it myself. > >>>> > >>>> Happy discussion and thanks in advance, > >>>> Christian. > >>>> > >>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: > >>>> > >>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König > >>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> > wrote: > >>>> > >>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: > >>>> > >>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König > >>>> <deathsimple@vodafone.de > >>>> <mailto:deathsimple@vodafone.de>> wrote: > >>>> > >>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: > >>>> > >>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian > König > >>>> <deathsimple@vodafone.de > >>>> <mailto:deathsimple@vodafone.de>> wrote: > >>>> > >>>> Am 07.08.2014 um 16:32 schrieb Alex > Deucher: > >>>> > >>>> On Thu, Aug 7, 2014 at 7:33 AM, > >>>> Christian König > >>>> <deathsimple@vodafone.de > >>>> <mailto:deathsimple@vodafone.de>> > >>>> wrote: > >>>> > >>>> From: Marco A Benatto > >>>> <marco.antonio.780@gmail.com > >>>> <mailto: > marco.antonio.780@gmail.com>> > >>>> > >>>> Adding a Frames Per Second > >>>> estimation logic on UVD handles > >>>> when it has being used. This > >>>> estimation is per handle basis > >>>> and will help on DPM profile > >>>> calculation. > >>>> > >>>> v2 (chk): fix timestamp type, move > >>>> functions around and > >>>> cleanup code a bit. > >>>> > >>>> Will this really help much? I thought > >>>> the problem was mainly due to > >>>> sclk and mclk for post processing. > >>>> > >>>> > >>>> It should at least handle the UVD side for > >>>> upclocking when you get a > >>>> lot > >>>> of > >>>> streams / fps. And at on my NI the patch > >>>> seems to do exactly that. > >>>> > >>>> Switching sclk and mclk for post > >>>> processing is a different task, and I > >>>> actually have no idea what to do with > them. > >>>> > >>>> At this point we always choose the plain UVD > >>>> state anyway so this > >>>> patch would only take effect if we re-enabled > >>>> the dynamic UVD state > >>>> selection. > >>>> > >>>> > >>>> Hui? I thought we already re-enabled the dynamic > >>>> UVD state selection, but > >>>> double checking this I found it disabled again. > >>>> > >>>> What was the problem with that? Looks like I > >>>> somehow missed the > >>>> discussion > >>>> around it. > >>>> > >>>> We did, but after doing so a number of people > >>>> complained about a > >>>> regression on IRC because when apps like xmbc enabled > >>>> post processing, > >>>> performance went down. > >>>> > >>>> > >>>> That's strange, from my experience the different UVD > >>>> performance states only > >>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the > >>>> DPM dumps to > >>>> confirms this. > >>>> > >>>> The sclks and mclks are usually different as well, especially > >>>> on APUs. > >>>> I can send you some examples. > >>>> > >>>> You not off hand remember who complained on IRC? Finding > >>>> something in the > >>>> IRC logs is like searching for a needle in a haystack. > >>>> > >>>> I don't remember off hand. I think zgreg was involved in some > >>>> of the > >>>> discussions. > >>>> > >>>> Alex > >>>> > >>>> Thanks, > >>>> Christian. > >>>> > >>>> > >>>> Alex > >>>> > >>>> > >>>> Christian. > >>>> > >>>> > >>>> For the post processing, we probably need a > >>>> hint we can > >>>> pass to the driver in the CS ioctl to denote > >>>> what state we need. > >>>> Although if we did that, this could would > >>>> largely be moot. That said, > >>>> newer asics support dynamic UVD clocks so we > >>>> really only need > >>>> something like that for older asics and I > >>>> guess VCE. > >>>> > >>>> Alex > >>>> > >>>> Christian. > >>>> > >>>> > >>>> Alex > >>>> > >>>> Signed-off-by: Marco A Benatto > >>>> <marco.antonio.780@gmail.com > >>>> <mailto: > marco.antonio.780@gmail.com>> > >>>> Signed-off-by: Christian König > >>>> <christian.koenig@amd.com > >>>> <mailto:christian.koenig@amd.com > >> > >>>> --- > >>>> > >>>> drivers/gpu/drm/radeon/radeon.h > >>>> | 10 ++++++ > >>>> > >>>> > drivers/gpu/drm/radeon/radeon_uvd.c > >>>> | 64 > >>>> > +++++++++++++++++++++++++++++++++---- > >>>> 2 files changed, 68 > >>>> insertions(+), 6 deletions(-) > >>>> > >>>> diff --git > >>>> a/drivers/gpu/drm/radeon/radeon.h > >>>> b/drivers/gpu/drm/radeon/radeon.h > >>>> index 9e1732e..e92f6cb 100644 > >>>> --- > a/drivers/gpu/drm/radeon/radeon.h > >>>> +++ > b/drivers/gpu/drm/radeon/radeon.h > >>>> @@ -1617,6 +1617,15 @@ int > >>>> radeon_pm_get_type_index(struct > >>>> radeon_device > >>>> *rdev, > >>>> #define > >>>> RADEON_UVD_STACK_SIZE (1024*1024) > >>>> #define RADEON_UVD_HEAP_SIZE > >>>> (1024*1024) > >>>> > >>>> +#define > RADEON_UVD_FPS_EVENTS_MAX 8 > >>>> +#define RADEON_UVD_DEFAULT_FPS 60 > >>>> + > >>>> +struct radeon_uvd_fps { > >>>> + uint64_t timestamp; > >>>> + uint8_t > event_index; > >>>> + uint8_t > >>>> > events[RADEON_UVD_FPS_EVENTS_MAX]; > >>>> +}; > >>>> + > >>>> struct radeon_uvd { > >>>> struct radeon_bo > >>>> *vcpu_bo; > >>>> void > >>>> *cpu_addr; > >>>> @@ -1626,6 +1635,7 @@ struct > >>>> radeon_uvd { > >>>> struct drm_file > >>>> *filp[RADEON_MAX_UVD_HANDLES]; > >>>> unsigned > >>>> > img_size[RADEON_MAX_UVD_HANDLES]; > >>>> struct delayed_work > >>>> idle_work; > >>>> + struct radeon_uvd_fps > >>>> fps_info[RADEON_MAX_UVD_HANDLES]; > >>>> }; > >>>> > >>>> int radeon_uvd_init(struct > >>>> radeon_device *rdev); > >>>> diff --git > >>>> > a/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> > b/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> index 6bf55ec..ef5667a 100644 > >>>> --- > >>>> > a/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> +++ > >>>> > b/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> @@ -237,6 +237,51 @@ void > >>>> > radeon_uvd_force_into_uvd_segment(struct > >>>> radeon_bo *rbo) > >>>> rbo->placement.lpfn = > >>>> (256 * 1024 * 1024) >> PAGE_SHIFT; > >>>> } > >>>> > >>>> +static void > >>>> radeon_uvd_fps_clear_events(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + unsigned i; > >>>> + > >>>> + fps->timestamp = > jiffies_64; > >>>> + fps->event_index = 0; > >>>> + for (i = 0; i < > >>>> RADEON_UVD_FPS_EVENTS_MAX; i++) > >>>> + fps->events[i] = > 0; > >>>> +} > >>>> + > >>>> +static void > >>>> radeon_uvd_fps_note_event(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + uint64_t timestamp = > >>>> jiffies_64; > >>>> + unsigned rate = 0; > >>>> + > >>>> + uint8_t index = > >>>> fps->event_index++; > >>>> + fps->event_index %= > >>>> RADEON_UVD_FPS_EVENTS_MAX; > >>>> + > >>>> + rate = div64_u64(HZ, > >>>> max(timestamp - fps->timestamp, > >>>> 1ULL)); > >>>> + > >>>> + fps->timestamp = > timestamp; > >>>> + fps->events[index] = > >>>> min(rate, 120u); > >>>> +} > >>>> + > >>>> +static unsigned > >>>> radeon_uvd_estimate_fps(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + unsigned i, valid = 0, > >>>> count = 0; > >>>> + > >>>> + for (i = 0; i < > >>>> RADEON_UVD_FPS_EVENTS_MAX; i++) { > >>>> + /* We should > >>>> ignore zero values */ > >>>> + if (fps->events[i] > >>>> != 0) { > >>>> + count += > >>>> fps->events[i]; > >>>> + valid++; > >>>> + } > >>>> + } > >>>> + > >>>> + if (valid > 0) > >>>> + return count / > valid; > >>>> + else > >>>> + return > >>>> RADEON_UVD_DEFAULT_FPS; > >>>> +} > >>>> + > >>>> void > >>>> radeon_uvd_free_handles(struct > >>>> radeon_device *rdev, struct > >>>> drm_file *filp) > >>>> { > >>>> int i, r; > >>>> @@ -419,8 +464,10 @@ static int > >>>> radeon_uvd_cs_msg(struct > >>>> radeon_cs_parser > >>>> *p, struct radeon_bo *bo, > >>>> > >>>> /* create or decode, > >>>> validate the handle */ > >>>> for (i = 0; i < > >>>> RADEON_MAX_UVD_HANDLES; ++i) { > >>>> - if > >>>> > (atomic_read(&p->rdev->uvd.handles[i]) > >>>> == handle) > >>>> + if > >>>> > (atomic_read(&p->rdev->uvd.handles[i]) > >>>> == handle) > >>>> { > >>>> + > >>>> > radeon_uvd_fps_note_event(p->rdev, i); > >>>> > return 0; > >>>> + } > >>>> } > >>>> > >>>> /* handle not found > >>>> try to alloc a new one */ > >>>> @@ -428,6 +475,7 @@ static int > >>>> radeon_uvd_cs_msg(struct > >>>> radeon_cs_parser > >>>> *p, struct radeon_bo *bo, > >>>> if > >>>> > (!atomic_cmpxchg(&p->rdev->uvd.handles[i], > >>>> 0, > >>>> handle)) { > >>>> > >>>> p->rdev->uvd.filp[i] = p->filp; > >>>> > >>>> p->rdev->uvd.img_size[i] = > img_size; > >>>> + > >>>> > radeon_uvd_fps_clear_events(p->rdev, > >>>> i); > >>>> > return 0; > >>>> } > >>>> } > >>>> @@ -763,7 +811,7 @@ int > >>>> radeon_uvd_get_destroy_msg(struct > >>>> radeon_device > >>>> *rdev, int ring, > >>>> static void > >>>> radeon_uvd_count_handles(struct > >>>> radeon_device *rdev, > >>>> > >>>> unsigned *sd, unsigned *hd) > >>>> { > >>>> - unsigned i; > >>>> + unsigned i, fps_rate = 0; > >>>> > >>>> *sd = 0; > >>>> *hd = 0; > >>>> @@ -772,10 +820,13 @@ static void > >>>> radeon_uvd_count_handles(struct > >>>> radeon_device *rdev, > >>>> if > >>>> > (!atomic_read(&rdev->uvd.handles[i])) > >>>> > continue; > >>>> > >>>> - if > >>>> (rdev->uvd.img_size[i] >= 720*576) > >>>> - ++(*hd); > >>>> - else > >>>> - ++(*sd); > >>>> + fps_rate = > >>>> radeon_uvd_estimate_fps(rdev, i); > >>>> + > >>>> + if > >>>> (rdev->uvd.img_size[i] >= > 720*576) { > >>>> + (*hd) += > >>>> fps_rate > 30 ? 1 : 2; > >>>> + } else { > >>>> + (*sd) += > >>>> fps_rate > 30 ? 1 : 2; > >>>> + } > >>>> } > >>>> } > >>>> > >>>> @@ -805,6 +856,7 @@ void > >>>> radeon_uvd_note_usage(struct > >>>> radeon_device > >>>> *rdev) > >>>> set_clocks &= > >>>> > schedule_delayed_work(&rdev->uvd.idle_work, > >>>> > >>>> > msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); > >>>> > >>>> + > >>>> if > >>>> ((rdev->pm.pm_method == > >>>> PM_METHOD_DPM) && > >>>> rdev->pm.dpm_enabled) { > >>>> unsigned hd = > >>>> 0, sd = 0; > >>>> > >>>> radeon_uvd_count_handles(rdev, > >>>> &sd, &hd); > >>>> -- > >>>> 1.9.1 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> -- > >>>> Marco Antonio Benatto > >>>> Linux user ID:#506236 > >>> > >> > >> > > >
On 15.08.2014 18:43, Marco Benatto wrote: > Looking at radeon_uvd source code and AFAIK it's currently disable > since commit dca5086a90c9ec64f4e0de801a659508202b0640 > <http://cgit.freedesktop.org/%7Eagd5f/linux/commit/drivers/gpu/drm/radeon/radeon_uvd.c?h=drm-next-3.18-wip&id=dca5086a90c9ec64f4e0de801a659508202b0640> > Please, correct me if I'm wrong. > No, that's correct. It was just a misunderstanding. > Maybe we can check for which system we're running and take a decision > in what we use (if sysfs entry or UVD streams) to determine which UVD > state can be set, like Grigori suggested. > > What do you think? > Seems like a good idea to me. Systems that have "good" 3D power levels for all UVD power states can choose the power state automatically according to active streams. Maybe the sysfs entry can still be used for overriding the automatic state decision. Systems which have "bad" (slow) 3D power levels in UVD power states should never choose dynamically. They should use the generic UVD power state by default and users can override it with the sysfs entry. Grigori > > > > On Fri, Aug 15, 2014 at 12:32 PM, Grigori Goronzy <greg@chown.ath.cx > <mailto:greg@chown.ath.cx>> wrote: > > On 15.08.2014 17:26, Alex Deucher wrote: > > On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy > <greg@chown.ath.cx <mailto:greg@chown.ath.cx>> wrote: > >> On 15.08.2014 16:11, Christian König wrote: > >>> Hi Marco, > >>> > >>> the problem with an CS ioctl flag is that we sometimes don't > know how > >>> much SCLK/MCLK boost is needed, for example when we do post > processing > >>> in the player using OpenGL and UVD decoding with VDPAU. In this case > >>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so > >>> can't give that info to the kernel either. > >>> > >> > >> Maybe it's an acceptable workaround to simply disable dynamic UVD > state > >> selection in case the UVD states only have a single power level. That > >> will avoid the performance issues on affected systems, while still > >> allowing dynamic UVD states on systems that have a saner DPM table > >> setup. I think it is mosly older systems that suffer from this. > >> > > > > That is exactly what we do now. > > > > Is it? In 3.17-wip, dynamic UVD state selection (according to active > streams) is still completely disabled. It will always use the generic > UVD state. In fact wasn't it reverted again because of the performance > issues on some systems? > > Grigori > > > Alex > > > > > >> Best regards > >> Grigori > >> > >>> Regards, > >>> Christian. > >>> > >>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: > >>>> Hey all, > >>>> > >>>> I also had a talk with Alex yesterday about post-processing issues > >>>> when using dynamic UVD profiles and a chamge on CS ioctl > >>>> including a flag to let user mode driver tell to the kernel which > >>>> performance requirement it wants for post processing. A commom > >>>> point for both discussion is to stablish the default values for > these > >>>> profiles, but probably this ioctl change would be more > impacting/complex > >>>> to implement than a sysfs entry. > >>>> > >>>> If a sysfs entry is anough for now I can handle the code to > create it > >>>> and, with your help, the code to setup the UVD profile requested > >>>> through it. > >>>> > >>>> Is there any suggestion? > >>>> > >>>> Thanks all for your help, > >>>> > >>>> > >>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König > >>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com> > <mailto:christian.koenig@amd.com <mailto:christian.koenig@amd.com>>> > wrote: > >>>> > >>>> Hi guys, > >>>> > >>>> to make a long story short every time I watch a movie my laptop > >>>> start to heat up because we always select the standard UVD > power > >>>> profile without actually measuring if that is necessary. > >>>> > >>>> Marco came up with a patch that seems to reliable measure > the fps > >>>> send down to the kernel and so together with knowing the frame > >>>> size of the video should allow us to select the right UVD power > >>>> profile. > >>>> > >>>> The problem is that Alex (unnoticed by me) completely disabled > >>>> selecting the UVD profiles because of some issues with advanced > >>>> post processing discussed on IRC. The problem seems to be > that the > >>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D > load > >>>> that comes with scaling, deinterlacing etc... > >>>> > >>>> I unfortunately don't have time for it, cause this only affects > >>>> the hardware generations R600-SI and not the newest one CIK. So > >>>> could you guys stick together and come up with a solution? > >>>> Something like a sysfs entry that let's us select the > minimum UVD > >>>> power level allowed? > >>>> > >>>> I think Marco is happy to come up with a patch, we just need to > >>>> know what's really needed and what should be the default > values. > >>>> I'm happy to review everything that comes out of it, just don't > >>>> have time to do it myself. > >>>> > >>>> Happy discussion and thanks in advance, > >>>> Christian. > >>>> > >>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: > >>>> > >>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König > >>>> <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de> <mailto:deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>>> wrote: > >>>> > >>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: > >>>> > >>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König > >>>> <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de> > >>>> <mailto:deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>>> wrote: > >>>> > >>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: > >>>> > >>>> On Thu, Aug 7, 2014 at 11:32 AM, > Christian König > >>>> <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de> > >>>> <mailto:deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>>> wrote: > >>>> > >>>> Am 07.08.2014 um 16:32 schrieb Alex > Deucher: > >>>> > >>>> On Thu, Aug 7, 2014 at 7:33 AM, > >>>> Christian König > >>>> <deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de> > >>>> <mailto:deathsimple@vodafone.de > <mailto:deathsimple@vodafone.de>>> > >>>> wrote: > >>>> > >>>> From: Marco A Benatto > >>>> > <marco.antonio.780@gmail.com <mailto:marco.antonio.780@gmail.com> > >>>> > <mailto:marco.antonio.780@gmail.com > <mailto:marco.antonio.780@gmail.com>>> > >>>> > >>>> Adding a Frames Per Second > >>>> estimation logic on UVD handles > >>>> when it has being used. This > >>>> estimation is per handle basis > >>>> and will help on DPM profile > >>>> calculation. > >>>> > >>>> v2 (chk): fix timestamp > type, move > >>>> functions around and > >>>> cleanup code > a bit. > >>>> > >>>> Will this really help much? I > thought > >>>> the problem was mainly due to > >>>> sclk and mclk for post processing. > >>>> > >>>> > >>>> It should at least handle the UVD > side for > >>>> upclocking when you get a > >>>> lot > >>>> of > >>>> streams / fps. And at on my NI the > patch > >>>> seems to do exactly that. > >>>> > >>>> Switching sclk and mclk for post > >>>> processing is a different task, and I > >>>> actually have no idea what to do > with them. > >>>> > >>>> At this point we always choose the > plain UVD > >>>> state anyway so this > >>>> patch would only take effect if we > re-enabled > >>>> the dynamic UVD state > >>>> selection. > >>>> > >>>> > >>>> Hui? I thought we already re-enabled the > dynamic > >>>> UVD state selection, but > >>>> double checking this I found it disabled again. > >>>> > >>>> What was the problem with that? Looks like I > >>>> somehow missed the > >>>> discussion > >>>> around it. > >>>> > >>>> We did, but after doing so a number of people > >>>> complained about a > >>>> regression on IRC because when apps like xmbc > enabled > >>>> post processing, > >>>> performance went down. > >>>> > >>>> > >>>> That's strange, from my experience the different UVD > >>>> performance states only > >>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the > >>>> DPM dumps to > >>>> confirms this. > >>>> > >>>> The sclks and mclks are usually different as well, > especially > >>>> on APUs. > >>>> I can send you some examples. > >>>> > >>>> You not off hand remember who complained on IRC? > Finding > >>>> something in the > >>>> IRC logs is like searching for a needle in a haystack. > >>>> > >>>> I don't remember off hand. I think zgreg was involved > in some > >>>> of the > >>>> discussions. > >>>> > >>>> Alex > >>>> > >>>> Thanks, > >>>> Christian. > >>>> > >>>> > >>>> Alex > >>>> > >>>> > >>>> Christian. > >>>> > >>>> > >>>> For the post processing, we probably need a > >>>> hint we can > >>>> pass to the driver in the CS ioctl to > denote > >>>> what state we need. > >>>> Although if we did that, this could would > >>>> largely be moot. That said, > >>>> newer asics support dynamic UVD clocks > so we > >>>> really only need > >>>> something like that for older asics and I > >>>> guess VCE. > >>>> > >>>> Alex > >>>> > >>>> Christian. > >>>> > >>>> > >>>> Alex > >>>> > >>>> Signed-off-by: Marco A Benatto > >>>> > <marco.antonio.780@gmail.com <mailto:marco.antonio.780@gmail.com> > >>>> > <mailto:marco.antonio.780@gmail.com > <mailto:marco.antonio.780@gmail.com>>> > >>>> Signed-off-by: Christian König > >>>> <christian.koenig@amd.com > <mailto:christian.koenig@amd.com> > >>>> > <mailto:christian.koenig@amd.com <mailto:christian.koenig@amd.com>>> > >>>> --- > >>>> > >>>> > drivers/gpu/drm/radeon/radeon.h > >>>> | 10 ++++++ > >>>> > >>>> > drivers/gpu/drm/radeon/radeon_uvd.c > >>>> | 64 > >>>> > +++++++++++++++++++++++++++++++++---- > >>>> 2 files changed, 68 > >>>> insertions(+), 6 deletions(-) > >>>> > >>>> diff --git > >>>> > a/drivers/gpu/drm/radeon/radeon.h > >>>> > b/drivers/gpu/drm/radeon/radeon.h > >>>> index 9e1732e..e92f6cb 100644 > >>>> --- > a/drivers/gpu/drm/radeon/radeon.h > >>>> +++ > b/drivers/gpu/drm/radeon/radeon.h > >>>> @@ -1617,6 +1617,15 @@ int > >>>> radeon_pm_get_type_index(struct > >>>> radeon_device > >>>> *rdev, > >>>> #define > >>>> RADEON_UVD_STACK_SIZE > (1024*1024) > >>>> #define > RADEON_UVD_HEAP_SIZE > >>>> (1024*1024) > >>>> > >>>> +#define > RADEON_UVD_FPS_EVENTS_MAX 8 > >>>> +#define > RADEON_UVD_DEFAULT_FPS 60 > >>>> + > >>>> +struct radeon_uvd_fps { > >>>> + uint64_t > timestamp; > >>>> + uint8_t > event_index; > >>>> + uint8_t > >>>> > events[RADEON_UVD_FPS_EVENTS_MAX]; > >>>> +}; > >>>> + > >>>> struct radeon_uvd { > >>>> struct radeon_bo > >>>> *vcpu_bo; > >>>> void > >>>> *cpu_addr; > >>>> @@ -1626,6 +1635,7 @@ struct > >>>> radeon_uvd { > >>>> struct drm_file > >>>> *filp[RADEON_MAX_UVD_HANDLES]; > >>>> unsigned > >>>> > img_size[RADEON_MAX_UVD_HANDLES]; > >>>> struct delayed_work > >>>> idle_work; > >>>> + struct radeon_uvd_fps > >>>> > fps_info[RADEON_MAX_UVD_HANDLES]; > >>>> }; > >>>> > >>>> int radeon_uvd_init(struct > >>>> radeon_device *rdev); > >>>> diff --git > >>>> > a/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> > b/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> index 6bf55ec..ef5667a 100644 > >>>> --- > >>>> > a/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> +++ > >>>> > b/drivers/gpu/drm/radeon/radeon_uvd.c > >>>> @@ -237,6 +237,51 @@ void > >>>> > radeon_uvd_force_into_uvd_segment(struct > >>>> radeon_bo *rbo) > >>>> > rbo->placement.lpfn = > >>>> (256 * 1024 * 1024) >> > PAGE_SHIFT; > >>>> } > >>>> > >>>> +static void > >>>> > radeon_uvd_fps_clear_events(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct > radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + unsigned i; > >>>> + > >>>> + fps->timestamp = > jiffies_64; > >>>> + fps->event_index = 0; > >>>> + for (i = 0; i < > >>>> RADEON_UVD_FPS_EVENTS_MAX; i++) > >>>> + > fps->events[i] = 0; > >>>> +} > >>>> + > >>>> +static void > >>>> > radeon_uvd_fps_note_event(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct > radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + uint64_t timestamp = > >>>> jiffies_64; > >>>> + unsigned rate = 0; > >>>> + > >>>> + uint8_t index = > >>>> fps->event_index++; > >>>> + fps->event_index %= > >>>> RADEON_UVD_FPS_EVENTS_MAX; > >>>> + > >>>> + rate = div64_u64(HZ, > >>>> max(timestamp - fps->timestamp, > >>>> 1ULL)); > >>>> + > >>>> + fps->timestamp = > timestamp; > >>>> + fps->events[index] = > >>>> min(rate, 120u); > >>>> +} > >>>> + > >>>> +static unsigned > >>>> radeon_uvd_estimate_fps(struct > >>>> radeon_device *rdev, > >>>> int > >>>> idx) > >>>> +{ > >>>> + struct > radeon_uvd_fps *fps > >>>> = &rdev->uvd.fps_info[idx]; > >>>> + unsigned i, valid = 0, > >>>> count = 0; > >>>> + > >>>> + for (i = 0; i < > >>>> RADEON_UVD_FPS_EVENTS_MAX; > i++) { > >>>> + /* We should > >>>> ignore zero values */ > >>>> + if > (fps->events[i] > >>>> != 0) { > >>>> + > count += > >>>> fps->events[i]; > >>>> + > valid++; > >>>> + } > >>>> + } > >>>> + > >>>> + if (valid > 0) > >>>> + return > count / valid; > >>>> + else > >>>> + return > >>>> RADEON_UVD_DEFAULT_FPS; > >>>> +} > >>>> + > >>>> void > >>>> radeon_uvd_free_handles(struct > >>>> radeon_device *rdev, struct > >>>> drm_file *filp) > >>>> { > >>>> int i, r; > >>>> @@ -419,8 +464,10 @@ static int > >>>> radeon_uvd_cs_msg(struct > >>>> radeon_cs_parser > >>>> *p, struct radeon_bo *bo, > >>>> > >>>> /* create or > decode, > >>>> validate the handle */ > >>>> for (i = 0; i < > >>>> RADEON_MAX_UVD_HANDLES; ++i) { > >>>> - if > >>>> > (atomic_read(&p->rdev->uvd.handles[i]) > >>>> == handle) > >>>> + if > >>>> > (atomic_read(&p->rdev->uvd.handles[i]) > >>>> == handle) > >>>> { > >>>> + > >>>> > radeon_uvd_fps_note_event(p->rdev, i); > >>>> > return 0; > >>>> + } > >>>> } > >>>> > >>>> /* handle not found > >>>> try to alloc a new one */ > >>>> @@ -428,6 +475,7 @@ static int > >>>> radeon_uvd_cs_msg(struct > >>>> radeon_cs_parser > >>>> *p, struct radeon_bo *bo, > >>>> if > >>>> > (!atomic_cmpxchg(&p->rdev->uvd.handles[i], > >>>> 0, > >>>> handle)) { > >>>> > >>>> p->rdev->uvd.filp[i] = p->filp; > >>>> > >>>> p->rdev->uvd.img_size[i] = > img_size; > >>>> + > >>>> > radeon_uvd_fps_clear_events(p->rdev, > >>>> i); > >>>> > return 0; > >>>> } > >>>> } > >>>> @@ -763,7 +811,7 @@ int > >>>> > radeon_uvd_get_destroy_msg(struct > >>>> radeon_device > >>>> *rdev, int ring, > >>>> static void > >>>> radeon_uvd_count_handles(struct > >>>> radeon_device *rdev, > >>>> > >>>> unsigned *sd, > unsigned *hd) > >>>> { > >>>> - unsigned i; > >>>> + unsigned i, > fps_rate = 0; > >>>> > >>>> *sd = 0; > >>>> *hd = 0; > >>>> @@ -772,10 +820,13 @@ > static void > >>>> radeon_uvd_count_handles(struct > >>>> radeon_device *rdev, > >>>> if > >>>> > (!atomic_read(&rdev->uvd.handles[i])) > >>>> > continue; > >>>> > >>>> - if > >>>> (rdev->uvd.img_size[i] >= > 720*576) > >>>> - > ++(*hd); > >>>> - else > >>>> - > ++(*sd); > >>>> + fps_rate = > >>>> > radeon_uvd_estimate_fps(rdev, i); > >>>> + > >>>> + if > >>>> (rdev->uvd.img_size[i] >= > 720*576) { > >>>> + > (*hd) += > >>>> fps_rate > 30 ? 1 : 2; > >>>> + } else { > >>>> + > (*sd) += > >>>> fps_rate > 30 ? 1 : 2; > >>>> + } > >>>> } > >>>> } > >>>> > >>>> @@ -805,6 +856,7 @@ void > >>>> radeon_uvd_note_usage(struct > >>>> radeon_device > >>>> *rdev) > >>>> set_clocks &= > >>>> > schedule_delayed_work(&rdev->uvd.idle_work, > >>>> > >>>> > msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); > >>>> > >>>> + > >>>> if > >>>> ((rdev->pm.pm_method == > >>>> PM_METHOD_DPM) && > >>>> rdev->pm.dpm_enabled) { > >>>> > unsigned hd = > >>>> 0, sd = 0; > >>>> > >>>> radeon_uvd_count_handles(rdev, > >>>> &sd, &hd); > >>>> -- > >>>> 1.9.1 > >>>> > >>>> > >>>> > >>>> > >>>> > >>>> -- > >>>> Marco Antonio Benatto > >>>> Linux user ID:#506236 > >>> > >> > >> > > > > > > -- > Marco Antonio Benatto > Linux user ID:#506236
Am 15.08.2014 um 17:32 schrieb Grigori Goronzy: > On 15.08.2014 17:26, Alex Deucher wrote: >> On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> wrote: >>> On 15.08.2014 16:11, Christian König wrote: >>>> Hi Marco, >>>> >>>> the problem with an CS ioctl flag is that we sometimes don't know how >>>> much SCLK/MCLK boost is needed, for example when we do post processing >>>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>>> can't give that info to the kernel either. >>>> >>> Maybe it's an acceptable workaround to simply disable dynamic UVD state >>> selection in case the UVD states only have a single power level. That >>> will avoid the performance issues on affected systems, while still >>> allowing dynamic UVD states on systems that have a saner DPM table >>> setup. I think it is mosly older systems that suffer from this. >>> >> That is exactly what we do now. >> > Is it? In 3.17-wip, dynamic UVD state selection (according to active > streams) is still completely disabled. It will always use the generic > UVD state. In fact wasn't it reverted again because of the performance > issues on some systems? This is the performance table of my laptop (at least the interesting parts), which I think is a typical example of the problem: [ 4.106772] == power state 1 == [ 4.106774] ui class: performance [ 4.106776] internal class: none [ 4.106780] uvd vclk: 0 dclk: 0 [ 4.106782] power level 0 sclk: 20000 vddc_index: 2 [ 4.106784] power level 1 sclk: 50000 vddc_index: 2 [ 4.106805] == power state 3 == [ 4.106807] ui class: none [ 4.106808] internal class: uvd [ 4.106813] uvd vclk: 55000 dclk: 40000 [ 4.106816] power level 0 sclk: 50000 vddc_index: 2 [ 4.106818] power level 1 sclk: 50000 vddc_index: 2 [ 4.106820] status: [ 4.106822] == power state 4 == [ 4.106823] ui class: battery [ 4.106825] internal class: uvd_hd [ 4.106831] uvd vclk: 40000 dclk: 30000 [ 4.106833] power level 0 sclk: 38000 vddc_index: 1 [ 4.106835] power level 1 sclk: 38000 vddc_index: 1 [ 4.106839] == power state 5 == [ 4.106841] ui class: battery [ 4.106843] internal class: uvd_sd [ 4.106848] uvd vclk: 40000 dclk: 30000 [ 4.106850] power level 0 sclk: 38000 vddc_index: 2 [ 4.106853] power level 1 sclk: 38000 vddc_index: 2 As you can see we currently always select the performance level uvd, which results in selecting the maximum sclk/dclk and vclk. Unfortunately neither uvd, uvd_sd nor uvd_hd allows the hardware to switch the sclk once selected (it's a hardware limitation of older uvd blocks). So for all cases where this is interesting you actually always have only a single power level to choose from. Christian. > > Grigori > >> Alex >> >> >>> Best regards >>> Grigori >>> >>>> Regards, >>>> Christian. >>>> >>>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>>> Hey all, >>>>> >>>>> I also had a talk with Alex yesterday about post-processing issues >>>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>>> including a flag to let user mode driver tell to the kernel which >>>>> performance requirement it wants for post processing. A commom >>>>> point for both discussion is to stablish the default values for these >>>>> profiles, but probably this ioctl change would be more impacting/complex >>>>> to implement than a sysfs entry. >>>>> >>>>> If a sysfs entry is anough for now I can handle the code to create it >>>>> and, with your help, the code to setup the UVD profile requested >>>>> through it. >>>>> >>>>> Is there any suggestion? >>>>> >>>>> Thanks all for your help, >>>>> >>>>> >>>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>>>> >>>>> Hi guys, >>>>> >>>>> to make a long story short every time I watch a movie my laptop >>>>> start to heat up because we always select the standard UVD power >>>>> profile without actually measuring if that is necessary. >>>>> >>>>> Marco came up with a patch that seems to reliable measure the fps >>>>> send down to the kernel and so together with knowing the frame >>>>> size of the video should allow us to select the right UVD power >>>>> profile. >>>>> >>>>> The problem is that Alex (unnoticed by me) completely disabled >>>>> selecting the UVD profiles because of some issues with advanced >>>>> post processing discussed on IRC. The problem seems to be that the >>>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>>>> that comes with scaling, deinterlacing etc... >>>>> >>>>> I unfortunately don't have time for it, cause this only affects >>>>> the hardware generations R600-SI and not the newest one CIK. So >>>>> could you guys stick together and come up with a solution? >>>>> Something like a sysfs entry that let's us select the minimum UVD >>>>> power level allowed? >>>>> >>>>> I think Marco is happy to come up with a patch, we just need to >>>>> know what's really needed and what should be the default values. >>>>> I'm happy to review everything that comes out of it, just don't >>>>> have time to do it myself. >>>>> >>>>> Happy discussion and thanks in advance, >>>>> Christian. >>>>> >>>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>>> >>>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: >>>>> >>>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>>> >>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>> <deathsimple@vodafone.de >>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>> >>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>> >>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >>>>> <deathsimple@vodafone.de >>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>> >>>>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>>>> >>>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>>> Christian König >>>>> <deathsimple@vodafone.de >>>>> <mailto:deathsimple@vodafone.de>> >>>>> wrote: >>>>> >>>>> From: Marco A Benatto >>>>> <marco.antonio.780@gmail.com >>>>> <mailto:marco.antonio.780@gmail.com>> >>>>> >>>>> Adding a Frames Per Second >>>>> estimation logic on UVD handles >>>>> when it has being used. This >>>>> estimation is per handle basis >>>>> and will help on DPM profile >>>>> calculation. >>>>> >>>>> v2 (chk): fix timestamp type, move >>>>> functions around and >>>>> cleanup code a bit. >>>>> >>>>> Will this really help much? I thought >>>>> the problem was mainly due to >>>>> sclk and mclk for post processing. >>>>> >>>>> >>>>> It should at least handle the UVD side for >>>>> upclocking when you get a >>>>> lot >>>>> of >>>>> streams / fps. And at on my NI the patch >>>>> seems to do exactly that. >>>>> >>>>> Switching sclk and mclk for post >>>>> processing is a different task, and I >>>>> actually have no idea what to do with them. >>>>> >>>>> At this point we always choose the plain UVD >>>>> state anyway so this >>>>> patch would only take effect if we re-enabled >>>>> the dynamic UVD state >>>>> selection. >>>>> >>>>> >>>>> Hui? I thought we already re-enabled the dynamic >>>>> UVD state selection, but >>>>> double checking this I found it disabled again. >>>>> >>>>> What was the problem with that? Looks like I >>>>> somehow missed the >>>>> discussion >>>>> around it. >>>>> >>>>> We did, but after doing so a number of people >>>>> complained about a >>>>> regression on IRC because when apps like xmbc enabled >>>>> post processing, >>>>> performance went down. >>>>> >>>>> >>>>> That's strange, from my experience the different UVD >>>>> performance states only >>>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>>>> DPM dumps to >>>>> confirms this. >>>>> >>>>> The sclks and mclks are usually different as well, especially >>>>> on APUs. >>>>> I can send you some examples. >>>>> >>>>> You not off hand remember who complained on IRC? Finding >>>>> something in the >>>>> IRC logs is like searching for a needle in a haystack. >>>>> >>>>> I don't remember off hand. I think zgreg was involved in some >>>>> of the >>>>> discussions. >>>>> >>>>> Alex >>>>> >>>>> Thanks, >>>>> Christian. >>>>> >>>>> >>>>> Alex >>>>> >>>>> >>>>> Christian. >>>>> >>>>> >>>>> For the post processing, we probably need a >>>>> hint we can >>>>> pass to the driver in the CS ioctl to denote >>>>> what state we need. >>>>> Although if we did that, this could would >>>>> largely be moot. That said, >>>>> newer asics support dynamic UVD clocks so we >>>>> really only need >>>>> something like that for older asics and I >>>>> guess VCE. >>>>> >>>>> Alex >>>>> >>>>> Christian. >>>>> >>>>> >>>>> Alex >>>>> >>>>> Signed-off-by: Marco A Benatto >>>>> <marco.antonio.780@gmail.com >>>>> <mailto:marco.antonio.780@gmail.com>> >>>>> Signed-off-by: Christian König >>>>> <christian.koenig@amd.com >>>>> <mailto:christian.koenig@amd.com>> >>>>> --- >>>>> >>>>> drivers/gpu/drm/radeon/radeon.h >>>>> | 10 ++++++ >>>>> >>>>> drivers/gpu/drm/radeon/radeon_uvd.c >>>>> | 64 >>>>> +++++++++++++++++++++++++++++++++---- >>>>> 2 files changed, 68 >>>>> insertions(+), 6 deletions(-) >>>>> >>>>> diff --git >>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>> index 9e1732e..e92f6cb 100644 >>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>> @@ -1617,6 +1617,15 @@ int >>>>> radeon_pm_get_type_index(struct >>>>> radeon_device >>>>> *rdev, >>>>> #define >>>>> RADEON_UVD_STACK_SIZE (1024*1024) >>>>> #define RADEON_UVD_HEAP_SIZE >>>>> (1024*1024) >>>>> >>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>> + >>>>> +struct radeon_uvd_fps { >>>>> + uint64_t timestamp; >>>>> + uint8_t event_index; >>>>> + uint8_t >>>>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>> +}; >>>>> + >>>>> struct radeon_uvd { >>>>> struct radeon_bo >>>>> *vcpu_bo; >>>>> void >>>>> *cpu_addr; >>>>> @@ -1626,6 +1635,7 @@ struct >>>>> radeon_uvd { >>>>> struct drm_file >>>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>>> unsigned >>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>> struct delayed_work >>>>> idle_work; >>>>> + struct radeon_uvd_fps >>>>> fps_info[RADEON_MAX_UVD_HANDLES]; >>>>> }; >>>>> >>>>> int radeon_uvd_init(struct >>>>> radeon_device *rdev); >>>>> diff --git >>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> index 6bf55ec..ef5667a 100644 >>>>> --- >>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> +++ >>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> @@ -237,6 +237,51 @@ void >>>>> radeon_uvd_force_into_uvd_segment(struct >>>>> radeon_bo *rbo) >>>>> rbo->placement.lpfn = >>>>> (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>> } >>>>> >>>>> +static void >>>>> radeon_uvd_fps_clear_events(struct >>>>> radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps >>>>> = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i; >>>>> + >>>>> + fps->timestamp = jiffies_64; >>>>> + fps->event_index = 0; >>>>> + for (i = 0; i < >>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>> + fps->events[i] = 0; >>>>> +} >>>>> + >>>>> +static void >>>>> radeon_uvd_fps_note_event(struct >>>>> radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps >>>>> = &rdev->uvd.fps_info[idx]; >>>>> + uint64_t timestamp = >>>>> jiffies_64; >>>>> + unsigned rate = 0; >>>>> + >>>>> + uint8_t index = >>>>> fps->event_index++; >>>>> + fps->event_index %= >>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>> + >>>>> + rate = div64_u64(HZ, >>>>> max(timestamp - fps->timestamp, >>>>> 1ULL)); >>>>> + >>>>> + fps->timestamp = timestamp; >>>>> + fps->events[index] = >>>>> min(rate, 120u); >>>>> +} >>>>> + >>>>> +static unsigned >>>>> radeon_uvd_estimate_fps(struct >>>>> radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps >>>>> = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i, valid = 0, >>>>> count = 0; >>>>> + >>>>> + for (i = 0; i < >>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>> + /* We should >>>>> ignore zero values */ >>>>> + if (fps->events[i] >>>>> != 0) { >>>>> + count += >>>>> fps->events[i]; >>>>> + valid++; >>>>> + } >>>>> + } >>>>> + >>>>> + if (valid > 0) >>>>> + return count / valid; >>>>> + else >>>>> + return >>>>> RADEON_UVD_DEFAULT_FPS; >>>>> +} >>>>> + >>>>> void >>>>> radeon_uvd_free_handles(struct >>>>> radeon_device *rdev, struct >>>>> drm_file *filp) >>>>> { >>>>> int i, r; >>>>> @@ -419,8 +464,10 @@ static int >>>>> radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> >>>>> /* create or decode, >>>>> validate the handle */ >>>>> for (i = 0; i < >>>>> RADEON_MAX_UVD_HANDLES; ++i) { >>>>> - if >>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>> == handle) >>>>> + if >>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>> == handle) >>>>> { >>>>> + >>>>> radeon_uvd_fps_note_event(p->rdev, i); >>>>> return 0; >>>>> + } >>>>> } >>>>> >>>>> /* handle not found >>>>> try to alloc a new one */ >>>>> @@ -428,6 +475,7 @@ static int >>>>> radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> if >>>>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>> 0, >>>>> handle)) { >>>>> >>>>> p->rdev->uvd.filp[i] = p->filp; >>>>> >>>>> p->rdev->uvd.img_size[i] = img_size; >>>>> + >>>>> radeon_uvd_fps_clear_events(p->rdev, >>>>> i); >>>>> return 0; >>>>> } >>>>> } >>>>> @@ -763,7 +811,7 @@ int >>>>> radeon_uvd_get_destroy_msg(struct >>>>> radeon_device >>>>> *rdev, int ring, >>>>> static void >>>>> radeon_uvd_count_handles(struct >>>>> radeon_device *rdev, >>>>> >>>>> unsigned *sd, unsigned *hd) >>>>> { >>>>> - unsigned i; >>>>> + unsigned i, fps_rate = 0; >>>>> >>>>> *sd = 0; >>>>> *hd = 0; >>>>> @@ -772,10 +820,13 @@ static void >>>>> radeon_uvd_count_handles(struct >>>>> radeon_device *rdev, >>>>> if >>>>> (!atomic_read(&rdev->uvd.handles[i])) >>>>> continue; >>>>> >>>>> - if >>>>> (rdev->uvd.img_size[i] >= 720*576) >>>>> - ++(*hd); >>>>> - else >>>>> - ++(*sd); >>>>> + fps_rate = >>>>> radeon_uvd_estimate_fps(rdev, i); >>>>> + >>>>> + if >>>>> (rdev->uvd.img_size[i] >= 720*576) { >>>>> + (*hd) += >>>>> fps_rate > 30 ? 1 : 2; >>>>> + } else { >>>>> + (*sd) += >>>>> fps_rate > 30 ? 1 : 2; >>>>> + } >>>>> } >>>>> } >>>>> >>>>> @@ -805,6 +856,7 @@ void >>>>> radeon_uvd_note_usage(struct >>>>> radeon_device >>>>> *rdev) >>>>> set_clocks &= >>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>> >>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>> >>>>> + >>>>> if >>>>> ((rdev->pm.pm_method == >>>>> PM_METHOD_DPM) && >>>>> rdev->pm.dpm_enabled) { >>>>> unsigned hd = >>>>> 0, sd = 0; >>>>> >>>>> radeon_uvd_count_handles(rdev, >>>>> &sd, &hd); >>>>> -- >>>>> 1.9.1 >>>>> >>>>> >>>>> >>>>> >>>>> >>>>> -- >>>>> Marco Antonio Benatto >>>>> Linux user ID:#506236 >>> >
Grigori, Alex and Christian are you ok if I merge ioctl flag idea with sysfs idea? We let the system decide the state using the hint provided by CS ioctl flag but if performance is not good as expected or DPM table is not sane we still will have a fallback way o override this decision. On Fri, Aug 15, 2014 at 1:54 PM, Christian König <christian.koenig@amd.com> wrote: > Am 15.08.2014 um 17:32 schrieb Grigori Goronzy: > > On 15.08.2014 17:26, Alex Deucher wrote: >> >>> On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> >>> wrote: >>> >>>> On 15.08.2014 16:11, Christian König wrote: >>>> >>>>> Hi Marco, >>>>> >>>>> the problem with an CS ioctl flag is that we sometimes don't know how >>>>> much SCLK/MCLK boost is needed, for example when we do post processing >>>>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>>>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>>>> can't give that info to the kernel either. >>>>> >>>>> Maybe it's an acceptable workaround to simply disable dynamic UVD >>>> state >>>> selection in case the UVD states only have a single power level. That >>>> will avoid the performance issues on affected systems, while still >>>> allowing dynamic UVD states on systems that have a saner DPM table >>>> setup. I think it is mosly older systems that suffer from this. >>>> >>>> That is exactly what we do now. >>> >>> Is it? In 3.17-wip, dynamic UVD state selection (according to active >> streams) is still completely disabled. It will always use the generic >> UVD state. In fact wasn't it reverted again because of the performance >> issues on some systems? >> > > This is the performance table of my laptop (at least the interesting > parts), which I think is a typical example of the problem: > > [ 4.106772] == power state 1 == > [ 4.106774] ui class: performance > [ 4.106776] internal class: none > [ 4.106780] uvd vclk: 0 dclk: 0 > [ 4.106782] power level 0 sclk: 20000 vddc_index: 2 > [ 4.106784] power level 1 sclk: 50000 vddc_index: 2 > [ 4.106805] == power state 3 == > [ 4.106807] ui class: none > [ 4.106808] internal class: uvd > [ 4.106813] uvd vclk: 55000 dclk: 40000 > [ 4.106816] power level 0 sclk: 50000 vddc_index: 2 > [ 4.106818] power level 1 sclk: 50000 vddc_index: 2 > [ 4.106820] status: > [ 4.106822] == power state 4 == > [ 4.106823] ui class: battery > [ 4.106825] internal class: uvd_hd > [ 4.106831] uvd vclk: 40000 dclk: 30000 > [ 4.106833] power level 0 sclk: 38000 vddc_index: 1 > [ 4.106835] power level 1 sclk: 38000 vddc_index: 1 > [ 4.106839] == power state 5 == > [ 4.106841] ui class: battery > [ 4.106843] internal class: uvd_sd > [ 4.106848] uvd vclk: 40000 dclk: 30000 > [ 4.106850] power level 0 sclk: 38000 vddc_index: 2 > [ 4.106853] power level 1 sclk: 38000 vddc_index: 2 > > As you can see we currently always select the performance level uvd, which > results in selecting the maximum sclk/dclk and vclk. Unfortunately neither > uvd, uvd_sd nor uvd_hd allows the hardware to switch the sclk once selected > (it's a hardware limitation of older uvd blocks). > > So for all cases where this is interesting you actually always have only a > single power level to choose from. > > Christian. > > > >> Grigori >> >> Alex >>> >>> >>> Best regards >>>> Grigori >>>> >>>> Regards, >>>>> Christian. >>>>> >>>>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>>> >>>>>> Hey all, >>>>>> >>>>>> I also had a talk with Alex yesterday about post-processing issues >>>>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>>>> including a flag to let user mode driver tell to the kernel which >>>>>> performance requirement it wants for post processing. A commom >>>>>> point for both discussion is to stablish the default values for these >>>>>> profiles, but probably this ioctl change would be more >>>>>> impacting/complex >>>>>> to implement than a sysfs entry. >>>>>> >>>>>> If a sysfs entry is anough for now I can handle the code to create it >>>>>> and, with your help, the code to setup the UVD profile requested >>>>>> through it. >>>>>> >>>>>> Is there any suggestion? >>>>>> >>>>>> Thanks all for your help, >>>>>> >>>>>> >>>>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>>>>> >>>>>> Hi guys, >>>>>> >>>>>> to make a long story short every time I watch a movie my laptop >>>>>> start to heat up because we always select the standard UVD power >>>>>> profile without actually measuring if that is necessary. >>>>>> >>>>>> Marco came up with a patch that seems to reliable measure the fps >>>>>> send down to the kernel and so together with knowing the frame >>>>>> size of the video should allow us to select the right UVD power >>>>>> profile. >>>>>> >>>>>> The problem is that Alex (unnoticed by me) completely disabled >>>>>> selecting the UVD profiles because of some issues with advanced >>>>>> post processing discussed on IRC. The problem seems to be that >>>>>> the >>>>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>>>>> that comes with scaling, deinterlacing etc... >>>>>> >>>>>> I unfortunately don't have time for it, cause this only affects >>>>>> the hardware generations R600-SI and not the newest one CIK. So >>>>>> could you guys stick together and come up with a solution? >>>>>> Something like a sysfs entry that let's us select the minimum UVD >>>>>> power level allowed? >>>>>> >>>>>> I think Marco is happy to come up with a patch, we just need to >>>>>> know what's really needed and what should be the default values. >>>>>> I'm happy to review everything that comes out of it, just don't >>>>>> have time to do it myself. >>>>>> >>>>>> Happy discussion and thanks in advance, >>>>>> Christian. >>>>>> >>>>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>>>> >>>>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> >>>>>> wrote: >>>>>> >>>>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>>>> >>>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>>> <deathsimple@vodafone.de >>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>> >>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>> >>>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian >>>>>> König >>>>>> <deathsimple@vodafone.de >>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>> >>>>>> Am 07.08.2014 um 16:32 schrieb Alex >>>>>> Deucher: >>>>>> >>>>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>>>> Christian König >>>>>> <deathsimple@vodafone.de >>>>>> <mailto:deathsimple@vodafone.de>> >>>>>> wrote: >>>>>> >>>>>> From: Marco A Benatto >>>>>> <marco.antonio.780@gmail.com >>>>>> <mailto:marco.antonio.780@ >>>>>> gmail.com>> >>>>>> >>>>>> Adding a Frames Per Second >>>>>> estimation logic on UVD handles >>>>>> when it has being used. This >>>>>> estimation is per handle basis >>>>>> and will help on DPM profile >>>>>> calculation. >>>>>> >>>>>> v2 (chk): fix timestamp type, >>>>>> move >>>>>> functions around and >>>>>> cleanup code a bit. >>>>>> >>>>>> Will this really help much? I >>>>>> thought >>>>>> the problem was mainly due to >>>>>> sclk and mclk for post processing. >>>>>> >>>>>> >>>>>> It should at least handle the UVD side >>>>>> for >>>>>> upclocking when you get a >>>>>> lot >>>>>> of >>>>>> streams / fps. And at on my NI the patch >>>>>> seems to do exactly that. >>>>>> >>>>>> Switching sclk and mclk for post >>>>>> processing is a different task, and I >>>>>> actually have no idea what to do with >>>>>> them. >>>>>> >>>>>> At this point we always choose the plain UVD >>>>>> state anyway so this >>>>>> patch would only take effect if we re-enabled >>>>>> the dynamic UVD state >>>>>> selection. >>>>>> >>>>>> >>>>>> Hui? I thought we already re-enabled the dynamic >>>>>> UVD state selection, but >>>>>> double checking this I found it disabled again. >>>>>> >>>>>> What was the problem with that? Looks like I >>>>>> somehow missed the >>>>>> discussion >>>>>> around it. >>>>>> >>>>>> We did, but after doing so a number of people >>>>>> complained about a >>>>>> regression on IRC because when apps like xmbc enabled >>>>>> post processing, >>>>>> performance went down. >>>>>> >>>>>> >>>>>> That's strange, from my experience the different UVD >>>>>> performance states only >>>>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>>>>> DPM dumps to >>>>>> confirms this. >>>>>> >>>>>> The sclks and mclks are usually different as well, especially >>>>>> on APUs. >>>>>> I can send you some examples. >>>>>> >>>>>> You not off hand remember who complained on IRC? Finding >>>>>> something in the >>>>>> IRC logs is like searching for a needle in a haystack. >>>>>> >>>>>> I don't remember off hand. I think zgreg was involved in >>>>>> some >>>>>> of the >>>>>> discussions. >>>>>> >>>>>> Alex >>>>>> >>>>>> Thanks, >>>>>> Christian. >>>>>> >>>>>> >>>>>> Alex >>>>>> >>>>>> >>>>>> Christian. >>>>>> >>>>>> >>>>>> For the post processing, we probably need a >>>>>> hint we can >>>>>> pass to the driver in the CS ioctl to denote >>>>>> what state we need. >>>>>> Although if we did that, this could would >>>>>> largely be moot. That said, >>>>>> newer asics support dynamic UVD clocks so we >>>>>> really only need >>>>>> something like that for older asics and I >>>>>> guess VCE. >>>>>> >>>>>> Alex >>>>>> >>>>>> Christian. >>>>>> >>>>>> >>>>>> Alex >>>>>> >>>>>> Signed-off-by: Marco A Benatto >>>>>> <marco.antonio.780@gmail.com >>>>>> <mailto:marco.antonio.780@ >>>>>> gmail.com>> >>>>>> Signed-off-by: Christian König >>>>>> <christian.koenig@amd.com >>>>>> <mailto:christian.koenig@amd.com >>>>>> >> >>>>>> --- >>>>>> >>>>>> drivers/gpu/drm/radeon/radeon.h >>>>>> | 10 ++++++ >>>>>> >>>>>> drivers/gpu/drm/radeon/radeon_ >>>>>> uvd.c >>>>>> | 64 >>>>>> ++++++++++++++++++++++++++++++ >>>>>> +++---- >>>>>> 2 files changed, 68 >>>>>> insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git >>>>>> a/drivers/gpu/drm/radeon/ >>>>>> radeon.h >>>>>> b/drivers/gpu/drm/radeon/ >>>>>> radeon.h >>>>>> index 9e1732e..e92f6cb 100644 >>>>>> --- a/drivers/gpu/drm/radeon/ >>>>>> radeon.h >>>>>> +++ b/drivers/gpu/drm/radeon/ >>>>>> radeon.h >>>>>> @@ -1617,6 +1617,15 @@ int >>>>>> radeon_pm_get_type_index(struct >>>>>> radeon_device >>>>>> *rdev, >>>>>> #define >>>>>> RADEON_UVD_STACK_SIZE >>>>>> (1024*1024) >>>>>> #define RADEON_UVD_HEAP_SIZE >>>>>> (1024*1024) >>>>>> >>>>>> +#define >>>>>> RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>> +#define RADEON_UVD_DEFAULT_FPS >>>>>> 60 >>>>>> + >>>>>> +struct radeon_uvd_fps { >>>>>> + uint64_t >>>>>> timestamp; >>>>>> + uint8_t >>>>>> event_index; >>>>>> + uint8_t >>>>>> events[RADEON_UVD_FPS_EVENTS_ >>>>>> MAX]; >>>>>> +}; >>>>>> + >>>>>> struct radeon_uvd { >>>>>> struct radeon_bo >>>>>> *vcpu_bo; >>>>>> void >>>>>> *cpu_addr; >>>>>> @@ -1626,6 +1635,7 @@ struct >>>>>> radeon_uvd { >>>>>> struct drm_file >>>>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>>>> unsigned >>>>>> img_size[RADEON_MAX_UVD_ >>>>>> HANDLES]; >>>>>> struct delayed_work >>>>>> idle_work; >>>>>> + struct radeon_uvd_fps >>>>>> fps_info[RADEON_MAX_UVD_ >>>>>> HANDLES]; >>>>>> }; >>>>>> >>>>>> int radeon_uvd_init(struct >>>>>> radeon_device *rdev); >>>>>> diff --git >>>>>> a/drivers/gpu/drm/radeon/ >>>>>> radeon_uvd.c >>>>>> b/drivers/gpu/drm/radeon/ >>>>>> radeon_uvd.c >>>>>> index 6bf55ec..ef5667a 100644 >>>>>> --- >>>>>> a/drivers/gpu/drm/radeon/ >>>>>> radeon_uvd.c >>>>>> +++ >>>>>> b/drivers/gpu/drm/radeon/ >>>>>> radeon_uvd.c >>>>>> @@ -237,6 +237,51 @@ void >>>>>> radeon_uvd_force_into_uvd_ >>>>>> segment(struct >>>>>> radeon_bo *rbo) >>>>>> rbo->placement.lpfn = >>>>>> (256 * 1024 * 1024) >> >>>>>> PAGE_SHIFT; >>>>>> } >>>>>> >>>>>> +static void >>>>>> radeon_uvd_fps_clear_events( >>>>>> struct >>>>>> radeon_device *rdev, >>>>>> int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps >>>>>> *fps >>>>>> = &rdev->uvd.fps_info[idx]; >>>>>> + unsigned i; >>>>>> + >>>>>> + fps->timestamp = >>>>>> jiffies_64; >>>>>> + fps->event_index = 0; >>>>>> + for (i = 0; i < >>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>> + fps->events[i] = >>>>>> 0; >>>>>> +} >>>>>> + >>>>>> +static void >>>>>> radeon_uvd_fps_note_event(struct >>>>>> radeon_device *rdev, >>>>>> int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps >>>>>> *fps >>>>>> = &rdev->uvd.fps_info[idx]; >>>>>> + uint64_t timestamp = >>>>>> jiffies_64; >>>>>> + unsigned rate = 0; >>>>>> + >>>>>> + uint8_t index = >>>>>> fps->event_index++; >>>>>> + fps->event_index %= >>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>> + >>>>>> + rate = div64_u64(HZ, >>>>>> max(timestamp - fps->timestamp, >>>>>> 1ULL)); >>>>>> + >>>>>> + fps->timestamp = >>>>>> timestamp; >>>>>> + fps->events[index] = >>>>>> min(rate, 120u); >>>>>> +} >>>>>> + >>>>>> +static unsigned >>>>>> radeon_uvd_estimate_fps(struct >>>>>> radeon_device *rdev, >>>>>> int >>>>>> idx) >>>>>> +{ >>>>>> + struct radeon_uvd_fps >>>>>> *fps >>>>>> = &rdev->uvd.fps_info[idx]; >>>>>> + unsigned i, valid = 0, >>>>>> count = 0; >>>>>> + >>>>>> + for (i = 0; i < >>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>>> + /* We should >>>>>> ignore zero values */ >>>>>> + if >>>>>> (fps->events[i] >>>>>> != 0) { >>>>>> + count += >>>>>> fps->events[i]; >>>>>> + valid++; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + if (valid > 0) >>>>>> + return count / >>>>>> valid; >>>>>> + else >>>>>> + return >>>>>> RADEON_UVD_DEFAULT_FPS; >>>>>> +} >>>>>> + >>>>>> void >>>>>> radeon_uvd_free_handles(struct >>>>>> radeon_device *rdev, struct >>>>>> drm_file *filp) >>>>>> { >>>>>> int i, r; >>>>>> @@ -419,8 +464,10 @@ static int >>>>>> radeon_uvd_cs_msg(struct >>>>>> radeon_cs_parser >>>>>> *p, struct radeon_bo *bo, >>>>>> >>>>>> /* create or decode, >>>>>> validate the handle */ >>>>>> for (i = 0; i < >>>>>> RADEON_MAX_UVD_HANDLES; ++i) { >>>>>> - if >>>>>> (atomic_read(&p->rdev->uvd. >>>>>> handles[i]) >>>>>> == handle) >>>>>> + if >>>>>> (atomic_read(&p->rdev->uvd. >>>>>> handles[i]) >>>>>> == handle) >>>>>> { >>>>>> + >>>>>> radeon_uvd_fps_note_event(p->rdev, >>>>>> i); >>>>>> >>>>>> return 0; >>>>>> + } >>>>>> } >>>>>> >>>>>> /* handle not found >>>>>> try to alloc a new one */ >>>>>> @@ -428,6 +475,7 @@ static int >>>>>> radeon_uvd_cs_msg(struct >>>>>> radeon_cs_parser >>>>>> *p, struct radeon_bo *bo, >>>>>> if >>>>>> (!atomic_cmpxchg(&p->rdev-> >>>>>> uvd.handles[i], >>>>>> 0, >>>>>> handle)) { >>>>>> >>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>> >>>>>> p->rdev->uvd.img_size[i] = >>>>>> img_size; >>>>>> + >>>>>> radeon_uvd_fps_clear_events(p- >>>>>> >rdev, >>>>>> i); >>>>>> >>>>>> return 0; >>>>>> } >>>>>> } >>>>>> @@ -763,7 +811,7 @@ int >>>>>> radeon_uvd_get_destroy_msg( >>>>>> struct >>>>>> radeon_device >>>>>> *rdev, int ring, >>>>>> static void >>>>>> radeon_uvd_count_handles(struct >>>>>> radeon_device *rdev, >>>>>> >>>>>> unsigned *sd, unsigned >>>>>> *hd) >>>>>> { >>>>>> - unsigned i; >>>>>> + unsigned i, fps_rate = 0; >>>>>> >>>>>> *sd = 0; >>>>>> *hd = 0; >>>>>> @@ -772,10 +820,13 @@ static void >>>>>> radeon_uvd_count_handles(struct >>>>>> radeon_device *rdev, >>>>>> if >>>>>> (!atomic_read(&rdev->uvd. >>>>>> handles[i])) >>>>>> >>>>>> continue; >>>>>> >>>>>> - if >>>>>> (rdev->uvd.img_size[i] >= >>>>>> 720*576) >>>>>> - ++(*hd); >>>>>> - else >>>>>> - ++(*sd); >>>>>> + fps_rate = >>>>>> radeon_uvd_estimate_fps(rdev, i); >>>>>> + >>>>>> + if >>>>>> (rdev->uvd.img_size[i] >= >>>>>> 720*576) { >>>>>> + (*hd) += >>>>>> fps_rate > 30 ? 1 : 2; >>>>>> + } else { >>>>>> + (*sd) += >>>>>> fps_rate > 30 ? 1 : 2; >>>>>> + } >>>>>> } >>>>>> } >>>>>> >>>>>> @@ -805,6 +856,7 @@ void >>>>>> radeon_uvd_note_usage(struct >>>>>> radeon_device >>>>>> *rdev) >>>>>> set_clocks &= >>>>>> schedule_delayed_work(&rdev-> >>>>>> uvd.idle_work, >>>>>> >>>>>> msecs_to_jiffies(UVD_IDLE_ >>>>>> TIMEOUT_MS)); >>>>>> >>>>>> + >>>>>> if >>>>>> ((rdev->pm.pm_method == >>>>>> PM_METHOD_DPM) && >>>>>> rdev->pm.dpm_enabled) { >>>>>> unsigned hd = >>>>>> 0, sd = 0; >>>>>> >>>>>> radeon_uvd_count_handles(rdev, >>>>>> &sd, &hd); >>>>>> -- >>>>>> 1.9.1 >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> -- >>>>>> Marco Antonio Benatto >>>>>> Linux user ID:#506236 >>>>>> >>>>> >>>> >> >
On Fri, Aug 15, 2014 at 1:03 PM, Marco Benatto <marco.antonio.780@gmail.com> wrote: > Grigori, Alex and Christian > > are you ok if I merge ioctl flag idea with sysfs idea? > > We let the system decide the state using the hint provided by CS ioctl flag > but if performance is not good as expected > or DPM table is not sane we still will have a fallback way o override this > decision. For backwards compatibility. The CS ioctl flag should be an opt in flag, e.g., UVD_KERNEL_MANAGE_POWER_STATE. That way there would be no change for old versions of mesa, but for newer versions of mesa, the UVD user mode driver could set the flag when there was no post processing for lower power usage and not set it when there was post processing for better performance. I'd rather not have a sysfs entry. I'd prefer a mesa env var or drirc option to force the CS flag. Alex > > > On Fri, Aug 15, 2014 at 1:54 PM, Christian König <christian.koenig@amd.com> > wrote: >> >> Am 15.08.2014 um 17:32 schrieb Grigori Goronzy: >> >>> On 15.08.2014 17:26, Alex Deucher wrote: >>>> >>>> On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> >>>> wrote: >>>>> >>>>> On 15.08.2014 16:11, Christian König wrote: >>>>>> >>>>>> Hi Marco, >>>>>> >>>>>> the problem with an CS ioctl flag is that we sometimes don't know how >>>>>> much SCLK/MCLK boost is needed, for example when we do post processing >>>>>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>>>>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>>>>> can't give that info to the kernel either. >>>>>> >>>>> Maybe it's an acceptable workaround to simply disable dynamic UVD state >>>>> selection in case the UVD states only have a single power level. That >>>>> will avoid the performance issues on affected systems, while still >>>>> allowing dynamic UVD states on systems that have a saner DPM table >>>>> setup. I think it is mosly older systems that suffer from this. >>>>> >>>> That is exactly what we do now. >>>> >>> Is it? In 3.17-wip, dynamic UVD state selection (according to active >>> streams) is still completely disabled. It will always use the generic >>> UVD state. In fact wasn't it reverted again because of the performance >>> issues on some systems? >> >> >> This is the performance table of my laptop (at least the interesting >> parts), which I think is a typical example of the problem: >> >> [ 4.106772] == power state 1 == >> [ 4.106774] ui class: performance >> [ 4.106776] internal class: none >> [ 4.106780] uvd vclk: 0 dclk: 0 >> [ 4.106782] power level 0 sclk: 20000 vddc_index: 2 >> [ 4.106784] power level 1 sclk: 50000 vddc_index: 2 >> [ 4.106805] == power state 3 == >> [ 4.106807] ui class: none >> [ 4.106808] internal class: uvd >> [ 4.106813] uvd vclk: 55000 dclk: 40000 >> [ 4.106816] power level 0 sclk: 50000 vddc_index: 2 >> [ 4.106818] power level 1 sclk: 50000 vddc_index: 2 >> [ 4.106820] status: >> [ 4.106822] == power state 4 == >> [ 4.106823] ui class: battery >> [ 4.106825] internal class: uvd_hd >> [ 4.106831] uvd vclk: 40000 dclk: 30000 >> [ 4.106833] power level 0 sclk: 38000 vddc_index: 1 >> [ 4.106835] power level 1 sclk: 38000 vddc_index: 1 >> [ 4.106839] == power state 5 == >> [ 4.106841] ui class: battery >> [ 4.106843] internal class: uvd_sd >> [ 4.106848] uvd vclk: 40000 dclk: 30000 >> [ 4.106850] power level 0 sclk: 38000 vddc_index: 2 >> [ 4.106853] power level 1 sclk: 38000 vddc_index: 2 >> >> As you can see we currently always select the performance level uvd, which >> results in selecting the maximum sclk/dclk and vclk. Unfortunately neither >> uvd, uvd_sd nor uvd_hd allows the hardware to switch the sclk once selected >> (it's a hardware limitation of older uvd blocks). >> >> So for all cases where this is interesting you actually always have only a >> single power level to choose from. >> >> Christian. >> >> >>> >>> Grigori >>> >>>> Alex >>>> >>>> >>>>> Best regards >>>>> Grigori >>>>> >>>>>> Regards, >>>>>> Christian. >>>>>> >>>>>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>>>>> >>>>>>> Hey all, >>>>>>> >>>>>>> I also had a talk with Alex yesterday about post-processing issues >>>>>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>>>>> including a flag to let user mode driver tell to the kernel which >>>>>>> performance requirement it wants for post processing. A commom >>>>>>> point for both discussion is to stablish the default values for these >>>>>>> profiles, but probably this ioctl change would be more >>>>>>> impacting/complex >>>>>>> to implement than a sysfs entry. >>>>>>> >>>>>>> If a sysfs entry is anough for now I can handle the code to create it >>>>>>> and, with your help, the code to setup the UVD profile requested >>>>>>> through it. >>>>>>> >>>>>>> Is there any suggestion? >>>>>>> >>>>>>> Thanks all for your help, >>>>>>> >>>>>>> >>>>>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>>>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>>>>>> >>>>>>> Hi guys, >>>>>>> >>>>>>> to make a long story short every time I watch a movie my laptop >>>>>>> start to heat up because we always select the standard UVD power >>>>>>> profile without actually measuring if that is necessary. >>>>>>> >>>>>>> Marco came up with a patch that seems to reliable measure the >>>>>>> fps >>>>>>> send down to the kernel and so together with knowing the frame >>>>>>> size of the video should allow us to select the right UVD power >>>>>>> profile. >>>>>>> >>>>>>> The problem is that Alex (unnoticed by me) completely disabled >>>>>>> selecting the UVD profiles because of some issues with advanced >>>>>>> post processing discussed on IRC. The problem seems to be that >>>>>>> the >>>>>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>>>>>> that comes with scaling, deinterlacing etc... >>>>>>> >>>>>>> I unfortunately don't have time for it, cause this only affects >>>>>>> the hardware generations R600-SI and not the newest one CIK. So >>>>>>> could you guys stick together and come up with a solution? >>>>>>> Something like a sysfs entry that let's us select the minimum >>>>>>> UVD >>>>>>> power level allowed? >>>>>>> >>>>>>> I think Marco is happy to come up with a patch, we just need to >>>>>>> know what's really needed and what should be the default values. >>>>>>> I'm happy to review everything that comes out of it, just don't >>>>>>> have time to do it myself. >>>>>>> >>>>>>> Happy discussion and thanks in advance, >>>>>>> Christian. >>>>>>> >>>>>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>>>>> >>>>>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>>>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> >>>>>>> wrote: >>>>>>> >>>>>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>>>>> >>>>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>>>> <deathsimple@vodafone.de >>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>> >>>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>>> >>>>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian >>>>>>> König >>>>>>> <deathsimple@vodafone.de >>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>> >>>>>>> Am 07.08.2014 um 16:32 schrieb Alex >>>>>>> Deucher: >>>>>>> >>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>>>>> Christian König >>>>>>> <deathsimple@vodafone.de >>>>>>> <mailto:deathsimple@vodafone.de>> >>>>>>> wrote: >>>>>>> >>>>>>> From: Marco A Benatto >>>>>>> <marco.antonio.780@gmail.com >>>>>>> >>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>> >>>>>>> Adding a Frames Per Second >>>>>>> estimation logic on UVD handles >>>>>>> when it has being used. This >>>>>>> estimation is per handle basis >>>>>>> and will help on DPM profile >>>>>>> calculation. >>>>>>> >>>>>>> v2 (chk): fix timestamp type, >>>>>>> move >>>>>>> functions around and >>>>>>> cleanup code a >>>>>>> bit. >>>>>>> >>>>>>> Will this really help much? I >>>>>>> thought >>>>>>> the problem was mainly due to >>>>>>> sclk and mclk for post processing. >>>>>>> >>>>>>> >>>>>>> It should at least handle the UVD side >>>>>>> for >>>>>>> upclocking when you get a >>>>>>> lot >>>>>>> of >>>>>>> streams / fps. And at on my NI the patch >>>>>>> seems to do exactly that. >>>>>>> >>>>>>> Switching sclk and mclk for post >>>>>>> processing is a different task, and I >>>>>>> actually have no idea what to do with >>>>>>> them. >>>>>>> >>>>>>> At this point we always choose the plain UVD >>>>>>> state anyway so this >>>>>>> patch would only take effect if we >>>>>>> re-enabled >>>>>>> the dynamic UVD state >>>>>>> selection. >>>>>>> >>>>>>> >>>>>>> Hui? I thought we already re-enabled the dynamic >>>>>>> UVD state selection, but >>>>>>> double checking this I found it disabled again. >>>>>>> >>>>>>> What was the problem with that? Looks like I >>>>>>> somehow missed the >>>>>>> discussion >>>>>>> around it. >>>>>>> >>>>>>> We did, but after doing so a number of people >>>>>>> complained about a >>>>>>> regression on IRC because when apps like xmbc >>>>>>> enabled >>>>>>> post processing, >>>>>>> performance went down. >>>>>>> >>>>>>> >>>>>>> That's strange, from my experience the different UVD >>>>>>> performance states only >>>>>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>>>>>> DPM dumps to >>>>>>> confirms this. >>>>>>> >>>>>>> The sclks and mclks are usually different as well, >>>>>>> especially >>>>>>> on APUs. >>>>>>> I can send you some examples. >>>>>>> >>>>>>> You not off hand remember who complained on IRC? Finding >>>>>>> something in the >>>>>>> IRC logs is like searching for a needle in a haystack. >>>>>>> >>>>>>> I don't remember off hand. I think zgreg was involved in >>>>>>> some >>>>>>> of the >>>>>>> discussions. >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>> Thanks, >>>>>>> Christian. >>>>>>> >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>> >>>>>>> Christian. >>>>>>> >>>>>>> >>>>>>> For the post processing, we probably need a >>>>>>> hint we can >>>>>>> pass to the driver in the CS ioctl to denote >>>>>>> what state we need. >>>>>>> Although if we did that, this could would >>>>>>> largely be moot. That said, >>>>>>> newer asics support dynamic UVD clocks so we >>>>>>> really only need >>>>>>> something like that for older asics and I >>>>>>> guess VCE. >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>> Christian. >>>>>>> >>>>>>> >>>>>>> Alex >>>>>>> >>>>>>> Signed-off-by: Marco A Benatto >>>>>>> <marco.antonio.780@gmail.com >>>>>>> >>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>> Signed-off-by: Christian König >>>>>>> <christian.koenig@amd.com >>>>>>> >>>>>>> <mailto:christian.koenig@amd.com>> >>>>>>> --- >>>>>>> >>>>>>> drivers/gpu/drm/radeon/radeon.h >>>>>>> | 10 ++++++ >>>>>>> >>>>>>> >>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> | 64 >>>>>>> >>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>> 2 files changed, 68 >>>>>>> insertions(+), 6 deletions(-) >>>>>>> >>>>>>> diff --git >>>>>>> >>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>> >>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>> --- >>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>> +++ >>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>> @@ -1617,6 +1617,15 @@ int >>>>>>> radeon_pm_get_type_index(struct >>>>>>> radeon_device >>>>>>> *rdev, >>>>>>> #define >>>>>>> RADEON_UVD_STACK_SIZE >>>>>>> (1024*1024) >>>>>>> #define >>>>>>> RADEON_UVD_HEAP_SIZE >>>>>>> (1024*1024) >>>>>>> >>>>>>> +#define >>>>>>> RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>> +#define RADEON_UVD_DEFAULT_FPS >>>>>>> 60 >>>>>>> + >>>>>>> +struct radeon_uvd_fps { >>>>>>> + uint64_t >>>>>>> timestamp; >>>>>>> + uint8_t >>>>>>> event_index; >>>>>>> + uint8_t >>>>>>> >>>>>>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>> +}; >>>>>>> + >>>>>>> struct radeon_uvd { >>>>>>> struct radeon_bo >>>>>>> *vcpu_bo; >>>>>>> void >>>>>>> *cpu_addr; >>>>>>> @@ -1626,6 +1635,7 @@ struct >>>>>>> radeon_uvd { >>>>>>> struct drm_file >>>>>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>> unsigned >>>>>>> >>>>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>> struct delayed_work >>>>>>> idle_work; >>>>>>> + struct radeon_uvd_fps >>>>>>> >>>>>>> fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>> }; >>>>>>> >>>>>>> int radeon_uvd_init(struct >>>>>>> radeon_device *rdev); >>>>>>> diff --git >>>>>>> >>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> >>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>> --- >>>>>>> >>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> +++ >>>>>>> >>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>> @@ -237,6 +237,51 @@ void >>>>>>> >>>>>>> radeon_uvd_force_into_uvd_segment(struct >>>>>>> radeon_bo *rbo) >>>>>>> rbo->placement.lpfn >>>>>>> = >>>>>>> (256 * 1024 * 1024) >> >>>>>>> PAGE_SHIFT; >>>>>>> } >>>>>>> >>>>>>> +static void >>>>>>> >>>>>>> radeon_uvd_fps_clear_events(struct >>>>>>> radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps >>>>>>> *fps >>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>> + unsigned i; >>>>>>> + >>>>>>> + fps->timestamp = >>>>>>> jiffies_64; >>>>>>> + fps->event_index = 0; >>>>>>> + for (i = 0; i < >>>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>> + fps->events[i] = >>>>>>> 0; >>>>>>> +} >>>>>>> + >>>>>>> +static void >>>>>>> radeon_uvd_fps_note_event(struct >>>>>>> radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps >>>>>>> *fps >>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>> + uint64_t timestamp = >>>>>>> jiffies_64; >>>>>>> + unsigned rate = 0; >>>>>>> + >>>>>>> + uint8_t index = >>>>>>> fps->event_index++; >>>>>>> + fps->event_index %= >>>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>>> + >>>>>>> + rate = div64_u64(HZ, >>>>>>> max(timestamp - fps->timestamp, >>>>>>> 1ULL)); >>>>>>> + >>>>>>> + fps->timestamp = >>>>>>> timestamp; >>>>>>> + fps->events[index] = >>>>>>> min(rate, 120u); >>>>>>> +} >>>>>>> + >>>>>>> +static unsigned >>>>>>> radeon_uvd_estimate_fps(struct >>>>>>> radeon_device *rdev, >>>>>>> int >>>>>>> idx) >>>>>>> +{ >>>>>>> + struct radeon_uvd_fps >>>>>>> *fps >>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>> + unsigned i, valid = 0, >>>>>>> count = 0; >>>>>>> + >>>>>>> + for (i = 0; i < >>>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>> { >>>>>>> + /* We should >>>>>>> ignore zero values */ >>>>>>> + if >>>>>>> (fps->events[i] >>>>>>> != 0) { >>>>>>> + count += >>>>>>> fps->events[i]; >>>>>>> + valid++; >>>>>>> + } >>>>>>> + } >>>>>>> + >>>>>>> + if (valid > 0) >>>>>>> + return count / >>>>>>> valid; >>>>>>> + else >>>>>>> + return >>>>>>> RADEON_UVD_DEFAULT_FPS; >>>>>>> +} >>>>>>> + >>>>>>> void >>>>>>> radeon_uvd_free_handles(struct >>>>>>> radeon_device *rdev, struct >>>>>>> drm_file *filp) >>>>>>> { >>>>>>> int i, r; >>>>>>> @@ -419,8 +464,10 @@ static int >>>>>>> radeon_uvd_cs_msg(struct >>>>>>> radeon_cs_parser >>>>>>> *p, struct radeon_bo *bo, >>>>>>> >>>>>>> /* create or decode, >>>>>>> validate the handle */ >>>>>>> for (i = 0; i < >>>>>>> RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>> - if >>>>>>> >>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>> == handle) >>>>>>> + if >>>>>>> >>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>> == handle) >>>>>>> { >>>>>>> + >>>>>>> >>>>>>> radeon_uvd_fps_note_event(p->rdev, i); >>>>>>> >>>>>>> return 0; >>>>>>> + } >>>>>>> } >>>>>>> >>>>>>> /* handle not found >>>>>>> try to alloc a new one */ >>>>>>> @@ -428,6 +475,7 @@ static int >>>>>>> radeon_uvd_cs_msg(struct >>>>>>> radeon_cs_parser >>>>>>> *p, struct radeon_bo *bo, >>>>>>> if >>>>>>> >>>>>>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>> 0, >>>>>>> handle)) { >>>>>>> >>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>> >>>>>>> p->rdev->uvd.img_size[i] = >>>>>>> img_size; >>>>>>> + >>>>>>> >>>>>>> radeon_uvd_fps_clear_events(p->rdev, >>>>>>> i); >>>>>>> >>>>>>> return 0; >>>>>>> } >>>>>>> } >>>>>>> @@ -763,7 +811,7 @@ int >>>>>>> >>>>>>> radeon_uvd_get_destroy_msg(struct >>>>>>> radeon_device >>>>>>> *rdev, int ring, >>>>>>> static void >>>>>>> radeon_uvd_count_handles(struct >>>>>>> radeon_device *rdev, >>>>>>> >>>>>>> unsigned *sd, unsigned >>>>>>> *hd) >>>>>>> { >>>>>>> - unsigned i; >>>>>>> + unsigned i, fps_rate = >>>>>>> 0; >>>>>>> >>>>>>> *sd = 0; >>>>>>> *hd = 0; >>>>>>> @@ -772,10 +820,13 @@ static >>>>>>> void >>>>>>> radeon_uvd_count_handles(struct >>>>>>> radeon_device *rdev, >>>>>>> if >>>>>>> >>>>>>> (!atomic_read(&rdev->uvd.handles[i])) >>>>>>> >>>>>>> continue; >>>>>>> >>>>>>> - if >>>>>>> (rdev->uvd.img_size[i] >= >>>>>>> 720*576) >>>>>>> - ++(*hd); >>>>>>> - else >>>>>>> - ++(*sd); >>>>>>> + fps_rate = >>>>>>> radeon_uvd_estimate_fps(rdev, >>>>>>> i); >>>>>>> + >>>>>>> + if >>>>>>> (rdev->uvd.img_size[i] >= >>>>>>> 720*576) { >>>>>>> + (*hd) += >>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>> + } else { >>>>>>> + (*sd) += >>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>> + } >>>>>>> } >>>>>>> } >>>>>>> >>>>>>> @@ -805,6 +856,7 @@ void >>>>>>> radeon_uvd_note_usage(struct >>>>>>> radeon_device >>>>>>> *rdev) >>>>>>> set_clocks &= >>>>>>> >>>>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>> >>>>>>> >>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>> >>>>>>> + >>>>>>> if >>>>>>> ((rdev->pm.pm_method == >>>>>>> PM_METHOD_DPM) && >>>>>>> rdev->pm.dpm_enabled) { >>>>>>> unsigned hd >>>>>>> = >>>>>>> 0, sd = 0; >>>>>>> >>>>>>> radeon_uvd_count_handles(rdev, >>>>>>> &sd, &hd); >>>>>>> -- >>>>>>> 1.9.1 >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> -- >>>>>>> Marco Antonio Benatto >>>>>>> Linux user ID:#506236 >>>>> >>>>> >>> >> > > > > -- > Marco Antonio Benatto > Linux user ID: #506236
> I'd rather not have a sysfs entry. I'd prefer a mesa env var or drirc > option to force the CS flag. Yeah, that actually sounds like a good idea. I think we could add a new dword to the flags chunk for this, or use the priority field which is unused for UVD on those older chipsets anyway. If possible I would like to have something like RADEON_MIN_UVD_POWER_LEVEL=(uvd_sd|uvd_hd|uvd_hd2|uvd|uvd_mvc) which forces a minimum level, but still let the kernel decide if we don't wouldn't want a higher level. Christian. Am 15.08.2014 um 19:10 schrieb Alex Deucher: > On Fri, Aug 15, 2014 at 1:03 PM, Marco Benatto > <marco.antonio.780@gmail.com> wrote: >> Grigori, Alex and Christian >> >> are you ok if I merge ioctl flag idea with sysfs idea? >> >> We let the system decide the state using the hint provided by CS ioctl flag >> but if performance is not good as expected >> or DPM table is not sane we still will have a fallback way o override this >> decision. > For backwards compatibility. The CS ioctl flag should be an opt in > flag, e.g., UVD_KERNEL_MANAGE_POWER_STATE. That way there would be no > change for old versions of mesa, but for newer versions of mesa, the > UVD user mode driver could set the flag when there was no post > processing for lower power usage and not set it when there was post > processing for better performance. > > I'd rather not have a sysfs entry. I'd prefer a mesa env var or drirc > option to force the CS flag. > > Alex > >> >> On Fri, Aug 15, 2014 at 1:54 PM, Christian König <christian.koenig@amd.com> >> wrote: >>> Am 15.08.2014 um 17:32 schrieb Grigori Goronzy: >>> >>>> On 15.08.2014 17:26, Alex Deucher wrote: >>>>> On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> >>>>> wrote: >>>>>> On 15.08.2014 16:11, Christian König wrote: >>>>>>> Hi Marco, >>>>>>> >>>>>>> the problem with an CS ioctl flag is that we sometimes don't know how >>>>>>> much SCLK/MCLK boost is needed, for example when we do post processing >>>>>>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>>>>>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>>>>>> can't give that info to the kernel either. >>>>>>> >>>>>> Maybe it's an acceptable workaround to simply disable dynamic UVD state >>>>>> selection in case the UVD states only have a single power level. That >>>>>> will avoid the performance issues on affected systems, while still >>>>>> allowing dynamic UVD states on systems that have a saner DPM table >>>>>> setup. I think it is mosly older systems that suffer from this. >>>>>> >>>>> That is exactly what we do now. >>>>> >>>> Is it? In 3.17-wip, dynamic UVD state selection (according to active >>>> streams) is still completely disabled. It will always use the generic >>>> UVD state. In fact wasn't it reverted again because of the performance >>>> issues on some systems? >>> >>> This is the performance table of my laptop (at least the interesting >>> parts), which I think is a typical example of the problem: >>> >>> [ 4.106772] == power state 1 == >>> [ 4.106774] ui class: performance >>> [ 4.106776] internal class: none >>> [ 4.106780] uvd vclk: 0 dclk: 0 >>> [ 4.106782] power level 0 sclk: 20000 vddc_index: 2 >>> [ 4.106784] power level 1 sclk: 50000 vddc_index: 2 >>> [ 4.106805] == power state 3 == >>> [ 4.106807] ui class: none >>> [ 4.106808] internal class: uvd >>> [ 4.106813] uvd vclk: 55000 dclk: 40000 >>> [ 4.106816] power level 0 sclk: 50000 vddc_index: 2 >>> [ 4.106818] power level 1 sclk: 50000 vddc_index: 2 >>> [ 4.106820] status: >>> [ 4.106822] == power state 4 == >>> [ 4.106823] ui class: battery >>> [ 4.106825] internal class: uvd_hd >>> [ 4.106831] uvd vclk: 40000 dclk: 30000 >>> [ 4.106833] power level 0 sclk: 38000 vddc_index: 1 >>> [ 4.106835] power level 1 sclk: 38000 vddc_index: 1 >>> [ 4.106839] == power state 5 == >>> [ 4.106841] ui class: battery >>> [ 4.106843] internal class: uvd_sd >>> [ 4.106848] uvd vclk: 40000 dclk: 30000 >>> [ 4.106850] power level 0 sclk: 38000 vddc_index: 2 >>> [ 4.106853] power level 1 sclk: 38000 vddc_index: 2 >>> >>> As you can see we currently always select the performance level uvd, which >>> results in selecting the maximum sclk/dclk and vclk. Unfortunately neither >>> uvd, uvd_sd nor uvd_hd allows the hardware to switch the sclk once selected >>> (it's a hardware limitation of older uvd blocks). >>> >>> So for all cases where this is interesting you actually always have only a >>> single power level to choose from. >>> >>> Christian. >>> >>> >>>> Grigori >>>> >>>>> Alex >>>>> >>>>> >>>>>> Best regards >>>>>> Grigori >>>>>> >>>>>>> Regards, >>>>>>> Christian. >>>>>>> >>>>>>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>>>>>> Hey all, >>>>>>>> >>>>>>>> I also had a talk with Alex yesterday about post-processing issues >>>>>>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>>>>>> including a flag to let user mode driver tell to the kernel which >>>>>>>> performance requirement it wants for post processing. A commom >>>>>>>> point for both discussion is to stablish the default values for these >>>>>>>> profiles, but probably this ioctl change would be more >>>>>>>> impacting/complex >>>>>>>> to implement than a sysfs entry. >>>>>>>> >>>>>>>> If a sysfs entry is anough for now I can handle the code to create it >>>>>>>> and, with your help, the code to setup the UVD profile requested >>>>>>>> through it. >>>>>>>> >>>>>>>> Is there any suggestion? >>>>>>>> >>>>>>>> Thanks all for your help, >>>>>>>> >>>>>>>> >>>>>>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>>>>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote: >>>>>>>> >>>>>>>> Hi guys, >>>>>>>> >>>>>>>> to make a long story short every time I watch a movie my laptop >>>>>>>> start to heat up because we always select the standard UVD power >>>>>>>> profile without actually measuring if that is necessary. >>>>>>>> >>>>>>>> Marco came up with a patch that seems to reliable measure the >>>>>>>> fps >>>>>>>> send down to the kernel and so together with knowing the frame >>>>>>>> size of the video should allow us to select the right UVD power >>>>>>>> profile. >>>>>>>> >>>>>>>> The problem is that Alex (unnoticed by me) completely disabled >>>>>>>> selecting the UVD profiles because of some issues with advanced >>>>>>>> post processing discussed on IRC. The problem seems to be that >>>>>>>> the >>>>>>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D load >>>>>>>> that comes with scaling, deinterlacing etc... >>>>>>>> >>>>>>>> I unfortunately don't have time for it, cause this only affects >>>>>>>> the hardware generations R600-SI and not the newest one CIK. So >>>>>>>> could you guys stick together and come up with a solution? >>>>>>>> Something like a sysfs entry that let's us select the minimum >>>>>>>> UVD >>>>>>>> power level allowed? >>>>>>>> >>>>>>>> I think Marco is happy to come up with a patch, we just need to >>>>>>>> know what's really needed and what should be the default values. >>>>>>>> I'm happy to review everything that comes out of it, just don't >>>>>>>> have time to do it myself. >>>>>>>> >>>>>>>> Happy discussion and thanks in advance, >>>>>>>> Christian. >>>>>>>> >>>>>>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>>>>>> >>>>>>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>>>>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> >>>>>>>> wrote: >>>>>>>> >>>>>>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>>>>>> >>>>>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>>>>> <deathsimple@vodafone.de >>>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>>> >>>>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>>>> >>>>>>>> On Thu, Aug 7, 2014 at 11:32 AM, Christian >>>>>>>> König >>>>>>>> <deathsimple@vodafone.de >>>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>>> >>>>>>>> Am 07.08.2014 um 16:32 schrieb Alex >>>>>>>> Deucher: >>>>>>>> >>>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>>>>>> Christian König >>>>>>>> <deathsimple@vodafone.de >>>>>>>> <mailto:deathsimple@vodafone.de>> >>>>>>>> wrote: >>>>>>>> >>>>>>>> From: Marco A Benatto >>>>>>>> <marco.antonio.780@gmail.com >>>>>>>> >>>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>>> >>>>>>>> Adding a Frames Per Second >>>>>>>> estimation logic on UVD handles >>>>>>>> when it has being used. This >>>>>>>> estimation is per handle basis >>>>>>>> and will help on DPM profile >>>>>>>> calculation. >>>>>>>> >>>>>>>> v2 (chk): fix timestamp type, >>>>>>>> move >>>>>>>> functions around and >>>>>>>> cleanup code a >>>>>>>> bit. >>>>>>>> >>>>>>>> Will this really help much? I >>>>>>>> thought >>>>>>>> the problem was mainly due to >>>>>>>> sclk and mclk for post processing. >>>>>>>> >>>>>>>> >>>>>>>> It should at least handle the UVD side >>>>>>>> for >>>>>>>> upclocking when you get a >>>>>>>> lot >>>>>>>> of >>>>>>>> streams / fps. And at on my NI the patch >>>>>>>> seems to do exactly that. >>>>>>>> >>>>>>>> Switching sclk and mclk for post >>>>>>>> processing is a different task, and I >>>>>>>> actually have no idea what to do with >>>>>>>> them. >>>>>>>> >>>>>>>> At this point we always choose the plain UVD >>>>>>>> state anyway so this >>>>>>>> patch would only take effect if we >>>>>>>> re-enabled >>>>>>>> the dynamic UVD state >>>>>>>> selection. >>>>>>>> >>>>>>>> >>>>>>>> Hui? I thought we already re-enabled the dynamic >>>>>>>> UVD state selection, but >>>>>>>> double checking this I found it disabled again. >>>>>>>> >>>>>>>> What was the problem with that? Looks like I >>>>>>>> somehow missed the >>>>>>>> discussion >>>>>>>> around it. >>>>>>>> >>>>>>>> We did, but after doing so a number of people >>>>>>>> complained about a >>>>>>>> regression on IRC because when apps like xmbc >>>>>>>> enabled >>>>>>>> post processing, >>>>>>>> performance went down. >>>>>>>> >>>>>>>> >>>>>>>> That's strange, from my experience the different UVD >>>>>>>> performance states only >>>>>>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get the >>>>>>>> DPM dumps to >>>>>>>> confirms this. >>>>>>>> >>>>>>>> The sclks and mclks are usually different as well, >>>>>>>> especially >>>>>>>> on APUs. >>>>>>>> I can send you some examples. >>>>>>>> >>>>>>>> You not off hand remember who complained on IRC? Finding >>>>>>>> something in the >>>>>>>> IRC logs is like searching for a needle in a haystack. >>>>>>>> >>>>>>>> I don't remember off hand. I think zgreg was involved in >>>>>>>> some >>>>>>>> of the >>>>>>>> discussions. >>>>>>>> >>>>>>>> Alex >>>>>>>> >>>>>>>> Thanks, >>>>>>>> Christian. >>>>>>>> >>>>>>>> >>>>>>>> Alex >>>>>>>> >>>>>>>> >>>>>>>> Christian. >>>>>>>> >>>>>>>> >>>>>>>> For the post processing, we probably need a >>>>>>>> hint we can >>>>>>>> pass to the driver in the CS ioctl to denote >>>>>>>> what state we need. >>>>>>>> Although if we did that, this could would >>>>>>>> largely be moot. That said, >>>>>>>> newer asics support dynamic UVD clocks so we >>>>>>>> really only need >>>>>>>> something like that for older asics and I >>>>>>>> guess VCE. >>>>>>>> >>>>>>>> Alex >>>>>>>> >>>>>>>> Christian. >>>>>>>> >>>>>>>> >>>>>>>> Alex >>>>>>>> >>>>>>>> Signed-off-by: Marco A Benatto >>>>>>>> <marco.antonio.780@gmail.com >>>>>>>> >>>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>>> Signed-off-by: Christian König >>>>>>>> <christian.koenig@amd.com >>>>>>>> >>>>>>>> <mailto:christian.koenig@amd.com>> >>>>>>>> --- >>>>>>>> >>>>>>>> drivers/gpu/drm/radeon/radeon.h >>>>>>>> | 10 ++++++ >>>>>>>> >>>>>>>> >>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> | 64 >>>>>>>> >>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>> 2 files changed, 68 >>>>>>>> insertions(+), 6 deletions(-) >>>>>>>> >>>>>>>> diff --git >>>>>>>> >>>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>>> >>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>> --- >>>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>>> +++ >>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>> @@ -1617,6 +1617,15 @@ int >>>>>>>> radeon_pm_get_type_index(struct >>>>>>>> radeon_device >>>>>>>> *rdev, >>>>>>>> #define >>>>>>>> RADEON_UVD_STACK_SIZE >>>>>>>> (1024*1024) >>>>>>>> #define >>>>>>>> RADEON_UVD_HEAP_SIZE >>>>>>>> (1024*1024) >>>>>>>> >>>>>>>> +#define >>>>>>>> RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>> +#define RADEON_UVD_DEFAULT_FPS >>>>>>>> 60 >>>>>>>> + >>>>>>>> +struct radeon_uvd_fps { >>>>>>>> + uint64_t >>>>>>>> timestamp; >>>>>>>> + uint8_t >>>>>>>> event_index; >>>>>>>> + uint8_t >>>>>>>> >>>>>>>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>> +}; >>>>>>>> + >>>>>>>> struct radeon_uvd { >>>>>>>> struct radeon_bo >>>>>>>> *vcpu_bo; >>>>>>>> void >>>>>>>> *cpu_addr; >>>>>>>> @@ -1626,6 +1635,7 @@ struct >>>>>>>> radeon_uvd { >>>>>>>> struct drm_file >>>>>>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>> unsigned >>>>>>>> >>>>>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>>> struct delayed_work >>>>>>>> idle_work; >>>>>>>> + struct radeon_uvd_fps >>>>>>>> >>>>>>>> fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>> }; >>>>>>>> >>>>>>>> int radeon_uvd_init(struct >>>>>>>> radeon_device *rdev); >>>>>>>> diff --git >>>>>>>> >>>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> >>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>> --- >>>>>>>> >>>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> +++ >>>>>>>> >>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>> @@ -237,6 +237,51 @@ void >>>>>>>> >>>>>>>> radeon_uvd_force_into_uvd_segment(struct >>>>>>>> radeon_bo *rbo) >>>>>>>> rbo->placement.lpfn >>>>>>>> = >>>>>>>> (256 * 1024 * 1024) >> >>>>>>>> PAGE_SHIFT; >>>>>>>> } >>>>>>>> >>>>>>>> +static void >>>>>>>> >>>>>>>> radeon_uvd_fps_clear_events(struct >>>>>>>> radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps >>>>>>>> *fps >>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>> + unsigned i; >>>>>>>> + >>>>>>>> + fps->timestamp = >>>>>>>> jiffies_64; >>>>>>>> + fps->event_index = 0; >>>>>>>> + for (i = 0; i < >>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>> + fps->events[i] = >>>>>>>> 0; >>>>>>>> +} >>>>>>>> + >>>>>>>> +static void >>>>>>>> radeon_uvd_fps_note_event(struct >>>>>>>> radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps >>>>>>>> *fps >>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>> + uint64_t timestamp = >>>>>>>> jiffies_64; >>>>>>>> + unsigned rate = 0; >>>>>>>> + >>>>>>>> + uint8_t index = >>>>>>>> fps->event_index++; >>>>>>>> + fps->event_index %= >>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>> + >>>>>>>> + rate = div64_u64(HZ, >>>>>>>> max(timestamp - fps->timestamp, >>>>>>>> 1ULL)); >>>>>>>> + >>>>>>>> + fps->timestamp = >>>>>>>> timestamp; >>>>>>>> + fps->events[index] = >>>>>>>> min(rate, 120u); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static unsigned >>>>>>>> radeon_uvd_estimate_fps(struct >>>>>>>> radeon_device *rdev, >>>>>>>> int >>>>>>>> idx) >>>>>>>> +{ >>>>>>>> + struct radeon_uvd_fps >>>>>>>> *fps >>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>> + unsigned i, valid = 0, >>>>>>>> count = 0; >>>>>>>> + >>>>>>>> + for (i = 0; i < >>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>>>>> { >>>>>>>> + /* We should >>>>>>>> ignore zero values */ >>>>>>>> + if >>>>>>>> (fps->events[i] >>>>>>>> != 0) { >>>>>>>> + count += >>>>>>>> fps->events[i]; >>>>>>>> + valid++; >>>>>>>> + } >>>>>>>> + } >>>>>>>> + >>>>>>>> + if (valid > 0) >>>>>>>> + return count / >>>>>>>> valid; >>>>>>>> + else >>>>>>>> + return >>>>>>>> RADEON_UVD_DEFAULT_FPS; >>>>>>>> +} >>>>>>>> + >>>>>>>> void >>>>>>>> radeon_uvd_free_handles(struct >>>>>>>> radeon_device *rdev, struct >>>>>>>> drm_file *filp) >>>>>>>> { >>>>>>>> int i, r; >>>>>>>> @@ -419,8 +464,10 @@ static int >>>>>>>> radeon_uvd_cs_msg(struct >>>>>>>> radeon_cs_parser >>>>>>>> *p, struct radeon_bo *bo, >>>>>>>> >>>>>>>> /* create or decode, >>>>>>>> validate the handle */ >>>>>>>> for (i = 0; i < >>>>>>>> RADEON_MAX_UVD_HANDLES; ++i) { >>>>>>>> - if >>>>>>>> >>>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>>> == handle) >>>>>>>> + if >>>>>>>> >>>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>>> == handle) >>>>>>>> { >>>>>>>> + >>>>>>>> >>>>>>>> radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>> >>>>>>>> return 0; >>>>>>>> + } >>>>>>>> } >>>>>>>> >>>>>>>> /* handle not found >>>>>>>> try to alloc a new one */ >>>>>>>> @@ -428,6 +475,7 @@ static int >>>>>>>> radeon_uvd_cs_msg(struct >>>>>>>> radeon_cs_parser >>>>>>>> *p, struct radeon_bo *bo, >>>>>>>> if >>>>>>>> >>>>>>>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>>> 0, >>>>>>>> handle)) { >>>>>>>> >>>>>>>> p->rdev->uvd.filp[i] = p->filp; >>>>>>>> >>>>>>>> p->rdev->uvd.img_size[i] = >>>>>>>> img_size; >>>>>>>> + >>>>>>>> >>>>>>>> radeon_uvd_fps_clear_events(p->rdev, >>>>>>>> i); >>>>>>>> >>>>>>>> return 0; >>>>>>>> } >>>>>>>> } >>>>>>>> @@ -763,7 +811,7 @@ int >>>>>>>> >>>>>>>> radeon_uvd_get_destroy_msg(struct >>>>>>>> radeon_device >>>>>>>> *rdev, int ring, >>>>>>>> static void >>>>>>>> radeon_uvd_count_handles(struct >>>>>>>> radeon_device *rdev, >>>>>>>> >>>>>>>> unsigned *sd, unsigned >>>>>>>> *hd) >>>>>>>> { >>>>>>>> - unsigned i; >>>>>>>> + unsigned i, fps_rate = >>>>>>>> 0; >>>>>>>> >>>>>>>> *sd = 0; >>>>>>>> *hd = 0; >>>>>>>> @@ -772,10 +820,13 @@ static >>>>>>>> void >>>>>>>> radeon_uvd_count_handles(struct >>>>>>>> radeon_device *rdev, >>>>>>>> if >>>>>>>> >>>>>>>> (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>> >>>>>>>> continue; >>>>>>>> >>>>>>>> - if >>>>>>>> (rdev->uvd.img_size[i] >= >>>>>>>> 720*576) >>>>>>>> - ++(*hd); >>>>>>>> - else >>>>>>>> - ++(*sd); >>>>>>>> + fps_rate = >>>>>>>> radeon_uvd_estimate_fps(rdev, >>>>>>>> i); >>>>>>>> + >>>>>>>> + if >>>>>>>> (rdev->uvd.img_size[i] >= >>>>>>>> 720*576) { >>>>>>>> + (*hd) += >>>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>>> + } else { >>>>>>>> + (*sd) += >>>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>>> + } >>>>>>>> } >>>>>>>> } >>>>>>>> >>>>>>>> @@ -805,6 +856,7 @@ void >>>>>>>> radeon_uvd_note_usage(struct >>>>>>>> radeon_device >>>>>>>> *rdev) >>>>>>>> set_clocks &= >>>>>>>> >>>>>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>>> >>>>>>>> >>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>> >>>>>>>> + >>>>>>>> if >>>>>>>> ((rdev->pm.pm_method == >>>>>>>> PM_METHOD_DPM) && >>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>> unsigned hd >>>>>>>> = >>>>>>>> 0, sd = 0; >>>>>>>> >>>>>>>> radeon_uvd_count_handles(rdev, >>>>>>>> &sd, &hd); >>>>>>>> -- >>>>>>>> 1.9.1 >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> -- >>>>>>>> Marco Antonio Benatto >>>>>>>> Linux user ID:#506236 >>>>>> >> >> >> -- >> Marco Antonio Benatto >> Linux user ID: #506236
Ok, I'll start the changes this night (BRT) taking 3.18-wip as basis. In case of any doubt I'll return to you guys. Thanks for the help On Fri, Aug 15, 2014 at 2:33 PM, Christian König <christian.koenig@amd.com> wrote: > I'd rather not have a sysfs entry. I'd prefer a mesa env var or drirc >> option to force the CS flag. >> > Yeah, that actually sounds like a good idea. I think we could add a new > dword to the flags chunk for this, or use the priority field which is > unused for UVD on those older chipsets anyway. > > If possible I would like to have something like > RADEON_MIN_UVD_POWER_LEVEL=(uvd_sd|uvd_hd|uvd_hd2|uvd|uvd_mvc) which > forces a minimum level, but still let the kernel decide if we don't > wouldn't want a higher level. > > Christian. > > Am 15.08.2014 um 19:10 schrieb Alex Deucher: > > On Fri, Aug 15, 2014 at 1:03 PM, Marco Benatto >> <marco.antonio.780@gmail.com> wrote: >> >>> Grigori, Alex and Christian >>> >>> are you ok if I merge ioctl flag idea with sysfs idea? >>> >>> We let the system decide the state using the hint provided by CS ioctl >>> flag >>> but if performance is not good as expected >>> or DPM table is not sane we still will have a fallback way o override >>> this >>> decision. >>> >> For backwards compatibility. The CS ioctl flag should be an opt in >> flag, e.g., UVD_KERNEL_MANAGE_POWER_STATE. That way there would be no >> change for old versions of mesa, but for newer versions of mesa, the >> UVD user mode driver could set the flag when there was no post >> processing for lower power usage and not set it when there was post >> processing for better performance. >> >> I'd rather not have a sysfs entry. I'd prefer a mesa env var or drirc >> option to force the CS flag. >> >> Alex >> >> >>> On Fri, Aug 15, 2014 at 1:54 PM, Christian König < >>> christian.koenig@amd.com> >>> wrote: >>> >>>> Am 15.08.2014 um 17:32 schrieb Grigori Goronzy: >>>> >>>> On 15.08.2014 17:26, Alex Deucher wrote: >>>>> >>>>>> On Fri, Aug 15, 2014 at 11:20 AM, Grigori Goronzy <greg@chown.ath.cx> >>>>>> wrote: >>>>>> >>>>>>> On 15.08.2014 16:11, Christian König wrote: >>>>>>> >>>>>>>> Hi Marco, >>>>>>>> >>>>>>>> the problem with an CS ioctl flag is that we sometimes don't know >>>>>>>> how >>>>>>>> much SCLK/MCLK boost is needed, for example when we do post >>>>>>>> processing >>>>>>>> in the player using OpenGL and UVD decoding with VDPAU. In this case >>>>>>>> VDPAU don't has the slightest idea how high SCLK/MCLK must be and so >>>>>>>> can't give that info to the kernel either. >>>>>>>> >>>>>>>> Maybe it's an acceptable workaround to simply disable dynamic UVD >>>>>>> state >>>>>>> selection in case the UVD states only have a single power level. That >>>>>>> will avoid the performance issues on affected systems, while still >>>>>>> allowing dynamic UVD states on systems that have a saner DPM table >>>>>>> setup. I think it is mosly older systems that suffer from this. >>>>>>> >>>>>>> That is exactly what we do now. >>>>>> >>>>>> Is it? In 3.17-wip, dynamic UVD state selection (according to active >>>>> streams) is still completely disabled. It will always use the generic >>>>> UVD state. In fact wasn't it reverted again because of the performance >>>>> issues on some systems? >>>>> >>>> >>>> This is the performance table of my laptop (at least the interesting >>>> parts), which I think is a typical example of the problem: >>>> >>>> [ 4.106772] == power state 1 == >>>> [ 4.106774] ui class: performance >>>> [ 4.106776] internal class: none >>>> [ 4.106780] uvd vclk: 0 dclk: 0 >>>> [ 4.106782] power level 0 sclk: 20000 vddc_index: 2 >>>> [ 4.106784] power level 1 sclk: 50000 vddc_index: 2 >>>> [ 4.106805] == power state 3 == >>>> [ 4.106807] ui class: none >>>> [ 4.106808] internal class: uvd >>>> [ 4.106813] uvd vclk: 55000 dclk: 40000 >>>> [ 4.106816] power level 0 sclk: 50000 vddc_index: 2 >>>> [ 4.106818] power level 1 sclk: 50000 vddc_index: 2 >>>> [ 4.106820] status: >>>> [ 4.106822] == power state 4 == >>>> [ 4.106823] ui class: battery >>>> [ 4.106825] internal class: uvd_hd >>>> [ 4.106831] uvd vclk: 40000 dclk: 30000 >>>> [ 4.106833] power level 0 sclk: 38000 vddc_index: 1 >>>> [ 4.106835] power level 1 sclk: 38000 vddc_index: 1 >>>> [ 4.106839] == power state 5 == >>>> [ 4.106841] ui class: battery >>>> [ 4.106843] internal class: uvd_sd >>>> [ 4.106848] uvd vclk: 40000 dclk: 30000 >>>> [ 4.106850] power level 0 sclk: 38000 vddc_index: 2 >>>> [ 4.106853] power level 1 sclk: 38000 vddc_index: 2 >>>> >>>> As you can see we currently always select the performance level uvd, >>>> which >>>> results in selecting the maximum sclk/dclk and vclk. Unfortunately >>>> neither >>>> uvd, uvd_sd nor uvd_hd allows the hardware to switch the sclk once >>>> selected >>>> (it's a hardware limitation of older uvd blocks). >>>> >>>> So for all cases where this is interesting you actually always have >>>> only a >>>> single power level to choose from. >>>> >>>> Christian. >>>> >>>> >>>> Grigori >>>>> >>>>> Alex >>>>>> >>>>>> >>>>>> Best regards >>>>>>> Grigori >>>>>>> >>>>>>> Regards, >>>>>>>> Christian. >>>>>>>> >>>>>>>> Am 15.08.2014 um 15:21 schrieb Marco Benatto: >>>>>>>> >>>>>>>>> Hey all, >>>>>>>>> >>>>>>>>> I also had a talk with Alex yesterday about post-processing issues >>>>>>>>> when using dynamic UVD profiles and a chamge on CS ioctl >>>>>>>>> including a flag to let user mode driver tell to the kernel which >>>>>>>>> performance requirement it wants for post processing. A commom >>>>>>>>> point for both discussion is to stablish the default values for >>>>>>>>> these >>>>>>>>> profiles, but probably this ioctl change would be more >>>>>>>>> impacting/complex >>>>>>>>> to implement than a sysfs entry. >>>>>>>>> >>>>>>>>> If a sysfs entry is anough for now I can handle the code to create >>>>>>>>> it >>>>>>>>> and, with your help, the code to setup the UVD profile requested >>>>>>>>> through it. >>>>>>>>> >>>>>>>>> Is there any suggestion? >>>>>>>>> >>>>>>>>> Thanks all for your help, >>>>>>>>> >>>>>>>>> >>>>>>>>> On Fri, Aug 15, 2014 at 5:48 AM, Christian König >>>>>>>>> <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> >>>>>>>>> wrote: >>>>>>>>> >>>>>>>>> Hi guys, >>>>>>>>> >>>>>>>>> to make a long story short every time I watch a movie my >>>>>>>>> laptop >>>>>>>>> start to heat up because we always select the standard UVD >>>>>>>>> power >>>>>>>>> profile without actually measuring if that is necessary. >>>>>>>>> >>>>>>>>> Marco came up with a patch that seems to reliable measure the >>>>>>>>> fps >>>>>>>>> send down to the kernel and so together with knowing the >>>>>>>>> frame >>>>>>>>> size of the video should allow us to select the right UVD >>>>>>>>> power >>>>>>>>> profile. >>>>>>>>> >>>>>>>>> The problem is that Alex (unnoticed by me) completely >>>>>>>>> disabled >>>>>>>>> selecting the UVD profiles because of some issues with >>>>>>>>> advanced >>>>>>>>> post processing discussed on IRC. The problem seems to be >>>>>>>>> that >>>>>>>>> the >>>>>>>>> lower UVD profiles have a to low SCLK/MCLK to handle the 3D >>>>>>>>> load >>>>>>>>> that comes with scaling, deinterlacing etc... >>>>>>>>> >>>>>>>>> I unfortunately don't have time for it, cause this only >>>>>>>>> affects >>>>>>>>> the hardware generations R600-SI and not the newest one CIK. >>>>>>>>> So >>>>>>>>> could you guys stick together and come up with a solution? >>>>>>>>> Something like a sysfs entry that let's us select the minimum >>>>>>>>> UVD >>>>>>>>> power level allowed? >>>>>>>>> >>>>>>>>> I think Marco is happy to come up with a patch, we just need >>>>>>>>> to >>>>>>>>> know what's really needed and what should be the default >>>>>>>>> values. >>>>>>>>> I'm happy to review everything that comes out of it, just >>>>>>>>> don't >>>>>>>>> have time to do it myself. >>>>>>>>> >>>>>>>>> Happy discussion and thanks in advance, >>>>>>>>> Christian. >>>>>>>>> >>>>>>>>> Am 12.08.2014 um 15:05 schrieb Alex Deucher: >>>>>>>>> >>>>>>>>> On Tue, Aug 12, 2014 at 6:00 AM, Christian König >>>>>>>>> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de >>>>>>>>> >> >>>>>>>>> wrote: >>>>>>>>> >>>>>>>>> Am 11.08.2014 um 16:52 schrieb Alex Deucher: >>>>>>>>> >>>>>>>>> On Mon, Aug 11, 2014 at 5:08 AM, Christian König >>>>>>>>> <deathsimple@vodafone.de >>>>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>>>> >>>>>>>>> Am 07.08.2014 um 21:43 schrieb Alex Deucher: >>>>>>>>> >>>>>>>>> On Thu, Aug 7, 2014 at 11:32 AM, >>>>>>>>> Christian >>>>>>>>> König >>>>>>>>> <deathsimple@vodafone.de >>>>>>>>> <mailto:deathsimple@vodafone.de>> wrote: >>>>>>>>> >>>>>>>>> Am 07.08.2014 um 16:32 schrieb Alex >>>>>>>>> Deucher: >>>>>>>>> >>>>>>>>> On Thu, Aug 7, 2014 at 7:33 AM, >>>>>>>>> Christian König >>>>>>>>> <deathsimple@vodafone.de >>>>>>>>> <mailto:deathsimple@vodafone.de >>>>>>>>> >> >>>>>>>>> wrote: >>>>>>>>> >>>>>>>>> From: Marco A Benatto >>>>>>>>> <marco.antonio.780@gmail.com >>>>>>>>> >>>>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>>>> >>>>>>>>> Adding a Frames Per Second >>>>>>>>> estimation logic on UVD >>>>>>>>> handles >>>>>>>>> when it has being used. This >>>>>>>>> estimation is per handle >>>>>>>>> basis >>>>>>>>> and will help on DPM profile >>>>>>>>> calculation. >>>>>>>>> >>>>>>>>> v2 (chk): fix timestamp type, >>>>>>>>> move >>>>>>>>> functions around and >>>>>>>>> cleanup code a >>>>>>>>> bit. >>>>>>>>> >>>>>>>>> Will this really help much? I >>>>>>>>> thought >>>>>>>>> the problem was mainly due to >>>>>>>>> sclk and mclk for post >>>>>>>>> processing. >>>>>>>>> >>>>>>>>> >>>>>>>>> It should at least handle the UVD >>>>>>>>> side >>>>>>>>> for >>>>>>>>> upclocking when you get a >>>>>>>>> lot >>>>>>>>> of >>>>>>>>> streams / fps. And at on my NI the >>>>>>>>> patch >>>>>>>>> seems to do exactly that. >>>>>>>>> >>>>>>>>> Switching sclk and mclk for post >>>>>>>>> processing is a different task, and I >>>>>>>>> actually have no idea what to do with >>>>>>>>> them. >>>>>>>>> >>>>>>>>> At this point we always choose the plain >>>>>>>>> UVD >>>>>>>>> state anyway so this >>>>>>>>> patch would only take effect if we >>>>>>>>> re-enabled >>>>>>>>> the dynamic UVD state >>>>>>>>> selection. >>>>>>>>> >>>>>>>>> >>>>>>>>> Hui? I thought we already re-enabled the >>>>>>>>> dynamic >>>>>>>>> UVD state selection, but >>>>>>>>> double checking this I found it disabled >>>>>>>>> again. >>>>>>>>> >>>>>>>>> What was the problem with that? Looks like I >>>>>>>>> somehow missed the >>>>>>>>> discussion >>>>>>>>> around it. >>>>>>>>> >>>>>>>>> We did, but after doing so a number of people >>>>>>>>> complained about a >>>>>>>>> regression on IRC because when apps like xmbc >>>>>>>>> enabled >>>>>>>>> post processing, >>>>>>>>> performance went down. >>>>>>>>> >>>>>>>>> >>>>>>>>> That's strange, from my experience the different UVD >>>>>>>>> performance states only >>>>>>>>> affect UVDs dclk/vclk, not sclk/mclk. I need to get >>>>>>>>> the >>>>>>>>> DPM dumps to >>>>>>>>> confirms this. >>>>>>>>> >>>>>>>>> The sclks and mclks are usually different as well, >>>>>>>>> especially >>>>>>>>> on APUs. >>>>>>>>> I can send you some examples. >>>>>>>>> >>>>>>>>> You not off hand remember who complained on IRC? >>>>>>>>> Finding >>>>>>>>> something in the >>>>>>>>> IRC logs is like searching for a needle in a >>>>>>>>> haystack. >>>>>>>>> >>>>>>>>> I don't remember off hand. I think zgreg was involved in >>>>>>>>> some >>>>>>>>> of the >>>>>>>>> discussions. >>>>>>>>> >>>>>>>>> Alex >>>>>>>>> >>>>>>>>> Thanks, >>>>>>>>> Christian. >>>>>>>>> >>>>>>>>> >>>>>>>>> Alex >>>>>>>>> >>>>>>>>> >>>>>>>>> Christian. >>>>>>>>> >>>>>>>>> >>>>>>>>> For the post processing, we probably >>>>>>>>> need a >>>>>>>>> hint we can >>>>>>>>> pass to the driver in the CS ioctl to >>>>>>>>> denote >>>>>>>>> what state we need. >>>>>>>>> Although if we did that, this could would >>>>>>>>> largely be moot. That said, >>>>>>>>> newer asics support dynamic UVD clocks >>>>>>>>> so we >>>>>>>>> really only need >>>>>>>>> something like that for older asics and I >>>>>>>>> guess VCE. >>>>>>>>> >>>>>>>>> Alex >>>>>>>>> >>>>>>>>> Christian. >>>>>>>>> >>>>>>>>> >>>>>>>>> Alex >>>>>>>>> >>>>>>>>> Signed-off-by: Marco A >>>>>>>>> Benatto >>>>>>>>> <marco.antonio.780@gmail.com >>>>>>>>> >>>>>>>>> <mailto:marco.antonio.780@gmail.com>> >>>>>>>>> Signed-off-by: Christian >>>>>>>>> König >>>>>>>>> <christian.koenig@amd.com >>>>>>>>> >>>>>>>>> <mailto:christian.koenig@amd.com>> >>>>>>>>> --- >>>>>>>>> >>>>>>>>> >>>>>>>>> drivers/gpu/drm/radeon/radeon.h >>>>>>>>> | 10 ++++++ >>>>>>>>> >>>>>>>>> >>>>>>>>> drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> | 64 >>>>>>>>> >>>>>>>>> +++++++++++++++++++++++++++++++++---- >>>>>>>>> 2 files changed, 68 >>>>>>>>> insertions(+), 6 deletions(-) >>>>>>>>> >>>>>>>>> diff --git >>>>>>>>> >>>>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> >>>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> index 9e1732e..e92f6cb 100644 >>>>>>>>> --- >>>>>>>>> a/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> +++ >>>>>>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>>>>>> @@ -1617,6 +1617,15 @@ int >>>>>>>>> radeon_pm_get_type_index( >>>>>>>>> struct >>>>>>>>> radeon_device >>>>>>>>> *rdev, >>>>>>>>> #define >>>>>>>>> RADEON_UVD_STACK_SIZE >>>>>>>>> (1024*1024) >>>>>>>>> #define >>>>>>>>> RADEON_UVD_HEAP_SIZE >>>>>>>>> (1024*1024) >>>>>>>>> >>>>>>>>> +#define >>>>>>>>> RADEON_UVD_FPS_EVENTS_MAX 8 >>>>>>>>> +#define >>>>>>>>> RADEON_UVD_DEFAULT_FPS >>>>>>>>> 60 >>>>>>>>> + >>>>>>>>> +struct radeon_uvd_fps { >>>>>>>>> + uint64_t >>>>>>>>> timestamp; >>>>>>>>> + uint8_t >>>>>>>>> event_index; >>>>>>>>> + uint8_t >>>>>>>>> >>>>>>>>> events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> struct radeon_uvd { >>>>>>>>> struct radeon_bo >>>>>>>>> *vcpu_bo; >>>>>>>>> void >>>>>>>>> *cpu_addr; >>>>>>>>> @@ -1626,6 +1635,7 @@ struct >>>>>>>>> radeon_uvd { >>>>>>>>> struct drm_file >>>>>>>>> >>>>>>>>> *filp[RADEON_MAX_UVD_HANDLES]; >>>>>>>>> unsigned >>>>>>>>> >>>>>>>>> img_size[RADEON_MAX_UVD_HANDLES]; >>>>>>>>> struct >>>>>>>>> delayed_work >>>>>>>>> idle_work; >>>>>>>>> + struct radeon_uvd_fps >>>>>>>>> >>>>>>>>> fps_info[RADEON_MAX_UVD_HANDLES]; >>>>>>>>> }; >>>>>>>>> >>>>>>>>> int >>>>>>>>> radeon_uvd_init(struct >>>>>>>>> radeon_device *rdev); >>>>>>>>> diff --git >>>>>>>>> >>>>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> >>>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> index 6bf55ec..ef5667a 100644 >>>>>>>>> --- >>>>>>>>> >>>>>>>>> a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> +++ >>>>>>>>> >>>>>>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>>>>>> @@ -237,6 +237,51 @@ void >>>>>>>>> >>>>>>>>> radeon_uvd_force_into_uvd_segment(struct >>>>>>>>> radeon_bo *rbo) >>>>>>>>> >>>>>>>>> rbo->placement.lpfn >>>>>>>>> = >>>>>>>>> (256 * 1024 * 1024) >> >>>>>>>>> PAGE_SHIFT; >>>>>>>>> } >>>>>>>>> >>>>>>>>> +static void >>>>>>>>> >>>>>>>>> radeon_uvd_fps_clear_events(struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps >>>>>>>>> *fps >>>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>>> + unsigned i; >>>>>>>>> + >>>>>>>>> + fps->timestamp = >>>>>>>>> jiffies_64; >>>>>>>>> + fps->event_index = 0; >>>>>>>>> + for (i = 0; i < >>>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>> i++) >>>>>>>>> + >>>>>>>>> fps->events[i] = >>>>>>>>> 0; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static void >>>>>>>>> radeon_uvd_fps_note_event( >>>>>>>>> struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps >>>>>>>>> *fps >>>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>>> + uint64_t timestamp = >>>>>>>>> jiffies_64; >>>>>>>>> + unsigned rate = 0; >>>>>>>>> + >>>>>>>>> + uint8_t index = >>>>>>>>> fps->event_index++; >>>>>>>>> + fps->event_index %= >>>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>> + >>>>>>>>> + rate = div64_u64(HZ, >>>>>>>>> max(timestamp - >>>>>>>>> fps->timestamp, >>>>>>>>> 1ULL)); >>>>>>>>> + >>>>>>>>> + fps->timestamp = >>>>>>>>> timestamp; >>>>>>>>> + fps->events[index] = >>>>>>>>> min(rate, 120u); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static unsigned >>>>>>>>> >>>>>>>>> radeon_uvd_estimate_fps(struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> int >>>>>>>>> idx) >>>>>>>>> +{ >>>>>>>>> + struct radeon_uvd_fps >>>>>>>>> *fps >>>>>>>>> = &rdev->uvd.fps_info[idx]; >>>>>>>>> + unsigned i, valid = >>>>>>>>> 0, >>>>>>>>> count = 0; >>>>>>>>> + >>>>>>>>> + for (i = 0; i < >>>>>>>>> RADEON_UVD_FPS_EVENTS_MAX; >>>>>>>>> i++) >>>>>>>>> { >>>>>>>>> + /* We should >>>>>>>>> ignore zero values */ >>>>>>>>> + if >>>>>>>>> (fps->events[i] >>>>>>>>> != 0) { >>>>>>>>> + >>>>>>>>> count += >>>>>>>>> fps->events[i]; >>>>>>>>> + >>>>>>>>> valid++; >>>>>>>>> + } >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (valid > 0) >>>>>>>>> + return count >>>>>>>>> / >>>>>>>>> valid; >>>>>>>>> + else >>>>>>>>> + return >>>>>>>>> RADEON_UVD_DEFAULT_FPS; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> void >>>>>>>>> >>>>>>>>> radeon_uvd_free_handles(struct >>>>>>>>> radeon_device *rdev, struct >>>>>>>>> drm_file *filp) >>>>>>>>> { >>>>>>>>> int i, r; >>>>>>>>> @@ -419,8 +464,10 @@ static >>>>>>>>> int >>>>>>>>> radeon_uvd_cs_msg(struct >>>>>>>>> radeon_cs_parser >>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>> >>>>>>>>> /* create or >>>>>>>>> decode, >>>>>>>>> validate the handle */ >>>>>>>>> for (i = 0; i < >>>>>>>>> RADEON_MAX_UVD_HANDLES; ++i) >>>>>>>>> { >>>>>>>>> - if >>>>>>>>> >>>>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>>>> == handle) >>>>>>>>> + if >>>>>>>>> >>>>>>>>> (atomic_read(&p->rdev->uvd.handles[i]) >>>>>>>>> == handle) >>>>>>>>> { >>>>>>>>> + >>>>>>>>> >>>>>>>>> radeon_uvd_fps_note_event(p->rdev, i); >>>>>>>>> >>>>>>>>> return 0; >>>>>>>>> + } >>>>>>>>> } >>>>>>>>> >>>>>>>>> /* handle not >>>>>>>>> found >>>>>>>>> try to alloc a new one */ >>>>>>>>> @@ -428,6 +475,7 @@ static >>>>>>>>> int >>>>>>>>> radeon_uvd_cs_msg(struct >>>>>>>>> radeon_cs_parser >>>>>>>>> *p, struct radeon_bo *bo, >>>>>>>>> if >>>>>>>>> >>>>>>>>> (!atomic_cmpxchg(&p->rdev->uvd.handles[i], >>>>>>>>> 0, >>>>>>>>> handle)) { >>>>>>>>> >>>>>>>>> p->rdev->uvd.filp[i] = >>>>>>>>> p->filp; >>>>>>>>> >>>>>>>>> p->rdev->uvd.img_size[i] = >>>>>>>>> img_size; >>>>>>>>> + >>>>>>>>> >>>>>>>>> radeon_uvd_fps_clear_events(p->rdev, >>>>>>>>> i); >>>>>>>>> >>>>>>>>> return 0; >>>>>>>>> } >>>>>>>>> } >>>>>>>>> @@ -763,7 +811,7 @@ int >>>>>>>>> >>>>>>>>> radeon_uvd_get_destroy_msg(struct >>>>>>>>> radeon_device >>>>>>>>> *rdev, int ring, >>>>>>>>> static void >>>>>>>>> radeon_uvd_count_handles( >>>>>>>>> struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> >>>>>>>>> unsigned *sd, unsigned >>>>>>>>> *hd) >>>>>>>>> { >>>>>>>>> - unsigned i; >>>>>>>>> + unsigned i, fps_rate >>>>>>>>> = >>>>>>>>> 0; >>>>>>>>> >>>>>>>>> *sd = 0; >>>>>>>>> *hd = 0; >>>>>>>>> @@ -772,10 +820,13 @@ static >>>>>>>>> void >>>>>>>>> radeon_uvd_count_handles( >>>>>>>>> struct >>>>>>>>> radeon_device *rdev, >>>>>>>>> if >>>>>>>>> >>>>>>>>> (!atomic_read(&rdev->uvd.handles[i])) >>>>>>>>> >>>>>>>>> continue; >>>>>>>>> >>>>>>>>> - if >>>>>>>>> (rdev->uvd.img_size[i] >= >>>>>>>>> 720*576) >>>>>>>>> - >>>>>>>>> ++(*hd); >>>>>>>>> - else >>>>>>>>> - >>>>>>>>> ++(*sd); >>>>>>>>> + fps_rate = >>>>>>>>> radeon_uvd_estimate_fps(rdev, >>>>>>>>> i); >>>>>>>>> + >>>>>>>>> + if >>>>>>>>> (rdev->uvd.img_size[i] >= >>>>>>>>> 720*576) { >>>>>>>>> + >>>>>>>>> (*hd) += >>>>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>>>> + } else { >>>>>>>>> + >>>>>>>>> (*sd) += >>>>>>>>> fps_rate > 30 ? 1 : 2; >>>>>>>>> + } >>>>>>>>> } >>>>>>>>> } >>>>>>>>> >>>>>>>>> @@ -805,6 +856,7 @@ void >>>>>>>>> radeon_uvd_note_usage(struct >>>>>>>>> radeon_device >>>>>>>>> *rdev) >>>>>>>>> set_clocks &= >>>>>>>>> >>>>>>>>> schedule_delayed_work(&rdev->uvd.idle_work, >>>>>>>>> >>>>>>>>> >>>>>>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>>>>>> >>>>>>>>> + >>>>>>>>> if >>>>>>>>> ((rdev->pm.pm_method == >>>>>>>>> PM_METHOD_DPM) && >>>>>>>>> rdev->pm.dpm_enabled) { >>>>>>>>> unsigned >>>>>>>>> hd >>>>>>>>> = >>>>>>>>> 0, sd = 0; >>>>>>>>> >>>>>>>>> >>>>>>>>> radeon_uvd_count_handles(rdev, >>>>>>>>> &sd, &hd); >>>>>>>>> -- >>>>>>>>> 1.9.1 >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> -- >>>>>>>>> Marco Antonio Benatto >>>>>>>>> Linux user ID:#506236 >>>>>>>>> >>>>>>>> >>>>>>> >>> >>> -- >>> Marco Antonio Benatto >>> Linux user ID: #506236 >>> >> >
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 9e1732e..e92f6cb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device *rdev, #define RADEON_UVD_STACK_SIZE (1024*1024) #define RADEON_UVD_HEAP_SIZE (1024*1024) +#define RADEON_UVD_FPS_EVENTS_MAX 8 +#define RADEON_UVD_DEFAULT_FPS 60 + +struct radeon_uvd_fps { + uint64_t timestamp; + uint8_t event_index; + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; +}; + struct radeon_uvd { struct radeon_bo *vcpu_bo; void *cpu_addr; @@ -1626,6 +1635,7 @@ struct radeon_uvd { struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; unsigned img_size[RADEON_MAX_UVD_HANDLES]; struct delayed_work idle_work; + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; }; int radeon_uvd_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6bf55ec..ef5667a 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; } +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int idx) +{ + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; + unsigned i; + + fps->timestamp = jiffies_64; + fps->event_index = 0; + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) + fps->events[i] = 0; +} + +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int idx) +{ + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; + uint64_t timestamp = jiffies_64; + unsigned rate = 0; + + uint8_t index = fps->event_index++; + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; + + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); + + fps->timestamp = timestamp; + fps->events[index] = min(rate, 120u); +} + +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int idx) +{ + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; + unsigned i, valid = 0, count = 0; + + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { + /* We should ignore zero values */ + if (fps->events[i] != 0) { + count += fps->events[i]; + valid++; + } + } + + if (valid > 0) + return count / valid; + else + return RADEON_UVD_DEFAULT_FPS; +} + void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) { int i, r; @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, /* create or decode, validate the handle */ for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { + radeon_uvd_fps_note_event(p->rdev, i); return 0; + } } /* handle not found try to alloc a new one */ @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { p->rdev->uvd.filp[i] = p->filp; p->rdev->uvd.img_size[i] = img_size; + radeon_uvd_fps_clear_events(p->rdev, i); return 0; } } @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, static void radeon_uvd_count_handles(struct radeon_device *rdev, unsigned *sd, unsigned *hd) { - unsigned i; + unsigned i, fps_rate = 0; *sd = 0; *hd = 0; @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct radeon_device *rdev, if (!atomic_read(&rdev->uvd.handles[i])) continue; - if (rdev->uvd.img_size[i] >= 720*576) - ++(*hd); - else - ++(*sd); + fps_rate = radeon_uvd_estimate_fps(rdev, i); + + if (rdev->uvd.img_size[i] >= 720*576) { + (*hd) += fps_rate > 30 ? 1 : 2; + } else { + (*sd) += fps_rate > 30 ? 1 : 2; + } } } @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { unsigned hd = 0, sd = 0; radeon_uvd_count_handles(rdev, &sd, &hd);