Am 07.08.2014 um 21:43 schrieb Alex Deucher: > On Thu, Aug 7, 2014 at 11:32 AM, Christian K?nig > <deathsimple at vodafone.de> wrote: >> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >> >>> On Thu, Aug 7, 2014 at 7:33 AM, Christian K?nig <deathsimple at vodafone.de> >>> wrote: >>>> From: Marco A Benatto <marco.antonio.780 at gmail.com> >>>> >>>> Adding a Frames Per Second estimation logic on UVD handles >>>> when it has being used. This estimation is per handle basis >>>> and will help on DPM profile calculation. >>>> >>>> v2 (chk): fix timestamp type, move functions around and >>>> cleanup code a bit. >>> Will this really help much? I thought the problem was mainly due to >>> sclk and mclk for post processing. >> >> It should at least handle the UVD side for upclocking when you get a lot of >> streams / fps. And at on my NI the patch seems to do exactly that. >> >> Switching sclk and mclk for post processing is a different task, and I >> actually have no idea what to do with them. > At this point we always choose the plain UVD state anyway so this > patch would only take effect if we re-enabled the dynamic UVD state > selection.
Hui? I thought we already re-enabled the dynamic UVD state selection, but double checking this I found it disabled again. What was the problem with that? Looks like I somehow missed the discussion around it. Christian. > For the post processing, we probably need a hint we can > pass to the driver in the CS ioctl to denote what state we need. > Although if we did that, this could would largely be moot. That said, > newer asics support dynamic UVD clocks so we really only need > something like that for older asics and I guess VCE. > > Alex > >> Christian. >> >> >>> Alex >>> >>>> Signed-off-by: Marco A Benatto <marco.antonio.780 at gmail.com> >>>> Signed-off-by: Christian K?nig <christian.koenig at amd.com> >>>> --- >>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>> +++++++++++++++++++++++++++++++++---- >>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>> b/drivers/gpu/drm/radeon/radeon.h >>>> index 9e1732e..e92f6cb 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct radeon_device >>>> *rdev, >>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>> >>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>> + >>>> +struct radeon_uvd_fps { >>>> + uint64_t timestamp; >>>> + uint8_t event_index; >>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>> +}; >>>> + >>>> struct radeon_uvd { >>>> struct radeon_bo *vcpu_bo; >>>> void *cpu_addr; >>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>> struct delayed_work idle_work; >>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>> }; >>>> >>>> int radeon_uvd_init(struct radeon_device *rdev); >>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> index 6bf55ec..ef5667a 100644 >>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>> radeon_bo *rbo) >>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>> } >>>> >>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + unsigned i; >>>> + >>>> + fps->timestamp = jiffies_64; >>>> + fps->event_index = 0; >>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>> + fps->events[i] = 0; >>>> +} >>>> + >>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + uint64_t timestamp = jiffies_64; >>>> + unsigned rate = 0; >>>> + >>>> + uint8_t index = fps->event_index++; >>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>> + >>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>> + >>>> + fps->timestamp = timestamp; >>>> + fps->events[index] = min(rate, 120u); >>>> +} >>>> + >>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, int >>>> idx) >>>> +{ >>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>> + unsigned i, valid = 0, count = 0; >>>> + >>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>> + /* We should ignore zero values */ >>>> + if (fps->events[i] != 0) { >>>> + count += fps->events[i]; >>>> + valid++; >>>> + } >>>> + } >>>> + >>>> + if (valid > 0) >>>> + return count / valid; >>>> + else >>>> + return RADEON_UVD_DEFAULT_FPS; >>>> +} >>>> + >>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>> drm_file *filp) >>>> { >>>> int i, r; >>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> >>>> /* create or decode, validate the handle */ >>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>> return 0; >>>> + } >>>> } >>>> >>>> /* handle not found try to alloc a new one */ >>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser >>>> *p, struct radeon_bo *bo, >>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>> handle)) { >>>> p->rdev->uvd.filp[i] = p->filp; >>>> p->rdev->uvd.img_size[i] = img_size; >>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>> return 0; >>>> } >>>> } >>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>>> *rdev, int ring, >>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>> unsigned *sd, unsigned *hd) >>>> { >>>> - unsigned i; >>>> + unsigned i, fps_rate = 0; >>>> >>>> *sd = 0; >>>> *hd = 0; >>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>> radeon_device *rdev, >>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>> continue; >>>> >>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>> - ++(*hd); >>>> - else >>>> - ++(*sd); >>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>> + >>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>> + } else { >>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>> + } >>>> } >>>> } >>>> >>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>> *rdev) >>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>> >>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>> >>>> + >>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>> rdev->pm.dpm_enabled) { >>>> unsigned hd = 0, sd = 0; >>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>> -- >>>> 1.9.1 >>>>