On Mon, 2010-06-07 at 10:28 +0800, Xiang, Haihao wrote: > On Sat, 2010-06-05 at 08:16 +0800, Eric Anholt wrote: > > We had two cases recently where the rounding to powers of two hurt > > badly: 4:2:0 YUV HD video frames would round up from 2.2MB to 4MB, > > Urban Terror was hitting aperture size limitations. Mipmap trees for > > power of two sizes will land right in the middle between two cache > > buckets. > > > > By giving a few more sizes between powers of two, Urban Terror on my > > 945 ends up consuming 207MB of GEM objects instead of 272MB. > > --- > > intel/intel_bufmgr_gem.c | 64 > > +++++++++++++++++++++++++++++++++++----------- > > 1 files changed, 49 insertions(+), 15 deletions(-) > > > > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c > > index b76fd7e..c3e189e 100644 > > --- a/intel/intel_bufmgr_gem.c > > +++ b/intel/intel_bufmgr_gem.c > > @@ -66,6 +66,8 @@ > > fprintf(stderr, __VA_ARGS__); \ > > } while (0) > > > > +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) > > + > > typedef struct _drm_intel_bo_gem drm_intel_bo_gem; > > > > struct drm_intel_gem_bo_bucket { > > @@ -73,10 +75,6 @@ struct drm_intel_gem_bo_bucket { > > unsigned long size; > > }; > > > > -/* Only cache objects up to 64MB. Bigger than that, and the rounding of > > the > > - * size makes many operations fail that wouldn't otherwise. > > - */ > > -#define DRM_INTEL_GEM_BO_BUCKETS 14 > > typedef struct _drm_intel_bufmgr_gem { > > drm_intel_bufmgr bufmgr; > > > > @@ -93,7 +91,8 @@ typedef struct _drm_intel_bufmgr_gem { > > int exec_count; > > > > /** Array of lists of cached gem objects of power-of-two sizes */ > > - struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; > > + struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; > > + int num_buckets; > > > > uint64_t gtt_size; > > int available_fences; > > @@ -285,7 +284,7 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem > > *bufmgr_gem, > > { > > int i; > > > > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { > > + for (i = 0; i < bufmgr_gem->num_buckets; i++) { > > struct drm_intel_gem_bo_bucket *bucket = > > &bufmgr_gem->cache_bucket[i]; > > if (bucket->size >= size) { > > @@ -822,7 +821,7 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem > > *bufmgr_gem, time_t time) > > { > > int i; > > > > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { > > + for (i = 0; i < bufmgr_gem->num_buckets; i++) { > > struct drm_intel_gem_bo_bucket *bucket = > > &bufmgr_gem->cache_bucket[i]; > > > > @@ -1250,7 +1249,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) > > pthread_mutex_destroy(&bufmgr_gem->lock); > > > > /* Free any cached buffer objects we were going to reuse */ > > - for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { > > + for (i = 0; i < bufmgr_gem->num_buckets; i++) { > > struct drm_intel_gem_bo_bucket *bucket = > > &bufmgr_gem->cache_bucket[i]; > > drm_intel_bo_gem *bo_gem; > > @@ -1960,6 +1959,46 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, > > drm_intel_bo *target_bo) > > return 0; > > } > > > > +static void > > +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) > > +{ > > + unsigned int i = bufmgr_gem->num_buckets; > > + > > + assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); > > + > > + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); > > + bufmgr_gem->cache_bucket[i].size = size; > > + bufmgr_gem->num_buckets++; > > +} > > + > > +static void > > +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) > > +{ > > + unsigned long size, cache_max_size = 64 * 1024 * 1024; > > + > > + /* Initialize the linked lists for BO reuse cache. */ > > + for (size = 4096; size <= cache_max_size; size *= 2) { > > + add_bucket(bufmgr_gem, size); > > + > > + /* OK, so power of two buckets was too wasteful of > > + * memory. Give 3 other sizes between each power of > > + * two, to hopefully cover things accurately enough. > > + * (The alternative is probably to just go for exact > > + * matching of sizes, and assume that for things like > > + * composited window resize the tiled width/height > > + * alignment and rounding of sizes to pages will get > > + * us useful cache hit rates anyway) > > + */ > > + if (size == 8192) { > > + add_bucket(bufmgr_gem, size + size / 2); > > + } else if (size < cache_max_size) { > > + add_bucket(bufmgr_gem, size + size * 1 / 4); > > + add_bucket(bufmgr_gem, size + size * 2 / 4); > > + add_bucket(bufmgr_gem, size + size * 3 / 4); > > + } > > + } > > +} > > + > > /** > > * Initializes the GEM buffer manager, which uses the kernel to allocate, > > map, > > * and manage map buffer objections. > > @@ -1972,8 +2011,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) > > drm_intel_bufmgr_gem *bufmgr_gem; > > struct drm_i915_gem_get_aperture aperture; > > drm_i915_getparam_t gp; > > - int ret, i; > > - unsigned long size; > > + int ret; > > int exec2 = 0; > > > > bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); > > @@ -2092,11 +2130,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) > > drm_intel_gem_get_pipe_from_crtc_id; > > bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; > > > > - /* Initialize the linked lists for BO reuse cache. */ > > - for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { > > - DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); > > - bufmgr_gem->cache_bucket[i].size = size; > > - } > > + init_cache_buckets(bufmgr_gem); > > > > return &bufmgr_gem->bufmgr; > > } > > ACK. > > This patch saves about 24M of GEM objects when playing a 1920x800, 4:2:0 > HD video with VAAPI.
More detail information, it saves 24M of GEM objects from 99M, another testing shows that it saves 39M of GEM objects from 181M. Thanks Haihao. > > > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx