On Mon, Jul 21, 2014 at 11:00:53PM -0700, Jordan Justen wrote: > We now skip allocating a hiz miptree for gen7. Instead, we calculate > the required hiz buffer parameters and allocate a bo directly. > > v2: > * Update hz_height calculation as suggested by Topi > > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 97 > ++++++++++++++++++++++++++- > 1 file changed, 95 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index d5a2227..e3bcd8a 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt) > drm_intel_bo_unreference((*mt)->bo); > intel_miptree_release(&(*mt)->stencil_mt); > if ((*mt)->hiz_buf) { > - intel_miptree_release(&(*mt)->hiz_buf->mt); > + if ((*mt)->hiz_buf->mt) > + intel_miptree_release(&(*mt)->hiz_buf->mt); > + else > + drm_intel_bo_unreference((*mt)->hiz_buf->bo); > free((*mt)->hiz_buf); > } > intel_miptree_release(&(*mt)->mcs_mt); > @@ -1374,6 +1377,91 @@ intel_miptree_level_enable_hiz(struct brw_context *brw, > } > > > +/** > + * Helper for intel_miptree_alloc_hiz() that determines the required hiz > + * buffer dimensions and allocates a bo for the hiz buffer. > + */ > +static struct intel_miptree_aux_buffer * > +intel_gen7_hiz_buf_create(struct brw_context *brw, > + struct intel_mipmap_tree *mt) > +{ > + unsigned z_width = mt->logical_width0; > + unsigned z_height = mt->logical_height0; > + const unsigned z_depth = mt->logical_depth0; > + unsigned hz_width, hz_height; > + struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1); > + > + if (!buf) > + return NULL; > + > + /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents > + * adjustments required for Z_Height and Z_Width based on multisampling. > + */ > + switch(mt->num_samples) {
I'm new here, but you need a space, I think. > + case 0: > + case 1: > + break; > + case 2: > + case 4: > + z_width *= 2; > + z_height *= 2; > + break; > + case 8: > + z_width *= 4; > + z_height *= 2; > + break; > + default: > + assert(!"Unsupported sample count!"); > + } > + > + const unsigned vertical_align = 8; /* 'j' in the docs */ j seems to vary based on surface type (if I am reading this correctly). 8 seems safe in all cases. > + const unsigned H0 = z_height; > + const unsigned h0 = ALIGN(H0, vertical_align); > + const unsigned h1 = ALIGN(minify(H0, 1), vertical_align); I'm having trouble verifying this. On gen8+ it seems this shouldn't be aligned, ie. h1 = mt->logical_height0. Can you help me find why you are doing this operation. > + const unsigned Z0 = z_depth; > + > + /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */ > + hz_width = ALIGN(z_width, 16); > + > + if (mt->target == GL_TEXTURE_3D) { > + unsigned H_i = H0; > + unsigned Z_i = Z0; > + hz_height = 0; > + for (int level = mt->first_level; level <= mt->last_level; ++level) { > + unsigned h_i = ALIGN(H_i, vertical_align); > + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */ You could optimize this I think, if z_depth is 0 we can ignore this whole operation. Not sure if that ever happens or not. > + hz_height += h_i * Z_i; > + H_i = minify(H_i, 1); > + Z_i = minify(Z_i, 1); I want to discuss this with you in person. > + } > + /* HZ_Height = > + * (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) > + */ > + hz_height = CEILING(hz_height, 2); > + } else { > + const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align); > + if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY || > + mt->target == GL_TEXTURE_CUBE_MAP) { > + /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 > */ > + hz_height = CEILING(hz_qpitch * Z0 * 6, 2 * 8) * 8; > + } else { > + /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */ > + hz_height = CEILING(hz_qpitch * Z0, 2 * 8) * 8; > + } > + } > + > + unsigned long pitch; > + uint32_t tiling = I915_TILING_Y; > + buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz", > + hz_width, hz_height, 1, > + &tiling, &pitch, > + BO_ALLOC_FOR_RENDER); I think you need to check tiling here (didn't check pitch). > + buf->pitch = pitch; > + > + return buf; > +} > + > + > static struct intel_miptree_aux_buffer * > intel_hiz_miptree_buf_create(struct brw_context *brw, > struct intel_mipmap_tree *mt) > @@ -1412,7 +1500,12 @@ intel_miptree_alloc_hiz(struct brw_context *brw, > struct intel_mipmap_tree *mt) > { > assert(mt->hiz_buf == NULL); > - mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt); > + > + if (brw->gen == 7) { > + mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt); > + } else { > + mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt); > + } > > if (!mt->hiz_buf) > return false; I think you need to check mt->hiz_buf->bo as the code currently works. IMO however, it'd be preferable to just do the check in intel_gen7_hiz_buf_create() > -- -- Ben Widawsky, Intel Open Source Technology Center _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev