Yeah, this will help bindless a little bit because the winsys overhead becomes high with a huge buffers list. Though, there is not a ton of HTILE buffers.

A little comment on patch 4.

Patches 3-6 are:

Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>

On 06/07/2017 09:50 PM, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/r600/evergreen_state.c    |  6 +--
  src/gallium/drivers/r600/r600_blit.c          |  2 +-
  src/gallium/drivers/r600/r600_state.c         |  6 +--
  src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
  src/gallium/drivers/radeon/r600_texture.c     | 57 ++++++++++++---------------
  src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
  src/gallium/drivers/radeonsi/si_descriptors.c |  9 +----
  src/gallium/drivers/radeonsi/si_state.c       | 16 +++-----
  8 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index c3b939f..9595351 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1386,22 +1386,22 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
        } else {
                surf->db_stencil_base = offset;
                /* DRM 2.6.18 allows the INVALID format to disable stencil.
                 * Older kernels are out of luck. */
                surf->db_stencil_info = rctx->screen->b.info.drm_minor >= 18 ?
                                        
S_028044_FORMAT(V_028044_STENCIL_INVALID) :
                                        S_028044_FORMAT(V_028044_STENCIL_8);
        }
/* use htile only for first level */
-       if (rtex->htile_buffer && !level) {
-               uint64_t va = rtex->htile_buffer->gpu_address;
+       if (rtex->htile_offset && !level) {
+               uint64_t va = rtex->resource.gpu_address + rtex->htile_offset;
                surf->db_htile_data_base = va >> 8;
                surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
                                         S_028ABC_HTILE_HEIGHT(1) |
                                         S_028ABC_FULL_CACHE(1);
                surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
                surf->db_preload_control = 0;
        }
surf->depth_initialized = true;
  }
@@ -1869,21 +1869,21 @@ static void evergreen_emit_db_state(struct r600_context 
*rctx, struct r600_atom
        struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
                struct r600_texture *rtex = (struct r600_texture 
*)a->rsurf->base.texture;
                unsigned reloc_idx;
radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
                radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 
a->rsurf->db_htile_surface);
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 
a->rsurf->db_preload_control);
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, 
a->rsurf->db_htile_data_base);
-               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 
rtex->htile_buffer,
+               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 
&rtex->resource,
                                                  RADEON_USAGE_READWRITE, 
RADEON_PRIO_HTILE);
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, reloc_idx);
        } else {
                radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
        }
  }
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index 80aa9c0..79505d5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -437,21 +437,21 @@ static void r600_clear(struct pipe_context *ctx, unsigned 
buffers,
                struct r600_texture *rtex;
                unsigned level = fb->zsbuf->u.tex.level;
rtex = (struct r600_texture*)fb->zsbuf->texture; /* We can't use hyperz fast clear if each slice of a texture
                 * array are clear to different value. To simplify code just
                 * disable fast clear for texture array.
                 */
                /* Only use htile for first level */
-               if (rtex->htile_buffer && !level &&
+               if (rtex->htile_offset && !level &&
                     fb->zsbuf->u.tex.first_layer == 0 &&
                     fb->zsbuf->u.tex.last_layer == 
util_max_layer(&rtex->resource.b.b, level)) {
                        if (rtex->depth_clear_value != depth) {
                                rtex->depth_clear_value = depth;
                                r600_mark_atom_dirty(rctx, 
&rctx->db_state.atom);
                        }
                        rctx->db_misc_state.htile_clear = true;
                        r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
                }
        }
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 2001cfd..dca8fe5 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1054,22 +1054,22 @@ static void r600_init_depth_surface(struct r600_context 
*rctx,
        assert(format != ~0);
surf->db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format);
        surf->db_depth_base = offset >> 8;
        surf->db_depth_view = 
S_028004_SLICE_START(surf->base.u.tex.first_layer) |
                              S_028004_SLICE_MAX(surf->base.u.tex.last_layer);
        surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | 
S_028000_SLICE_TILE_MAX(slice);
        surf->db_prefetch_limit = (rtex->surface.u.legacy.level[level].nblk_y / 
8) - 1;
/* use htile only for first level */
-       if (rtex->htile_buffer && !level) {
-               surf->db_htile_data_base = 0;
+       if (rtex->htile_offset && !level) {
+               surf->db_htile_data_base = rtex->htile_offset >> 8;
                surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
                                         S_028D24_HTILE_HEIGHT(1) |
                                         S_028D24_FULL_CACHE(1);
                /* preload is not working properly on r6xx/r7xx */
                surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
        }
surf->depth_initialized = true;
  }
@@ -1536,21 +1536,21 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
        struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
        struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
                struct r600_texture *rtex = (struct r600_texture 
*)a->rsurf->base.texture;
                unsigned reloc_idx;
radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
                radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 
a->rsurf->db_htile_surface);
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, 
a->rsurf->db_htile_data_base);
-               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 
rtex->htile_buffer,
+               reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, 
&rtex->resource,
                                                  RADEON_USAGE_READWRITE, 
RADEON_PRIO_HTILE);
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, reloc_idx);
        } else {
                radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
        }
  }
static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
  {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b17b690..84d38fb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -225,21 +225,21 @@ struct r600_texture {
        /* Colorbuffer compression and fast clear. */
        struct r600_fmask_info          fmask;
        struct r600_cmask_info          cmask;
        struct r600_resource            *cmask_buffer;
        uint64_t                        dcc_offset; /* 0 = disabled */
        unsigned                        cb_color_info; /* fast clear enable bit 
*/
        unsigned                        color_clear_value[2];
        unsigned                        last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
-       struct r600_resource            *htile_buffer;
+       uint64_t                        htile_offset;
        bool                            tc_compatible_htile;
        bool                            depth_cleared; /* if it was cleared at 
least once */
        float                           depth_clear_value;
        bool                            stencil_cleared; /* if it was cleared 
at least once */
        uint8_t                         stencil_clear_value;
bool non_disp_tiling; /* R600-Cayman only */ /* Whether the texture is a displayable back buffer and needs DCC
         * decompression, which is expensive. Therefore, it's enabled only
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index d00f05b..32275b1 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -502,21 +502,21 @@ static void r600_degrade_tile_mode_to_linear(struct 
r600_common_context *rctx,
        rtex->resource.bo_size = new_tex->resource.bo_size;
        rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
        rtex->resource.domains = new_tex->resource.domains;
        rtex->resource.flags = new_tex->resource.flags;
        rtex->size = new_tex->size;
        rtex->surface = new_tex->surface;
        rtex->non_disp_tiling = new_tex->non_disp_tiling;
        rtex->cb_color_info = new_tex->cb_color_info;
        rtex->cmask = new_tex->cmask; /* needed even without CMASK */
- assert(!rtex->htile_buffer);
+       assert(!rtex->htile_offset);
        assert(!rtex->cmask.size);
        assert(!rtex->fmask.size);
        assert(!rtex->dcc_offset);
        assert(!rtex->is_depth);
r600_texture_reference(&new_tex, NULL); p_atomic_inc(&rctx->screen->dirty_tex_counter);
  }
@@ -605,21 +605,20 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
  }
static void r600_texture_destroy(struct pipe_screen *screen,
                                 struct pipe_resource *ptex)
  {
        struct r600_texture *rtex = (struct r600_texture*)ptex;
        struct r600_resource *resource = &rtex->resource;
r600_texture_reference(&rtex->flushed_depth_texture, NULL); - r600_resource_reference(&rtex->htile_buffer, NULL);
        if (rtex->cmask_buffer != &rtex->resource) {
            r600_resource_reference(&rtex->cmask_buffer, NULL);
        }
        pb_reference(&resource->buf, NULL);
        r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
        r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
        FREE(rtex);
  }
static const struct u_resource_vtbl r600_texture_vtbl;
@@ -922,47 +921,28 @@ static void r600_texture_get_htile_size(struct 
r600_common_screen *rscreen,
rtex->surface.htile_alignment = base_align;
        rtex->surface.htile_size =
                (util_max_layer(&rtex->resource.b.b, 0) + 1) *
                align(slice_bytes, base_align);
  }
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
                                        struct r600_texture *rtex)
  {
-       uint32_t clear_value;
-
-       if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) {
-               clear_value = 0x0000030F;
-       } else {
+       if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile)
                r600_texture_get_htile_size(rscreen, rtex);
-               clear_value = 0;
-       }
if (!rtex->surface.htile_size)
                return;
- rtex->htile_buffer = (struct r600_resource*)
-               r600_aligned_buffer_create(&rscreen->b,
-                                          R600_RESOURCE_FLAG_UNMAPPABLE,
-                                          PIPE_USAGE_DEFAULT,
-                                          rtex->surface.htile_size,
-                                          rtex->surface.htile_alignment);
-       if (rtex->htile_buffer == NULL) {
-               /* this is not a fatal error as we can still keep rendering
-                * without htile buffer */
-               R600_ERR("Failed to create buffer object for htile buffer.\n");
-       } else {
-               r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
-                                        0, rtex->surface.htile_size,
-                                        clear_value);
-       }
+       rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
+       rtex->size = rtex->htile_offset + rtex->surface.htile_size;
  }
void r600_print_texture_info(struct r600_common_screen *rscreen,
                             struct r600_texture *rtex, FILE *f)
  {
        int i;
/* Common parameters. */
        fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
                "blk_h=%u, array_size=%u, last_level=%u, "
@@ -997,25 +977,26 @@ void r600_print_texture_info(struct r600_common_screen 
*rscreen,
                if (rtex->cmask.size) {
                        fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", "
                                "alignment=%u, rb_aligned=%u, 
pipe_aligned=%u\n",
                                rtex->cmask.offset,
                                rtex->surface.u.gfx9.cmask_size,
                                rtex->surface.u.gfx9.cmask_alignment,
                                rtex->surface.u.gfx9.cmask.rb_aligned,
                                rtex->surface.u.gfx9.cmask.pipe_aligned);
                }
- if (rtex->htile_buffer) {
-                       fprintf(f, "  HTile: size=%u, alignment=%u, "
+               if (rtex->htile_offset) {
+                       fprintf(f, "  HTile: offset=%"PRIu64", size=%"PRIu64", 
alignment=%u, "
                                "rb_aligned=%u, pipe_aligned=%u\n",
-                               rtex->htile_buffer->b.b.width0,
-                               rtex->htile_buffer->buf->alignment,
+                               rtex->htile_offset,
+                               rtex->surface.htile_size,
+                               rtex->surface.htile_alignment,
                                rtex->surface.u.gfx9.htile.rb_aligned,
                                rtex->surface.u.gfx9.htile.pipe_aligned);
                }
if (rtex->dcc_offset) {
                        fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", "
                                "alignment=%u, pitch_max=%u, 
num_dcc_levels=%u\n",
                                rtex->dcc_offset, rtex->surface.dcc_size,
                                rtex->surface.dcc_alignment,
                                rtex->surface.u.gfx9.dcc_pitch_max,
@@ -1044,24 +1025,25 @@ void r600_print_texture_info(struct r600_common_screen 
*rscreen,
                        rtex->fmask.offset, rtex->fmask.size, 
rtex->fmask.alignment,
                        rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
                        rtex->fmask.slice_tile_max, 
rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
                fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, 
"
                        "slice_tile_max=%u\n",
                        rtex->cmask.offset, rtex->cmask.size, 
rtex->cmask.alignment,
                        rtex->cmask.slice_tile_max);
- if (rtex->htile_buffer)
-               fprintf(f, "  HTile: size=%u, alignment=%u, TC_compatible = 
%u\n",
-                       rtex->htile_buffer->b.b.width0,
-                       rtex->htile_buffer->buf->alignment,
+       if (rtex->htile_offset)
+               fprintf(f, "  HTile: offset=%"PRIu64", size=%"PRIu64", "
+                       "alignment=%u, TC_compatible = %u\n",
+                       rtex->htile_offset, rtex->surface.htile_size,
+                       rtex->surface.htile_alignment,
                        rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
                fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", 
alignment=%u\n",
                        rtex->dcc_offset, rtex->surface.dcc_size,
                        rtex->surface.dcc_alignment);
                for (i = 0; i <= rtex->resource.b.b.last_level; i++)
                        fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", 
"
                                "fast_clear_size=%"PRIu64"\n",
                                i, i < rtex->surface.num_dcc_levels,
@@ -1235,20 +1217,31 @@ r600_texture_create_object(struct pipe_screen *screen,
                else if (resource->domains & RADEON_DOMAIN_GTT)
                        resource->gart_usage = buf->size;
        }
if (rtex->cmask.size) {
                /* Initialize the cmask to 0xCC (= compressed state). */
                r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
                                         rtex->cmask.offset, rtex->cmask.size,
                                         0xCCCCCCCC);
        }
+       if (rtex->htile_offset) {
+               uint32_t clear_value = 0;
+
+               if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
+                       clear_value = 0x0000030F;
+
+               r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+                                        rtex->htile_offset,
+                                        rtex->surface.htile_size,
+                                        clear_value);
+       }
/* Initialize DCC only if the texture is not being imported. */
        if (!buf && rtex->dcc_offset) {
                r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
                                         rtex->dcc_offset,
                                         rtex->surface.dcc_size,
                                         0xFFFFFFFF);
        }
/* Initialize the CMASK base register value. */
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index e39ba62..74bc2e9 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -719,21 +719,21 @@ static void si_clear(struct pipe_context *ctx, unsigned 
buffers,
if (!fb->cbufs[i])
                                continue;
tex = (struct r600_texture *)fb->cbufs[i]->texture;
                        if (tex->fmask.size == 0)
                                tex->dirty_level_mask &= ~(1 << 
fb->cbufs[i]->u.tex.level);
                }
        }
- if (zstex && zstex->htile_buffer &&
+       if (zstex && zstex->htile_offset &&
            zsbuf->u.tex.level == 0 &&
            zsbuf->u.tex.first_layer == 0 &&
            zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) 
{
                /* TC-compatible HTILE only supports depth clears to 0 or 1. */
                if (buffers & PIPE_CLEAR_DEPTH &&
                    (!zstex->tc_compatible_htile ||
                     depth == 0 || depth == 1)) {
                        /* Need to disable EXPCLEAR temporarily if clearing
                         * to a new value. */
                        if (!zstex->depth_cleared || zstex->depth_clear_value 
!= depth) {
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 3aa2b9d..0e8606f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -330,27 +330,20 @@ static void si_sampler_view_add_buffer(struct si_context 
*sctx,
        if (resource->target == PIPE_BUFFER)
                return;
/* Now add separate DCC or HTILE. */
        rtex = (struct r600_texture*)resource;
        if (rtex->dcc_separate_buffer) {
                radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
                                                    rtex->dcc_separate_buffer, 
usage,
                                                    RADEON_PRIO_DCC, check_mem);
        }
-
-       if (rtex->htile_buffer &&
-           rtex->tc_compatible_htile) {
-               radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-                                                   rtex->htile_buffer, usage,
-                                                   RADEON_PRIO_HTILE, 
check_mem);
-       }
  }
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
                                          struct si_sampler_views *views)
  {
        unsigned mask = views->enabled_mask;
/* Add buffers to the CS. */
        while (mask) {
                int i = u_bit_scan(&mask);
@@ -417,21 +410,21 @@ void si_set_mutable_tex_desc_fields(struct si_screen 
*sscreen,
                state[6] &= C_008F28_COMPRESSION_EN;
                state[7] = 0;
if (vi_dcc_enabled(tex, first_level)) {
                        meta_va = (!tex->dcc_separate_buffer ? 
tex->resource.gpu_address : 0) +
                                  tex->dcc_offset;
if (sscreen->b.chip_class <= VI)
                                meta_va += base_level_info->dcc_offset;
                } else if (tex->tc_compatible_htile) {
-                       meta_va = tex->htile_buffer->gpu_address;
+                       meta_va = tex->resource.gpu_address + tex->htile_offset;
                }
if (meta_va) {
                        state[6] |= S_008F28_COMPRESSION_EN(1);
                        state[7] = meta_va >> 8;
                }
        }
if (sscreen->b.chip_class >= GFX9) {
                state[3] &= C_008F1C_SW_MODE;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index c7bc7b0..53f66ac 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2309,21 +2309,21 @@ static void si_init_depth_surface(struct si_context 
*sctx,
                         S_028038_MAXMIP(rtex->resource.b.b.last_level);
                s_info = S_02803C_FORMAT(stencil_format) |
                         
S_02803C_SW_MODE(rtex->surface.u.gfx9.stencil.swizzle_mode);
                surf->db_z_info2 = 
S_028068_EPITCH(rtex->surface.u.gfx9.surf.epitch);
                surf->db_stencil_info2 = 
S_02806C_EPITCH(rtex->surface.u.gfx9.stencil.epitch);
                surf->db_depth_view |= S_028008_MIPID(level);
                surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 
- 1) |
                                      S_02801C_Y_MAX(rtex->resource.b.b.height0 
- 1);
/* Only use HTILE for the first level. */
-               if (rtex->htile_buffer && !level) {
+               if (rtex->htile_offset && !level) {
                        z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
                                  S_028038_ALLOW_EXPCLEAR(1);
if (rtex->tc_compatible_htile) {
                                unsigned max_zplanes = 4;
if (rtex->db_render_format == PIPE_FORMAT_Z16_UNORM &&
                                    rtex->resource.b.b.nr_samples > 1)
                                        max_zplanes = 2;
@@ -2335,21 +2335,22 @@ static void si_init_depth_surface(struct si_context *sctx,
                        if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
                                /* Stencil buffer workaround ported from the 
SI-CI-VI code.
                                 * See that for explanation.
                                 */
                                s_info |= 
S_02803C_ALLOW_EXPCLEAR(rtex->resource.b.b.nr_samples <= 1);
                        } else {
                                /* Use all HTILE for depth if there's no 
stencil. */
                                s_info |= S_02803C_TILE_STENCIL_DISABLE(1);
                        }
- surf->db_htile_data_base = rtex->htile_buffer->gpu_address >> 8;
+                       surf->db_htile_data_base = (rtex->resource.gpu_address +
+                                                   rtex->htile_offset) >> 8;
                        surf->db_htile_surface = S_028ABC_FULL_CACHE(1) |
                                                 
S_028ABC_PIPE_ALIGNED(rtex->surface.u.gfx9.htile.pipe_aligned) |
                                                 
S_028ABC_RB_ALIGNED(rtex->surface.u.gfx9.htile.rb_aligned);
                }
        } else {
                /* SI-CI-VI */
                struct legacy_surf_level *levelinfo = 
&rtex->surface.u.legacy.level[level];
assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); @@ -2387,21 +2388,21 @@ static void si_init_depth_surface(struct si_context *sctx,
                        tile_mode_index = si_tile_mode_index(rtex, level, true);
                        s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
                }
surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
                                      
S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
                surf->db_depth_slice = 
S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
                                                                
levelinfo->nblk_y) / 64 - 1);
/* Only use HTILE for the first level. */
-               if (rtex->htile_buffer && !level) {
+               if (rtex->htile_offset && !level) {
                        z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
                                  S_028040_ALLOW_EXPCLEAR(1);
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
                                /* Workaround: For a not yet understood reason, 
the
                                 * combination of MSAA, fast stencil clear and 
stencil
                                 * decompress messes with subsequent stencil 
buffer
                                 * uses. Problem was reproduced on Verde, 
Bonaire,
                                 * Tonga, and Carrizo.
                                 *
@@ -2413,21 +2414,22 @@ static void si_init_depth_surface(struct si_context 
*sctx,
                                if (rtex->resource.b.b.nr_samples <= 1)
                                        s_info |= S_028044_ALLOW_EXPCLEAR(1);
                        } else if (!rtex->tc_compatible_htile) {
                                /* Use all of the htile_buffer for depth if 
there's no stencil.
                                 * This must not be set when TC-compatible 
HTILE is enabled
                                 * due to a hw bug.
                                 */
                                s_info |= S_028044_TILE_STENCIL_DISABLE(1);
                        }
- surf->db_htile_data_base = rtex->htile_buffer->gpu_address >> 8;
+                       surf->db_htile_data_base = (rtex->resource.gpu_address +
+                                                   rtex->htile_offset) >> 8;
                        surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
if (rtex->tc_compatible_htile) {
                                surf->db_htile_surface |= 
S_028ABC_TC_COMPATIBLE(1);
if (rtex->resource.b.b.nr_samples <= 1)
                                        z_info |= 
S_028040_DECOMPRESS_ON_N_ZPLANES(5);
                                else if (rtex->resource.b.b.nr_samples <= 4)
                                        z_info |= 
S_028040_DECOMPRESS_ON_N_ZPLANES(3);
                                else
@@ -2808,26 +2810,20 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
        if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
                struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
                struct r600_texture *rtex = (struct 
r600_texture*)zb->base.texture;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      &rtex->resource, RADEON_USAGE_READWRITE,
                                      zb->base.texture->nr_samples > 1 ?
                                              RADEON_PRIO_DEPTH_BUFFER_MSAA :
                                              RADEON_PRIO_DEPTH_BUFFER);
- if (zb->db_htile_data_base) {
-                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                                             rtex->htile_buffer, 
RADEON_USAGE_READWRITE,
-                                             RADEON_PRIO_HTILE);
-               }
-
                if (sctx->b.chip_class >= GFX9) {
                        radeon_set_context_reg_seq(cs, 
R_028014_DB_HTILE_DATA_BASE, 3);
                        radeon_emit(cs, zb->db_htile_data_base);     /* 
DB_HTILE_DATA_BASE */
                        radeon_emit(cs, zb->db_htile_data_base >> 32); /* 
DB_HTILE_DATA_BASE_HI */
                        radeon_emit(cs, zb->db_depth_size);          /* 
DB_DEPTH_SIZE */
radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
                        radeon_emit(cs, zb->db_z_info |                      /* 
DB_Z_INFO */
                                    
S_028038_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
                        radeon_emit(cs, zb->db_stencil_info);                /* 
DB_STENCIL_INFO */

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to