From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/r600_pipe_common.c | 4 +++- src/gallium/drivers/radeon/r600_pipe_common.h | 2 ++ src/gallium/drivers/radeon/r600_texture.c | 13 ++++++------ src/gallium/drivers/radeonsi/si_pipe.c | 13 ++++++++++++ src/gallium/drivers/radeonsi/si_state.c | 27 ++++++++++++------------- src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 6 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 42dc38b..c33b457 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -717,21 +717,21 @@ static const struct debug_named_value common_debug_options[] = { { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, { "notiling", DBG_NO_TILING, "Disable tiling" }, { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, { "nodcc", DBG_NO_DCC, "Disable DCC." }, { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, - { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, + { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, { "noce", DBG_NO_CE, "Disable the constant engine"}, { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" }, DEBUG_NAMED_VALUE_END /* must be last */ }; static const char* r600_get_vendor(struct pipe_screen* pscreen) @@ -1310,20 +1310,22 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); + rscreen->has_rbplus = false; + rscreen->rbplus_allowed = false; r600_disk_cache_create(rscreen); slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (rscreen->force_aniso >= 0) { printf("radeon: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(rscreen->force_aniso)); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 3516884..883d5ed 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -363,20 +363,22 @@ union r600_mmio_counters { struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; enum radeon_family family; enum chip_class chip_class; struct radeon_info info; uint64_t debug_flags; bool has_cp_dma; bool has_streamout; + bool has_rbplus; /* if RB+ registers exist */ + bool rbplus_allowed; /* if RB+ is allowed */ struct disk_cache *disk_shader_cache; struct slab_parent_pool pool_transfers; /* Texture filter settings. */ int force_aniso; /* -1 = disabled */ /* Auxiliary context. Mainly used to initialize resources. * It must be locked prior to using and flushed before unlocking. */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index ec7a325..2953379 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -2468,26 +2468,27 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } /* Fast clear is the most appropriate place to enable DCC for * displayable surfaces. */ if (rctx->chip_class >= VI && !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) { vi_separate_dcc_try_enable(rctx, tex); - /* Stoney can't do a CMASK-based clear, so all clears are - * considered to be hypothetically slow clears, which - * is weighed when determining to enable separate DCC. + /* RB+ isn't supported with a CMASK-based clear, so all + * clears are considered to be hypothetically slow + * clears, which is weighed when determining whether to + * enable separate DCC. */ if (tex->dcc_gather_statistics && - rctx->family == CHIP_STONEY) + rctx->screen->rbplus_allowed) tex->num_slow_clears++; } /* Try to clear DCC first, otherwise try CMASK. */ if (tex->dcc_offset && tex->surface.num_dcc_levels) { uint32_t reset_value; bool clear_words_needed; if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR) continue; @@ -2501,22 +2502,22 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, if (clear_words_needed) tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; tex->separate_dcc_dirty = true; } else { /* 128-bit formats are unusupported */ if (tex->surface.bpe > 8) { continue; } - /* Stoney/RB+ doesn't work with CMASK fast clear. */ - if (rctx->family == CHIP_STONEY) + /* RB+ doesn't work with CMASK fast clear. */ + if (rctx->screen->rbplus_allowed) continue; /* ensure CMASK is enabled */ r600_texture_alloc_cmask_separate(rctx->screen, tex); if (tex->cmask.size == 0) { continue; } /* Do the fast clear. */ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 7f0b445..8904b9d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -832,20 +832,33 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.info.me_fw_version >= 173) || (sscreen->b.chip_class == SI && sscreen->b.info.pfp_fw_version >= 121 && sscreen->b.info.me_fw_version >= 87); sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 && sscreen->b.chip_class >= VI; sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + + /* Some chips have RB+ registers, but don't support RB+. Those must + * always disable it. + */ + if (sscreen->b.family == CHIP_STONEY || + sscreen->b.chip_class >= GFX9) { + sscreen->b.has_rbplus = true; + + sscreen->b.rbplus_allowed = + !(sscreen->b.debug_flags & DBG_NO_RB_PLUS) && + sscreen->b.family == CHIP_STONEY; + } + (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2; sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH; if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fa69b34..5f0eab1 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -110,22 +110,22 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a * * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend && blend->dual_src_blend && sctx->ps_shader.cso && (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) cb_target_mask = 0; radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); - /* STONEY-specific register settings. */ - if (sctx->b.family == CHIP_STONEY) { + /* RB+ register settings. */ + if (sctx->screen->b.rbplus_allowed) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; unsigned sx_ps_downconvert = 0; unsigned sx_blend_opt_epsilon = 0; unsigned sx_blend_opt_control = 0; for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; @@ -235,30 +235,29 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a case V_028C70_COLOR_2_10_10_10: if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); } break; } } - if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { - sx_ps_downconvert = 0; - sx_blend_opt_epsilon = 0; - sx_blend_opt_control = 0; - } - radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ + } else if (sctx->screen->b.has_rbplus) { + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ + radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ + radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ } } /* * Blender functions */ static uint32_t si_translate_blend_function(int blend_func) { switch (blend_func) { @@ -476,21 +475,21 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } /* cb_render_state will disable unused ones */ blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); if (!state->rt[j].colormask || !state->rt[j].blend_enable) { si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } - /* Blending optimizations for Stoney. + /* Blending optimizations for RB+. * These transformations don't change the behavior. * * First, get rid of DST in the blend factors: * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) */ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, PIPE_BLENDFACTOR_SRC_COLOR); si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR, @@ -551,21 +550,21 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) blend->need_src_alpha_4bit |= 0xfu << (i * 4); } if (blend->cb_target_mask) { color_control |= S_028808_MODE(mode); } else { color_control |= S_028808_MODE(V_028808_CB_DISABLE); } - if (sctx->b.family == CHIP_STONEY) { + if (sctx->screen->b.has_rbplus) { /* Disable RB+ blend optimizations for dual source blending. * Vulkan does this. */ if (blend->dual_src_blend) { for (int i = 0; i < 8; i++) { sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); } } @@ -1190,22 +1189,22 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s /* Bug workaround for smoothing (overrasterization) on SI. */ if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); } /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ if (!rs || !rs->multisample_enable) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; - if (sctx->b.family == CHIP_STONEY && - sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) + if (sctx->screen->b.has_rbplus && + !sctx->screen->b.rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, db_shader_control); } /* * format translation */ static uint32_t si_translate_colorformat(enum pipe_format format) @@ -1961,21 +1960,21 @@ static void si_choose_spi_color_formats(struct r600_surface *surf, unsigned ntype, bool is_depth) { /* Alpha is needed for alpha-to-coverage. * Blending may be with or without alpha. */ unsigned normal = 0; /* most optimal, may not support blending or export alpha */ unsigned alpha = 0; /* exports alpha, but may not support blending */ unsigned blend = 0; /* supports blending, but may not export alpha */ unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ - /* Choose the SPI color formats. These are required values for Stoney/RB+. + /* Choose the SPI color formats. These are required values for RB+. * Other chips have multiple choices, though they are not necessarily better. */ switch (format) { case V_028C70_COLOR_5_6_5: case V_028C70_COLOR_1_5_5_5: case V_028C70_COLOR_5_5_5_1: case V_028C70_COLOR_4_4_4_4: case V_028C70_COLOR_10_11_11: case V_028C70_COLOR_11_11_10: case V_028C70_COLOR_8: @@ -4204,21 +4203,21 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.family == CHIP_FIJI || sctx->b.family >= CHIP_POLARIS10) vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); } else { si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } - if (sctx->b.family == CHIP_STONEY) + if (sctx->screen->b.has_rbplus) si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, RADEON_PRIO_BORDER_COLORS); if (sctx->b.chip_class >= GFX9) { si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 0); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index f778cd7..0696582 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2650,21 +2650,21 @@ bool si_update_shaders(struct si_context *sctx) S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || sctx->flatshade != rs->flatshade) { sctx->sprite_coord_enable = rs->sprite_coord_enable; sctx->flatshade = rs->flatshade; si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) + if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); if (sctx->ps_db_shader_control != db_shader_control) { sctx->ps_db_shader_control = db_shader_control; si_mark_atom_dirty(sctx, &sctx->db_render_state); } if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) { sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev