This looks good to me, albeit I know nothing about the hw. So VI could do (just with some restrictios) even full-speed fp32 denorms whereas SI/CI can't? Interesting, I suppose that would be intended for compute. intel x86 can't even do that (actually, I think skylake can), though certainly other cpus could do that for ages.
(Albeit there's still nothing in the glsl spec which says this is required for fp16 pack...) Roland Am 06.02.2016 um 13:15 schrieb Marek Olšák: > From: Marek Olšák <marek.ol...@amd.com> > > This fixes FP16 conversion instructions for VI, which has 16-bit floats, > but not SI & CI, which can't disable denorms for those instructions. > --- > src/gallium/drivers/radeonsi/si_shader.c | 14 ++++++++++++++ > src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++++++++++++------ > src/gallium/drivers/radeonsi/sid.h | 3 +++ > 3 files changed, 29 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index a4680ce..3f1db70 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4155,6 +4155,20 @@ int si_compile_llvm(struct si_screen *sscreen, > > si_shader_binary_read_config(binary, conf, 0); > > + /* Enable 64-bit and 16-bit denormals, because there is no performance > + * cost. > + * > + * If denormals are enabled, all floating-point output modifiers are > + * ignored. > + * > + * Don't enable denormals for 32-bit floats, because: > + * - Floating-point output modifiers would be ignored by the hw. > + * - Some opcodes don't support denormals, such as v_mad_f32. We would > + * have to stop using those. > + * - SI & CI would be very slow. > + */ > + conf->float_mode |= V_00B028_FP_64_DENORMS; > + > FREE(binary->config); > FREE(binary->global_symbol_offsets); > binary->config = NULL; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index ce795c0..77a4e47 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader) > shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / > 4) | > S_00B528_SGPRS((num_sgprs - 1) / 8) | > S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B528_DX10_CLAMP(1); > + S_00B528_DX10_CLAMP(1) | > + S_00B528_FLOAT_MODE(shader->config.float_mode); > shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | > > S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); > } > @@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, > S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B428_SGPRS((num_sgprs - 1) / 8) | > - S_00B428_DX10_CLAMP(1)); > + S_00B428_DX10_CLAMP(1) | > + S_00B428_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, > S_00B42C_USER_SGPR(num_user_sgprs) | > > S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader) > S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B328_SGPRS((num_sgprs - 1) / 8) | > S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B328_DX10_CLAMP(1)); > + S_00B328_DX10_CLAMP(1) | > + S_00B328_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, > S_00B32C_USER_SGPR(num_user_sgprs) | > > S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, > S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B228_SGPRS((num_sgprs - 1) / 8) | > - S_00B228_DX10_CLAMP(1)); > + S_00B228_DX10_CLAMP(1) | > + S_00B228_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, > S_00B22C_USER_SGPR(num_user_sgprs) | > > S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct > si_shader *gs) > S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B128_SGPRS((num_sgprs - 1) / 8) | > S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B128_DX10_CLAMP(1)); > + S_00B128_DX10_CLAMP(1) | > + S_00B128_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, > S_00B12C_USER_SGPR(num_user_sgprs) | > S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | > @@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, > S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B028_SGPRS((num_sgprs - 1) / 8) | > - S_00B028_DX10_CLAMP(1)); > + S_00B028_DX10_CLAMP(1) | > + S_00B028_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, > S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | > S_00B02C_USER_SGPR(num_user_sgprs) | > diff --git a/src/gallium/drivers/radeonsi/sid.h > b/src/gallium/drivers/radeonsi/sid.h > index 9e1e158..8920847 100644 > --- a/src/gallium/drivers/radeonsi/sid.h > +++ b/src/gallium/drivers/radeonsi/sid.h > @@ -2845,6 +2845,9 @@ > #define S_00B028_FLOAT_MODE(x) (((x) > & 0xFF) << 12) > #define G_00B028_FLOAT_MODE(x) (((x) > >> 12) & 0xFF) > #define C_00B028_FLOAT_MODE > 0xFFF00FFF > +#define V_00B028_FP_32_DENORMS 0x30 > +#define V_00B028_FP_64_DENORMS 0xc0 > +#define V_00B028_FP_ALL_DENORMS 0xf0 > #define S_00B028_PRIV(x) (((x) > & 0x1) << 20) > #define G_00B028_PRIV(x) (((x) > >> 20) & 0x1) > #define C_00B028_PRIV > 0xFFEFFFFF > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev