On Sat, Feb 06, 2016 at 01:15:42PM +0100, Marek Olšák wrote: > From: Marek Olšák <marek.ol...@amd.com> > > This fixes FP16 conversion instructions for VI, which has 16-bit floats, > but not SI & CI, which can't disable denorms for those instructions.
Do you know why this fixes FP16 conversions? What does the OpenGL spec say about denormal handing? > --- > src/gallium/drivers/radeonsi/si_shader.c | 14 ++++++++++++++ > src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++++++++++++------ > src/gallium/drivers/radeonsi/sid.h | 3 +++ > 3 files changed, 29 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index a4680ce..3f1db70 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4155,6 +4155,20 @@ int si_compile_llvm(struct si_screen *sscreen, > > si_shader_binary_read_config(binary, conf, 0); > > + /* Enable 64-bit and 16-bit denormals, because there is no performance > + * cost. > + * > + * If denormals are enabled, all floating-point output modifiers are > + * ignored. > + * > + * Don't enable denormals for 32-bit floats, because: > + * - Floating-point output modifiers would be ignored by the hw. > + * - Some opcodes don't support denormals, such as v_mad_f32. We would > + * have to stop using those. > + * - SI & CI would be very slow. > + */ > + conf->float_mode |= V_00B028_FP_64_DENORMS; > + Do SI/CI support fp64 denorms? If so, won't this hurt performance? We should tell the compiler we are enabling fp-64 denorms by adding +fp64-denormals to the feature string. It would also be better to read the float_mode value from the config registers emitted by the compiler. -Tom > FREE(binary->config); > FREE(binary->global_symbol_offsets); > binary->config = NULL; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index ce795c0..77a4e47 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader) > shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / > 4) | > S_00B528_SGPRS((num_sgprs - 1) / 8) | > S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B528_DX10_CLAMP(1); > + S_00B528_DX10_CLAMP(1) | > + S_00B528_FLOAT_MODE(shader->config.float_mode); > shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | > > S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); > } > @@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, > S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B428_SGPRS((num_sgprs - 1) / 8) | > - S_00B428_DX10_CLAMP(1)); > + S_00B428_DX10_CLAMP(1) | > + S_00B428_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, > S_00B42C_USER_SGPR(num_user_sgprs) | > > S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader) > S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B328_SGPRS((num_sgprs - 1) / 8) | > S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B328_DX10_CLAMP(1)); > + S_00B328_DX10_CLAMP(1) | > + S_00B328_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, > S_00B32C_USER_SGPR(num_user_sgprs) | > > S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, > S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B228_SGPRS((num_sgprs - 1) / 8) | > - S_00B228_DX10_CLAMP(1)); > + S_00B228_DX10_CLAMP(1) | > + S_00B228_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, > S_00B22C_USER_SGPR(num_user_sgprs) | > > S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); > @@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct > si_shader *gs) > S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B128_SGPRS((num_sgprs - 1) / 8) | > S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | > - S_00B128_DX10_CLAMP(1)); > + S_00B128_DX10_CLAMP(1) | > + S_00B128_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, > S_00B12C_USER_SGPR(num_user_sgprs) | > S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | > @@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader) > si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, > S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | > S_00B028_SGPRS((num_sgprs - 1) / 8) | > - S_00B028_DX10_CLAMP(1)); > + S_00B028_DX10_CLAMP(1) | > + S_00B028_FLOAT_MODE(shader->config.float_mode)); > si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, > S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | > S_00B02C_USER_SGPR(num_user_sgprs) | > diff --git a/src/gallium/drivers/radeonsi/sid.h > b/src/gallium/drivers/radeonsi/sid.h > index 9e1e158..8920847 100644 > --- a/src/gallium/drivers/radeonsi/sid.h > +++ b/src/gallium/drivers/radeonsi/sid.h > @@ -2845,6 +2845,9 @@ > #define S_00B028_FLOAT_MODE(x) (((x) > & 0xFF) << 12) > #define G_00B028_FLOAT_MODE(x) (((x) > >> 12) & 0xFF) > #define C_00B028_FLOAT_MODE > 0xFFF00FFF > +#define V_00B028_FP_32_DENORMS 0x30 > +#define V_00B028_FP_64_DENORMS 0xc0 > +#define V_00B028_FP_ALL_DENORMS 0xf0 > #define S_00B028_PRIV(x) (((x) > & 0x1) << 20) > #define G_00B028_PRIV(x) (((x) > >> 20) & 0x1) > #define C_00B028_PRIV > 0xFFEFFFFF > -- > 2.1.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev