Looks like you may have missed the DFMA update in src/gallium/drivers/r600/r600_shader.c, but that's not too important... it can be updated when r600 gains doubles support. As with the previous version,
Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> On Sun, Mar 15, 2015 at 2:51 PM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > Needed by ARB_gpu_shader5. > > v2: select DMAD for FMA with double precision > v3: add and select DFMA > --- > src/gallium/auxiliary/gallivm/lp_bld_limits.h | 1 + > src/gallium/auxiliary/tgsi/tgsi_exec.h | 1 + > src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++-- > src/gallium/auxiliary/tgsi/tgsi_util.c | 1 + > src/gallium/docs/source/screen.rst | 2 ++ > src/gallium/docs/source/tgsi.rst | 26 > ++++++++++++++++++++++++ > src/gallium/drivers/freedreno/freedreno_screen.c | 1 + > src/gallium/drivers/i915/i915_screen.c | 1 + > src/gallium/drivers/nouveau/nv30/nv30_screen.c | 2 ++ > src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + > src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + > src/gallium/drivers/r300/r300_screen.c | 2 ++ > src/gallium/drivers/r600/r600_pipe.c | 1 + > src/gallium/drivers/r600/r600_shader.c | 6 +++--- > src/gallium/drivers/radeonsi/si_pipe.c | 1 + > src/gallium/drivers/svga/svga_screen.c | 2 ++ > src/gallium/drivers/vc4/vc4_screen.c | 1 + > src/gallium/include/pipe/p_defines.h | 1 + > src/gallium/include/pipe/p_shader_tokens.h | 6 +++--- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 ++++++++---- > 20 files changed, 62 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h > b/src/gallium/auxiliary/gallivm/lp_bld_limits.h > index 2962360..c5c51c1 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h > @@ -129,6 +129,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > /* if we get here, we missed a shader cap above (and should have seen > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h > b/src/gallium/auxiliary/tgsi/tgsi_exec.h > index 609c81b..0e59b88 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h > @@ -459,6 +459,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > return 1; > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > /* if we get here, we missed a shader cap above (and should have seen > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index 4d838fd..1194709 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -56,7 +56,7 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, > { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, > { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, > - { 0, 0, 0, 0, 0, 0, NONE, "", 19 }, /* removed */ > + { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, > { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, > { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A }, > { 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */ > @@ -155,7 +155,7 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC }, > { 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF }, > { 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END }, > - { 0, 0, 0, 0, 0, 0, NONE, "", 118 }, /* removed */ > + { 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA }, > { 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I }, > { 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV }, > { 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX }, > diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c > b/src/gallium/auxiliary/tgsi/tgsi_util.c > index d572ff0..e5b8427 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_util.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c > @@ -193,6 +193,7 @@ tgsi_util_get_inst_usage_mask(const struct > tgsi_full_instruction *inst, > case TGSI_OPCODE_MAD: > case TGSI_OPCODE_SUB: > case TGSI_OPCODE_LRP: > + case TGSI_OPCODE_FMA: > case TGSI_OPCODE_FRC: > case TGSI_OPCODE_CEIL: > case TGSI_OPCODE_CLAMP: > diff --git a/src/gallium/docs/source/screen.rst > b/src/gallium/docs/source/screen.rst > index e0fd1a2..26cc9ff 100644 > --- a/src/gallium/docs/source/screen.rst > +++ b/src/gallium/docs/source/screen.rst > @@ -336,6 +336,8 @@ to be 0. > is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used. > * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and > DLDEXP are supported. > +* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only) > + are supported. > > > .. _pipe_compute_cap: > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index b0a975a..7771136 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -272,6 +272,21 @@ This instruction replicates its result. > dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w > > > +.. opcode:: FMA - Fused Multiply-Add > + > +Perform a * b + c with no intermediate rounding step. > + > +.. math:: > + > + dst.x = src0.x \times src1.x + src2.x > + > + dst.y = src0.y \times src1.y + src2.y > + > + dst.z = src0.z \times src1.z + src2.z > + > + dst.w = src0.w \times src1.w + src2.w > + > + > .. opcode:: DP2A - 2-component Dot Product And Add > > .. math:: > @@ -1962,6 +1977,17 @@ source is an integer. > dst.zw = src0.zw \times src1.zw + src2.zw > > > +.. opcode:: DFMA - Fused Multiply-Add > + > +Perform a * b + c with no intermediate rounding step. > + > +.. math:: > + > + dst.xy = src0.xy \times src1.xy + src2.xy > + > + dst.zw = src0.zw \times src1.zw + src2.zw > + > + > .. opcode:: DRCP - Reciprocal > > .. math:: > diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c > b/src/gallium/drivers/freedreno/freedreno_screen.c > index a4699e4..1d73513 100644 > --- a/src/gallium/drivers/freedreno/freedreno_screen.c > +++ b/src/gallium/drivers/freedreno/freedreno_screen.c > @@ -363,6 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: > return 1; > diff --git a/src/gallium/drivers/i915/i915_screen.c > b/src/gallium/drivers/i915/i915_screen.c > index dc76464..50847e2 100644 > --- a/src/gallium/drivers/i915/i915_screen.c > +++ b/src/gallium/drivers/i915/i915_screen.c > @@ -158,6 +158,7 @@ i915_get_shader_param(struct pipe_screen *screen, > unsigned shader, enum pipe_sha > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > default: > debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > index 0fca9e0..eeb7148 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > @@ -250,6 +250,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > default: > debug_printf("unknown vertex shader param %d\n", param); > @@ -289,6 +290,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > default: > debug_printf("unknown fragment shader param %d\n", param); > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c > b/src/gallium/drivers/nouveau/nv50/nv50_screen.c > index ed07ba4..829dfbc 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c > @@ -289,6 +289,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > default: > NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > index 686d892..04c34f5 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > @@ -295,6 +295,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > return 1; > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: > return 16; /* would be 32 in linked (OpenGL-style) mode */ > diff --git a/src/gallium/drivers/r300/r300_screen.c > b/src/gallium/drivers/r300/r300_screen.c > index fca8001..752d7e5 100644 > --- a/src/gallium/drivers/r300/r300_screen.c > +++ b/src/gallium/drivers/r300/r300_screen.c > @@ -287,6 +287,7 @@ static int r300_get_shader_param(struct pipe_screen > *pscreen, unsigned shader, e > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > case PIPE_SHADER_CAP_PREFERRED_IR: > return PIPE_SHADER_IR_TGSI; > @@ -341,6 +342,7 @@ static int r300_get_shader_param(struct pipe_screen > *pscreen, unsigned shader, e > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > case PIPE_SHADER_CAP_PREFERRED_IR: > return PIPE_SHADER_IR_TGSI; > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index 24d901e..21e5d42 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -493,6 +493,7 @@ static int r600_get_shader_param(struct pipe_screen* > pscreen, unsigned shader, e > return 0; > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > return 0; > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 2ee59c8..54540c3 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -7295,7 +7295,7 @@ static struct r600_shader_tgsi_instruction > r600_shader_tgsi_instruction[] = { > {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, > {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, > {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, > - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, > + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, > tgsi_trans_srcx_replicate}, > {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, > {22, 0, ALU_OP0_NOP, tgsi_unsupported}, > @@ -7494,7 +7494,7 @@ static struct r600_shader_tgsi_instruction > eg_shader_tgsi_instruction[] = { > {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, > {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, > {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, > - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, > + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, > tgsi_trans_srcx_replicate}, > {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, > {22, 0, ALU_OP0_NOP, tgsi_unsupported}, > @@ -7693,7 +7693,7 @@ static struct r600_shader_tgsi_instruction > cm_shader_tgsi_instruction[] = { > {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, > {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, > {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, > - {19, 0, ALU_OP0_NOP, tgsi_unsupported}, > + {TGSI_OPCODE_FMA, 0, ALU_OP0_NOP, tgsi_unsupported}, > {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, > cayman_emit_float_instr}, > {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, > {22, 0, ALU_OP0_NOP, tgsi_unsupported}, > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index f1a5388..0aacab1 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -425,6 +425,7 @@ static int si_get_shader_param(struct pipe_screen* > pscreen, unsigned shader, enu > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > return 0; > diff --git a/src/gallium/drivers/svga/svga_screen.c > b/src/gallium/drivers/svga/svga_screen.c > index bac0dbc..7b01d35 100644 > --- a/src/gallium/drivers/svga/svga_screen.c > +++ b/src/gallium/drivers/svga/svga_screen.c > @@ -375,6 +375,7 @@ static int svga_get_shader_param(struct pipe_screen > *screen, unsigned shader, en > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > /* If we get here, we failed to handle a cap above */ > @@ -431,6 +432,7 @@ static int svga_get_shader_param(struct pipe_screen > *screen, unsigned shader, en > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > } > /* If we get here, we failed to handle a cap above */ > diff --git a/src/gallium/drivers/vc4/vc4_screen.c > b/src/gallium/drivers/vc4/vc4_screen.c > index 7c62847..0be8ec2 100644 > --- a/src/gallium/drivers/vc4/vc4_screen.c > +++ b/src/gallium/drivers/vc4/vc4_screen.c > @@ -319,6 +319,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, > unsigned shader, > case PIPE_SHADER_CAP_DOUBLES: > case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: > case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: > + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: > return 0; > case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: > case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: > diff --git a/src/gallium/include/pipe/p_defines.h > b/src/gallium/include/pipe/p_defines.h > index a8ffe9c..67f48e4 100644 > --- a/src/gallium/include/pipe/p_defines.h > +++ b/src/gallium/include/pipe/p_defines.h > @@ -644,6 +644,7 @@ enum pipe_shader_cap > PIPE_SHADER_CAP_DOUBLES, > PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */ > PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED, > + PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED, > }; > > /** > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index 95ac590..c14bcbc 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -306,7 +306,7 @@ struct tgsi_property_data { > #define TGSI_OPCODE_MAD 16 > #define TGSI_OPCODE_SUB 17 > #define TGSI_OPCODE_LRP 18 > - /* gap */ > +#define TGSI_OPCODE_FMA 19 > #define TGSI_OPCODE_SQRT 20 > #define TGSI_OPCODE_DP2A 21 > /* gap */ > @@ -404,7 +404,7 @@ struct tgsi_property_data { > #define TGSI_OPCODE_BREAKC 115 > #define TGSI_OPCODE_KILL_IF 116 /* conditional kill */ > #define TGSI_OPCODE_END 117 /* aka HALT */ > - /* gap */ > +#define TGSI_OPCODE_DFMA 118 > #define TGSI_OPCODE_F2I 119 > #define TGSI_OPCODE_IDIV 120 > #define TGSI_OPCODE_IMAX 121 > @@ -510,7 +510,7 @@ struct tgsi_property_data { > #define TGSI_OPCODE_DSNE 206 /* SM5 */ > #define TGSI_OPCODE_DRCP 207 /* eg, cayman */ > #define TGSI_OPCODE_DSQRT 208 /* eg, cayman also has DRSQ */ > -#define TGSI_OPCODE_DMAD 209 /* DFMA? */ > +#define TGSI_OPCODE_DMAD 209 > #define TGSI_OPCODE_DFRAC 210 /* eg, cayman */ > #define TGSI_OPCODE_DLDEXP 211 /* eg, cayman */ > #define TGSI_OPCODE_DFRACEXP 212 /* eg, cayman */ > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index bd191d8..efee4b2 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -332,6 +332,7 @@ public: > int glsl_version; > bool native_integers; > bool have_sqrt; > + bool have_fma; > > variable_storage *find_variable_storage(ir_variable *var); > > @@ -836,6 +837,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, > unsigned op, > case3fid(ADD, UADD, DADD); > case3fid(MUL, UMUL, DMUL); > case3fid(MAD, UMAD, DMAD); > + case3fid(FMA, UMAD, DFMA); > case3(DIV, IDIV, UDIV); > case4d(MAX, IMAX, UMAX, DMAX); > case4d(MIN, IMIN, UMIN, DMIN); > @@ -2222,10 +2224,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); > break; > case ir_triop_fma: > - /* NOTE: Perhaps there should be a special opcode that enforces fused > - * mul-add. Just use MAD for now. > - */ > - emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); > + /* In theory, MAD is incorrect here. */ > + if (have_fma) > + emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); > + else > + emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); > break; > case ir_unop_interpolate_at_centroid: > emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); > @@ -5564,6 +5567,8 @@ get_mesa_program(struct gl_context *ctx, > > v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, > > PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); > + v->have_fma = pscreen->get_shader_param(pscreen, ptarget, > + > PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); > > _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); > _mesa_generate_parameters_list_for_uniforms(shader_program, shader, > -- > 2.1.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev