The ARB_vertex_program specification defines RSQ as: tmp = fabs(ScalarLoad(op0)); result.x = ApproxRSQRT(tmp); result.y = ApproxRSQRT(tmp); result.z = ApproxRSQRT(tmp); result.w = ApproxRSQRT(tmp);
The mesa_to_tgsi code looks good, but it's very suboptimal. Source operands have an absolute modifier, so let's use it. The optimal code is: case OPCODE_RSQ: ureg_RSQ(ureg, dst[0], ureg_abs(src[0])); break; I guess the ABS opcode should be removed to prevent confusion. Marek On Thu, Jul 11, 2013 at 9:44 PM, Zack Rusin <za...@vmware.com> wrote: > GLSL spec says that rsq is undefined for src<=0, but the D3D10 > spec says it needs to be a NaN, so lets stop taking an absolute > value of the source which completely breaks that behavior. For > the gl program we can simply insert an extra abs instrunction > which produces the desired behavior there. > > Signed-off-by: Zack Rusin <za...@vmware.com> > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 2 -- > src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 ++++++++-------- > src/gallium/docs/source/tgsi.rst | 6 +++--- > src/mesa/state_tracker/st_mesa_to_tgsi.c | 10 ++++++++-- > 4 files changed, 19 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > index 1feaa19..8c26918 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > @@ -633,8 +633,6 @@ rsq_emit( > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > { > - emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, > - emit_data->args[0]); > if (bld_base->rsq_action.emit) { > bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data); > } else { > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c > b/src/gallium/auxiliary/tgsi/tgsi_exec.c > index 4482c6b..9133bcb 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c > @@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst, > assert(src->f[2] != 0.0f); > assert(src->f[3] != 0.0f); > #endif > - dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); > - dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); > - dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); > - dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); > + dst->f[0] = 1.0f / sqrtf(src->f[0]); > + dst->f[1] = 1.0f / sqrtf(src->f[1]); > + dst->f[2] = 1.0f / sqrtf(src->f[2]); > + dst->f[3] = 1.0f / sqrtf(src->f[3]); > } > > static void > micro_sqrt(union tgsi_exec_channel *dst, > const union tgsi_exec_channel *src) > { > - dst->f[0] = sqrtf(fabsf(src->f[0])); > - dst->f[1] = sqrtf(fabsf(src->f[1])); > - dst->f[2] = sqrtf(fabsf(src->f[2])); > - dst->f[3] = sqrtf(fabsf(src->f[3])); > + dst->f[0] = sqrtf(src->f[0]); > + dst->f[1] = sqrtf(src->f[1]); > + dst->f[2] = sqrtf(src->f[2]); > + dst->f[3] = sqrtf(src->f[3]); > } > > static void > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index 3f48b51..ab395a4 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -94,16 +94,16 @@ This instruction replicates its result. > > .. opcode:: RSQ - Reciprocal Square Root > > -This instruction replicates its result. > +This instruction replicates its result. The results are undefined for src <= > 0. > > .. math:: > > - dst = \frac{1}{\sqrt{|src.x|}} > + dst = \frac{1}{\sqrt{src.x}} > > > .. opcode:: SQRT - Square Root > > -This instruction replicates its result. > +This instruction replicates its result. The results are undefined for src < > 0. > > .. math:: > > diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c > b/src/mesa/state_tracker/st_mesa_to_tgsi.c > index dd9f4fc..168585a 100644 > --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c > +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c > @@ -614,8 +614,6 @@ translate_opcode( unsigned op ) > return TGSI_OPCODE_RCP; > case OPCODE_RET: > return TGSI_OPCODE_RET; > - case OPCODE_RSQ: > - return TGSI_OPCODE_RSQ; > case OPCODE_SCS: > return TGSI_OPCODE_SCS; > case OPCODE_SEQ: > @@ -755,6 +753,14 @@ compile_instruction( > emit_ddy( t, dst[0], &inst->SrcReg[0] ); > break; > > + case OPCODE_RSQ: { > + struct ureg_dst temp = ureg_DECL_temporary( ureg ); > + ureg_ABS( ureg, temp, src[0] ); > + ureg_RSQ( ureg, dst[0], ureg_src(temp) ); > + ureg_release_temporary( ureg, temp ); > + } > + break; > + > default: > ureg_insn( ureg, > translate_opcode( inst->Opcode ), > -- > 1.7.10.4 > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev