On Sun, Mar 13, 2016 at 7:52 AM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 03/13/2016 04:07 AM, Ilia Mirkin wrote: >> >> First off, st/mesa lowers DSQRT incorrectly (it uses CMP to attempt to >> find out whether the input is less than 0). Secondly the current >> approach (x * rsq(x)) behaves poorly for x = inf - a NaN is produced >> instead of inf. >> > > When I had a look at this precision thing, this seemed a bit weird to me but > I did not investigate much of time on it. > >> Instead we switch to the less accurate rcp(rsq(x)) method - this behaves >> nicely for all valid inputs. We still don't do this for DSQRT since the >> RSQ/RCP ops are *really* inaccurate, and don't even have Newton-Raphson >> steps right now. Eventually we should have a separate library function >> for DSQRT that does it more precisely (and perhaps move this lowering to >> the post-opt phase). >> >> This fixes a number of dEQP precision tests that were expecting better >> behavior for infinite inputs. > > > Yep, this fixes distance, length, refract and sqrt deqp precision tests (37 > tests). But they are still some fails with atan2, idexp and tanh. > Are you going to fix them too?
Not today. (that's "ldexp" btw) I glanced at atan2 - looks like it's returning results out of range... perhaps a min/max on the poly generated by the lowering logic is in order. Didn't investigate too closely. > > Except that you forgot to remove unused variables: > codegen/nv50_ir_lowering_nvc0.cpp: In member function ‘bool > nv50_ir::NVC0LoweringPass::handleSQRT(nv50_ir::Instruction*)’: > codegen/nv50_ir_lowering_nvc0.cpp:1785:20: warning: unused variable ‘mov’ > [-Wunused-variable] > Instruction *mov, *rsq; > ^ > codegen/nv50_ir_lowering_nvc0.cpp:1785:26: warning: unused variable ‘rsq’ > [-Wunused-variable] > Instruction *mov, *rsq; > ^ > CC nvc0/nvc0_surface.lo Yeah, I noticed that about 5s after I sent the patch out... already fixed locally. > > This patch is: > > Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > Tested-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> Thanks! > > >> >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> --- >> .../drivers/nouveau/codegen/nv50_ir_build_util.cpp | 6 +++- >> .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 ++ >> .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 7 ++--- >> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 32 >> +++++++++++----------- >> src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +- >> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- >> 6 files changed, 28 insertions(+), 23 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp >> index f58cf97..84ebfdb 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp >> @@ -585,6 +585,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, >> Instruction *i, >> return NULL; >> srcNr = 2; >> break; >> + case OP_SELP: srcNr = 3; break; >> default: >> // TODO when needed >> return NULL; >> @@ -601,7 +602,10 @@ BuildUtil::split64BitOpPostRA(Function *fn, >> Instruction *i, >> >> for (int s = 0; s < srcNr; ++s) { >> if (lo->getSrc(s)->reg.size < 8) { >> - hi->setSrc(s, zero); >> + if (s == 2) >> + hi->setSrc(s, lo->getSrc(s)); >> + else >> + hi->setSrc(s, zero); >> } else { >> if (lo->getSrc(s)->refCount() > 1) >> lo->setSrc(s, cloneShallow(fn, lo->getSrc(s))); >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> index b06d86a..d284446 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> @@ -616,6 +616,7 @@ static nv50_ir::operation translateOpcode(uint opcode) >> >> NV50_IR_OPCODE_CASE(RCP, RCP); >> NV50_IR_OPCODE_CASE(RSQ, RSQ); >> + NV50_IR_OPCODE_CASE(SQRT, SQRT); >> >> NV50_IR_OPCODE_CASE(MUL, MUL); >> NV50_IR_OPCODE_CASE(ADD, ADD); >> @@ -2689,6 +2690,7 @@ Converter::handleInstruction(const struct >> tgsi_full_instruction *insn) >> case TGSI_OPCODE_FLR: >> case TGSI_OPCODE_TRUNC: >> case TGSI_OPCODE_RCP: >> + case TGSI_OPCODE_SQRT: >> case TGSI_OPCODE_IABS: >> case TGSI_OPCODE_INEG: >> case TGSI_OPCODE_NOT: >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >> index 8752b0c..12c5f69 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp >> @@ -1203,10 +1203,9 @@ NV50LoweringPreSSA::handleDIV(Instruction *i) >> bool >> NV50LoweringPreSSA::handleSQRT(Instruction *i) >> { >> - Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, >> - bld.getSSA(), i->getSrc(0)); >> - i->op = OP_MUL; >> - i->setSrc(1, rsq->getDef(0)); >> + bld.setPosition(i, true); >> + i->op = OP_RSQ; >> + bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0)); >> >> return true; >> } >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> index d181f15..29b77c9 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> @@ -1778,22 +1778,22 @@ NVC0LoweringPass::handleMOD(Instruction *i) >> bool >> NVC0LoweringPass::handleSQRT(Instruction *i) >> { >> - Value *pred = bld.getSSA(1, FILE_PREDICATE); >> - Value *zero = bld.getSSA(); >> - Instruction *rsq; >> - >> - bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0)); >> - if (i->dType == TYPE_F64) >> - zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero); >> - bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), >> zero); >> - bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, >> pred); >> - rsq = bld.mkOp1(OP_RSQ, i->dType, >> - bld.getSSA(typeSizeof(i->dType)), i->getSrc(0)); >> - rsq->setPredicate(CC_NOT_P, pred); >> - i->op = OP_MUL; >> - i->setSrc(1, rsq->getDef(0)); >> - i->setPredicate(CC_NOT_P, pred); >> - >> + if (i->dType == TYPE_F64) { >> + Value *pred = bld.getSSA(1, FILE_PREDICATE); >> + Value *zero = bld.loadImm(NULL, 0.0d); >> + Value *dst = bld.getSSA(8); >> + Instruction *mov, *rsq; >> + bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0)); >> + bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), >> zero); >> + bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred); >> + i->op = OP_MUL; >> + i->setSrc(1, dst); >> + // TODO: Handle this properly with a library function >> + } else { >> + bld.setPosition(i, true); >> + i->op = OP_RSQ; >> + bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0)); >> + } >> >> return true; >> } >> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c >> b/src/gallium/drivers/nouveau/nv50/nv50_screen.c >> index 28e0ed3..5836bb2 100644 >> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c >> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c >> @@ -305,7 +305,7 @@ nv50_screen_get_shader_param(struct pipe_screen >> *pscreen, unsigned shader, >> case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: >> return 1; >> case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: >> - return 0; >> + return 1; >> case PIPE_SHADER_CAP_SUBROUTINES: >> return 0; /* please inline, or provide function declarations */ >> case PIPE_SHADER_CAP_INTEGERS: >> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >> index 30afdf2f0..3c5b1da 100644 >> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c >> @@ -328,7 +328,7 @@ nvc0_screen_get_shader_param(struct pipe_screen >> *pscreen, unsigned shader, >> case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: >> return 1; >> case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: >> - return 0; >> + return 1; >> case PIPE_SHADER_CAP_SUBROUTINES: >> return 1; >> case PIPE_SHADER_CAP_INTEGERS: >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev