[Mesa-dev] [PATCH 1/2] gallium: Eliminate TGSI_OPCODE_IFC.

2013-04-14 Thread jfonseca
From: José Fonseca 

Never used or implemented.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c |1 -
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c  |1 -
 src/gallium/auxiliary/tgsi/tgsi_info.c   |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 -
 src/gallium/docs/source/tgsi.rst |5 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c   |1 -
 src/gallium/drivers/r600/r600_shader.c   |9 ++---
 src/gallium/include/pipe/p_shader_tokens.h   |2 +-
 8 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
index a4caf78..3c79abf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -389,7 +389,6 @@ analyse_instruction(struct analysis_context *ctx,
 
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_IF:
-   case TGSI_OPCODE_IFC:
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDIF:
case TGSI_OPCODE_BGNLOOP:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 853de09..239530d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1732,7 +1732,6 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
  opcode == TGSI_OPCODE_CAL ||
  opcode == TGSI_OPCODE_CALLNZ ||
  opcode == TGSI_OPCODE_IF ||
- opcode == TGSI_OPCODE_IFC ||
  opcode == TGSI_OPCODE_BGNLOOP ||
  opcode == TGSI_OPCODE_SWITCH)
 return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 1fadfec..716b16b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -151,7 +151,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 0, 0, 0, 0, 0, 0, NONE, "", 111 }, /* removed */
{ 1, 1, 0, 0, 0, 0, REPL, "NRM4", TGSI_OPCODE_NRM4 },
{ 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
-   { 0, 1, 0, 0, 0, 0, NONE, "IFC", TGSI_OPCODE_IFC },
+   { 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, NONE, "KIL", TGSI_OPCODE_KIL },
{ 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index fa30352..b8519c6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -154,7 +154,6 @@ OP00(ENDSUB)
 OP00(NOP)
 OP11(NRM4)
 OP01(CALLNZ)
-OP01(IFC)
 OP01(BREAKC)
 OP01(KIL)
 OP00(END)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 28308cb..0002626 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1198,11 +1198,6 @@ XXX wait what
   TBD
 
 
-.. opcode:: IFC - If
-
-  TBD
-
-
 .. opcode:: BREAKC - Break Conditional
 
   TBD
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c 
b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index a0587b4..5e60e6c 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -135,7 +135,6 @@ static unsigned translate_opcode(unsigned opcode)
 /* gap */
  /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
  /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */
- /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */
  /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
 case TGSI_OPCODE_KIL: return RC_OPCODE_KIL;
 }
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6dbca50..44b5ce5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6035,7 +6035,8 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{111,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NRM4,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ,0, ALU_OP0_NOP, tgsi_unsupported},
-   {TGSI_OPCODE_IFC,   0, ALU_OP0_NOP, tgsi_unsupported},
+   /* gap */
+   {114,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KIL,   0, ALU_OP2_KILLGT, tgsi_kill},  /* conditional 
kill */
{TGSI_OPCODE_END,   0, ALU_OP0_NOP, tgsi_end},  /* aka HALT */
@@ -6228,7 +6229,8 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{111,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NRM4,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ,0, ALU_OP0_NOP, tgsi_unsupported},
-   {TGSI_OPCO

[Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread jfonseca
From: José Fonseca 

TGSI_OPCODE_IF condition had two possible interpretations:

- src.x != 0.0f

  - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was false either for
vertex and fragment shaders
  - gallivm/llvmpipe
  - postprocess
  - vl state tracker
  - vega state tracker
  - most old drivers
  - old internal state trackers
  - many graw examples

- src.x != 0U

  - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was true for both
vertex and fragment shaders
  - tgsi_exec/softpipe
  - r600
  - radeonsi
  - nv50

And drivers that use draw module also were a mess (because Mesa would
emit float IFs, but draw module supports native integers so it would
interpret IF arg as integers...)

This sort of works if the source argument is limited to float +0.0f or
+1.0f, integer 0, but would fail if source is float -0.0f, or integer in
the float NaN range.  It could also fail if source is integer 1, and
hardware flushes denormalized numbers to zero.

But with this change there are now two opcodes, IF and UIF, with clear
meaning.

Drivers that do not support native integers do not need to worry about
UIF.  However, for backwards compatibility with old state trackers and
examples, it is advisable that native integer capable drivers also
support the float IF opcode.

I tried to implement this for r600 and radeonsi based on the surrounding
code.  I couldn't do this for nouveau, so I just shunted IF/UIF
together, which matches the current behavior.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c   |1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
 src/gallium/auxiliary/tgsi/tgsi_dump.c |2 +
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   22 +++
 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |1 +
 src/gallium/docs/source/tgsi.rst   |   21 --
 .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |6 +++
 src/gallium/drivers/r600/r600_shader.c |   21 +++---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c|   41 
 src/gallium/include/pipe/p_shader_tokens.h |2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |   12 +-
 15 files changed, 137 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index c71c1f1..e1c362b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -868,6 +868,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * 
bld_base)
bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 98bce0e..223184d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -837,6 +837,7 @@ lp_emit_instruction_aos(
   return FALSE;
 
case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
   return FALSE;
 
case TGSI_OPCODE_BGNLOOP:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
index 3c79abf..b00aa09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -389,6 +389,7 @@ analyse_instruction(struct analysis_context *ctx,
 
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDIF:
case TGSI_OPCODE_BGNLOOP:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 239530d..362a1de 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1732,7 +1732,8 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
  opcode == TGSI_OPCODE_CAL ||
  opcode == TGSI_OPCODE_CALLNZ ||
  opcode == TGSI_OPCODE_IF ||
- opcode == TGSI_OPCODE_BGNLOOP ||
+  opcode == TGSI_OPCODE_UIF ||
+  opcode == TGSI_OPCODE_BGNLOOP ||
  opcode == TGSI_OPCODE_SWITCH)
 return FALSE;
}
@@ -2395,6 +2396,21 @@ if_emi

[Mesa-dev] [PATCH] nv50/ir: handle TGSI_OPCODE_IF(float) properly

2013-04-14 Thread Christoph Bumiller
You can merge this with the original UIF patch if you want.
---
 .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |7 ++-
 .../drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp |2 +-
 .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp |2 +-
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 054c75e..d8abccd 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -386,6 +386,7 @@ static nv50_ir::TexTarget translateTexture(uint tex)
 nv50_ir::DataType Instruction::inferSrcType() const
 {
switch (getOpcode()) {
+   case TGSI_OPCODE_UIF:
case TGSI_OPCODE_AND:
case TGSI_OPCODE_OR:
case TGSI_OPCODE_XOR:
@@ -2431,10 +2432,6 @@ Converter::handleInstruction(const struct 
tgsi_full_instruction *insn)
   mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
   break;
case TGSI_OPCODE_IF:
-  /* XXX: fall-through into UIF, but this might lead to
-   * incorrect behavior on state trackers and auxiliary
-   * modules that emit float bool IFs regardless of
-   * native integer support */
case TGSI_OPCODE_UIF:
{
   BasicBlock *ifBB = new BasicBlock(func);
@@ -2443,7 +2440,7 @@ Converter::handleInstruction(const struct 
tgsi_full_instruction *insn)
   condBBs.push(bb);
   joinBBs.push(bb);
 
-  mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
+  mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
 
   setPosition(ifBB, true);
}
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp 
b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
index 20f76f8..03086e3 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -1011,7 +1011,7 @@ NV50LoweringPreSSA::checkPredicate(Instruction *insn)
   return;
cdst = bld.getSSA(1, FILE_FLAGS);
 
-   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, cdst, bld.loadImm(NULL, 0), pred);
+   bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, bld.loadImm(NULL, 0), pred);
 
insn->setPredicate(insn->cc, cdst);
 }
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index 4d1d372..7676185 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1490,7 +1490,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn)
// CAUTION: don't use pdst->getInsn, the definition might not be unique,
//  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
 
-   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
+   bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, bld.mkImm(0), pred);
 
insn->setPredicate(insn->cc, pdst);
 }
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Jose Fonseca
- Original Message -
> From: Christoph Bumiller 
> 
> This is the only sane solution for nv50 and nvc0 (really, trust me),
> but since on other hardware the border colour is tightly coupled with
> texture state they'd have to undo the swizzle, so I've added a cap.
> 
> The name of the cap could be changed to be more descriptive, like
> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.

Yes, please.
 
> The dependency of update_sampler on the texture updates was
> introduced to avoid doing the apply_depthmode to the swizzle twice.
>
> More detailed explanation of driver situation:
> 
> No, really, don't suggest doing this in the driver. The driver has
> elegantly separated texture view and sampler states (which are each
> a structure in a table in VRAM and should not be updated to avoid
> performance loss), and table are bound to the independent (!) 

I wonder if this is modeled after D3D10, where sampler state is independent 
from resource view state. Though as far as I known, D3D10's interpretation of 
texture border color does not depend on the swizzle...

> texture
> and sampler slots in shaders which must be separately indexable
> indirectly).
> So, if I was to do this in the driver, I'd have to add separate sampler
> state object instances for each texture view with appropriately swizzled
> border color, and there's only 16 slots, so I'd be limited to 4 texture
> units.
> Not to mention the sheer insanity, ugliness and emotional pain incurred
> when writing that code when it COULD be so easy and simple in the state
> tracker where you know that textures and samplers are tightly coupled,
> while in gallium I cannot assume that to be the case.

You wouldn't really need to create all state combinations: if you known that 
textures and samplers are tightly coupled, then caching the actually used 
combinations will get you exactly the same behavior, without losing performance 
or generality.  But granted, this would require more effort.

Also please spare a thought for other state trackers -- and I'm not even 
talking about a potential D3D10 state tracker for which your driver would be 
unusable --, even inside Mesa: it seems like src/gallium/state_trackers/vega 
uses both texture border and swizzle, probably vl state tracker too, so your 
driver will be busted on those state trackers. These need to be updated -- 
maybe the burden of considering this state can be lifted onto some helper 
functinons -- if not, these state trackers should at least be updated to 
abort/warn when the cap is set. 

But I'm not really objecting -- as texture border seems fundamentally quirky 
state.  But before proceeding with this I'd like us to consider another texture 
border quirk while we are at it.

The other quirk is the integer vs float texture border colors.  Roland can 
probably talk a bit more about it as he was the one who came across it.  In a 
few words, the interpretation of texture border color union depends on the 
format in the sampler view state (whether it's a pure integer format or not).

So, I wonder how integer vs float texture border colors will fit in your 
driver's "elegantly separated texture view and sampler states", or any other 
driver for that matter.  That is, will the world need a 
PIPE_CAP_SAMPLER_VIEW_FORMAT_VIEW_AFFECTS_TEXTURE_BORDER_COLOR too?  If so then 
maybe we want to lump these two things together.


Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Jose Fonseca
- Original Message -

> Not to mention the sheer insanity, ugliness and emotional pain incurred
> when writing that code when it COULD be so easy and simple in the state
> tracker where you know that textures and samplers are tightly coupled,
> while in gallium I cannot assume that to be the case.

Also, will this still be true when Mesa state tracker implements 
GL_ARB_texture_view ?

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: handle TGSI_OPCODE_IF(float) properly

2013-04-14 Thread Jose Fonseca
Thanks Christoph. Will do.

Jose

- Original Message -
> You can merge this with the original UIF patch if you want.
> ---
>  .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |7 ++-
>  .../drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp |2 +-
>  .../drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp |2 +-
>  3 files changed, 4 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
> b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
> index 054c75e..d8abccd 100644
> --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
> @@ -386,6 +386,7 @@ static nv50_ir::TexTarget translateTexture(uint tex)
>  nv50_ir::DataType Instruction::inferSrcType() const
>  {
> switch (getOpcode()) {
> +   case TGSI_OPCODE_UIF:
> case TGSI_OPCODE_AND:
> case TGSI_OPCODE_OR:
> case TGSI_OPCODE_XOR:
> @@ -2431,10 +2432,6 @@ Converter::handleInstruction(const struct
> tgsi_full_instruction *insn)
>mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
>break;
> case TGSI_OPCODE_IF:
> -  /* XXX: fall-through into UIF, but this might lead to
> -   * incorrect behavior on state trackers and auxiliary
> -   * modules that emit float bool IFs regardless of
> -   * native integer support */
> case TGSI_OPCODE_UIF:
> {
>BasicBlock *ifBB = new BasicBlock(func);
> @@ -2443,7 +2440,7 @@ Converter::handleInstruction(const struct
> tgsi_full_instruction *insn)
>condBBs.push(bb);
>joinBBs.push(bb);
>  
> -  mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0));
> +  mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
>  
>setPosition(ifBB, true);
> }
> diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
> b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
> index 20f76f8..03086e3 100644
> --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
> +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
> @@ -1011,7 +1011,7 @@ NV50LoweringPreSSA::checkPredicate(Instruction *insn)
>return;
> cdst = bld.getSSA(1, FILE_FLAGS);
>  
> -   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, cdst, bld.loadImm(NULL, 0), pred);
> +   bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, bld.loadImm(NULL, 0), pred);
>  
> insn->setPredicate(insn->cc, cdst);
>  }
> diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
> b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
> index 4d1d372..7676185 100644
> --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1490,7 +1490,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn)
> // CAUTION: don't use pdst->getInsn, the definition might not be unique,
> //  delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
>  
> -   bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred);
> +   bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, bld.mkImm(0), pred);
>  
> insn->setPredicate(insn->cc, pdst);
>  }
> --
> 1.7.3.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Christoph Bumiller
On 14.04.2013 13:44, Jose Fonseca wrote:
> - Original Message -
>> From: Christoph Bumiller 
>>
>> This is the only sane solution for nv50 and nvc0 (really, trust me),
>> but since on other hardware the border colour is tightly coupled with
>> texture state they'd have to undo the swizzle, so I've added a cap.
>>
>> The name of the cap could be changed to be more descriptive, like
>> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> Yes, please.
>  
>> The dependency of update_sampler on the texture updates was
>> introduced to avoid doing the apply_depthmode to the swizzle twice.
>>
>> More detailed explanation of driver situation:
>>
>> No, really, don't suggest doing this in the driver. The driver has
>> elegantly separated texture view and sampler states (which are each
>> a structure in a table in VRAM and should not be updated to avoid
>> performance loss), and table are bound to the independent (!) 
> I wonder if this is modeled after D3D10, where sampler state is independent 
> from resource view state. Though as far as I known, D3D10's interpretation of 
> texture border color does not depend on the swizzle...
>
>> texture
>> and sampler slots in shaders which must be separately indexable
>> indirectly).
>> So, if I was to do this in the driver, I'd have to add separate sampler
>> state object instances for each texture view with appropriately swizzled
>> border color, and there's only 16 slots, so I'd be limited to 4 texture
>> units.
>> Not to mention the sheer insanity, ugliness and emotional pain incurred
>> when writing that code when it COULD be so easy and simple in the state
>> tracker where you know that textures and samplers are tightly coupled,
>> while in gallium I cannot assume that to be the case.
> You wouldn't really need to create all state combinations: if you known that 
> textures and samplers are tightly coupled, then caching the actually used 
> combinations will get you exactly the same behavior, without losing 
> performance or generality.  But granted, this would require more effort.

The emphasize being on "IF I knew" (that they're tighly coupled). If I
did, I could switch to linked mode where the card automatically uses the
view index as sampler index, ignoring the actual sampler index, and
validate them together.
However, that only applies to 3D, not to COMPUTE (which means that GL
compute shaders will still have the problem), and I'd have to support
both variants for state trackers that do not allow the coupling, and we
need a way for the state tracker to actually tell us what it wants. All
that makes it even quirkier.

> Also please spare a thought for other state trackers -- and I'm not even 
> talking about a potential D3D10 state tracker for which your driver would be 
> unusable --, even inside Mesa: it seems like src/gallium/state_trackers/vega 
> uses both texture border and swizzle, probably vl state tracker too, so your 
> driver will be busted on those state trackers. These need to be

It already is busted. It's also busted on r600 where making border color
+ swizzle work properly isn't even POSSIBLE (according to the radeon guys).

Maybe not for vega, it doesn't use a permutational swizzle, it just sets
components to PIPE_SWIZZLE_ONE, and incidentally the ZERO/ONE swizzles
do affect the border color. As far as I can tell, it looks something
like this (if you're interested; the exact behaviour seems not supposed
to be made use of):

===
In the format description (including swizzle), each color component of
RGBA (as seen by the shader) gets mapped a memory component
{C0,C1,C2,C3} or {ZERO,ONE_INT,ONE_FLOAT}.

When a memory (!) component (Cx) is first encountered when going through
RGBA, it is assigned the SAMPLER_BORDER_COLOR component value for that
component, and if the memory component is encountered again (because of
swizzle), that same value will be used.

So, assuming memory format RGBA and the swizzle 1RBG:
R = ONE
G = C0
B = C2
A = C1
the border colour will be SAMPLER_BORDER_COLOR.1GBA.

The resulting border colour with swizzle applied to the sampler would be
(lowercase being user values):
R=1
G=r
B=b
A=g

resulting in 1rbg, which works out.
===

>  updated -- maybe the burden of considering this state can be lifted onto 
> some helper functinons -- if not, these state trackers should at least be 
> updated to abort/warn when the cap is set. 
>
> But I'm not really objecting -- as texture border seems fundamentally quirky 
> state.  But before proceeding with this I'd like us to consider another 
> texture border quirk while we are at it.
>
> The other quirk is the integer vs float texture border colors.  Roland can 
> probably talk a bit more about it as he was the one who came across it.  In a 
> few words, the interpretation of texture border color union depends on the 
> format in the sampler view state (whether it's a pure integer format or not).
>
> So, I wonder how integer vs float texture border colors will fit in your 
> driver's "elega

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Christoph Bumiller
On 14.04.2013 13:50, Jose Fonseca wrote:
> - Original Message -
>
>> Not to mention the sheer insanity, ugliness and emotional pain incurred
>> when writing that code when it COULD be so easy and simple in the state
>> tracker where you know that textures and samplers are tightly coupled,
>> while in gallium I cannot assume that to be the case.
> Also, will this still be true when Mesa state tracker implements 
> GL_ARB_texture_view ?

I dare say yes. GL texture views do NOT decouple textures from samplers,
they just decouple gallium sampler views from OpenGL textures.

There may be an issue if we wanted (and we don't) to use a single
sampler for all the OpenGL texture views of a single texture. However,
that ONLY works if the shaders are changed as well, and since the
texture/sampler combinations are not predictable, this is a very bad
idea as it would mean frequent shader recompilations.

As to whether there will ever be an OpenGL extension that adds
separation of views and samplers to shaders ... I'm hoping for NV to add
some clause to the spec to solve the border colour trouble, like
forbidding texture swizzle in such cases (and I'm sure AMD would be
inclined to agree).

> Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Christoph Bumiller
On 14.04.2013 14:33, Christoph Bumiller wrote:
>  
> ===
> In the format description (including swizzle), each color component of
> RGBA (as seen by the shader) gets mapped a memory component
> {C0,C1,C2,C3} or {ZERO,ONE_INT,ONE_FLOAT}.
>
> When a memory (!) component (Cx) is first encountered when going through
> RGBA, it is assigned the SAMPLER_BORDER_COLOR component value for that
> component, and if the memory component is encountered again (because of
> swizzle), that same value will be used.
>
> So, assuming memory format RGBA and the swizzle 1RBG:
> R = ONE
> G = C0
> B = C2
> A = C1
> the border colour will be SAMPLER_BORDER_COLOR.1GBA.
>
> The resulting border colour with swizzle applied to the sampler would be
> (lowercase being user values):
> R=1
> G=r
> B=b
> A=g
>
> resulting in 1rbg, which works out.
> ===
>

Sorry, that was a bad example, I feel the need to give a better one:

When a memory component (Cx) is first encountered when going through RGBA, it 
is assigned the SAMPLER_BORDER_COLOR.R/G/B/A component value, and if the memory 
component is encountered again (because of swizzle), that same value will be 
used.

RGBA8 with swizzle G1GB:
R=C1
G=ONE
B=C1
A=C2

gets BORDER_COLOR.R1RA.

Maybe that's the same thing that happens on r600 (I just recall "undo
the swizzle in a weird way") ?


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Jose Fonseca


- Original Message -
> On 14.04.2013 13:44, Jose Fonseca wrote:
> > - Original Message -
> >> From: Christoph Bumiller 
> >>
> >> This is the only sane solution for nv50 and nvc0 (really, trust me),
> >> but since on other hardware the border colour is tightly coupled with
> >> texture state they'd have to undo the swizzle, so I've added a cap.
> >>
> >> The name of the cap could be changed to be more descriptive, like
> >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> > Yes, please.
> >  
> >> The dependency of update_sampler on the texture updates was
> >> introduced to avoid doing the apply_depthmode to the swizzle twice.
> >>
> >> More detailed explanation of driver situation:
> >>
> >> No, really, don't suggest doing this in the driver. The driver has
> >> elegantly separated texture view and sampler states (which are each
> >> a structure in a table in VRAM and should not be updated to avoid
> >> performance loss), and table are bound to the independent (!)
> > I wonder if this is modeled after D3D10, where sampler state is independent
> > from resource view state. Though as far as I known, D3D10's interpretation
> > of texture border color does not depend on the swizzle...
> >
> >> texture
> >> and sampler slots in shaders which must be separately indexable
> >> indirectly).
> >> So, if I was to do this in the driver, I'd have to add separate sampler
> >> state object instances for each texture view with appropriately swizzled
> >> border color, and there's only 16 slots, so I'd be limited to 4 texture
> >> units.
> >> Not to mention the sheer insanity, ugliness and emotional pain incurred
> >> when writing that code when it COULD be so easy and simple in the state
> >> tracker where you know that textures and samplers are tightly coupled,
> >> while in gallium I cannot assume that to be the case.
> > You wouldn't really need to create all state combinations: if you known
> > that textures and samplers are tightly coupled, then caching the actually
> > used combinations will get you exactly the same behavior, without losing
> > performance or generality.  But granted, this would require more effort.
> 
> The emphasize being on "IF I knew" (that they're tighly coupled). If I
> did, I could switch to linked mode where the card automatically uses the
> view index as sampler index, ignoring the actual sampler index, and
> validate them together.
> However, that only applies to 3D, not to COMPUTE (which means that GL
> compute shaders will still have the problem), and I'd have to support
> both variants for state trackers that do not allow the coupling, and we
> need a way for the state tracker to actually tell us what it wants. All
> that makes it even quirkier.
> 
> > Also please spare a thought for other state trackers -- and I'm not even
> > talking about a potential D3D10 state tracker for which your driver would
> > be unusable --, even inside Mesa: it seems like
> > src/gallium/state_trackers/vega uses both texture border and swizzle,
> > probably vl state tracker too, so your driver will be busted on those
> > state trackers. These need to be
> 
> It already is busted. It's also busted on r600 where making border color
> + swizzle work properly isn't even POSSIBLE (according to the radeon guys).
> 
> Maybe not for vega, it doesn't use a permutational swizzle, it just sets
> components to PIPE_SWIZZLE_ONE, and incidentally the ZERO/ONE swizzles
> do affect the border color. As far as I can tell, it looks something
> like this (if you're interested; the exact behaviour seems not supposed
> to be made use of):
> 
> ===
> In the format description (including swizzle), each color component of
> RGBA (as seen by the shader) gets mapped a memory component
> {C0,C1,C2,C3} or {ZERO,ONE_INT,ONE_FLOAT}.
> 
> When a memory (!) component (Cx) is first encountered when going through
> RGBA, it is assigned the SAMPLER_BORDER_COLOR component value for that
> component, and if the memory component is encountered again (because of
> swizzle), that same value will be used.
> 
> So, assuming memory format RGBA and the swizzle 1RBG:
> R = ONE
> G = C0
> B = C2
> A = C1
> the border colour will be SAMPLER_BORDER_COLOR.1GBA.
> 
> The resulting border colour with swizzle applied to the sampler would be
> (lowercase being user values):
> R=1
> G=r
> B=b
> A=g
> 
> resulting in 1rbg, which works out.
> ===
> 
> >  updated -- maybe the burden of considering this state can be lifted onto
> >  some helper functinons -- if not, these state trackers should at least be
> >  updated to abort/warn when the cap is set.
> >
> > But I'm not really objecting -- as texture border seems fundamentally
> > quirky state.  But before proceeding with this I'd like us to consider
> > another texture border quirk while we are at it.
> >
> > The other quirk is the integer vs float texture border colors.  Roland can
> > probably talk a bit more about it as he was the one who came across it.
> > In a few words, the interpret

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Christoph Bumiller
On 14.04.2013 15:34, Jose Fonseca wrote:
>
> - Original Message -
>> On 14.04.2013 13:44, Jose Fonseca wrote:
>>> - Original Message -
 From: Christoph Bumiller 

 This is the only sane solution for nv50 and nvc0 (really, trust me),
 but since on other hardware the border colour is tightly coupled with
 texture state they'd have to undo the swizzle, so I've added a cap.

 The name of the cap could be changed to be more descriptive, like
 PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
>>> Yes, please.
>>>  
 The dependency of update_sampler on the texture updates was
 introduced to avoid doing the apply_depthmode to the swizzle twice.

 More detailed explanation of driver situation:

 No, really, don't suggest doing this in the driver. The driver has
 elegantly separated texture view and sampler states (which are each
 a structure in a table in VRAM and should not be updated to avoid
 performance loss), and table are bound to the independent (!)
>>> I wonder if this is modeled after D3D10, where sampler state is independent
>>> from resource view state. Though as far as I known, D3D10's interpretation
>>> of texture border color does not depend on the swizzle...
>>>
 texture
 and sampler slots in shaders which must be separately indexable
 indirectly).
 So, if I was to do this in the driver, I'd have to add separate sampler
 state object instances for each texture view with appropriately swizzled
 border color, and there's only 16 slots, so I'd be limited to 4 texture
 units.
 Not to mention the sheer insanity, ugliness and emotional pain incurred
 when writing that code when it COULD be so easy and simple in the state
 tracker where you know that textures and samplers are tightly coupled,
 while in gallium I cannot assume that to be the case.
>>> You wouldn't really need to create all state combinations: if you known
>>> that textures and samplers are tightly coupled, then caching the actually
>>> used combinations will get you exactly the same behavior, without losing
>>> performance or generality.  But granted, this would require more effort.
>> The emphasize being on "IF I knew" (that they're tighly coupled). If I
>> did, I could switch to linked mode where the card automatically uses the
>> view index as sampler index, ignoring the actual sampler index, and
>> validate them together.
>> However, that only applies to 3D, not to COMPUTE (which means that GL
>> compute shaders will still have the problem), and I'd have to support
>> both variants for state trackers that do not allow the coupling, and we
>> need a way for the state tracker to actually tell us what it wants. All
>> that makes it even quirkier.
>>
>>> Also please spare a thought for other state trackers -- and I'm not even
>>> talking about a potential D3D10 state tracker for which your driver would
>>> be unusable --, even inside Mesa: it seems like
>>> src/gallium/state_trackers/vega uses both texture border and swizzle,
>>> probably vl state tracker too, so your driver will be busted on those
>>> state trackers. These need to be
>> It already is busted. It's also busted on r600 where making border color
>> + swizzle work properly isn't even POSSIBLE (according to the radeon guys).
>>
>> Maybe not for vega, it doesn't use a permutational swizzle, it just sets
>> components to PIPE_SWIZZLE_ONE, and incidentally the ZERO/ONE swizzles
>> do affect the border color. As far as I can tell, it looks something
>> like this (if you're interested; the exact behaviour seems not supposed
>> to be made use of):
>>
>> ===
>> In the format description (including swizzle), each color component of
>> RGBA (as seen by the shader) gets mapped a memory component
>> {C0,C1,C2,C3} or {ZERO,ONE_INT,ONE_FLOAT}.
>>
>> When a memory (!) component (Cx) is first encountered when going through
>> RGBA, it is assigned the SAMPLER_BORDER_COLOR component value for that
>> component, and if the memory component is encountered again (because of
>> swizzle), that same value will be used.
>>
>> So, assuming memory format RGBA and the swizzle 1RBG:
>> R = ONE
>> G = C0
>> B = C2
>> A = C1
>> the border colour will be SAMPLER_BORDER_COLOR.1GBA.
>>
>> The resulting border colour with swizzle applied to the sampler would be
>> (lowercase being user values):
>> R=1
>> G=r
>> B=b
>> A=g
>>
>> resulting in 1rbg, which works out.
>> ===
>>
>>>  updated -- maybe the burden of considering this state can be lifted onto
>>>  some helper functinons -- if not, these state trackers should at least be
>>>  updated to abort/warn when the cap is set.
>>>
>>> But I'm not really objecting -- as texture border seems fundamentally
>>> quirky state.  But before proceeding with this I'd like us to consider
>>> another texture border quirk while we are at it.
>>>
>>> The other quirk is the integer vs float texture border colors.  Roland can
>>> probably talk a bit more abou

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Marek Olšák
The border color in the sampler state is untyped and that's okay. The type
is irrelevant with nearest filtering - just memcpy the border color to the
destination register (if there is swizzling, just do what you do for
texels). With linear filtering, you can always assume it's float
(regardless of the sampler view).

Marek


On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca  wrote:

>
>
> - Original Message -
> > On 14.04.2013 13:44, Jose Fonseca wrote:
> > > - Original Message -
> > >> From: Christoph Bumiller 
> > >>
> > >> This is the only sane solution for nv50 and nvc0 (really, trust me),
> > >> but since on other hardware the border colour is tightly coupled with
> > >> texture state they'd have to undo the swizzle, so I've added a cap.
> > >>
> > >> The name of the cap could be changed to be more descriptive, like
> > >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> > > Yes, please.
> > >
> > >> The dependency of update_sampler on the texture updates was
> > >> introduced to avoid doing the apply_depthmode to the swizzle twice.
> > >>
> > >> More detailed explanation of driver situation:
> > >>
> > >> No, really, don't suggest doing this in the driver. The driver has
> > >> elegantly separated texture view and sampler states (which are each
> > >> a structure in a table in VRAM and should not be updated to avoid
> > >> performance loss), and table are bound to the independent (!)
> > > I wonder if this is modeled after D3D10, where sampler state is
> independent
> > > from resource view state. Though as far as I known, D3D10's
> interpretation
> > > of texture border color does not depend on the swizzle...
> > >
> > >> texture
> > >> and sampler slots in shaders which must be separately indexable
> > >> indirectly).
> > >> So, if I was to do this in the driver, I'd have to add separate
> sampler
> > >> state object instances for each texture view with appropriately
> swizzled
> > >> border color, and there's only 16 slots, so I'd be limited to 4
> texture
> > >> units.
> > >> Not to mention the sheer insanity, ugliness and emotional pain
> incurred
> > >> when writing that code when it COULD be so easy and simple in the
> state
> > >> tracker where you know that textures and samplers are tightly coupled,
> > >> while in gallium I cannot assume that to be the case.
> > > You wouldn't really need to create all state combinations: if you known
> > > that textures and samplers are tightly coupled, then caching the
> actually
> > > used combinations will get you exactly the same behavior, without
> losing
> > > performance or generality.  But granted, this would require more
> effort.
> >
> > The emphasize being on "IF I knew" (that they're tighly coupled). If I
> > did, I could switch to linked mode where the card automatically uses the
> > view index as sampler index, ignoring the actual sampler index, and
> > validate them together.
> > However, that only applies to 3D, not to COMPUTE (which means that GL
> > compute shaders will still have the problem), and I'd have to support
> > both variants for state trackers that do not allow the coupling, and we
> > need a way for the state tracker to actually tell us what it wants. All
> > that makes it even quirkier.
> >
> > > Also please spare a thought for other state trackers -- and I'm not
> even
> > > talking about a potential D3D10 state tracker for which your driver
> would
> > > be unusable --, even inside Mesa: it seems like
> > > src/gallium/state_trackers/vega uses both texture border and swizzle,
> > > probably vl state tracker too, so your driver will be busted on those
> > > state trackers. These need to be
> >
> > It already is busted. It's also busted on r600 where making border color
> > + swizzle work properly isn't even POSSIBLE (according to the radeon
> guys).
> >
> > Maybe not for vega, it doesn't use a permutational swizzle, it just sets
> > components to PIPE_SWIZZLE_ONE, and incidentally the ZERO/ONE swizzles
> > do affect the border color. As far as I can tell, it looks something
> > like this (if you're interested; the exact behaviour seems not supposed
> > to be made use of):
> >
> > ===
> > In the format description (including swizzle), each color component of
> > RGBA (as seen by the shader) gets mapped a memory component
> > {C0,C1,C2,C3} or {ZERO,ONE_INT,ONE_FLOAT}.
> >
> > When a memory (!) component (Cx) is first encountered when going through
> > RGBA, it is assigned the SAMPLER_BORDER_COLOR component value for that
> > component, and if the memory component is encountered again (because of
> > swizzle), that same value will be used.
> >
> > So, assuming memory format RGBA and the swizzle 1RBG:
> > R = ONE
> > G = C0
> > B = C2
> > A = C1
> > the border colour will be SAMPLER_BORDER_COLOR.1GBA.
> >
> > The resulting border colour with swizzle applied to the sampler would be
> > (lowercase being user values):
> > R=1
> > G=r
> > B=b
> > A=g
> >
> > resulting in 1rbg, which works out.
> > ===
> >
> > >  updated 

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Roland Scheidegger
Am 14.04.2013 10:12, schrieb jfons...@vmware.com:
> From: José Fonseca 
> 
> TGSI_OPCODE_IF condition had two possible interpretations:
> 
> - src.x != 0.0f
> 
>   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was false either for
> vertex and fragment shaders
>   - gallivm/llvmpipe
>   - postprocess
>   - vl state tracker
>   - vega state tracker
>   - most old drivers
>   - old internal state trackers
>   - many graw examples
> 
> - src.x != 0U
> 
>   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was true for both
> vertex and fragment shaders
>   - tgsi_exec/softpipe
>   - r600
>   - radeonsi
>   - nv50
> 
> And drivers that use draw module also were a mess (because Mesa would
> emit float IFs, but draw module supports native integers so it would
> interpret IF arg as integers...)
> 
> This sort of works if the source argument is limited to float +0.0f or
> +1.0f, integer 0, but would fail if source is float -0.0f, or integer in
> the float NaN range.  It could also fail if source is integer 1, and
> hardware flushes denormalized numbers to zero.
> 
> But with this change there are now two opcodes, IF and UIF, with clear
> meaning.
> 
> Drivers that do not support native integers do not need to worry about
> UIF.  However, for backwards compatibility with old state trackers and
> examples, it is advisable that native integer capable drivers also
> support the float IF opcode.
> 
> I tried to implement this for r600 and radeonsi based on the surrounding
> code.  I couldn't do this for nouveau, so I just shunted IF/UIF
> together, which matches the current behavior.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c   |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
>  src/gallium/auxiliary/tgsi/tgsi_dump.c |2 +
>  src/gallium/auxiliary/tgsi/tgsi_exec.c |   22 +++
>  src/gallium/auxiliary/tgsi/tgsi_info.c |2 +-
>  src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |1 +
>  src/gallium/docs/source/tgsi.rst   |   21 --
>  .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |6 +++
>  src/gallium/drivers/r600/r600_shader.c |   21 +++---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c|   41 
> 
>  src/gallium/include/pipe/p_shader_tokens.h |2 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
>  src/mesa/state_tracker/st_mesa_to_tgsi.c   |   12 +-
>  15 files changed, 137 insertions(+), 23 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index c71c1f1..e1c362b 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -868,6 +868,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * 
> bld_base)
> bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = 
> scalar_unary_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = 
> scalar_unary_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
> +   bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = 
> scalar_unary_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = 
> scalar_unary_fetch_args;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> index 98bce0e..223184d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> @@ -837,6 +837,7 @@ lp_emit_instruction_aos(
>return FALSE;
>  
> case TGSI_OPCODE_IF:
> +   case TGSI_OPCODE_UIF:
>return FALSE;
>  
> case TGSI_OPCODE_BGNLOOP:
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> index 3c79abf..b00aa09 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> @@ -389,6 +389,7 @@ analyse_instruction(struct analysis_context *ctx,
>  
> switch (inst->Instruction.Opcode) {
> case TGSI_OPCODE_IF:
> +   case TGSI_OPCODE_UIF:
> case TGSI_OPCODE_ELSE:
> case TGSI_OPCODE_ENDIF:
> case TGSI_OPCODE_BGNLOOP:
Could you also add it to tgsi_opcode_infer_src_type?


> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 239530d..362a1de 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1732,7 +1732,8 @@ near_end_of_shader(struct lp_build_tgsi_soa_context 
> *bld,
>  

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Roland Scheidegger
Yeah it is ok for OpenGL. I guess for d3d10 we'd probably need to create
another sampler if the same sampler is used for both int and float
textures. Or just supply both int and float border colors to the sample
code (but making it work both for opengl and d3d would be ugly). FWIW it
looks like some intel hw also seems to require multiple border color
values (and 6!!! ones at that), and the hw just picks the right value
based on format. Though for some reason the only border color format it
does _not_ have is 32bit int, so it looks this does absolutely nothing
to make both float and 32bit int colors work with the same sampler
simultaneously (and I guess 32bit int is probably the reason opengl
specifies those as ints, since you can't get accurate values with floats).
The swizzling looks like an orthogonal issue to that, however.

Roland


Am 14.04.2013 16:18, schrieb Marek Olšák:
> The border color in the sampler state is untyped and that's okay. The
> type is irrelevant with nearest filtering - just memcpy the border color
> to the destination register (if there is swizzling, just do what you do
> for texels). With linear filtering, you can always assume it's float
> (regardless of the sampler view).
> 
> Marek
> 
> 
> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca  > wrote:
> 
> 
> 
> - Original Message -
> > On 14.04.2013 13:44, Jose Fonseca wrote:
> > > - Original Message -
> > >> From: Christoph Bumiller  >
> > >>
> > >> This is the only sane solution for nv50 and nvc0 (really, trust
> me),
> > >> but since on other hardware the border colour is tightly
> coupled with
> > >> texture state they'd have to undo the swizzle, so I've added a cap.
> > >>
> > >> The name of the cap could be changed to be more descriptive, like
> > >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> > > Yes, please.
> > >
> > >> The dependency of update_sampler on the texture updates was
> > >> introduced to avoid doing the apply_depthmode to the swizzle twice.
> > >>
> > >> More detailed explanation of driver situation:
> > >>
> > >> No, really, don't suggest doing this in the driver. The driver has
> > >> elegantly separated texture view and sampler states (which are each
> > >> a structure in a table in VRAM and should not be updated to avoid
> > >> performance loss), and table are bound to the independent (!)
> > > I wonder if this is modeled after D3D10, where sampler state is
> independent
> > > from resource view state. Though as far as I known, D3D10's
> interpretation
> > > of texture border color does not depend on the swizzle...
> > >
> > >> texture
> > >> and sampler slots in shaders which must be separately indexable
> > >> indirectly).
> > >> So, if I was to do this in the driver, I'd have to add separate
> sampler
> > >> state object instances for each texture view with appropriately
> swizzled
> > >> border color, and there's only 16 slots, so I'd be limited to 4
> texture
> > >> units.
> > >> Not to mention the sheer insanity, ugliness and emotional pain
> incurred
> > >> when writing that code when it COULD be so easy and simple in
> the state
> > >> tracker where you know that textures and samplers are tightly
> coupled,
> > >> while in gallium I cannot assume that to be the case.
> > > You wouldn't really need to create all state combinations: if
> you known
> > > that textures and samplers are tightly coupled, then caching the
> actually
> > > used combinations will get you exactly the same behavior,
> without losing
> > > performance or generality.  But granted, this would require more
> effort.
> >
> > The emphasize being on "IF I knew" (that they're tighly coupled). If I
> > did, I could switch to linked mode where the card automatically
> uses the
> > view index as sampler index, ignoring the actual sampler index, and
> > validate them together.
> > However, that only applies to 3D, not to COMPUTE (which means that GL
> > compute shaders will still have the problem), and I'd have to support
> > both variants for state trackers that do not allow the coupling,
> and we
> > need a way for the state tracker to actually tell us what it
> wants. All
> > that makes it even quirkier.
> >
> > > Also please spare a thought for other state trackers -- and I'm
> not even
> > > talking about a potential D3D10 state tracker for which your
> driver would
> > > be unusable --, even inside Mesa: it seems like
> > > src/gallium/state_trackers/vega uses both texture border and
> swizzle,
> > > probably vl state tracker too, so your driver will be busted on
> those
> > > state trackers. These need to be
> >
> > It already 

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Roland Scheidegger
Oh and btw how does this work for real hw, if the hardware indeed
interpets the border color value according to format?
Are there some bits to set that the border color value is either
interpreted according to format (useful for opengl) or always as float
(useful for d3d10)? Or how else do you use the same sampler for
different int/float textures?
This discrepancy between OpenGL and d3d10 is quite a mess.

Roland



Am 14.04.2013 17:45, schrieb Roland Scheidegger:
> Yeah it is ok for OpenGL. I guess for d3d10 we'd probably need to create
> another sampler if the same sampler is used for both int and float
> textures. Or just supply both int and float border colors to the sample
> code (but making it work both for opengl and d3d would be ugly). FWIW it
> looks like some intel hw also seems to require multiple border color
> values (and 6!!! ones at that), and the hw just picks the right value
> based on format. Though for some reason the only border color format it
> does _not_ have is 32bit int, so it looks this does absolutely nothing
> to make both float and 32bit int colors work with the same sampler
> simultaneously (and I guess 32bit int is probably the reason opengl
> specifies those as ints, since you can't get accurate values with floats).
> The swizzling looks like an orthogonal issue to that, however.
> 
> Roland
> 
> 
> Am 14.04.2013 16:18, schrieb Marek Olšák:
>> The border color in the sampler state is untyped and that's okay. The
>> type is irrelevant with nearest filtering - just memcpy the border color
>> to the destination register (if there is swizzling, just do what you do
>> for texels). With linear filtering, you can always assume it's float
>> (regardless of the sampler view).
>>
>> Marek
>>
>>
>> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca > > wrote:
>>
>>
>>
>> - Original Message -
>> > On 14.04.2013 13:44, Jose Fonseca wrote:
>> > > - Original Message -
>> > >> From: Christoph Bumiller > >
>> > >>
>> > >> This is the only sane solution for nv50 and nvc0 (really, trust
>> me),
>> > >> but since on other hardware the border colour is tightly
>> coupled with
>> > >> texture state they'd have to undo the swizzle, so I've added a cap.
>> > >>
>> > >> The name of the cap could be changed to be more descriptive, like
>> > >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
>> > > Yes, please.
>> > >
>> > >> The dependency of update_sampler on the texture updates was
>> > >> introduced to avoid doing the apply_depthmode to the swizzle twice.
>> > >>
>> > >> More detailed explanation of driver situation:
>> > >>
>> > >> No, really, don't suggest doing this in the driver. The driver has
>> > >> elegantly separated texture view and sampler states (which are each
>> > >> a structure in a table in VRAM and should not be updated to avoid
>> > >> performance loss), and table are bound to the independent (!)
>> > > I wonder if this is modeled after D3D10, where sampler state is
>> independent
>> > > from resource view state. Though as far as I known, D3D10's
>> interpretation
>> > > of texture border color does not depend on the swizzle...
>> > >
>> > >> texture
>> > >> and sampler slots in shaders which must be separately indexable
>> > >> indirectly).
>> > >> So, if I was to do this in the driver, I'd have to add separate
>> sampler
>> > >> state object instances for each texture view with appropriately
>> swizzled
>> > >> border color, and there's only 16 slots, so I'd be limited to 4
>> texture
>> > >> units.
>> > >> Not to mention the sheer insanity, ugliness and emotional pain
>> incurred
>> > >> when writing that code when it COULD be so easy and simple in
>> the state
>> > >> tracker where you know that textures and samplers are tightly
>> coupled,
>> > >> while in gallium I cannot assume that to be the case.
>> > > You wouldn't really need to create all state combinations: if
>> you known
>> > > that textures and samplers are tightly coupled, then caching the
>> actually
>> > > used combinations will get you exactly the same behavior,
>> without losing
>> > > performance or generality.  But granted, this would require more
>> effort.
>> >
>> > The emphasize being on "IF I knew" (that they're tighly coupled). If I
>> > did, I could switch to linked mode where the card automatically
>> uses the
>> > view index as sampler index, ignoring the actual sampler index, and
>> > validate them together.
>> > However, that only applies to 3D, not to COMPUTE (which means that GL
>> > compute shaders will still have the problem), and I'd have to support
>> > both variants for state trackers that do not allow the coupling,
>> and we
>> > need a way for the state

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Marek Olšák
On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger wrote:

> Am 14.04.2013 10:12, schrieb jfons...@vmware.com:> -  TBD
>  > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to true if
> > +
> > +src0.x != 0.0
> > +
> > +  where src0.x is interpreted as a floating point register.
> Maybe should say something wrt evaluation of NaNs? I know we haven't
> really established rules for comparisons etc. wrt NaNs but those
> bools-as-float make me cry. I guess it is no different though than other
> float opcodes, if we now really have a definition saying IF takes _any_
> float not just a bool-as-float which was loosely implied before.
>
>
I don't know where the term "bool-as-float" came from, but I'd rather not
use it unless it's properly defined somewhere, and TGSI doesn't have bools
anyway, so why bother? The GLSL compiler or glsl-to-tgsi is responsible for
converting bools to either floats or ints and TGSI shouldn't need to care.
Both r300g and r600g use (src0.x != 0.0) for IF and (src0.x != 0) for UIF
(r600-only), so there is always the "not-equal-to" operator, which is also
well defined for NaNs.

Also if you care about NaNs, we should start by defining how instructions
should handle them, e.g. how relational operators handle NaNs, whether the
multiplication operator follows the rule 0*anything = 0 (MUL, MAD, DP4,
...), etc.

R600 have separate opcodes depending on what behavior you want, for example:
- The MUL opcode follows the rule 0*anything = 0. (DX9)
- The MUL_IEEE opcode follows the IEEE behavior.

The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4, EX2,
LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9 and
DX10. We should choose our opcodes carefully depending on whether we are
implementing a DX9, DX10, OpenGL, or OpenCL state tracker.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Fix UMAD on Cayman

2013-04-14 Thread Vadim Girlin

On 04/13/2013 09:54 PM, Martin Andersson wrote:

On Sat, Apr 13, 2013 at 4:23 AM, Vadim Girlin  wrote:

On 04/12/2013 11:36 PM, Martin Andersson wrote:


I have made some progress with this issue.

Vadim, I did as you suggested and tried to mimic the output from the
shader analyser
tool. I used your patch as a base and then tried various ways to see
what would work.
After many tries (and lockups) I did managed to get the
ext_transform_feedback/order
test to pass.

It is a very ugly patch but it should illustrate what the problem (and
potential solution) is.

Your test program fails however because explicit break statements do
not work. It
should be possible to use the same code for the explicit breaks as for
the implicit
loop break.The reason it does not is that I detect the implicit break
with a hack and
it does notwork for explicit breaks.

The problem is that I need to detect the break statement when creating the
corresponding if statement. So that I can treat it differently than
other "regular" if
statements. Anyone knows how I could do that, or is this the wrong
approach?



It doesn't work with my test app because IF/ENDIF blocks with BRK may
contain other code, so you can't simply throw away IF/ENDIF making that code
execute unconditionally.


Yeah my hack is not an viable option.


By the way, shader analyzer in some cases also produces the code with
JUMP/POP around PRED_SET-BREAK, though I'm not sure if that code will really
work as expected with catalyst. Possibly we're simply missing something in
the hardware configuration.

Also there is one thing that I didn't take into account in my initial patch
- r600g converts ALU followed by POP to ALU_POP_AFTER and this might explain
why my initial patch doesn't work. Possibly if we prevent that optimization
for ALU containing PRED_SET-BREAK and leave separate POP, it might be enough
to make it work. I'm attaching the additional patch that will force POP to
be a separate instruction in this case, please test it (on top of the my
first patch). This would be at least not very intrusive.


No, that patch did not help either.


If this won't help, then I think we should understand what exactly we are
trying to fix before implementing any big changes, possibly there is a
better solution or at least a more clean workaround. In the worst case we
can return to your approach and improve it to handle other cases.


I'm starting to think that there is nothing wrong with the shader
compiler. It seems to me that a push, pop inside a nested loop clears
the break status on a thread.

shift_reg = 1u;
count = 0u;
while (true) {
 if (x == 1u)
 break;
  while (true) {
  if (x != 1u)
   count = 10u;
  if (x == 1u)
   count = 20u;
  break;
  }
  shift_reg = 2u;
  break;
}

input: x == 0
actual ouput: shift_reg == 2, count == 10
expected output: shift_reg == 2, count == 10

input: x == 1
actual ouput: shift_reg == 2, count == 20
expected output: shift_reg == 1, count == 0

If I swap the if statements in the inner loop I get different results.

shift_reg = 1u;
count = 0u;
while (true) {
 if (x == 1u)
 break;
  while (true) {
  if (x == 1u)
   count = 20u;
  if (x != 1u)
   count = 10u;
  break;
  }
  shift_reg = 2u;
  break;
}

input: x == 0
actual ouput: shift_reg == 2, count == 10
expected output: shift_reg == 2, count == 10

input: x == 1
actual ouput: shift_reg == 2, count == 0
expected output: shift_reg == 1, count == 0

I tested both cases on mesa master and mesa master + Vadims two
patches with the same results.



This turned out to be a known issue with cayman: BREAK/CONTINUE followed 
by LOOP_STARTxxx for nested loop may put the branch stack into the state 
such that ALU_PUSH_BEFORE doesn't work as expected.


It seems the simplest workaround is either to avoid ALU_PUSH_BEFORE in 
nested loops completely or to replace it with separate PUSH and ALU.


We can check if we actually have BREAK/CONTINUE in the outer loop before 
LOOP_START for the inner loop, but I think it will be true in most 
cases, so the simplest fix for r600g is to replace all ALU_PUSH_BEFORE 
with PUSH + ALU in the nested loops on cayman.


Vadim


//Martin


Vadim



//Martin

On Thu, Apr 11, 2013 at 5:31 PM, Vadim Girlin 
wrote:


On 04/11/2013 02:08 AM, Marek Olšák wrote:



Here's the output:

creating vs ...
shader compilation status: OK
creating fs ...
shader compilation status: OK
thread #0 (0;0) : ref = 16608
thread #1 (1;0) : ref = 27873
thread #2 (0;1) : ref = 16608
thread #3 (1;1) : ref = 27877
results:
thread 0 (0, 0): expected = 16608, observed = 27876, FAIL
thread 1 (1, 0): expected = 27873, observed = 27873, OK
thread 2 (0, 1): expected = 16608, observed = 27876, FAIL
thread 3 (1, 1): expected = 27877, observed = 27877, OK



Thanks. According to these results, it looks like LOOP_START_DX10 for
inner
loop somehow react

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Marek Olšák
I think the hardware doesn't care what the border color type is. I think
the border color is "fetched" from the sampler state, which should be a
memcpy. If no texels are fetched from the texture, the border color is
copied to the destination register. If I set the texture hardware format to
"invalid", the texture fetch instructions always return the border color,
which suggests the hardware really does not care about the type.

OpenGL also doesn't care what the border color type is after it is set,
because the state is a union type.

Marek



On Sun, Apr 14, 2013 at 6:20 PM, Roland Scheidegger wrote:

> Oh and btw how does this work for real hw, if the hardware indeed
> interpets the border color value according to format?
> Are there some bits to set that the border color value is either
> interpreted according to format (useful for opengl) or always as float
> (useful for d3d10)? Or how else do you use the same sampler for
> different int/float textures?
> This discrepancy between OpenGL and d3d10 is quite a mess.
>
> Roland
>
>
>
> Am 14.04.2013 17:45, schrieb Roland Scheidegger:
> > Yeah it is ok for OpenGL. I guess for d3d10 we'd probably need to create
> > another sampler if the same sampler is used for both int and float
> > textures. Or just supply both int and float border colors to the sample
> > code (but making it work both for opengl and d3d would be ugly). FWIW it
> > looks like some intel hw also seems to require multiple border color
> > values (and 6!!! ones at that), and the hw just picks the right value
> > based on format. Though for some reason the only border color format it
> > does _not_ have is 32bit int, so it looks this does absolutely nothing
> > to make both float and 32bit int colors work with the same sampler
> > simultaneously (and I guess 32bit int is probably the reason opengl
> > specifies those as ints, since you can't get accurate values with
> floats).
> > The swizzling looks like an orthogonal issue to that, however.
> >
> > Roland
> >
> >
> > Am 14.04.2013 16:18, schrieb Marek Olšák:
> >> The border color in the sampler state is untyped and that's okay. The
> >> type is irrelevant with nearest filtering - just memcpy the border color
> >> to the destination register (if there is swizzling, just do what you do
> >> for texels). With linear filtering, you can always assume it's float
> >> (regardless of the sampler view).
> >>
> >> Marek
> >>
> >>
> >> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca  >> > wrote:
> >>
> >>
> >>
> >> - Original Message -
> >> > On 14.04.2013 13:44, Jose Fonseca wrote:
> >> > > - Original Message -
> >> > >> From: Christoph Bumiller  >> >
> >> > >>
> >> > >> This is the only sane solution for nv50 and nvc0 (really, trust
> >> me),
> >> > >> but since on other hardware the border colour is tightly
> >> coupled with
> >> > >> texture state they'd have to undo the swizzle, so I've added a
> cap.
> >> > >>
> >> > >> The name of the cap could be changed to be more descriptive,
> like
> >> > >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> >> > > Yes, please.
> >> > >
> >> > >> The dependency of update_sampler on the texture updates was
> >> > >> introduced to avoid doing the apply_depthmode to the swizzle
> twice.
> >> > >>
> >> > >> More detailed explanation of driver situation:
> >> > >>
> >> > >> No, really, don't suggest doing this in the driver. The driver
> has
> >> > >> elegantly separated texture view and sampler states (which are
> each
> >> > >> a structure in a table in VRAM and should not be updated to
> avoid
> >> > >> performance loss), and table are bound to the independent (!)
> >> > > I wonder if this is modeled after D3D10, where sampler state is
> >> independent
> >> > > from resource view state. Though as far as I known, D3D10's
> >> interpretation
> >> > > of texture border color does not depend on the swizzle...
> >> > >
> >> > >> texture
> >> > >> and sampler slots in shaders which must be separately indexable
> >> > >> indirectly).
> >> > >> So, if I was to do this in the driver, I'd have to add separate
> >> sampler
> >> > >> state object instances for each texture view with appropriately
> >> swizzled
> >> > >> border color, and there's only 16 slots, so I'd be limited to 4
> >> texture
> >> > >> units.
> >> > >> Not to mention the sheer insanity, ugliness and emotional pain
> >> incurred
> >> > >> when writing that code when it COULD be so easy and simple in
> >> the state
> >> > >> tracker where you know that textures and samplers are tightly
> >> coupled,
> >> > >> while in gallium I cannot assume that to be the case.
> >> > > You wouldn't really need to create all state combinations: if
> >> you known
> >> > > that textures and sample

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Marek Olšák
The R600 code looks good.

Marek


On Sun, Apr 14, 2013 at 10:12 AM,  wrote:

> From: José Fonseca 
>
> TGSI_OPCODE_IF condition had two possible interpretations:
>
> - src.x != 0.0f
>
>   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was false either for
> vertex and fragment shaders
>   - gallivm/llvmpipe
>   - postprocess
>   - vl state tracker
>   - vega state tracker
>   - most old drivers
>   - old internal state trackers
>   - many graw examples
>
> - src.x != 0U
>
>   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was true for both
> vertex and fragment shaders
>   - tgsi_exec/softpipe
>   - r600
>   - radeonsi
>   - nv50
>
> And drivers that use draw module also were a mess (because Mesa would
> emit float IFs, but draw module supports native integers so it would
> interpret IF arg as integers...)
>
> This sort of works if the source argument is limited to float +0.0f or
> +1.0f, integer 0, but would fail if source is float -0.0f, or integer in
> the float NaN range.  It could also fail if source is integer 1, and
> hardware flushes denormalized numbers to zero.
>
> But with this change there are now two opcodes, IF and UIF, with clear
> meaning.
>
> Drivers that do not support native integers do not need to worry about
> UIF.  However, for backwards compatibility with old state trackers and
> examples, it is advisable that native integer capable drivers also
> support the float IF opcode.
>
> I tried to implement this for r600 and radeonsi based on the surrounding
> code.  I couldn't do this for nouveau, so I just shunted IF/UIF
> together, which matches the current behavior.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c   |1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
>  src/gallium/auxiliary/tgsi/tgsi_dump.c |2 +
>  src/gallium/auxiliary/tgsi/tgsi_exec.c |   22 +++
>  src/gallium/auxiliary/tgsi/tgsi_info.c |2 +-
>  src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |1 +
>  src/gallium/docs/source/tgsi.rst   |   21 --
>  .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |6 +++
>  src/gallium/drivers/r600/r600_shader.c |   21 +++---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c|   41
> 
>  src/gallium/include/pipe/p_shader_tokens.h |2 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
>  src/mesa/state_tracker/st_mesa_to_tgsi.c   |   12 +-
>  15 files changed, 137 insertions(+), 23 deletions(-)
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Roland Scheidegger
Am 14.04.2013 18:55, schrieb Marek Olšák:
> I think the hardware doesn't care what the border color type is. I think
> the border color is "fetched" from the sampler state, which should be a
> memcpy. If no texels are fetched from the texture, the border color is
> copied to the destination register. If I set the texture hardware format
> to "invalid", the texture fetch instructions always return the border
> color, which suggests the hardware really does not care about the type.
But d3d always sets border color as float. So if your border color is
1.0, I doubt setting the fetched texel value of a int texture to
0x3f80 when you hit the border is the right thing to do (but it
would obviously be correct for a float texture). You certainly can
convert those float values to int type in your driver, but it will not
work if the same sampler is used both for int and float textures.
Though actually the spec also says
(http://msdn.microsoft.com/de-ch/library/windows/desktop/bb172415%28v=vs.85%29.aspx)
border color must be between 0.0 and 1.0. Doesn't make a whole lot of
sense for integer textures (as the only possible values when converting
to int would be 0 and 1). So something's clearly missing here...


> 
> OpenGL also doesn't care what the border color type is after it is set,
> because the state is a union type.
Yes of course.

Roland


> 
> Marek
> 
> 
> 
> On Sun, Apr 14, 2013 at 6:20 PM, Roland Scheidegger  > wrote:
> 
> Oh and btw how does this work for real hw, if the hardware indeed
> interpets the border color value according to format?
> Are there some bits to set that the border color value is either
> interpreted according to format (useful for opengl) or always as float
> (useful for d3d10)? Or how else do you use the same sampler for
> different int/float textures?
> This discrepancy between OpenGL and d3d10 is quite a mess.
> 
> Roland
> 
> 
> 
> Am 14.04.2013 17:45, schrieb Roland Scheidegger:
> > Yeah it is ok for OpenGL. I guess for d3d10 we'd probably need to
> create
> > another sampler if the same sampler is used for both int and float
> > textures. Or just supply both int and float border colors to the
> sample
> > code (but making it work both for opengl and d3d would be ugly).
> FWIW it
> > looks like some intel hw also seems to require multiple border color
> > values (and 6!!! ones at that), and the hw just picks the right value
> > based on format. Though for some reason the only border color
> format it
> > does _not_ have is 32bit int, so it looks this does absolutely nothing
> > to make both float and 32bit int colors work with the same sampler
> > simultaneously (and I guess 32bit int is probably the reason opengl
> > specifies those as ints, since you can't get accurate values with
> floats).
> > The swizzling looks like an orthogonal issue to that, however.
> >
> > Roland
> >
> >
> > Am 14.04.2013 16:18, schrieb Marek Olšák:
> >> The border color in the sampler state is untyped and that's okay. The
> >> type is irrelevant with nearest filtering - just memcpy the
> border color
> >> to the destination register (if there is swizzling, just do what
> you do
> >> for texels). With linear filtering, you can always assume it's float
> >> (regardless of the sampler view).
> >>
> >> Marek
> >>
> >>
> >> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca
> mailto:jfons...@vmware.com>
> >> >> wrote:
> >>
> >>
> >>
> >> - Original Message -
> >> > On 14.04.2013 13:44, Jose Fonseca wrote:
> >> > > - Original Message -
> >> > >> From: Christoph Bumiller  
> >>  >>
> >> > >>
> >> > >> This is the only sane solution for nv50 and nvc0
> (really, trust
> >> me),
> >> > >> but since on other hardware the border colour is tightly
> >> coupled with
> >> > >> texture state they'd have to undo the swizzle, so I've
> added a cap.
> >> > >>
> >> > >> The name of the cap could be changed to be more
> descriptive, like
> >> > >> PIPE_CAP_TEXTURE_SWIZZLE_AFFECTS_BORDER_COLOR.
> >> > > Yes, please.
> >> > >
> >> > >> The dependency of update_sampler on the texture updates was
> >> > >> introduced to avoid doing the apply_depthmode to the
> swizzle twice.
> >> > >>
> >> > >> More detailed explanation of driver situation:
> >> > >>
> >> > >> No, really, don't suggest doing this in the driver. The
> driver has
> >> > >> elegantly separated texture view and sampler states
> (which are each
> >> > >>

[Mesa-dev] [PATCH 0/6] New DirectFB sw for Gallium's EGL state tracker

2013-04-14 Thread Ilyes Gouta
These are few patches that add a new DirectFB native and winsys backend
for the EGL state tracker. This is just about the underlaying infrastructure
that enables DirectFB surface to act as EGL render targets for gallium's
softpipe and llvmpipe sw renderers.

A DirectFB application can now do:

desc.flags = DSDESC_CAPS|DSDESC_WIDTH|DSDESC_HEIGHT|DSDESC_PIXELFORMAT;
desc.caps = DSCAPS_DOUBLE; /* |DSCAPS_PRIMARY */
desc.width = 640;
desc.height = 480;
desc.pixelformat = DSPF_ARGB;
pDfb->CreateSurface(pDfb, &desc, &pSurface);

context = eglCreateContext(display, config[0], EGL_NO_CONTEXT, NULL);
surface = eglCreateWindowSurface(display, config[0], pSurface, NULL);

eglMakeCurrent(display, surface, surface, context);

in order to embed a mesa/EGL rendering into a target surface which could
subsequently serve as a source for further processing.

Ilyes Gouta (6):
  configure.ac: configure for DirectFB
  gallium: new DirectFB backend for the EGL state tracker
  gallium: new winsys for sw DirectFB
  egl: directfb: new DirectFB native EGL types
  egl: directfb: instantiate a new native DirectFB platform
  egl: directfb: automake updates for EGL_PLATFORM_DIRECTFB

 configure.ac   |  11 +-
 include/EGL/eglplatform.h  |   6 +
 src/egl/main/Makefile.am   |   6 +
 src/egl/main/egldisplay.c  |   7 +-
 src/egl/main/egldisplay.h  |   1 +
 src/gallium/state_trackers/egl/Makefile.am |   9 +
 src/gallium/state_trackers/egl/common/egl_g3d.c|   6 +
 src/gallium/state_trackers/egl/common/native.h |   3 +
 .../state_trackers/egl/directfb/native_directfb.c  | 585 +
 src/gallium/targets/egl-static/Makefile.am |   7 +
 src/gallium/winsys/sw/Makefile.am  |   4 +
 src/gallium/winsys/sw/directfb/Makefile.am |  31 ++
 .../winsys/sw/directfb/directfb_sw_winsys.c| 280 ++
 .../winsys/sw/directfb/directfb_sw_winsys.h|  35 ++
 14 files changed, 989 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/state_trackers/egl/directfb/native_directfb.c
 create mode 100644 src/gallium/winsys/sw/directfb/Makefile.am
 create mode 100644 src/gallium/winsys/sw/directfb/directfb_sw_winsys.c
 create mode 100644 src/gallium/winsys/sw/directfb/directfb_sw_winsys.h

-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] configure.ac: configure for DirectFB

2013-04-14 Thread Ilyes Gouta

Signed-off-by: Ilyes Gouta 
---
 configure.ac | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1c9d606..d74ec94 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1496,7 +1496,7 @@ dnl
 AC_ARG_WITH([egl-platforms],
 [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
 [comma delimited native platforms libEGL supports, e.g.
-"x11,drm" @<:@default=auto@:>@])],
+"x11,drm,directfb" @<:@default=auto@:>@])],
 [with_egl_platforms="$withval"],
 [if test "x$enable_egl" = xyes; then
with_egl_platforms="x11"
@@ -1543,6 +1543,10 @@ for plat in $egl_platforms; do
android|gdi)
;;
 
+   directfb)
+   PKG_CHECK_MODULES([DIRECTFB], [directfb])
+   ;;
+
*)
AC_MSG_ERROR([EGL platform '$plat' does not exist])
;;
@@ -1569,6 +1573,7 @@ AM_CONDITIONAL(HAVE_EGL_PLATFORM_X11, echo 
"$egl_platforms" | grep 'x11' >/dev/n
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND, echo "$egl_platforms" | grep 
'wayland' >/dev/null 2>&1)
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep 'drm' 
>/dev/null 2>&1)
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_FBDEV, echo "$egl_platforms" | grep 'fbdev' 
>/dev/null 2>&1)
+AM_CONDITIONAL(HAVE_EGL_PLATFORM_DIRECTFB, echo "$egl_platforms" | grep 
'directfb' >/dev/null 2>&1)
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_NULL, echo "$egl_platforms" | grep 'null' 
>/dev/null 2>&1)
 
 AM_CONDITIONAL(HAVE_EGL_DRIVER_DRI2, test "x$HAVE_EGL_DRIVER_DRI2" != "x")
@@ -1578,6 +1583,9 @@ AC_SUBST([EGL_NATIVE_PLATFORM])
 AC_SUBST([EGL_PLATFORMS])
 AC_SUBST([EGL_CFLAGS])
 
+AC_SUBST([DIRECTFB_CFLAGS])
+AC_SUBST([DIRECTFB_LIBS])
+
 AC_ARG_WITH([egl-driver-dir],
 [AS_HELP_STRING([--with-egl-driver-dir=DIR],
 [directory for EGL drivers [[default=${libdir}/egl]]])],
@@ -2127,6 +2135,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/sw/wayland/Makefile
src/gallium/winsys/sw/wrapper/Makefile
src/gallium/winsys/sw/xlib/Makefile
+   src/gallium/winsys/sw/directfb/Makefile
src/gbm/Makefile
src/gbm/main/gbm.pc
src/glsl/Makefile
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] gallium: new DirectFB backend for the EGL state tracker

2013-04-14 Thread Ilyes Gouta

Signed-off-by: Ilyes Gouta 
---
 src/gallium/state_trackers/egl/Makefile.am |   9 +
 src/gallium/state_trackers/egl/common/native.h |   3 +
 .../state_trackers/egl/directfb/native_directfb.c  | 585 +
 3 files changed, 597 insertions(+)
 create mode 100644 src/gallium/state_trackers/egl/directfb/native_directfb.c

diff --git a/src/gallium/state_trackers/egl/Makefile.am 
b/src/gallium/state_trackers/egl/Makefile.am
index f78b36e..27925b9 100644
--- a/src/gallium/state_trackers/egl/Makefile.am
+++ b/src/gallium/state_trackers/egl/Makefile.am
@@ -102,3 +102,12 @@ AM_CPPFLAGS += \
-I$(top_srcdir)/src/gallium/winsys/sw \
-DHAVE_NULL_BACKEND
 endif
+
+if HAVE_EGL_PLATFORM_DIRECTFB
+libegl_la_SOURCES += directfb/native_directfb.c
+AM_CPPFLAGS += \
+   -I$(top_srcdir)/src/gallium/winsys/sw \
+   -DHAVE_DIRECTFB_BACKEND
+AM_CFLAGS += \
+$(DIRECTFB_CFLAGS)
+endif
diff --git a/src/gallium/state_trackers/egl/common/native.h 
b/src/gallium/state_trackers/egl/common/native.h
index 312b079..553d8f9 100644
--- a/src/gallium/state_trackers/egl/common/native.h
+++ b/src/gallium/state_trackers/egl/common/native.h
@@ -336,6 +336,9 @@ native_get_null_platform(const struct native_event_handler 
*event_handler);
 const struct native_platform *
 native_get_android_platform(const struct native_event_handler *event_handler);
 
+const struct native_platform *
+native_get_directfb_platform(const struct native_event_handler *event_handler);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/egl/directfb/native_directfb.c 
b/src/gallium/state_trackers/egl/directfb/native_directfb.c
new file mode 100644
index 000..6c3be7d
--- /dev/null
+++ b/src/gallium/state_trackers/egl/directfb/native_directfb.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright (C) 2013 Ilyes Gouta, ilyes.go...@gmail.com.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * For DirectFB window system,
+ *
+ *  - the only valid native display is EGL_DEFAULT_DISPLAY
+ */
+
+#include 
+#include 
+
+#include 
+
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_debug.h"
+#include "util/u_inlines.h"
+#include "directfb/directfb_sw_winsys.h"
+
+#include "common/native_helper.h"
+#include "common/native.h"
+
+struct directfb_display {
+   struct native_display  base;
+   const struct native_event_handler *event_handler;
+   IDirectFBSurface  *pSurface;
+   struct native_config  *configs;
+   intnum_configs;
+};
+
+struct directfb_surface {
+   struct native_surfacebase;
+   enum pipe_format format;
+   struct directfb_display *display;
+   IDirectFBSurface*pSurface;
+   unsigned int server_stamp;
+   unsigned int client_stamp;
+   struct resource_surface *rsurf;
+};
+
+static INLINE struct directfb_display*
+directfb_display( const struct native_display *ndpy )
+{
+   return (struct directfb_display*)ndpy;
+}
+
+static INLINE struct directfb_surface*
+directfb_surface( const struct native_surface *nsurf )
+{
+   return (struct directfb_surface*)nsurf;
+}
+
+static const struct native_config **
+directfb_display_get_configs( struct native_display *ndpy, int *num_configs )
+{
+   struct directfb_display *display =
+ directfb_display( ndpy );
+   const struct native_config **configs;
+   int i;
+
+   configs = MALLOC( sizeof(*configs) * display->num_configs );
+   if (configs) {
+  for (i = 0; i < display->num_configs; i++)
+ configs[i] = &display->configs[i];
+  if (num_configs)
+ *num_configs = display->num_configs;
+   }
+
+   return configs;
+}
+
+static int
+directfb_display_get_param( struct native_display *ndpy,
+enum native_param_type param )
+{
+   int val = 0;
+
+   switch (param) {
+   case NATIVE_PARAM_PRESERVE_BUFFER:
+   case NATIVE_PARAM_USE_NATIVE_BUFFER:

[Mesa-dev] [PATCH 3/6] gallium: new winsys for sw DirectFB

2013-04-14 Thread Ilyes Gouta

Signed-off-by: Ilyes Gouta 
---
 src/gallium/winsys/sw/Makefile.am  |   4 +
 src/gallium/winsys/sw/directfb/Makefile.am |  31 +++
 .../winsys/sw/directfb/directfb_sw_winsys.c| 280 +
 .../winsys/sw/directfb/directfb_sw_winsys.h|  35 +++
 4 files changed, 350 insertions(+)
 create mode 100644 src/gallium/winsys/sw/directfb/Makefile.am
 create mode 100644 src/gallium/winsys/sw/directfb/directfb_sw_winsys.c
 create mode 100644 src/gallium/winsys/sw/directfb/directfb_sw_winsys.h

diff --git a/src/gallium/winsys/sw/Makefile.am 
b/src/gallium/winsys/sw/Makefile.am
index ae8984c..fc834e3 100644
--- a/src/gallium/winsys/sw/Makefile.am
+++ b/src/gallium/winsys/sw/Makefile.am
@@ -35,3 +35,7 @@ endif
 if HAVE_EGL_PLATFORM_WAYLAND
 SUBDIRS += wayland
 endif
+
+if HAVE_EGL_PLATFORM_DIRECTFB
+SUBDIRS += directfb
+endif
diff --git a/src/gallium/winsys/sw/directfb/Makefile.am 
b/src/gallium/winsys/sw/directfb/Makefile.am
new file mode 100644
index 000..71112a2
--- /dev/null
+++ b/src/gallium/winsys/sw/directfb/Makefile.am
@@ -0,0 +1,31 @@
+# Copyright (C) 2013 Ilyes Gouta, ilyes.go...@gmail.com.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CPPFLAGS = \
+   $(GALLIUM_CFLAGS)
+
+noinst_LTLIBRARIES = libws_directfb.la
+
+libws_directfb_la_SOURCES = directfb_sw_winsys.c
+
+libws_directfb_la_CFLAGS = \
+$(DIRECTFB_CFLAGS)
diff --git a/src/gallium/winsys/sw/directfb/directfb_sw_winsys.c 
b/src/gallium/winsys/sw/directfb/directfb_sw_winsys.c
new file mode 100644
index 000..056b29b
--- /dev/null
+++ b/src/gallium/winsys/sw/directfb/directfb_sw_winsys.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2013 Ilyes Gouta, ilyes.go...@gmail.com.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * DirectFB software rasterizer winsys.
+ */
+
+#include 
+#include 
+
+#include "pipe/p_format.h"
+#include "util/u_memory.h"
+#include "util/u_debug.h"
+#include "state_tracker/sw_winsys.h"
+
+#include "directfb_sw_winsys.h"
+
+struct directfb_sw_winsys
+{
+   struct sw_winsys base;
+   IDirectFB   *pDfb;
+};
+
+struct directfb_sw_displaytarget
+{
+   enum pipe_format  fmt;
+   IDirectFBSurface *pSurface;
+   IDirectFBSurface *pSubSurface; /* for map() and unmap() */
+   DFBSurfacePixelFormat format;
+   unsigned int  width;
+   unsigned int  height;
+   unsigned int  stride;
+   void *data;
+};
+
+static INLINE struct directfb_sw_winsys*
+directfb_sw_winsys( struct sw_winsys *ws )
+{
+   return (struct directfb_sw_winsys*)ws;
+}
+
+static INLINE struct directfb_sw_displaytarget*
+directfb_sw_displaytarget( struct sw_displaytarget *dt )
+{
+   return (struct directfb_sw_displaytarget*)dt;
+}
+
+static boolean
+directfb_sw_is_displaytarget_format_supported( struct sw_winsys *ws,
+   

[Mesa-dev] [PATCH 4/6] egl: directfb: new DirectFB native EGL types

2013-04-14 Thread Ilyes Gouta
Defining EGLNativeWindowType as a pointer on a IDirectFBSurface
enables for configuring EGL to render into a double-buffered
surface which could be offscreen but also primary (visible).

Signed-off-by: Ilyes Gouta 
---
 include/EGL/eglplatform.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
index 17fdc61..a5469ad 100644
--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -104,6 +104,12 @@ typedef struct ANativeWindow*EGLNativeWindowType;
 typedef struct egl_native_pixmap_t  *EGLNativePixmapType;
 typedef void*EGLNativeDisplayType;
 
+#elif defined(DIRECTFB_API)
+
+typedef void   *EGLNativeDisplayType;
+typedef IDirectFBSurface   *EGLNativePixmapType;
+typedef IDirectFBSurface   *EGLNativeWindowType;
+
 #elif defined(__unix__)
 
 #ifdef MESA_EGL_NO_X11_HEADERS
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] egl: directfb: instantiate a new native DirectFB platform

2013-04-14 Thread Ilyes Gouta

Signed-off-by: Ilyes Gouta 
---
 src/egl/main/egldisplay.c   | 7 ++-
 src/egl/main/egldisplay.h   | 1 +
 src/gallium/state_trackers/egl/common/egl_g3d.c | 6 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 985e781..6231d85 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -74,7 +74,8 @@ static const struct {
{ _EGL_PLATFORM_DRM, "drm" },
{ _EGL_PLATFORM_FBDEV, "fbdev" },
{ _EGL_PLATFORM_NULL, "null" },
-   { _EGL_PLATFORM_ANDROID, "android" }
+   { _EGL_PLATFORM_ANDROID, "android" },
+   { _EGL_PLATFORM_DIRECTFB, "directfb" }
 };
 
 
@@ -154,6 +155,10 @@ _eglNativePlatformDetectNativeDisplay(EGLNativeDisplayType 
nativeDisplay)
   return _EGL_PLATFORM_FBDEV;
 #endif
 
+#ifdef HAVE_DIRECTFB_PLATFORM
+   return _EGL_PLATFORM_DIRECTFB;
+#endif
+
if (_eglPointerIsDereferencable(nativeDisplay)) {
   void *first_pointer = *(void **) nativeDisplay;
 
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 4b33470..8efb3a1 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -46,6 +46,7 @@ enum _egl_platform_type {
_EGL_PLATFORM_FBDEV,
_EGL_PLATFORM_NULL,
_EGL_PLATFORM_ANDROID,
+   _EGL_PLATFORM_DIRECTFB,
 
_EGL_NUM_PLATFORMS,
_EGL_INVALID_PLATFORM = -1
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c 
b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 86abaeb..cb1a670 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -132,6 +132,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType 
plat)
  nplat = native_get_fbdev_platform(&egl_g3d_native_event_handler);
 #endif
  break;
+  case _EGL_PLATFORM_DIRECTFB:
+ plat_name = "directfb";
+#ifdef HAVE_DIRECTFB_BACKEND
+ nplat = native_get_directfb_platform(&egl_g3d_native_event_handler);
+#endif
+ break;
   case _EGL_PLATFORM_NULL:
  plat_name = "NULL";
 #ifdef HAVE_NULL_BACKEND
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] egl: directfb: automake updates for EGL_PLATFORM_DIRECTFB

2013-04-14 Thread Ilyes Gouta

Signed-off-by: Ilyes Gouta 
---
 src/egl/main/Makefile.am   | 6 ++
 src/gallium/targets/egl-static/Makefile.am | 7 +++
 2 files changed, 13 insertions(+)

diff --git a/src/egl/main/Makefile.am b/src/egl/main/Makefile.am
index ca5257a..e71b57f 100644
--- a/src/egl/main/Makefile.am
+++ b/src/egl/main/Makefile.am
@@ -99,6 +99,12 @@ if HAVE_EGL_PLATFORM_FBDEV
 AM_CFLAGS += -DHAVE_FBDEV_PLATFORM
 endif
 
+if HAVE_EGL_PLATFORM_DIRECTFB
+AM_CFLAGS += -DHAVE_DIRECTFB_PLATFORM
+AM_CFLAGS += $(DIRECTFB_CFLAGS)
+libEGL_la_LIBADD += $(DIRECTFB_LIBS)
+endif
+
 if HAVE_EGL_PLATFORM_NULL
 AM_CFLAGS += -DHAVE_NULL_PLATFORM
 endif
diff --git a/src/gallium/targets/egl-static/Makefile.am 
b/src/gallium/targets/egl-static/Makefile.am
index 5c40ae8..2886e7a 100644
--- a/src/gallium/targets/egl-static/Makefile.am
+++ b/src/gallium/targets/egl-static/Makefile.am
@@ -90,6 +90,13 @@ egl_gallium_la_LIBADD += \
$(LIBDRM_LIBS)
 endif
 
+if HAVE_EGL_PLATFORM_DIRECTFB
+AM_CPPFLAGS += $(DIRECTFB_CFLAGS)
+egl_gallium_la_LIBADD += \
+$(top_builddir)/src/gallium/winsys/sw/directfb/libws_directfb.la \
+$(DIRECTFB_LIBS)
+endif
+
 if HAVE_EGL_PLATFORM_FBDEV
 egl_gallium_la_LIBADD += 
$(top_builddir)/src/gallium/winsys/sw/fbdev/libfbdev.la
 endif
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Marek Olšák
If the border color is 1.0f and the format is integer, I'm not sure if the
behavior is defined in GL, but I think r600 will return 0x3f80, which
is fine.

No, I cannot convert the border color to any other type, because the
original type is UNKNOWN after glTexParameter returns. Also, the border
color is not clamped in GL.

Marek


On Sun, Apr 14, 2013 at 7:32 PM, Roland Scheidegger wrote:

> Am 14.04.2013 18:55, schrieb Marek Olšák:
> > I think the hardware doesn't care what the border color type is. I think
> > the border color is "fetched" from the sampler state, which should be a
> > memcpy. If no texels are fetched from the texture, the border color is
> > copied to the destination register. If I set the texture hardware format
> > to "invalid", the texture fetch instructions always return the border
> > color, which suggests the hardware really does not care about the type.
> But d3d always sets border color as float. So if your border color is
> 1.0, I doubt setting the fetched texel value of a int texture to
> 0x3f80 when you hit the border is the right thing to do (but it
> would obviously be correct for a float texture). You certainly can
> convert those float values to int type in your driver, but it will not
> work if the same sampler is used both for int and float textures.
> Though actually the spec also says
> (
> http://msdn.microsoft.com/de-ch/library/windows/desktop/bb172415%28v=vs.85%29.aspx
> )
> border color must be between 0.0 and 1.0. Doesn't make a whole lot of
> sense for integer textures (as the only possible values when converting
> to int would be 0 and 1). So something's clearly missing here...
>
>
> >
> > OpenGL also doesn't care what the border color type is after it is set,
> > because the state is a union type.
> Yes of course.
>
> Roland
>
>
> >
> > Marek
> >
> >
> >
> > On Sun, Apr 14, 2013 at 6:20 PM, Roland Scheidegger  > > wrote:
> >
> > Oh and btw how does this work for real hw, if the hardware indeed
> > interpets the border color value according to format?
> > Are there some bits to set that the border color value is either
> > interpreted according to format (useful for opengl) or always as
> float
> > (useful for d3d10)? Or how else do you use the same sampler for
> > different int/float textures?
> > This discrepancy between OpenGL and d3d10 is quite a mess.
> >
> > Roland
> >
> >
> >
> > Am 14.04.2013 17:45, schrieb Roland Scheidegger:
> > > Yeah it is ok for OpenGL. I guess for d3d10 we'd probably need to
> > create
> > > another sampler if the same sampler is used for both int and float
> > > textures. Or just supply both int and float border colors to the
> > sample
> > > code (but making it work both for opengl and d3d would be ugly).
> > FWIW it
> > > looks like some intel hw also seems to require multiple border
> color
> > > values (and 6!!! ones at that), and the hw just picks the right
> value
> > > based on format. Though for some reason the only border color
> > format it
> > > does _not_ have is 32bit int, so it looks this does absolutely
> nothing
> > > to make both float and 32bit int colors work with the same sampler
> > > simultaneously (and I guess 32bit int is probably the reason opengl
> > > specifies those as ints, since you can't get accurate values with
> > floats).
> > > The swizzling looks like an orthogonal issue to that, however.
> > >
> > > Roland
> > >
> > >
> > > Am 14.04.2013 16:18, schrieb Marek Olšák:
> > >> The border color in the sampler state is untyped and that's okay.
> The
> > >> type is irrelevant with nearest filtering - just memcpy the
> > border color
> > >> to the destination register (if there is swizzling, just do what
> > you do
> > >> for texels). With linear filtering, you can always assume it's
> float
> > >> (regardless of the sampler view).
> > >>
> > >> Marek
> > >>
> > >>
> > >> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca
> > mailto:jfons...@vmware.com>
> > >> >> wrote:
> > >>
> > >>
> > >>
> > >> - Original Message -
> > >> > On 14.04.2013 13:44, Jose Fonseca wrote:
> > >> > > - Original Message -
> > >> > >> From: Christoph Bumiller  > 
> > >>  > >>
> > >> > >>
> > >> > >> This is the only sane solution for nv50 and nvc0
> > (really, trust
> > >> me),
> > >> > >> but since on other hardware the border colour is tightly
> > >> coupled with
> > >> > >> texture state they'd have to undo the swizzle, so I've
> > added a cap.
> > >> > >>
> > >> > >> The name of the cap could be changed to be mo

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Roland Scheidegger
Am 14.04.2013 18:39, schrieb Marek Olšák:
> On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger  > wrote:
> 
> Am 14.04.2013 10:12, schrieb jfons...@vmware.com
> :> -  TBD
> > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to
> true if
> > +
> > +src0.x != 0.0
> > +
> > +  where src0.x is interpreted as a floating point register.
> Maybe should say something wrt evaluation of NaNs? I know we haven't
> really established rules for comparisons etc. wrt NaNs but those
> bools-as-float make me cry. I guess it is no different though than other
> float opcodes, if we now really have a definition saying IF takes _any_
> float not just a bool-as-float which was loosely implied before.
> 
> 
> I don't know where the term "bool-as-float" came from, but I'd rather
> not use it unless it's properly defined somewhere, and TGSI doesn't have
> bools anyway, so why bother? The GLSL compiler or glsl-to-tgsi is
> responsible for converting bools to either floats or ints and TGSI
> shouldn't need to care. Both r300g and r600g use (src0.x != 0.0) for IF
> and (src0.x != 0) for UIF (r600-only), so there is always the
> "not-equal-to" operator, which is also well defined for NaNs.
That depends on your definition of "well defined". llvm for instance has
both "ordered not equal" and "unordered not equal" operators for
precisely this reason. But yes I guess ieee-754 has some defined
behavior there.
That "bool-as-float" essentially comes from state trackers, because the
language they are translating from require bools as "if" inputs - hence
the input value always should have been the result of some comparison
(or similar) operation (which in turn return these fake bools).
But I agree this was never really documented, so just clearly stating
you can pass in any float is just fine (it means that state trackers now
are explicitly allowed to omit the comparison for simple cases like this
one, "if(a != 0)...", well if they can detect it, it was not really
obvious without documentation before if that would be ok). So in that
sense nothing more needs to be said about NaNs, since they just adhere
to the same rules as in other places (meaning pretty much undefined for
most things, currently).

> 
> Also if you care about NaNs, we should start by defining how
> instructions should handle them, e.g. how relational operators handle
> NaNs, whether the multiplication operator follows the rule 0*anything =
> 0 (MUL, MAD, DP4, ...), etc.
> 
> R600 have separate opcodes depending on what behavior you want, for example:
> - The MUL opcode follows the rule 0*anything = 0. (DX9)
> - The MUL_IEEE opcode follows the IEEE behavior.
> 
> The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4,
> EX2, LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9
> and DX10. We should choose our opcodes carefully depending on whether we
> are implementing a DX9, DX10, OpenGL, or OpenCL state tracker.

Yes indeed. d3d10 has quite strict rules which are mostly ieee754 (or
ieee754r) but with some deviations. Other specs tend to be more lenient,
and requiring strict rules could add quite some overhead, so we might
want to introduce additional opcodes. How does MIN/MAX work for dx9 btw?
DX10 will require you to give back the non-NaN value if only one
argument is NaN (which seems to be ieee754r behavior), which for
instance unfortunately doesn't translate well to sse2 code (as sse2 will
just give you the second source if there's a NaN in either src which
means you had to use cmp/select instead and be careful about what
comparison you use there since the cpu doesn't support the full set of
"ordered" and "unordered" comparisons unless you've got avx though
presumably llvm would take care of that if you use the right comparison
ops there).

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Roland Scheidegger
No, it's not defined in GL. But it doesn't matter there anyway cause you
can't have the same sampler (and hence border color) used for different
textures.

Roland


Am 14.04.2013 19:53, schrieb Marek Olšák:
> If the border color is 1.0f and the format is integer, I'm not sure if
> the behavior is defined in GL, but I think r600 will return 0x3f80,
> which is fine.
> 
> No, I cannot convert the border color to any other type, because the
> original type is UNKNOWN after glTexParameter returns. Also, the border
> color is not clamped in GL.
> 
> Marek
> 
> 
> On Sun, Apr 14, 2013 at 7:32 PM, Roland Scheidegger  > wrote:
> 
> Am 14.04.2013 18:55, schrieb Marek Olšák:
> > I think the hardware doesn't care what the border color type is. I
> think
> > the border color is "fetched" from the sampler state, which should
> be a
> > memcpy. If no texels are fetched from the texture, the border color is
> > copied to the destination register. If I set the texture hardware
> format
> > to "invalid", the texture fetch instructions always return the border
> > color, which suggests the hardware really does not care about the
> type.
> But d3d always sets border color as float. So if your border color is
> 1.0, I doubt setting the fetched texel value of a int texture to
> 0x3f80 when you hit the border is the right thing to do (but it
> would obviously be correct for a float texture). You certainly can
> convert those float values to int type in your driver, but it will not
> work if the same sampler is used both for int and float textures.
> Though actually the spec also says
> 
> (http://msdn.microsoft.com/de-ch/library/windows/desktop/bb172415%28v=vs.85%29.aspx)
> border color must be between 0.0 and 1.0. Doesn't make a whole lot of
> sense for integer textures (as the only possible values when converting
> to int would be 0 and 1). So something's clearly missing here...
> 
> 
> >
> > OpenGL also doesn't care what the border color type is after it is
> set,
> > because the state is a union type.
> Yes of course.
> 
> Roland
> 
> 
> >
> > Marek
> >
> >
> >
> > On Sun, Apr 14, 2013 at 6:20 PM, Roland Scheidegger
> mailto:srol...@vmware.com>
> > >> wrote:
> >
> > Oh and btw how does this work for real hw, if the hardware indeed
> > interpets the border color value according to format?
> > Are there some bits to set that the border color value is either
> > interpreted according to format (useful for opengl) or always
> as float
> > (useful for d3d10)? Or how else do you use the same sampler for
> > different int/float textures?
> > This discrepancy between OpenGL and d3d10 is quite a mess.
> >
> > Roland
> >
> >
> >
> > Am 14.04.2013 17:45, schrieb Roland Scheidegger:
> > > Yeah it is ok for OpenGL. I guess for d3d10 we'd probably
> need to
> > create
> > > another sampler if the same sampler is used for both int and
> float
> > > textures. Or just supply both int and float border colors to the
> > sample
> > > code (but making it work both for opengl and d3d would be ugly).
> > FWIW it
> > > looks like some intel hw also seems to require multiple
> border color
> > > values (and 6!!! ones at that), and the hw just picks the
> right value
> > > based on format. Though for some reason the only border color
> > format it
> > > does _not_ have is 32bit int, so it looks this does
> absolutely nothing
> > > to make both float and 32bit int colors work with the same
> sampler
> > > simultaneously (and I guess 32bit int is probably the reason
> opengl
> > > specifies those as ints, since you can't get accurate values
> with
> > floats).
> > > The swizzling looks like an orthogonal issue to that, however.
> > >
> > > Roland
> > >
> > >
> > > Am 14.04.2013 16:18, schrieb Marek Olšák:
> > >> The border color in the sampler state is untyped and that's
> okay. The
> > >> type is irrelevant with nearest filtering - just memcpy the
> > border color
> > >> to the destination register (if there is swizzling, just do
> what
> > you do
> > >> for texels). With linear filtering, you can always assume
> it's float
> > >> (regardless of the sampler view).
> > >>
> > >> Marek
> > >>
> > >>
> > >> On Sun, Apr 14, 2013 at 3:34 PM, Jose Fonseca
> > mailto:jfons...@vmware.com>
> >
> > >> 

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Marek Olšák
The R600 ISA documentation only says that the DX10 variants of MIN and MAX
use DX10 handling of NaNs. It does not say anything about the non-DX10
variants.

Marek


On Sun, Apr 14, 2013 at 8:16 PM, Roland Scheidegger wrote:

> Am 14.04.2013 18:39, schrieb Marek Olšák:
> > On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger  > > wrote:
> >
> > Am 14.04.2013 10:12, schrieb jfons...@vmware.com
> > :> -  TBD
> > > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to
> > true if
> > > +
> > > +src0.x != 0.0
> > > +
> > > +  where src0.x is interpreted as a floating point register.
> > Maybe should say something wrt evaluation of NaNs? I know we haven't
> > really established rules for comparisons etc. wrt NaNs but those
> > bools-as-float make me cry. I guess it is no different though than
> other
> > float opcodes, if we now really have a definition saying IF takes
> _any_
> > float not just a bool-as-float which was loosely implied before.
> >
> >
> > I don't know where the term "bool-as-float" came from, but I'd rather
> > not use it unless it's properly defined somewhere, and TGSI doesn't have
> > bools anyway, so why bother? The GLSL compiler or glsl-to-tgsi is
> > responsible for converting bools to either floats or ints and TGSI
> > shouldn't need to care. Both r300g and r600g use (src0.x != 0.0) for IF
> > and (src0.x != 0) for UIF (r600-only), so there is always the
> > "not-equal-to" operator, which is also well defined for NaNs.
> That depends on your definition of "well defined". llvm for instance has
> both "ordered not equal" and "unordered not equal" operators for
> precisely this reason. But yes I guess ieee-754 has some defined
> behavior there.
> That "bool-as-float" essentially comes from state trackers, because the
> language they are translating from require bools as "if" inputs - hence
> the input value always should have been the result of some comparison
> (or similar) operation (which in turn return these fake bools).
> But I agree this was never really documented, so just clearly stating
> you can pass in any float is just fine (it means that state trackers now
> are explicitly allowed to omit the comparison for simple cases like this
> one, "if(a != 0)...", well if they can detect it, it was not really
> obvious without documentation before if that would be ok). So in that
> sense nothing more needs to be said about NaNs, since they just adhere
> to the same rules as in other places (meaning pretty much undefined for
> most things, currently).
>
> >
> > Also if you care about NaNs, we should start by defining how
> > instructions should handle them, e.g. how relational operators handle
> > NaNs, whether the multiplication operator follows the rule 0*anything =
> > 0 (MUL, MAD, DP4, ...), etc.
> >
> > R600 have separate opcodes depending on what behavior you want, for
> example:
> > - The MUL opcode follows the rule 0*anything = 0. (DX9)
> > - The MUL_IEEE opcode follows the IEEE behavior.
> >
> > The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4,
> > EX2, LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9
> > and DX10. We should choose our opcodes carefully depending on whether we
> > are implementing a DX9, DX10, OpenGL, or OpenCL state tracker.
>
> Yes indeed. d3d10 has quite strict rules which are mostly ieee754 (or
> ieee754r) but with some deviations. Other specs tend to be more lenient,
> and requiring strict rules could add quite some overhead, so we might
> want to introduce additional opcodes. How does MIN/MAX work for dx9 btw?
> DX10 will require you to give back the non-NaN value if only one
> argument is NaN (which seems to be ieee754r behavior), which for
> instance unfortunately doesn't translate well to sse2 code (as sse2 will
> just give you the second source if there's a NaN in either src which
> means you had to use cmp/select instead and be careful about what
> comparison you use there since the cpu doesn't support the full set of
> "ordered" and "unordered" comparisons unless you've got avx though
> presumably llvm would take care of that if you use the right comparison
> ops there).
>
> Roland
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Roland Scheidegger
Ahh forget about all this.
Thanks to Christoph Bumiller for noticing me that in fact this is a
complete non-issue, since you cannot sample from integer textures _at
all_ with d3d10. Only ld, hence no sampler and no border color...
Looks like OpenGL is a lot more permissive there (which allows you to do
pretty much anything with such textures just as ordinary textures,
except you can't use non-nearest filter).

Am 14.04.2013 20:19, schrieb Roland Scheidegger:
> No, it's not defined in GL. But it doesn't matter there anyway cause you
> can't have the same sampler (and hence border color) used for different
> textures.
> 
> Roland
> 
> 
> Am 14.04.2013 19:53, schrieb Marek Olšák:
>> If the border color is 1.0f and the format is integer, I'm not sure if
>> the behavior is defined in GL, but I think r600 will return 0x3f80,
>> which is fine.
>>
>> No, I cannot convert the border color to any other type, because the
>> original type is UNKNOWN after glTexParameter returns. Also, the border
>> color is not clamped in GL.
>>
>> Marek
>>
>>
>> On Sun, Apr 14, 2013 at 7:32 PM, Roland Scheidegger > > wrote:
>>
>> Am 14.04.2013 18:55, schrieb Marek Olšák:
>> > I think the hardware doesn't care what the border color type is. I
>> think
>> > the border color is "fetched" from the sampler state, which should
>> be a
>> > memcpy. If no texels are fetched from the texture, the border color is
>> > copied to the destination register. If I set the texture hardware
>> format
>> > to "invalid", the texture fetch instructions always return the border
>> > color, which suggests the hardware really does not care about the
>> type.
>> But d3d always sets border color as float. So if your border color is
>> 1.0, I doubt setting the fetched texel value of a int texture to
>> 0x3f80 when you hit the border is the right thing to do (but it
>> would obviously be correct for a float texture). You certainly can
>> convert those float values to int type in your driver, but it will not
>> work if the same sampler is used both for int and float textures.
>> Though actually the spec also says
>> 
>> (http://msdn.microsoft.com/de-ch/library/windows/desktop/bb172415%28v=vs.85%29.aspx)
>> border color must be between 0.0 and 1.0. Doesn't make a whole lot of
>> sense for integer textures (as the only possible values when converting
>> to int would be 0 and 1). So something's clearly missing here...
>>
>>
>> >
>> > OpenGL also doesn't care what the border color type is after it is
>> set,
>> > because the state is a union type.
>> Yes of course.
>>
>> Roland
>>
>>
>> >
>> > Marek
>> >
>> >
>> >
>> > On Sun, Apr 14, 2013 at 6:20 PM, Roland Scheidegger
>> mailto:srol...@vmware.com>
>> > >> wrote:
>> >
>> > Oh and btw how does this work for real hw, if the hardware indeed
>> > interpets the border color value according to format?
>> > Are there some bits to set that the border color value is either
>> > interpreted according to format (useful for opengl) or always
>> as float
>> > (useful for d3d10)? Or how else do you use the same sampler for
>> > different int/float textures?
>> > This discrepancy between OpenGL and d3d10 is quite a mess.
>> >
>> > Roland
>> >
>> >
>> >
>> > Am 14.04.2013 17:45, schrieb Roland Scheidegger:
>> > > Yeah it is ok for OpenGL. I guess for d3d10 we'd probably
>> need to
>> > create
>> > > another sampler if the same sampler is used for both int and
>> float
>> > > textures. Or just supply both int and float border colors to the
>> > sample
>> > > code (but making it work both for opengl and d3d would be ugly).
>> > FWIW it
>> > > looks like some intel hw also seems to require multiple
>> border color
>> > > values (and 6!!! ones at that), and the hw just picks the
>> right value
>> > > based on format. Though for some reason the only border color
>> > format it
>> > > does _not_ have is 32bit int, so it looks this does
>> absolutely nothing
>> > > to make both float and 32bit int colors work with the same
>> sampler
>> > > simultaneously (and I guess 32bit int is probably the reason
>> opengl
>> > > specifies those as ints, since you can't get accurate values
>> with
>> > floats).
>> > > The swizzling looks like an orthogonal issue to that, however.
>> > >
>> > > Roland
>> > >
>> > >
>> > > Am 14.04.2013 16:18, schrieb Marek Olšák:
>> > >> The border color in the sampler state is untyped and that's
>> okay. The
>> > >> type is irrelevant with nearest filtering - just memcpy

Re: [Mesa-dev] Mismatch between Mesas dispatch table and the one used by the X server

2013-04-14 Thread Stefan Brüns
Ping!

Am Samstag, 6. April 2013, 20:36:50 schrieb Stefan Brüns:
> Am Freitag, 22. März 2013, 11:46:52 schrieben Sie:
> > To call glFoo, the xserver (or libGL) does
> > 
> >  (dispatch_table[offset_of_glFoo])(...);
> > 
> > To set the pointer for the glFoo function, the driver does
> > 
> >  dispatch_table[remap_table[remap_offset_of_glFoo]] = driver_glFoo;
> 
> Thanks for the clarification, after knowing what should happen finding the
> root cause was straigthforward:
> 
> The population of dispatch_table is done using _glapi_add_dispatch(...).
> This function is defined by the XServer in glx/glapi.c, and it is defined
> in Mesas mapi/mapi/mapi_glapi.c
> 
> Now, why is libGL and thus libglapi linked to the X server - the Intel
> driver optionally uses glamor, which uses libGL!
> 
> Recompiling the intel driver whithout glamor resolves this problem.
> 
> Regards,
> 
> Stefan
-- 
Stefan Brüns  /  Bergstraße 21  /  52062 Aachen
phone: +49 241 53809034 mobile: +49 151 50412019
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: Workaround for a nested loop bug on Cayman

2013-04-14 Thread Martin Andersson
There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested
loops may put the branch stack into a state such that ALU_PUSH_BEFORE
doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE
with a PUSH + ALU for nested loops.

Fixes piglit tests:
spec/!OpenGL 1.1/read-front
spec/EXT_transform_feedback/order*
spec/glsl-1.40/uniform_buffer/fs-struct-pad

No piglit regressions.
---
 src/gallium/drivers/r600/r600_shader.c | 33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6dbca50..aee011e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -252,6 +252,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
 static int tgsi_endloop(struct r600_shader_ctx *ctx);
 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx);
 
 /*
  * bytestream -> r600 shader
@@ -5490,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
return 0;
 }
 
-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int 
alu_type)
 {
struct r600_bytecode_alu alu;
int r;
@@ -5510,7 +5511,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, 
int opcode)
 
alu.last = 1;
 
-   r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+   r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
if (r)
return r;
return 0;
@@ -5730,7 +5731,20 @@ static void break_loop_on_flag(struct r600_shader_ctx 
*ctx, unsigned fc_sp)
 
 static int tgsi_if(struct r600_shader_ctx *ctx)
 {
-   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
+   int alu_type = CF_OP_ALU_PUSH_BEFORE;
+
+   /* 
+  There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for 
nested
+  loops may put the branch stack into a state such that 
ALU_PUSH_BEFORE 
+  doesn't work as expected. Workaround this by replacing the 
ALU_PUSH_BEFORE
+  with a PUSH + ALU for nested loops.
+*/
+   if (ctx->bc->chip_class == CAYMAN && 
need_cayman_loop_bug_workaround(ctx)) {
+   r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
+   alu_type = CF_OP_ALU;
+   }
+
+   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type);
 
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
 
@@ -5834,6 +5848,19 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx 
*ctx)
return 0;
 }
 
+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx)
+{
+   unsigned int fscp;
+   int num_loops = 0;
+   for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+   {
+   if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+   ++num_loops;
+   }
+
+   return num_loops >= 2;
+}
+
 static int tgsi_umad(struct r600_shader_ctx *ctx)
 {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Alex Deucher
On Sun, Apr 14, 2013 at 2:36 PM, Marek Olšák  wrote:
> The R600 ISA documentation only says that the DX10 variants of MIN and MAX
> use DX10 handling of NaNs. It does not say anything about the non-DX10
> variants.

The difference is the NaN behavior.  The dx10 versions of MIN/MAX are
NaN safe.  There are also DX10 and non-DX10 versions of the SET*
opcodes.  The difference there is in the result:

SETE A == B ? 1.0 : 0.0
SETE_DX10   A == B ?   -1 : 0
etc.

Alex

>
> Marek
>
>
> On Sun, Apr 14, 2013 at 8:16 PM, Roland Scheidegger 
> wrote:
>>
>> Am 14.04.2013 18:39, schrieb Marek Olšák:
>> > On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger > > > wrote:
>> >
>> > Am 14.04.2013 10:12, schrieb jfons...@vmware.com
>> > :> -  TBD
>> > > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to
>> > true if
>> > > +
>> > > +src0.x != 0.0
>> > > +
>> > > +  where src0.x is interpreted as a floating point register.
>> > Maybe should say something wrt evaluation of NaNs? I know we haven't
>> > really established rules for comparisons etc. wrt NaNs but those
>> > bools-as-float make me cry. I guess it is no different though than
>> > other
>> > float opcodes, if we now really have a definition saying IF takes
>> > _any_
>> > float not just a bool-as-float which was loosely implied before.
>> >
>> >
>> > I don't know where the term "bool-as-float" came from, but I'd rather
>> > not use it unless it's properly defined somewhere, and TGSI doesn't have
>> > bools anyway, so why bother? The GLSL compiler or glsl-to-tgsi is
>> > responsible for converting bools to either floats or ints and TGSI
>> > shouldn't need to care. Both r300g and r600g use (src0.x != 0.0) for IF
>> > and (src0.x != 0) for UIF (r600-only), so there is always the
>> > "not-equal-to" operator, which is also well defined for NaNs.
>> That depends on your definition of "well defined". llvm for instance has
>> both "ordered not equal" and "unordered not equal" operators for
>> precisely this reason. But yes I guess ieee-754 has some defined
>> behavior there.
>> That "bool-as-float" essentially comes from state trackers, because the
>> language they are translating from require bools as "if" inputs - hence
>> the input value always should have been the result of some comparison
>> (or similar) operation (which in turn return these fake bools).
>> But I agree this was never really documented, so just clearly stating
>> you can pass in any float is just fine (it means that state trackers now
>> are explicitly allowed to omit the comparison for simple cases like this
>> one, "if(a != 0)...", well if they can detect it, it was not really
>> obvious without documentation before if that would be ok). So in that
>> sense nothing more needs to be said about NaNs, since they just adhere
>> to the same rules as in other places (meaning pretty much undefined for
>> most things, currently).
>>
>> >
>> > Also if you care about NaNs, we should start by defining how
>> > instructions should handle them, e.g. how relational operators handle
>> > NaNs, whether the multiplication operator follows the rule 0*anything =
>> > 0 (MUL, MAD, DP4, ...), etc.
>> >
>> > R600 have separate opcodes depending on what behavior you want, for
>> > example:
>> > - The MUL opcode follows the rule 0*anything = 0. (DX9)
>> > - The MUL_IEEE opcode follows the IEEE behavior.
>> >
>> > The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4,
>> > EX2, LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9
>> > and DX10. We should choose our opcodes carefully depending on whether we
>> > are implementing a DX9, DX10, OpenGL, or OpenCL state tracker.
>>
>> Yes indeed. d3d10 has quite strict rules which are mostly ieee754 (or
>> ieee754r) but with some deviations. Other specs tend to be more lenient,
>> and requiring strict rules could add quite some overhead, so we might
>> want to introduce additional opcodes. How does MIN/MAX work for dx9 btw?
>> DX10 will require you to give back the non-NaN value if only one
>> argument is NaN (which seems to be ieee754r behavior), which for
>> instance unfortunately doesn't translate well to sse2 code (as sse2 will
>> just give you the second source if there's a NaN in either src which
>> means you had to use cmp/select instead and be careful about what
>> comparison you use there since the cpu doesn't support the full set of
>> "ordered" and "unordered" comparisons unless you've got avx though
>> presumably llvm would take care of that if you use the right comparison
>> ops there).
>>
>> Roland
>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/list

Re: [Mesa-dev] [PATCH] st/mesa: optionally apply texture swizzle to border color

2013-04-14 Thread Alex Deucher
On Sun, Apr 14, 2013 at 12:55 PM, Marek Olšák  wrote:
> I think the hardware doesn't care what the border color type is. I think the
> border color is "fetched" from the sampler state, which should be a memcpy.
> If no texels are fetched from the texture, the border color is copied to the
> destination register. If I set the texture hardware format to "invalid", the
> texture fetch instructions always return the border color, which suggests
> the hardware really does not care about the type.

The border color is definitely stored are part of the sampler state.
Overrides (BORDER_COLOR_TYPE field) and format conversion are applied
after the the state is fetched from the sampler.

Alex

>
> OpenGL also doesn't care what the border color type is after it is set,
> because the state is a union type.
>
> Marek
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Roland Scheidegger
Am 14.04.2013 23:44, schrieb Alex Deucher:
> On Sun, Apr 14, 2013 at 2:36 PM, Marek Olšák  wrote:
>> The R600 ISA documentation only says that the DX10 variants of MIN and MAX
>> use DX10 handling of NaNs. It does not say anything about the non-DX10
>> variants.
> 
> The difference is the NaN behavior.  The dx10 versions of MIN/MAX are
> NaN safe.
Yes but what does it mean for the non-dx10 versions what do they return
in case one argument is a NaN? Obviously it can't just be random
otherwise you could always use the dx10 version...

Roland



  There are also DX10 and non-DX10 versions of the SET*
> opcodes.  The difference there is in the result:
> 
> SETE A == B ? 1.0 : 0.0
> SETE_DX10   A == B ?   -1 : 0
> etc.
> 
> Alex
> 
>>
>> Marek
>>
>>
>> On Sun, Apr 14, 2013 at 8:16 PM, Roland Scheidegger 
>> wrote:
>>>
>>> Am 14.04.2013 18:39, schrieb Marek Olšák:
 On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger >>> > wrote:

 Am 14.04.2013 10:12, schrieb jfons...@vmware.com
 :> -  TBD
 > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to
 true if
 > +
 > +src0.x != 0.0
 > +
 > +  where src0.x is interpreted as a floating point register.
 Maybe should say something wrt evaluation of NaNs? I know we haven't
 really established rules for comparisons etc. wrt NaNs but those
 bools-as-float make me cry. I guess it is no different though than
 other
 float opcodes, if we now really have a definition saying IF takes
 _any_
 float not just a bool-as-float which was loosely implied before.


 I don't know where the term "bool-as-float" came from, but I'd rather
 not use it unless it's properly defined somewhere, and TGSI doesn't have
 bools anyway, so why bother? The GLSL compiler or glsl-to-tgsi is
 responsible for converting bools to either floats or ints and TGSI
 shouldn't need to care. Both r300g and r600g use (src0.x != 0.0) for IF
 and (src0.x != 0) for UIF (r600-only), so there is always the
 "not-equal-to" operator, which is also well defined for NaNs.
>>> That depends on your definition of "well defined". llvm for instance has
>>> both "ordered not equal" and "unordered not equal" operators for
>>> precisely this reason. But yes I guess ieee-754 has some defined
>>> behavior there.
>>> That "bool-as-float" essentially comes from state trackers, because the
>>> language they are translating from require bools as "if" inputs - hence
>>> the input value always should have been the result of some comparison
>>> (or similar) operation (which in turn return these fake bools).
>>> But I agree this was never really documented, so just clearly stating
>>> you can pass in any float is just fine (it means that state trackers now
>>> are explicitly allowed to omit the comparison for simple cases like this
>>> one, "if(a != 0)...", well if they can detect it, it was not really
>>> obvious without documentation before if that would be ok). So in that
>>> sense nothing more needs to be said about NaNs, since they just adhere
>>> to the same rules as in other places (meaning pretty much undefined for
>>> most things, currently).
>>>

 Also if you care about NaNs, we should start by defining how
 instructions should handle them, e.g. how relational operators handle
 NaNs, whether the multiplication operator follows the rule 0*anything =
 0 (MUL, MAD, DP4, ...), etc.

 R600 have separate opcodes depending on what behavior you want, for
 example:
 - The MUL opcode follows the rule 0*anything = 0. (DX9)
 - The MUL_IEEE opcode follows the IEEE behavior.

 The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4,
 EX2, LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9
 and DX10. We should choose our opcodes carefully depending on whether we
 are implementing a DX9, DX10, OpenGL, or OpenCL state tracker.
>>>
>>> Yes indeed. d3d10 has quite strict rules which are mostly ieee754 (or
>>> ieee754r) but with some deviations. Other specs tend to be more lenient,
>>> and requiring strict rules could add quite some overhead, so we might
>>> want to introduce additional opcodes. How does MIN/MAX work for dx9 btw?
>>> DX10 will require you to give back the non-NaN value if only one
>>> argument is NaN (which seems to be ieee754r behavior), which for
>>> instance unfortunately doesn't translate well to sse2 code (as sse2 will
>>> just give you the second source if there's a NaN in either src which
>>> means you had to use cmp/select instead and be careful about what
>>> comparison you use there since the cpu doesn't support the full set of
>>> "ordered" and "unordered" comparisons unless you've got avx though
>>> presumably llvm would take care of that if you use the right comparison
>>> ops there).
>>>
>>> Roland
>>

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Alex Deucher
On Sun, Apr 14, 2013 at 6:04 PM, Roland Scheidegger  wrote:
> Am 14.04.2013 23:44, schrieb Alex Deucher:
>> On Sun, Apr 14, 2013 at 2:36 PM, Marek Olšák  wrote:
>>> The R600 ISA documentation only says that the DX10 variants of MIN and MAX
>>> use DX10 handling of NaNs. It does not say anything about the non-DX10
>>> variants.
>>
>> The difference is the NaN behavior.  The dx10 versions of MIN/MAX are
>> NaN safe.
> Yes but what does it mean for the non-dx10 versions what do they return
> in case one argument is a NaN? Obviously it can't just be random
> otherwise you could always use the dx10 version...

I can't seem to find any finer details at the moment, but apparently
DX9 and DX10 have different rules for NaN propagation for min and max
and clamping (there's also DX9 and DX10 clamping behavior) and the
opcodes implement those differences.  It looks like DX10 (like IEEE)
requires NaN always be propagated while DX9 does not.  I suppose the
non-DX10 version does whatever is expected for NaN on DX9.

Alex


>
> Roland
>
>
>
>   There are also DX10 and non-DX10 versions of the SET*
>> opcodes.  The difference there is in the result:
>>
>> SETE A == B ? 1.0 : 0.0
>> SETE_DX10   A == B ?   -1 : 0
>> etc.
>>
>> Alex
>>
>>>
>>> Marek
>>>
>>>
>>> On Sun, Apr 14, 2013 at 8:16 PM, Roland Scheidegger 
>>> wrote:

 Am 14.04.2013 18:39, schrieb Marek Olšák:
> On Sun, Apr 14, 2013 at 5:24 PM, Roland Scheidegger  > wrote:
>
> Am 14.04.2013 10:12, schrieb jfons...@vmware.com
> :> -  TBD
> > +  Start an IF ... ELSE .. ENDIF block.  Condition evaluates to
> true if
> > +
> > +src0.x != 0.0
> > +
> > +  where src0.x is interpreted as a floating point register.
> Maybe should say something wrt evaluation of NaNs? I know we haven't
> really established rules for comparisons etc. wrt NaNs but those
> bools-as-float make me cry. I guess it is no different though than
> other
> float opcodes, if we now really have a definition saying IF takes
> _any_
> float not just a bool-as-float which was loosely implied before.
>
>
> I don't know where the term "bool-as-float" came from, but I'd rather
> not use it unless it's properly defined somewhere, and TGSI doesn't have
> bools anyway, so why bother? The GLSL compiler or glsl-to-tgsi is
> responsible for converting bools to either floats or ints and TGSI
> shouldn't need to care. Both r300g and r600g use (src0.x != 0.0) for IF
> and (src0.x != 0) for UIF (r600-only), so there is always the
> "not-equal-to" operator, which is also well defined for NaNs.
 That depends on your definition of "well defined". llvm for instance has
 both "ordered not equal" and "unordered not equal" operators for
 precisely this reason. But yes I guess ieee-754 has some defined
 behavior there.
 That "bool-as-float" essentially comes from state trackers, because the
 language they are translating from require bools as "if" inputs - hence
 the input value always should have been the result of some comparison
 (or similar) operation (which in turn return these fake bools).
 But I agree this was never really documented, so just clearly stating
 you can pass in any float is just fine (it means that state trackers now
 are explicitly allowed to omit the comparison for simple cases like this
 one, "if(a != 0)...", well if they can detect it, it was not really
 obvious without documentation before if that would be ok). So in that
 sense nothing more needs to be said about NaNs, since they just adhere
 to the same rules as in other places (meaning pretty much undefined for
 most things, currently).

>
> Also if you care about NaNs, we should start by defining how
> instructions should handle them, e.g. how relational operators handle
> NaNs, whether the multiplication operator follows the rule 0*anything =
> 0 (MUL, MAD, DP4, ...), etc.
>
> R600 have separate opcodes depending on what behavior you want, for
> example:
> - The MUL opcode follows the rule 0*anything = 0. (DX9)
> - The MUL_IEEE opcode follows the IEEE behavior.
>
> The other opcodes with both the DX9 and IEEE behavior are: MAD, DP4,
> EX2, LG2, RCP, RSQ. There are also separate MIN and MAX opcodes for DX9
> and DX10. We should choose our opcodes carefully depending on whether we
> are implementing a DX9, DX10, OpenGL, or OpenCL state tracker.

 Yes indeed. d3d10 has quite strict rules which are mostly ieee754 (or
 ieee754r) but with some deviations. Other specs tend to be more lenient,
 and requiring strict rules could add quite some overhead, so we might
 want to introduce additional opcodes. How does MIN/MAX work for dx9 btw?
 DX10 will require you to give back the

Re: [Mesa-dev] [PATCH 2/2] gallium: Desambiguate TGSI_OPCODE_IF.

2013-04-14 Thread Jose Fonseca
- Original Message -
> Am 14.04.2013 10:12, schrieb jfons...@vmware.com:
> > From: José Fonseca 
> > 
> > TGSI_OPCODE_IF condition had two possible interpretations:
> > 
> > - src.x != 0.0f
> > 
> >   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was false either for
> > vertex and fragment shaders
> >   - gallivm/llvmpipe
> >   - postprocess
> >   - vl state tracker
> >   - vega state tracker
> >   - most old drivers
> >   - old internal state trackers
> >   - many graw examples
> > 
> > - src.x != 0U
> > 
> >   - Mesa statetracker when PIPE_SHADER_CAP_INTEGERS was true for both
> > vertex and fragment shaders
> >   - tgsi_exec/softpipe
> >   - r600
> >   - radeonsi
> >   - nv50
> > 
> > And drivers that use draw module also were a mess (because Mesa would
> > emit float IFs, but draw module supports native integers so it would
> > interpret IF arg as integers...)
> > 
> > This sort of works if the source argument is limited to float +0.0f or
> > +1.0f, integer 0, but would fail if source is float -0.0f, or integer in
> > the float NaN range.  It could also fail if source is integer 1, and
> > hardware flushes denormalized numbers to zero.
> > 
> > But with this change there are now two opcodes, IF and UIF, with clear
> > meaning.
> > 
> > Drivers that do not support native integers do not need to worry about
> > UIF.  However, for backwards compatibility with old state trackers and
> > examples, it is advisable that native integer capable drivers also
> > support the float IF opcode.
> > 
> > I tried to implement this for r600 and radeonsi based on the surrounding
> > code.  I couldn't do this for nouveau, so I just shunted IF/UIF
> > together, which matches the current behavior.
> > ---
> >  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |1 +
> >  src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|1 +
> >  src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c   |1 +
> >  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|   19 -
> >  src/gallium/auxiliary/tgsi/tgsi_dump.c |2 +
> >  src/gallium/auxiliary/tgsi/tgsi_exec.c |   22 +++
> >  src/gallium/auxiliary/tgsi/tgsi_info.c |2 +-
> >  src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |1 +
> >  src/gallium/docs/source/tgsi.rst   |   21 --
> >  .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp |6 +++
> >  src/gallium/drivers/r600/r600_shader.c |   21 +++---
> >  .../drivers/radeon/radeon_setup_tgsi_llvm.c|   41
> >  
> >  src/gallium/include/pipe/p_shader_tokens.h |2 +-
> >  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |8 +++-
> >  src/mesa/state_tracker/st_mesa_to_tgsi.c   |   12 +-
> >  15 files changed, 137 insertions(+), 23 deletions(-)
> > 
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > index c71c1f1..e1c362b 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> > @@ -868,6 +868,7 @@ lp_set_default_actions(struct lp_build_tgsi_context *
> > bld_base)
> > bld_base->op_actions[TGSI_OPCODE_COS].fetch_args =
> > scalar_unary_fetch_args;
> > bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args =
> > scalar_unary_fetch_args;
> > bld_base->op_actions[TGSI_OPCODE_IF].fetch_args =
> > scalar_unary_fetch_args;
> > +   bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args =
> > scalar_unary_fetch_args;
> > bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
> > bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
> > bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args =
> > scalar_unary_fetch_args;
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> > index 98bce0e..223184d 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> > @@ -837,6 +837,7 @@ lp_emit_instruction_aos(
> >return FALSE;
> >  
> > case TGSI_OPCODE_IF:
> > +   case TGSI_OPCODE_UIF:
> >return FALSE;
> >  
> > case TGSI_OPCODE_BGNLOOP:
> > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> > index 3c79abf..b00aa09 100644
> > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
> > @@ -389,6 +389,7 @@ analyse_instruction(struct analysis_context *ctx,
> >  
> > switch (inst->Instruction.Opcode) {
> > case TGSI_OPCODE_IF:
> > +   case TGSI_OPCODE_UIF:
> > case TGSI_OPCODE_ELSE:
> > case TGSI_OPCODE_ENDIF:
> > case TGSI_OPCODE_BGNLOOP:
> Could you also add it to tgsi_opcode_infer_src_type?

Good point. Will do. tgsi_opcode_infer_src_type has a default catch all for 
float, so I

Re: [Mesa-dev] [PATCH] r600g: Workaround for a nested loop bug on Cayman

2013-04-14 Thread Vadim Girlin

On 04/15/2013 01:05 AM, Martin Andersson wrote:

There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested
loops may put the branch stack into a state such that ALU_PUSH_BEFORE
doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE
with a PUSH + ALU for nested loops.

Fixes piglit tests:
spec/!OpenGL 1.1/read-front
spec/EXT_transform_feedback/order*
spec/glsl-1.40/uniform_buffer/fs-struct-pad

No piglit regressions.
---
  src/gallium/drivers/r600/r600_shader.c | 33 ++---
  1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6dbca50..aee011e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -252,6 +252,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
  static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
  static int tgsi_endloop(struct r600_shader_ctx *ctx);
  static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx);

  /*
   * bytestream -> r600 shader
@@ -5490,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
return 0;
  }

-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int 
alu_type)
  {
struct r600_bytecode_alu alu;
int r;
@@ -5510,7 +5511,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, 
int opcode)

alu.last = 1;

-   r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+   r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
if (r)
return r;
return 0;
@@ -5730,7 +5731,20 @@ static void break_loop_on_flag(struct r600_shader_ctx 
*ctx, unsigned fc_sp)

  static int tgsi_if(struct r600_shader_ctx *ctx)
  {
-   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
+   int alu_type = CF_OP_ALU_PUSH_BEFORE;
+
+   /*
+  There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for 
nested
+  loops may put the branch stack into a state such that ALU_PUSH_BEFORE
+  doesn't work as expected. Workaround this by replacing the 
ALU_PUSH_BEFORE
+  with a PUSH + ALU for nested loops.
+*/
+   if (ctx->bc->chip_class == CAYMAN && 
need_cayman_loop_bug_workaround(ctx)) {


We already have current loop level for the stack size computation, see 
r600_bytecode::stack, so I think need_cayman_loop_bug_workaround call 
may be replaced with "ctx->bc->stack.loop > 1", if I'm not missing 
something.


Vadim


+   r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
+   alu_type = CF_OP_ALU;
+   }
+
+   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type);

r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);

@@ -5834,6 +5848,19 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx 
*ctx)
return 0;
  }

+static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx)
+{
+   unsigned int fscp;
+   int num_loops = 0;
+   for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+   {
+   if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+   ++num_loops;
+   }
+
+   return num_loops >= 2;
+}
+
  static int tgsi_umad(struct r600_shader_ctx *ctx)
  {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: fix small but severe bug in handling multiple lod level strides

2013-04-14 Thread sroland
From: Roland Scheidegger 

Inserting the value for the second quad in the wrong place for the
following shuffle. This meant the row or image stride was undefined which is
quite catastrophic, can lead to bogus texels fetched or just segfault.
This code is only hit for SoA path currently, still surprising it
didn't crash more or caused more visible issues (I think llvm used a
broadcast shuffle for the undefined parts of the vector, hence the undefined
value for the second quad was just the same as that from the first quad,
so as long as both quads hit the same mip level everything was fine, and since
lower mips always have the same large stride it made it less likely to
hit out-of-bound memory in case of differing lods).

Note: this is a candidate for release branches.
---
 src/gallium/auxiliary/gallivm/lp_bld_sample.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 7f44c4e..f885363 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -934,7 +934,7 @@ lp_build_get_level_stride_vec(struct 
lp_build_sample_context *bld,
   stride = bld->int_coord_bld.undef;
   for (i = 0; i < bld->num_lods; i++) {
  LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
- LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
+ LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
  indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
  stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
  stride1 = LLVMBuildLoad(builder, stride1, "");
-- 
1.7.9.5
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Workaround for a nested loop bug on Cayman

2013-04-14 Thread Martin Andersson
On Mon, Apr 15, 2013 at 1:09 AM, Vadim Girlin  wrote:
> On 04/15/2013 01:05 AM, Martin Andersson wrote:
>>
>> There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx for nested
>> loops may put the branch stack into a state such that ALU_PUSH_BEFORE
>> doesn't work as expected. Workaround this by replacing the ALU_PUSH_BEFORE
>> with a PUSH + ALU for nested loops.
>>
>> Fixes piglit tests:
>> spec/!OpenGL 1.1/read-front
>> spec/EXT_transform_feedback/order*
>> spec/glsl-1.40/uniform_buffer/fs-struct-pad
>>
>> No piglit regressions.
>> ---
>>   src/gallium/drivers/r600/r600_shader.c | 33
>> ++---
>>   1 file changed, 30 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 6dbca50..aee011e 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -252,6 +252,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx);
>>   static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
>>   static int tgsi_endloop(struct r600_shader_ctx *ctx);
>>   static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
>> +static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx);
>>
>>   /*
>>* bytestream -> r600 shader
>> @@ -5490,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
>> return 0;
>>   }
>>
>> -static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
>> +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int
>> alu_type)
>>   {
>> struct r600_bytecode_alu alu;
>> int r;
>> @@ -5510,7 +5511,7 @@ static int emit_logic_pred(struct r600_shader_ctx
>> *ctx, int opcode)
>>
>> alu.last = 1;
>>
>> -   r = r600_bytecode_add_alu_type(ctx->bc, &alu,
>> CF_OP_ALU_PUSH_BEFORE);
>> +   r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
>> if (r)
>> return r;
>> return 0;
>> @@ -5730,7 +5731,20 @@ static void break_loop_on_flag(struct
>> r600_shader_ctx *ctx, unsigned fc_sp)
>>
>>   static int tgsi_if(struct r600_shader_ctx *ctx)
>>   {
>> -   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
>> +   int alu_type = CF_OP_ALU_PUSH_BEFORE;
>> +
>> +   /*
>> +  There is a bug where a BREAK/CONTINUE followed by LOOP_STARTxxx
>> for nested
>> +  loops may put the branch stack into a state such that
>> ALU_PUSH_BEFORE
>> +  doesn't work as expected. Workaround this by replacing the
>> ALU_PUSH_BEFORE
>> +  with a PUSH + ALU for nested loops.
>> +*/
>> +   if (ctx->bc->chip_class == CAYMAN &&
>> need_cayman_loop_bug_workaround(ctx)) {
>
>
> We already have current loop level for the stack size computation, see
> r600_bytecode::stack, so I think need_cayman_loop_bug_workaround call may be
> replaced with "ctx->bc->stack.loop > 1", if I'm not missing something.

Ok, will try that tonight. Should I add a comment that it is a hardware bug?

> Vadim
>
>
>> +   r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
>> +   alu_type = CF_OP_ALU;
>> +   }
>> +
>> +   emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT, alu_type);
>>
>> r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
>>
>> @@ -5834,6 +5848,19 @@ static int tgsi_loop_brk_cont(struct
>> r600_shader_ctx *ctx)
>> return 0;
>>   }
>>
>> +static bool need_cayman_loop_bug_workaround(struct r600_shader_ctx *ctx)
>> +{
>> +   unsigned int fscp;
>> +   int num_loops = 0;
>> +   for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
>> +   {
>> +   if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
>> +   ++num_loops;
>> +   }
>> +
>> +   return num_loops >= 2;
>> +}
>> +
>>   static int tgsi_umad(struct r600_shader_ctx *ctx)
>>   {
>> struct tgsi_full_instruction *inst =
>> &ctx->parse.FullToken.FullInstruction;
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev