[Mesa-dev] [PATCH] drisw: fix up context and apis for software context

2012-12-09 Thread Dave Airlie
This ports over from the dri2 code to the drisw bits. It means 3.1
core contexts now work for softpipe.

Signed-off-by: Dave Airlie 
---
 src/mesa/drivers/dri/common/drisw_util.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drisw_util.c 
b/src/mesa/drivers/dri/common/drisw_util.c
index 8fdb05e..cd5a39a 100644
--- a/src/mesa/drivers/dri/common/drisw_util.c
+++ b/src/mesa/drivers/dri/common/drisw_util.c
@@ -126,7 +126,10 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
 mesa_api = API_OPENGLES2;
 break;
 case __DRI_API_OPENGL_CORE:
+mesa_api = API_OPENGL_CORE;
+break;
 default:
+*error = __DRI_CTX_ERROR_BAD_API;
 return NULL;
 }
 
@@ -149,6 +152,19 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
}
 }
 
+/* Mesa does not support the GL_ARB_compatibilty extension or the
+ * compatibility profile.  This means that we treat a API_OPENGL_COMPAT 
3.1 as
+ * API_OPENGL_CORE and reject API_OPENGL_COMPAT 3.2+.
+ */
+if (mesa_api == API_OPENGL_COMPAT && major_version == 3 && minor_version 
== 1)
+   mesa_api = API_OPENGL_CORE;
+
+if (mesa_api == API_OPENGL_COMPAT
+&& ((major_version > 3)
+|| (major_version == 3 && minor_version >= 2))) {
+   *error = __DRI_CTX_ERROR_BAD_API;
+   return NULL;
+}
 /* There are no forward-compatible contexts before OpenGL 3.0.  The
  * GLX_ARB_create_context spec says:
  *
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] drisw: fix up context and apis for software context

2012-12-09 Thread Dave Airlie
On 9 Dec 2012 20:32, "Dave Airlie"  wrote:
>
> This ports over from the dri2 code to the drisw bits. It means 3.1
> core contexts now work for softpipe.
>

Well if soft pipe had msaa they would but its enough to hack so I can test
ubo/tbo better.

Dave.
> Signed-off-by: Dave Airlie 
> ---
>  src/mesa/drivers/dri/common/drisw_util.c | 16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/common/drisw_util.c
b/src/mesa/drivers/dri/common/drisw_util.c
> index 8fdb05e..cd5a39a 100644
> --- a/src/mesa/drivers/dri/common/drisw_util.c
> +++ b/src/mesa/drivers/dri/common/drisw_util.c
> @@ -126,7 +126,10 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
>  mesa_api = API_OPENGLES2;
>  break;
>  case __DRI_API_OPENGL_CORE:
> +mesa_api = API_OPENGL_CORE;
> +break;
>  default:
> +*error = __DRI_CTX_ERROR_BAD_API;
>  return NULL;
>  }
>
> @@ -149,6 +152,19 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
> }
>  }
>
> +/* Mesa does not support the GL_ARB_compatibilty extension or the
> + * compatibility profile.  This means that we treat a
API_OPENGL_COMPAT 3.1 as
> + * API_OPENGL_CORE and reject API_OPENGL_COMPAT 3.2+.
> + */
> +if (mesa_api == API_OPENGL_COMPAT && major_version == 3 &&
minor_version == 1)
> +   mesa_api = API_OPENGL_CORE;
> +
> +if (mesa_api == API_OPENGL_COMPAT
> +&& ((major_version > 3)
> +|| (major_version == 3 && minor_version >= 2))) {
> +   *error = __DRI_CTX_ERROR_BAD_API;
> +   return NULL;
> +}
>  /* There are no forward-compatible contexts before OpenGL 3.0.  The
>   * GLX_ARB_create_context spec says:
>   *
> --
> 1.8.0.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] AMDGPU: remove nonsense setPrefLoopAlignment

2012-12-09 Thread Christian König
The Align parameter is a power of two, so 16 results in 64K
alignment. Additional to that even 16 byte alignment doesn't
make any sense, so just remove it.

Signed-off-by: Christian König 
---
 lib/Target/AMDGPU/AMDILISelLowering.cpp |1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Target/AMDGPU/AMDILISelLowering.cpp 
b/lib/Target/AMDGPU/AMDILISelLowering.cpp
index 6a5d841..8bfd30c 100644
--- a/lib/Target/AMDGPU/AMDILISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDILISelLowering.cpp
@@ -217,7 +217,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
 
   setSchedulingPreference(Sched::RegPressure);
   setPow2DivIsCheap(false);
-  setPrefLoopAlignment(16);
   setSelectIsExpensive(true);
   setJumpIsExpensive(true);
 
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] AMDGPU: BB operand support for SI

2012-12-09 Thread Christian König
Signed-off-by: Christian König 
---
 lib/Target/AMDGPU/AMDGPUMCInstLower.cpp|   10 --
 lib/Target/AMDGPU/AMDGPUMCInstLower.h  |5 -
 .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp   |   10 +-
 lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp |6 ++
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp 
b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index de4053e..32275a2b 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -21,11 +21,14 @@
 #include "llvm/Constants.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
-AMDGPUMCInstLower::AMDGPUMCInstLower() { }
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
+  Ctx(ctx)
+{ }
 
 void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
@@ -50,13 +53,16 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, 
MCInst &OutMI) const {
 case MachineOperand::MO_Register:
   MCOp = MCOperand::CreateReg(MO.getReg());
   break;
+case MachineOperand::MO_MachineBasicBlock:
+  MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+   MO.getMBB()->getSymbol(), Ctx));
 }
 OutMI.addOperand(MCOp);
   }
 }
 
 void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  AMDGPUMCInstLower MCInstLowering;
+  AMDGPUMCInstLower MCInstLowering(OutContext);
 
   if (MI->isBundle()) {
 const MachineBasicBlock *MBB = MI->getParent();
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.h 
b/lib/Target/AMDGPU/AMDGPUMCInstLower.h
index d7bf827..d7d538e 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.h
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -14,12 +14,15 @@
 namespace llvm {
 
 class MCInst;
+class MCContext;
 class MachineInstr;
 
 class AMDGPUMCInstLower {
 
+  MCContext &Ctx;
+
 public:
-  AMDGPUMCInstLower();
+  AMDGPUMCInstLower(MCContext &ctx);
 
   /// \brief Lower a MachineInstr to an MCInst
   void lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp 
b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 3417fbc..8f41ebb 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -47,7 +47,7 @@ public:
   virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
   virtual unsigned getNumFixupKinds() const { return 0; };
   virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-  uint64_t Value) const { assert(!"Not implemented"); }
+  uint64_t Value) const;
   virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
 const MCInstFragment *DF,
 const MCAsmLayout &Layout) const {
@@ -80,3 +80,11 @@ AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
 raw_ostream &OS) const 
{
   return new AMDGPUMCObjectWriter(OS);
 }
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+  unsigned DataSize, uint64_t Value) const {
+
+  uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+  assert(Fixup.getKind() == FK_PCRel_4);
+  *Dst = (Value - 4) / 4;
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp 
b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 7f271d1..c47dc99 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
@@ -149,6 +150,11 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst 
&MI,
 } Imm;
 Imm.F = MO.getFPImm();
 return Imm.I;
+  } else if (MO.isExpr()) {
+const MCExpr *Expr = MO.getExpr();
+MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
+Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+return 0;
   } else{
 llvm_unreachable("Encoding of this operand type is not supported yet.");
   }
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] AMDGPU: enable S_*N2_* instructions

2012-12-09 Thread Christian König
They seem to work fine.

Signed-off-by: Christian König 
---
 lib/Target/AMDGPU/SIInstructions.td |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/Target/AMDGPU/SIInstructions.td 
b/lib/Target/AMDGPU/SIInstructions.td
index e9bbe23..42fa8e6 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -971,10 +971,10 @@ def S_OR_B32 : SOP2_32 <0x0010, "S_OR_B32", []>;
 def S_OR_B64 : SOP2_64 <0x0011, "S_OR_B64", []>;
 def S_XOR_B32 : SOP2_32 <0x0012, "S_XOR_B32", []>;
 def S_XOR_B64 : SOP2_64 <0x0013, "S_XOR_B64", []>;
-def S_ANDN2_B32 : SOP2_ANDN2 <0x0014, "S_ANDN2_B32", []>;
-def S_ANDN2_B64 : SOP2_ANDN2 <0x0015, "S_ANDN2_B64", []>;
-def S_ORN2_B32 : SOP2_ORN2 <0x0016, "S_ORN2_B32", []>;
-def S_ORN2_B64 : SOP2_ORN2 <0x0017, "S_ORN2_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x0014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x0015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x0016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x0017, "S_ORN2_B64", []>;
 def S_NAND_B32 : SOP2_32 <0x0018, "S_NAND_B32", []>;
 def S_NAND_B64 : SOP2_64 <0x0019, "S_NAND_B64", []>;
 def S_NOR_B32 : SOP2_32 <0x001a, "S_NOR_B32", []>;
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa syncobj: don't store a pointer to the set_entry

2012-12-09 Thread Stefan Dösinger
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Am 2012-12-08 22:24, schrieb Jordan Justen:
> This likely will fix 
> https://bugs.freedesktop.org/show_bug.cgi?id=58012 (Regression
> since 56e95d3c)
The crashes are gone on r300g and r600g.

This is most likely unrelated, but on r600g I get broken rendering in
Half Life 2 (just random garbage on the screen). I'm sure it doesn't
crash though because I don't see the debugger kicking in, and hl2
terminates properly after the timedemo. There are complaints about
commands rejected by the kernel:


[  760.187261] [drm:radeon_cs_ib_chunk] *ERROR* Invalid command stream !
[  760.192898] radeon :01:00.0:
evergreen_cs_track_validate_stencil:602 stencil read bo base
4148500480 not aligned with 16384
[  760.192901] radeon :01:00.0: evergreen_packet3_check:2098
invalid cmd stream 2440

Note that I don't know when this started - I haven't updated Mesa on
that box for weeks.
-BEGIN PGP SIGNATURE-
Version: GnuPG v2.0.19 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQIcBAEBAgAGBQJQxM6bAAoJEN0/YqbEcdMw58oP/RVTIfaNufKv+//AuUcoUSLt
2huxKsU3DoQojrGgMdMIP6MZBRwmbQ0i3IVMGNPrDNBpy0c5FgJRP6XIKUl8mF5t
CC4u8NWV1q6zf3F6eMlNCp4EfRVLPViUVA8rZjSSbHshhenio2ftoZgQDxTPyPnX
FOkVNerBUMhE6yqa5QLu+qGuciJprzq/AUH8IiHlFIOgHfs/mAaTcrWlY6c4ZNaH
op5/PxsXrXlmu32x6tjOMQYz+i8FEb6enClMdz81v8ek1bv2IzSLaT5i6La2flKI
TApuonDYfIYmC7u8q+uumPVK5PcwrpVEYGhTjCSM8bvREgRXDIEn3ekmCyiHxAFt
zGY1sO4Wmf4Po5VD/GMNOYPDMNcdgCk5oZwcJzzRXPn71eiJimQkGNao0Dnmd0dq
bxLbz+KJxAQJaX6rNjO1/uCey0xfQ8IVf9IIoCCxINW1vZRV5p76L3cD1HqQt8wU
YrADeNf9wB1qSAtH0Ybhlf+VPA/7kJbatwSpSuNKEohZVLfZal/1YEQj1An5POj+
FSg07RDuyJVBE8HQksnqQie2K58wGvrTKvwCr9/P98z2sD2xjEen8o1fGWYN7+p+
1327o97Yc7umnz1BbbG6fd/YSVQ7E5wpcrUpgTLHTAcr+B2cPLB5TTGMGcuL52M2
hKknDGGOMWZZLxvN7e92
=+BNj
-END PGP SIGNATURE-
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Restore NULL context check in _mesa_reference_renderbuffer_().

2012-12-09 Thread Ian Romanick
That's not a false alarm... That's supposed to work! libGL and driver versions 
are supposed to be independent. Anytime they're not is a bug.

On Dec 8, 2012, at 8:43 PM, Kenneth Graunke  wrote:

> On 12/08/2012 05:40 AM, Brian Paul wrote:
>> On 12/08/2012 01:10 AM, Kenneth Graunke wrote:
>>> Starting KDE on i965 makes the X server die in a fire with the following
>>> assertion:
>>> 
>>> X: intel_fbo.c:94: intel_delete_renderbuffer: Assertion `irb' failed.
>>> 
>>> Obviously, this is rather unpleasant.  Bisecting revealed that:
>>> 
>>> 006918c0db77e945ac56b15bc64eba502b86d56c is the first bad commit
>>> commit 006918c0db77e945ac56b15bc64eba502b86d56c
>>> Author: Brian Paul
>>> Date:   Sat Dec 1 10:52:42 2012 -0700
>>> 
>>> mesa: remove warning message in _mesa_reference_renderbuffer_()
>>> 
>>> We were warning when there was no current context and we're about
>>> to delete a renderbuffer, but that happens fairly often and isn't
>>> really a problem.
>>> 
>>> Fixes http://bugs.freedesktop.org/show_bug.cgi?id=57754
>>> 
>>> Note: This is a candidate for the stable branches.
>>> 
>>> Tested-by: Ian Romanick
>>> 
>>> This commit removed not only the "else emit warning" block, but the
>>> whole NULL check as well.  Apparently it's necessary, so put it back.
>> 
>> 
>> Hi Kenneth,
>> 
>> The assertion says the 'irb' is null, but you're checking if the context
>> is null.  Off hand, I think a irb==null check is needed in
>> intel_delete_renderbuffer().  Maybe seeing a stack trace would shed more
>> light on where the null irb/ctx is coming from.
>> 
>> In any case, if you need to check for ctx==null, please do that in
>> intel_delete_renderbuffer().
>> 
>> The deal is that some (most?) drivers don't need a context handle in
>> order to free a renderbuffer.  In the gallium state tracker we use the
>> context to free a piece of context state that's associated with a
>> renderbuffer, but the renderbuffer itself can be freed without a context.
>> 
>> Sorry for the headaches this one is causing.
>> 
>> -Brian
> 
> Sorry for the false alarm...this was my fault.
> 
> Apparently on my system X is loading an older i965_dri.so, but likely a new 
> libGL.  The incompatibility between the one-argument 
> intel_delete_renderbuffer and two-argument gl_renderbuffer::Delete caused 
> something stupid to happen...maybe I got the renderbuffer passed as the 
> context, and NULL for the renderbuffer.
> 
> I put both halves back in sync and everything's fine now.  Again, my 
> apologies for the trouble.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] R: Re: [PATCH] silence unused code warnings

2012-12-09 Thread Fabio Pedretti
>Comments below.

Thanks, updated patches follow.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: remove unused OpClassStr array

2012-12-09 Thread Fabio Pedretti
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index ded4f61..ea81e53 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -67,26 +67,6 @@ static void init_colours()
   colour = _colour;
 }
 
-static const char *OpClassStr[OPCLASS_OTHER + 1] =
-{
-   "MOVE",
-   "LOAD",
-   "STORE",
-   "ARITH",
-   "SHIFT",
-   "SFU",
-   "LOGIC",
-   "COMPARE",
-   "CONVERT",
-   "ATOMIC",
-   "TEXTURE",
-   "SURFACE",
-   "FLOW",
-   "(INVALID)",
-   "PSEUDO",
-   "OTHER"
-};
-
 const char *operationStr[OP_LAST + 1] =
 {
"nop",

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: comment unused nvc0_validate_zcull function

2012-12-09 Thread Fabio Pedretti
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 0f92614..80a8c01 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -3,6 +3,7 @@
 
 #include "nvc0_context.h"
 
+#if 0
 static void
 nvc0_validate_zcull(struct nvc0_context *nvc0)
 {
@@ -51,6 +52,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
 BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
 PUSH_DATA (push, 0);
 }
+#endif
 
 static void
 nvc0_validate_fb(struct nvc0_context *nvc0)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] vega: remove unused variables

2012-12-09 Thread Fabio Pedretti
diff --git a/src/gallium/state_trackers/vega/path.c b/src/gallium/state_trackers/vega/path.c
index 43755f4..31ec719 100644
--- a/src/gallium/state_trackers/vega/path.c
+++ b/src/gallium/state_trackers/vega/path.c
@@ -1085,10 +1085,8 @@ static INLINE VGubyte normalize_coords(struct path_iter_data *pd,
}
   break;
case VG_SCUBIC_TO: {
-  VGfloat x0, y0, x1, y1, x2, y2, x3, y3;
+  VGfloat x1, y1, x2, y2, x3, y3;
   data_at(&pd->coords, pd->path, 0, 4, data);
-  x0 = pd->ox;
-  y0 = pd->oy;
   x1 = 2*pd->ox-pd->px;
   y1 = 2*pd->oy-pd->py;
   x2 = data[0];

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] R: Re: Gallium versioning, currently at 0.4

2012-12-09 Thread Fabio Pedretti
So, is there a plan to do it?

>Da: mar...@gmail.com
>
>The Gallium version has no meaning to me. The renderer string could
>just be "ATI RV530".
>
>Marek
>
>On Wed, Oct 31, 2012 at 6:05 PM, Fabio Pedretti  wrote:
>> Is there a reason to keep the '0.4' on Gallium renderer string ( 'Gallium 
0.4
>> on ATI RV530' )? It never gets update, although the gallium interface 
changes,
>> similar to the old DRIVER_DATE that was then removed.
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/12] swrast: make _mesa_get_texel_fetch_func() static

2012-12-09 Thread Brian Paul
From: Brian Paul 

Not called from any other file.
---
 src/mesa/swrast/s_texfetch.c |8 
 src/mesa/swrast/s_texfetch.h |3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c
index 86b01a0..9117885 100644
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -1175,8 +1175,8 @@ texfetch_funcs[] =
 };
 
 
-FetchTexelFunc
-_mesa_get_texel_fetch_func(gl_format format, GLuint dims)
+static FetchTexelFunc
+get_texel_fetch_func(gl_format format, GLuint dims)
 {
 #ifdef DEBUG
/* check that the table entries are sorted by format name */
@@ -1198,7 +1198,7 @@ _mesa_get_texel_fetch_func(gl_format format, GLuint dims)
case 3:
   return texfetch_funcs[format].Fetch3D;
default:
-  assert(0 && "bad dims in _mesa_get_texel_fetch_func");
+  assert(0 && "bad dims in get_texel_fetch_func");
   return NULL;
}
 }
@@ -1220,7 +1220,7 @@ set_fetch_functions(struct gl_sampler_object *samp,
   format = _mesa_get_srgb_format_linear(format);
}
 
-   texImage->FetchTexel = _mesa_get_texel_fetch_func(format, dims);
+   texImage->FetchTexel = get_texel_fetch_func(format, dims);
ASSERT(texImage->FetchTexel);
 }
 
diff --git a/src/mesa/swrast/s_texfetch.h b/src/mesa/swrast/s_texfetch.h
index 33950ce..4e4397e 100644
--- a/src/mesa/swrast/s_texfetch.h
+++ b/src/mesa/swrast/s_texfetch.h
@@ -29,9 +29,6 @@
 
 #include "swrast/s_context.h"
 
-extern FetchTexelFunc
-_mesa_get_texel_fetch_func(gl_format format, GLuint dims);
-
 void
 _mesa_update_fetch_functions(struct gl_context *ctx, GLuint unit);
 
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/12] swrast: merge get_texel_fetch_func() and set_fetch_functions()

2012-12-09 Thread Brian Paul
From: Brian Paul 

No real need for separate functions anymore.
---
 src/mesa/swrast/s_texfetch.c |   46 ++---
 1 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c
index 9117885..c133eac 100644
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -1175,9 +1175,15 @@ texfetch_funcs[] =
 };
 
 
-static FetchTexelFunc
-get_texel_fetch_func(gl_format format, GLuint dims)
+/**
+ * Initialize the texture image's FetchTexel methods.
+ */
+static void
+set_fetch_functions(const struct gl_sampler_object *samp,
+struct swrast_texture_image *texImage, GLuint dims)
 {
+   gl_format format = texImage->Base.TexFormat;
+
 #ifdef DEBUG
/* check that the table entries are sorted by format name */
gl_format fmt;
@@ -1188,39 +1194,27 @@ get_texel_fetch_func(gl_format format, GLuint dims)
 
STATIC_ASSERT(Elements(texfetch_funcs) == MESA_FORMAT_COUNT);
 
+   if (samp->sRGBDecode == GL_SKIP_DECODE_EXT &&
+   _mesa_get_format_color_encoding(format) == GL_SRGB) {
+  format = _mesa_get_srgb_format_linear(format);
+   }
+
assert(format < MESA_FORMAT_COUNT);
 
switch (dims) {
case 1:
-  return texfetch_funcs[format].Fetch1D;
+  texImage->FetchTexel = texfetch_funcs[format].Fetch1D;
+  break;
case 2:
-  return texfetch_funcs[format].Fetch2D;
+  texImage->FetchTexel = texfetch_funcs[format].Fetch2D;
+  break;
case 3:
-  return texfetch_funcs[format].Fetch3D;
+  texImage->FetchTexel = texfetch_funcs[format].Fetch3D;
+  break;
default:
-  assert(0 && "bad dims in get_texel_fetch_func");
-  return NULL;
-   }
-}
-
-
-/**
- * Initialize the texture image's FetchTexel methods.
- */
-static void
-set_fetch_functions(struct gl_sampler_object *samp,
-struct swrast_texture_image *texImage, GLuint dims)
-{
-   gl_format format = texImage->Base.TexFormat;
-
-   ASSERT(dims == 1 || dims == 2 || dims == 3);
-
-   if (samp->sRGBDecode == GL_SKIP_DECODE_EXT &&
-   _mesa_get_format_color_encoding(format) == GL_SRGB) {
-  format = _mesa_get_srgb_format_linear(format);
+  assert(!"Bad dims in set_fetch_functions()");
}
 
-   texImage->FetchTexel = get_texel_fetch_func(format, dims);
ASSERT(texImage->FetchTexel);
 }
 
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] mesa: add compressed_fetch_func typedef

2012-12-09 Thread Brian Paul
From: Brian Paul 

This is a first step in removing the swrast-related code in core
Mesa's texture compression files.
---
 src/mesa/main/texcompress.h |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 359b916..7e3de0e 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -48,6 +48,15 @@ _mesa_compressed_image_address(GLint col, GLint row, GLint 
img,
gl_format mesaFormat,
GLsizei width, const GLubyte *image);
 
+
+/** A function to fetch one texel from a compressed texture */
+typedef void (*compressed_fetch_func)(const GLubyte *map,
+  const GLuint imageOffsets[],
+  GLint rowStride,
+  GLint i, GLint j, GLint k,
+  GLfloat *texel);
+
+
 extern void
 _mesa_decompress_image(gl_format format, GLuint width, GLuint height,
const GLubyte *src, GLint srcRowStride,
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/12] mesa: add new texel fetch code for dxt formats

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_s3tc.c |  106 +-
 src/mesa/main/texcompress_s3tc.h |5 ++
 2 files changed, 110 insertions(+), 1 deletions(-)

diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c
index 476b998..9595c84 100644
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -58,7 +58,7 @@
 #define DXTN_LIBNAME "libtxc_dxtn.so"
 #endif
 
-typedef void (*dxtFetchTexelFuncExt)( GLint srcRowstride, GLubyte *pixdata, 
GLint col, GLint row, GLvoid *texelOut );
+typedef void (*dxtFetchTexelFuncExt)( GLint srcRowstride, const GLubyte 
*pixdata, GLint col, GLint row, GLvoid *texelOut );
 
 static dxtFetchTexelFuncExt fetch_ext_rgb_dxt1 = NULL;
 static dxtFetchTexelFuncExt fetch_ext_rgba_dxt1 = NULL;
@@ -494,3 +494,107 @@ _mesa_fetch_texel_srgba_dxt5(const struct 
swrast_texture_image *texImage,
texel[BCOMP] = _mesa_nonlinear_to_linear(rgba[BCOMP]);
texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
 }
+
+
+/** Report problem with dxt texture decompression, once */
+static void
+problem(const char *func)
+{
+   static GLboolean warned = GL_FALSE;
+   if (!warned) {
+  _mesa_debug(NULL, "attempted to decode DXT texture without "
+  "library available: %s\n", func);
+  warned = GL_TRUE;
+   }
+}
+
+
+static void
+fetch_rgb_dxt1(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   if (fetch_ext_rgb_dxt1) {
+  GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+  GLubyte tex[4];
+  fetch_ext_rgb_dxt1(rowStride, map + sliceOffset, i, j, tex);
+  texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]);
+  texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]);
+  texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]);
+  texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]);
+   }
+   else {
+  problem("rgb_dxt1");
+   }
+}
+
+static void
+fetch_rgba_dxt1(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   if (fetch_ext_rgba_dxt1) {
+  GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+  GLubyte tex[4];
+  fetch_ext_rgba_dxt1(rowStride, map + sliceOffset, i, j, tex);
+  texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]);
+  texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]);
+  texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]);
+  texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]);
+   }
+   else {
+  problem("rgba_dxt1");
+   }
+}
+
+static void
+fetch_rgba_dxt3(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   if (fetch_ext_rgba_dxt3) {
+  GLuint sliceOffset = k ? imageOffsets[k] : 0;
+  GLubyte tex[4];
+  fetch_ext_rgba_dxt3(rowStride, map + sliceOffset, i, j, tex);
+  texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]);
+  texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]);
+  texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]);
+  texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]);
+   }
+   else {
+  problem("rgba_dxt3");
+   }
+}
+
+static void
+fetch_rgba_dxt5(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   if (fetch_ext_rgba_dxt5) {
+  GLuint sliceOffset = k ? imageOffsets[k] : 0;
+  GLubyte tex[4];
+  fetch_ext_rgba_dxt5(rowStride, map + sliceOffset, i, j, tex);
+  texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]);
+  texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]);
+  texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]);
+  texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]);
+   }
+   else {
+  problem("rgba_dxt5");
+   }
+}
+
+
+compressed_fetch_func
+_mesa_get_dxt_fetch_func(gl_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGB_DXT1:
+  return fetch_rgb_dxt1;
+   case MESA_FORMAT_RGBA_DXT1:
+  return fetch_rgba_dxt1;
+   case MESA_FORMAT_RGBA_DXT3:
+  return fetch_rgba_dxt3;
+   case MESA_FORMAT_RGBA_DXT5:
+  return fetch_rgba_dxt5;
+   default:
+  return NULL;
+   }
+}
diff --git a/src/mesa/main/texcompress_s3tc.h b/src/mesa/main/texcompress_s3tc.h
index 524ac0c..5758e83 100644
--- a/src/mesa/main/texcompress_s3tc.h
+++ b/src/mesa/main/texcompress_s3tc.h
@@ -29,6 +29,7 @@
 #include "glheader.h"
 #include "mfeatures.h"
 #include "texstore.h"
+#include "texcompress.h"
 
 struct gl_context;
 struct swrast_texture_image;
@@ -80,4 +81,8 @@ _mesa_fetch_texel_srgba_dxt5(const struct 
swrast_texture_image *texImage,
 extern void
 _mesa_init_texture_s3tc(struct gl_context *ctx);
 
+extern compressed_fetch_func
+_mesa_get_dxt_fetch_func(gl_format format);
+
+
 #endif /* TEXCOMPRESS_S3TC_H */
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/12] mesa: add new texel fetch code for fxt formats

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_fxt1.c |   42 ++
 src/mesa/main/texcompress_fxt1.h |3 ++
 2 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
index eeed788..6a0f856 100644
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -1643,3 +1643,45 @@ fxt1_decode_1 (const void *texture, GLint stride, /* in 
pixels */
 
decode_1[mode](code, t, rgba);
 }
+
+
+
+
+static void
+fetch_rgb_fxt1(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLubyte rgba[4];
+   fxt1_decode_1(map, rowStride, i, j, rgba);
+   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
+   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
+   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
+   texel[ACOMP] = 1.0F;
+}
+
+
+static void
+fetch_rgba_fxt1(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLubyte rgba[4];
+   fxt1_decode_1(map, rowStride, i, j, rgba);
+   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
+   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
+   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
+   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
+}
+
+
+compressed_fetch_func
+_mesa_get_fxt_fetch_func(gl_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGB_FXT1:
+  return fetch_rgb_fxt1;
+   case MESA_FORMAT_RGBA_FXT1:
+  return fetch_rgba_fxt1;
+   default:
+  return NULL;
+   }
+}
diff --git a/src/mesa/main/texcompress_fxt1.h b/src/mesa/main/texcompress_fxt1.h
index 2a8b8d6..f60f196 100644
--- a/src/mesa/main/texcompress_fxt1.h
+++ b/src/mesa/main/texcompress_fxt1.h
@@ -45,4 +45,7 @@ extern void
 _mesa_fetch_texel_2d_f_rgb_fxt1(const struct swrast_texture_image *texImage,
 GLint i, GLint j, GLint k, GLfloat *texel);
 
+compressed_fetch_func
+_mesa_get_fxt_fetch_func(gl_format format);
+
 #endif /* TEXCOMPRESS_FXT1_H */
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/12] mesa: add new texel fetch code for rgtc formats

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_rgtc.c |  161 ++
 src/mesa/main/texcompress_rgtc.h |5 +
 2 files changed, 166 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c
index 5773459..de71f5d 100644
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -447,3 +447,164 @@ _mesa_fetch_texel_signed_la_latc2(const struct 
swrast_texture_image *texImage,
 #undef TYPE
 #undef T_MIN
 #undef T_MAX
+
+
+
+static void
+fetch_red_rgtc1(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLubyte red;
+   GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+   unsigned_fetch_texel_rgtc(rowStride, map + sliceOffset, i, j, &red, 1);
+   texel[RCOMP] = UBYTE_TO_FLOAT(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+static void
+fetch_l_latc1(const GLubyte *map, const GLuint imageOffsets[],
+  GLint rowStride, GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   GLubyte red;
+   GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+   unsigned_fetch_texel_rgtc(rowStride, map + sliceOffset, i, j, &red, 1);
+   texel[RCOMP] =
+   texel[GCOMP] =
+   texel[BCOMP] = UBYTE_TO_FLOAT(red);
+   texel[ACOMP] = 1.0;
+}
+
+static void
+fetch_signed_red_rgtc1(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k,
+   GLfloat *texel)
+{
+   GLbyte red;
+   GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+   signed_fetch_texel_rgtc(rowStride, (const GLbyte *) map + sliceOffset,
+   i, j, &red, 1);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = 0.0;
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+static void
+fetch_signed_l_latc1(const GLubyte *map, const GLuint imageOffsets[],
+ GLint rowStride, GLint i, GLint j, GLint k,
+ GLfloat *texel)
+{
+   GLubyte red;
+   GLuint sliceOffset = k ? imageOffsets[k] / 2 : 0;
+   unsigned_fetch_texel_rgtc(rowStride,  map + sliceOffset, i, j, &red, 1);
+   texel[RCOMP] =
+   texel[GCOMP] =
+   texel[BCOMP] = BYTE_TO_FLOAT(red);
+   texel[ACOMP] = 1.0;
+}
+
+static void
+fetch_rg_rgtc2(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k,
+   GLfloat *texel)
+{
+   GLubyte red, green;
+   GLuint sliceOffset = k ? imageOffsets[k] : 0;
+   unsigned_fetch_texel_rgtc(rowStride,
+ map + sliceOffset,
+ i, j, &red, 2);
+   unsigned_fetch_texel_rgtc(rowStride,
+ map + sliceOffset + 8,
+ i, j, &green, 2);
+   texel[RCOMP] = UBYTE_TO_FLOAT(red);
+   texel[GCOMP] = UBYTE_TO_FLOAT(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+static void
+fetch_la_latc2(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k,
+   GLfloat *texel)
+{
+   GLubyte red, green;
+   GLuint sliceOffset = k ? imageOffsets[k] : 0;
+   unsigned_fetch_texel_rgtc(rowStride,
+ map + sliceOffset,
+ i, j, &red, 2);
+   unsigned_fetch_texel_rgtc(rowStride,
+ map + sliceOffset + 8,
+ i, j, &green, 2);
+   texel[RCOMP] =
+   texel[GCOMP] =
+   texel[BCOMP] = UBYTE_TO_FLOAT(red);
+   texel[ACOMP] = UBYTE_TO_FLOAT(green);
+}
+
+
+static void
+fetch_signed_rg_rgtc2(const GLubyte *map, const GLuint imageOffsets[],
+  GLint rowStride, GLint i, GLint j, GLint k,
+  GLfloat *texel)
+{
+   GLbyte red, green;
+   GLuint sliceOffset = k ? imageOffsets[k] : 0;
+   signed_fetch_texel_rgtc(rowStride,
+   (GLbyte *) map + sliceOffset,
+   i, j, &red, 2);
+   signed_fetch_texel_rgtc(rowStride,
+   (GLbyte *) map + sliceOffset + 8,
+   i, j, &green, 2);
+   texel[RCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[GCOMP] = BYTE_TO_FLOAT_TEX(green);
+   texel[BCOMP] = 0.0;
+   texel[ACOMP] = 1.0;
+}
+
+
+static void
+fetch_signed_la_latc2(const GLubyte *map, const GLuint imageOffsets[],
+  GLint rowStride, GLint i, GLint j, GLint k,
+  GLfloat *texel)
+{
+   GLbyte red, green;
+   GLuint sliceOffset = k ? imageOffsets[k] : 0;
+   signed_fetch_texel_rgtc(rowStride,
+   (GLbyte *) map + sliceOffset,
+   i, j, &red, 2);
+   signed_fetch_texel_rgtc(rowStride,
+   (GLbyte *) map + sliceOffset + 8,
+   i, j, &green, 2);
+   texel[RCOMP] =
+   texel[GCOMP] =
+   texel[BCOMP] = BYTE_TO_FLOAT_TEX(red);
+   texel[ACOMP] = BYTE_TO_FLOAT_

[Mesa-dev] [PATCH 07/12] mesa: add new texel fetch code for etc formats

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_etc.c |  274 +++
 src/mesa/main/texcompress_etc.h |6 +
 2 files changed, 280 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 73d2fa4..c8bf6ea 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -1461,3 +1461,277 @@ _mesa_unpack_etc2_format(uint8_t *dst_row,
 src_row, src_stride,
 src_width, src_height);
 }
+
+
+
+static void
+fetch_etc1_rgb8(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k,
+GLfloat *texel)
+{
+   struct etc1_block block;
+   GLubyte dst[3];
+   const GLubyte *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
+
+   etc1_parse_block(&block, src);
+   etc1_fetch_texel(&block, i % 4, j % 4, dst);
+
+   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
+   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
+   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
+   texel[ACOMP] = 1.0f;
+}
+
+
+static void
+fetch_etc2_rgb8(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k,
+GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[3];
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
+
+   etc2_rgb8_parse_block(&block, src,
+ false /* punchthrough_alpha */);
+   etc2_rgb8_fetch_texel(&block, i % 4, j % 4, dst,
+ false /* punchthrough_alpha */);
+
+   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
+   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
+   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
+   texel[ACOMP] = 1.0f;
+}
+
+static void
+fetch_etc2_srgb8(const GLubyte *map, const GLuint imageOffsets[],
+ GLint rowStride, GLint i, GLint j, GLint k,
+ GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[3];
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
+
+   etc2_rgb8_parse_block(&block, src,
+ false /* punchthrough_alpha */);
+   etc2_rgb8_fetch_texel(&block, i % 4, j % 4, dst,
+ false /* punchthrough_alpha */);
+
+   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
+   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
+   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
+   texel[ACOMP] = 1.0f;
+}
+
+static void
+fetch_etc2_rgba8_eac(const GLubyte *map, const GLuint imageOffsets[],
+ GLint rowStride, GLint i, GLint j, GLint k,
+ GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[4];
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   etc2_rgba8_parse_block(&block, src);
+   etc2_rgba8_fetch_texel(&block, i % 4, j % 4, dst);
+
+   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
+   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
+   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
+   texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
+}
+
+static void
+fetch_etc2_srgb8_alpha8_eac(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k,
+GLfloat *texel)
+{
+   struct etc2_block block;
+   uint8_t dst[4];
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   etc2_rgba8_parse_block(&block, src);
+   etc2_rgba8_fetch_texel(&block, i % 4, j % 4, dst);
+
+   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
+   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
+   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
+   texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
+}
+
+static void
+fetch_etc2_r11_eac(const GLubyte *map, const GLuint imageOffsets[],
+   GLint rowStride, GLint i, GLint j, GLint k,
+   GLfloat *texel)
+{
+   struct etc2_block block;
+   GLushort dst;
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
+
+   etc2_r11_parse_block(&block, src);
+   etc2_r11_fetch_texel(&block, i % 4, j % 4, (uint8_t *)&dst);
+
+   texel[RCOMP] = USHORT_TO_FLOAT(dst);
+   texel[GCOMP] = 0.0f;
+   texel[BCOMP] = 0.0f;
+   texel[ACOMP] = 1.0f;
+}
+
+static void
+fetch_etc2_rg11_eac(const GLubyte *map, const GLuint imageOffsets[],
+GLint rowStride, GLint i, GLint j, GLint k,
+GLfloat *texel)
+{
+   struct etc2_block block;
+   GLushort dst[2];
+   const uint8_t *src;
+
+   src = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
+
+   /* red component */
+   etc2_r11_parse_block(&block, src);
+   etc2_r11_fetch_texel(&block, i % 4, j % 4, (uint8_t *)dst);
+
+   /* green component */
+   etc2_r11_parse_block(&block, src + 8);
+   etc2_r11_fetch_texel(&block, i % 4, j % 4, (uint8_t *)(dst + 1));
+

[Mesa-dev] [PATCH 08/12] mesa: added _mesa_get_compressed_fetch_func()

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress.c |   33 +
 src/mesa/main/texcompress.h |3 +++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 372a483..9ad6a8b 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -523,6 +523,39 @@ _mesa_compressed_image_address(GLint col, GLint row, GLint 
img,
 
 
 /**
+ * Return a texel-fetch function for the given format, or NULL if
+ * invalid format.
+ */
+compressed_fetch_func
+_mesa_get_compressed_fetch_func(gl_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_RGBA_DXT5:
+  return _mesa_get_dxt_fetch_func(format);
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+  return _mesa_get_fxt_fetch_func(format);
+   case MESA_FORMAT_RED_RGTC1:
+   case MESA_FORMAT_L_LATC1:
+   case MESA_FORMAT_SIGNED_RED_RGTC1:
+   case MESA_FORMAT_SIGNED_L_LATC1:
+   case MESA_FORMAT_RG_RGTC2:
+   case MESA_FORMAT_LA_LATC2:
+   case MESA_FORMAT_SIGNED_RG_RGTC2:
+   case MESA_FORMAT_SIGNED_LA_LATC2:
+  return _mesa_get_compressed_rgtc_func(format);
+   case MESA_FORMAT_ETC1_RGB8:
+  return _mesa_get_etc_fetch_func(format);
+   default:
+  return NULL;
+   }
+}
+
+
+/**
  * Decompress a compressed texture image, returning a GL_RGBA/GL_FLOAT image.
  * \param srcRowStride  stride in bytes between rows of blocks in the
  *  compressed source image.
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 7e3de0e..b45e7cf 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -56,6 +56,9 @@ typedef void (*compressed_fetch_func)(const GLubyte *map,
   GLint i, GLint j, GLint k,
   GLfloat *texel);
 
+extern compressed_fetch_func
+_mesa_get_compressed_fetch_func(gl_format format);
+
 
 extern void
 _mesa_decompress_image(gl_format format, GLuint width, GLuint height,
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] mesa: reimplement _mesa_decompress_image() using new tex fetch code

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress.c |  110 +++
 1 files changed, 7 insertions(+), 103 deletions(-)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 9ad6a8b..33c580a 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -42,7 +42,6 @@
 #include "texcompress_rgtc.h"
 #include "texcompress_s3tc.h"
 #include "texcompress_etc.h"
-#include "swrast/s_context.h"
 
 
 /**
@@ -565,120 +564,25 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,
const GLubyte *src, GLint srcRowStride,
GLfloat *dest)
 {
-   void (*fetch)(const struct swrast_texture_image *texImage,
- GLint i, GLint j, GLint k, GLfloat *texel);
-   struct swrast_texture_image texImage;  /* dummy teximage */
+   compressed_fetch_func fetch;
GLuint i, j;
GLuint bytes, bw, bh;
+   GLint stride;
 
bytes = _mesa_get_format_bytes(format);
_mesa_get_format_block_size(format, &bw, &bh);
 
-   /* setup dummy texture image info */
-   memset(&texImage, 0, sizeof(texImage));
-   texImage.Map = (void *) src;
-
-   /* XXX This line is a bit of a hack to adapt to the row stride
-* convention used by the texture decompression functions.
-*/
-   texImage.RowStride = srcRowStride * bh / bytes;
-
-   switch (format) {
-   /* DXT formats */
-   case MESA_FORMAT_RGB_DXT1:
-  fetch = _mesa_fetch_texel_rgb_dxt1;
-  break;
-   case MESA_FORMAT_RGBA_DXT1:
-  fetch = _mesa_fetch_texel_rgba_dxt1;
-  break;
-   case MESA_FORMAT_RGBA_DXT3:
-  fetch = _mesa_fetch_texel_rgba_dxt3;
-  break;
-   case MESA_FORMAT_RGBA_DXT5:
-  fetch = _mesa_fetch_texel_rgba_dxt5;
-  break;
-
-   /* FXT1 formats */
-   case MESA_FORMAT_RGB_FXT1:
-  fetch = _mesa_fetch_texel_2d_f_rgb_fxt1;
-  break;
-   case MESA_FORMAT_RGBA_FXT1:
-  fetch = _mesa_fetch_texel_2d_f_rgba_fxt1;
-  break;
-
-   /* Red/RG formats */
-   case MESA_FORMAT_RED_RGTC1:
-  fetch = _mesa_fetch_texel_red_rgtc1;
-  break;
-   case MESA_FORMAT_SIGNED_RED_RGTC1:
-  fetch = _mesa_fetch_texel_signed_red_rgtc1;
-  break;
-   case MESA_FORMAT_RG_RGTC2:
-  fetch = _mesa_fetch_texel_rg_rgtc2;
-  break;
-   case MESA_FORMAT_SIGNED_RG_RGTC2:
-  fetch = _mesa_fetch_texel_signed_rg_rgtc2;
-  break;
-
-   /* L/LA formats */
-   case MESA_FORMAT_L_LATC1:
-  fetch = _mesa_fetch_texel_l_latc1;
-  break;
-   case MESA_FORMAT_SIGNED_L_LATC1:
-  fetch = _mesa_fetch_texel_signed_l_latc1;
-  break;
-   case MESA_FORMAT_LA_LATC2:
-  fetch = _mesa_fetch_texel_la_latc2;
-  break;
-   case MESA_FORMAT_SIGNED_LA_LATC2:
-  fetch = _mesa_fetch_texel_signed_la_latc2;
-  break;
-
-   /* ETC1 formats */
-   case MESA_FORMAT_ETC1_RGB8:
-  fetch = _mesa_fetch_texel_2d_f_etc1_rgb8;
-  break;
-
-   /* ETC2 formats */
-   case MESA_FORMAT_ETC2_RGB8:
-  fetch = _mesa_fetch_texel_2d_f_etc2_rgb8;
-  break;
-   case MESA_FORMAT_ETC2_SRGB8:
-  fetch = _mesa_fetch_texel_2d_f_etc2_srgb8;
-  break;
-   case MESA_FORMAT_ETC2_RGBA8_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_rgba8_eac;
-  break;
-   case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac;
-  break;
-   case MESA_FORMAT_ETC2_R11_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_r11_eac;
-  break;
-   case MESA_FORMAT_ETC2_RG11_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_rg11_eac;
-  break;
-   case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_signed_r11_eac;
-  break;
-   case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
-  fetch = _mesa_fetch_texel_2d_f_etc2_signed_rg11_eac;
-  break;
-   case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
-  fetch = _mesa_fetch_texel_2d_f_etc2_rgb8_punchthrough_alpha1;
-  break;
-   case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
-  fetch = _mesa_fetch_texel_2d_f_etc2_srgb8_punchthrough_alpha1;
-  break;
-
-   default:
+   fetch = _mesa_get_compressed_fetch_func(format);
+   if (!fetch) {
   _mesa_problem(NULL, "Unexpected format in _mesa_decompress_image()");
   return;
}
+ 
+   stride = srcRowStride * bh / bytes;
 
for (j = 0; j < height; j++) {
   for (i = 0; i < width; i++) {
- fetch(&texImage, i, j, 0, dest);
+ fetch(src, NULL, stride, i, j, 0, dest);
  dest += 4;
   }
}
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] swrast: use new core Mesa compressed texel fetch functions

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/swrast/s_context.h  |4 +
 src/mesa/swrast/s_texfetch.c |  193 +++---
 2 files changed, 110 insertions(+), 87 deletions(-)

diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h
index 18353c4..f3f188e 100644
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -45,6 +45,7 @@
 
 #include "main/compiler.h"
 #include "main/mtypes.h"
+#include "main/texcompress.h"
 #include "program/prog_execute.h"
 #include "swrast.h"
 #include "s_fragprog.h"
@@ -146,6 +147,9 @@ struct swrast_texture_image
GLubyte *Buffer;
 
FetchTexelFunc FetchTexel;
+
+   /** For fetching texels from compressed textures */
+   compressed_fetch_func FetchCompressedTexel;
 };
 
 
diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c
index c133eac..1f19641 100644
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -88,6 +88,23 @@ nonlinear_to_linear(GLubyte cs8)
 #define DIM 3
 #include "s_texfetch_tmp.h"
 
+
+/**
+ * All compressed texture texel fetching is done though this function.
+ * Basically just call a core-Mesa texel fetch function.
+ */
+static void
+fetch_compressed(const struct swrast_texture_image *swImage,
+ GLint i, GLint j, GLint k, GLfloat *texel)
+{
+   swImage->FetchCompressedTexel(swImage->Map,
+ swImage->ImageOffsets,
+ swImage->RowStride,
+ i, j, k, texel);
+}
+
+
+
 /**
  * Null texel fetch function.
  *
@@ -426,64 +443,64 @@ texfetch_funcs[] =
},
{
   MESA_FORMAT_SRGB_DXT1,
-  _mesa_fetch_texel_srgb_dxt1,
-  _mesa_fetch_texel_srgb_dxt1,
-  _mesa_fetch_texel_srgb_dxt1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_SRGBA_DXT1,
-  _mesa_fetch_texel_srgba_dxt1,
-  _mesa_fetch_texel_srgba_dxt1,
-  _mesa_fetch_texel_srgba_dxt1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_SRGBA_DXT3,
-  _mesa_fetch_texel_srgba_dxt3,
-  _mesa_fetch_texel_srgba_dxt3,
-  _mesa_fetch_texel_srgba_dxt3
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_SRGBA_DXT5,
-  _mesa_fetch_texel_srgba_dxt5,
-  _mesa_fetch_texel_srgba_dxt5,
-  _mesa_fetch_texel_srgba_dxt5
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
 
{
   MESA_FORMAT_RGB_FXT1,
-  NULL,
-  _mesa_fetch_texel_2d_f_rgb_fxt1,
-  NULL
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGBA_FXT1,
-  NULL,
-  _mesa_fetch_texel_2d_f_rgba_fxt1,
-  NULL
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGB_DXT1,
-  _mesa_fetch_texel_rgb_dxt1,
-  _mesa_fetch_texel_rgb_dxt1,
-  _mesa_fetch_texel_rgb_dxt1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGBA_DXT1,
-  _mesa_fetch_texel_rgba_dxt1,
-  _mesa_fetch_texel_rgba_dxt1,
-  _mesa_fetch_texel_rgba_dxt1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGBA_DXT3,
-  _mesa_fetch_texel_rgba_dxt3,
-  _mesa_fetch_texel_rgba_dxt3,
-  _mesa_fetch_texel_rgba_dxt3
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGBA_DXT5,
-  _mesa_fetch_texel_rgba_dxt5,
-  _mesa_fetch_texel_rgba_dxt5,
-  _mesa_fetch_texel_rgba_dxt5
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RGBA_FLOAT32,
@@ -976,117 +993,117 @@ texfetch_funcs[] =
},
{
   MESA_FORMAT_RED_RGTC1,
-  _mesa_fetch_texel_red_rgtc1,
-  _mesa_fetch_texel_red_rgtc1,
-  _mesa_fetch_texel_red_rgtc1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_SIGNED_RED_RGTC1,
-  _mesa_fetch_texel_signed_red_rgtc1,
-  _mesa_fetch_texel_signed_red_rgtc1,
-  _mesa_fetch_texel_signed_red_rgtc1
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_RG_RGTC2,
-  _mesa_fetch_texel_rg_rgtc2,
-  _mesa_fetch_texel_rg_rgtc2,
-  _mesa_fetch_texel_rg_rgtc2
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_SIGNED_RG_RGTC2,
-  _mesa_fetch_texel_signed_rg_rgtc2,
-  _mesa_fetch_texel_signed_rg_rgtc2,
-  _mesa_fetch_texel_signed_rg_rgtc2
+  fetch_compressed,
+  fetch_compressed,
+  fetch_compressed
},
{
   MESA_FORMAT_L_LATC1,
-  _mesa_fetch_texel_l_latc1,
-  _mesa_fetch_texel_l_latc1,
-  _mesa_fetch_texel_l_latc1
+  fetch_compressed,
+  fetch_c

[Mesa-dev] [PATCH 11/12] mesa: remove old swrast-based compressed texel fetch code

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_etc.c  |  244 +-
 src/mesa/main/texcompress_etc.h  |   46 ---
 src/mesa/main/texcompress_fxt1.c |   32 -
 src/mesa/main/texcompress_fxt1.h |8 --
 src/mesa/main/texcompress_rgtc.c |  132 
 src/mesa/main/texcompress_rgtc.h |   34 --
 src/mesa/main/texcompress_s3tc.c |  165 -
 src/mesa/main/texcompress_s3tc.h |   32 -
 8 files changed, 1 insertions(+), 692 deletions(-)

diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index c8bf6ea..57c42c4 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -43,9 +43,9 @@
 #include "texcompress_etc.h"
 #include "texstore.h"
 #include "macros.h"
-#include "swrast/s_context.h"
 #include "format_unpack.h"
 
+
 struct etc2_block {
int distance;
uint64_t pixel_indices[2];
@@ -113,25 +113,6 @@ _mesa_texstore_etc1_rgb8(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
-void
-_mesa_fetch_texel_2d_f_etc1_rgb8(const struct swrast_texture_image *texImage,
- GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   struct etc1_block block;
-   GLubyte dst[3];
-   const GLubyte *src;
-
-   src = (const GLubyte *) texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
-
-   etc1_parse_block(&block, src);
-   etc1_fetch_texel(&block, i % 4, j % 4, dst);
-
-   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
-   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
-   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
-   texel[ACOMP] = 1.0f;
-}
 
 /**
  * Decode texture data in format `MESA_FORMAT_ETC1_RGB8` to
@@ -1166,229 +1147,6 @@ 
_mesa_texstore_etc2_srgb8_punchthrough_alpha1(TEXSTORE_PARAMS)
return GL_FALSE;
 }
 
-void
-_mesa_fetch_texel_2d_f_etc2_rgb8(const struct swrast_texture_image *texImage,
- GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   struct etc2_block block;
-   uint8_t dst[3];
-   const uint8_t *src;
-
-   src = texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
-
-   etc2_rgb8_parse_block(&block, src,
- false /* punchthrough_alpha */);
-   etc2_rgb8_fetch_texel(&block, i % 4, j % 4, dst,
- false /* punchthrough_alpha */);
-
-   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
-   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
-   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
-   texel[ACOMP] = 1.0f;
-}
-
-void
-_mesa_fetch_texel_2d_f_etc2_srgb8(const struct swrast_texture_image *texImage,
-  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   struct etc2_block block;
-   uint8_t dst[3];
-   const uint8_t *src;
-
-   src = texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
-
-   etc2_rgb8_parse_block(&block, src,
- false /* punchthrough_alpha */);
-   etc2_rgb8_fetch_texel(&block, i % 4, j % 4, dst,
- false /* punchthrough_alpha */);
-
-   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
-   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
-   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
-   texel[ACOMP] = 1.0f;
-}
-
-void
-_mesa_fetch_texel_2d_f_etc2_rgba8_eac(const struct swrast_texture_image 
*texImage,
-  GLint i, GLint j, GLint k, GLfloat 
*texel)
-{
-   struct etc2_block block;
-   uint8_t dst[4];
-   const uint8_t *src;
-
-   src = texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
-
-   etc2_rgba8_parse_block(&block, src);
-   etc2_rgba8_fetch_texel(&block, i % 4, j % 4, dst);
-
-   texel[RCOMP] = UBYTE_TO_FLOAT(dst[0]);
-   texel[GCOMP] = UBYTE_TO_FLOAT(dst[1]);
-   texel[BCOMP] = UBYTE_TO_FLOAT(dst[2]);
-   texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
-}
-
-void
-_mesa_fetch_texel_2d_f_etc2_srgb8_alpha8_eac(const struct
- swrast_texture_image *texImage,
- GLint i, GLint j,
- GLint k, GLfloat *texel)
-{
-   struct etc2_block block;
-   uint8_t dst[4];
-   const uint8_t *src;
-
-   src = texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
-
-   etc2_rgba8_parse_block(&block, src);
-   etc2_rgba8_fetch_texel(&block, i % 4, j % 4, dst);
-
-   texel[RCOMP] = _mesa_nonlinear_to_linear(dst[0]);
-   texel[GCOMP] = _mesa_nonlinear_to_linear(dst[1]);
-   texel[BCOMP] = _mesa_nonlinear_to_linear(dst[2]);
-   texel[ACOMP] = UBYTE_TO_FLOAT(dst[3]);
-}
-
-void
-_mesa_fetch_texel_2d_f_etc2_r11_eac(const struct swrast_texture_image 
*texImage,
-GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   struct etc2_block block;
-   GLushort dst;
-   const uint8_t *src;
-
-   src = texImage->Map +
-  (((texImage->RowStride + 3) / 4) * (j / 4) + (i / 4)) * 8;
-
-   etc2_r11_parse_block(&block, src);

[Mesa-dev] [PATCH 12/12] mesa: minor indentation fixes in texcompress_etc.c

2012-12-09 Thread Brian Paul
From: Brian Paul 

---
 src/mesa/main/texcompress_etc.c |   34 +-
 1 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/mesa/main/texcompress_etc.c b/src/mesa/main/texcompress_etc.c
index 57c42c4..7ad4dde 100644
--- a/src/mesa/main/texcompress_etc.c
+++ b/src/mesa/main/texcompress_etc.c
@@ -212,7 +212,7 @@ etc2_base_color1_h_mode(const uint8_t *in, GLuint index)
   break;
}
return ((x << 4) | (x & 0xf));
- }
+}
 
 static uint8_t
 etc2_base_color2_h_mode(const uint8_t *in, GLuint index)
@@ -234,7 +234,7 @@ etc2_base_color2_h_mode(const uint8_t *in, GLuint index)
   break;
}
return ((x << 4) | (x & 0xf));
- }
+}
 
 static uint8_t
 etc2_base_color_o_planar(const uint8_t *in, GLuint index)
@@ -449,7 +449,7 @@ etc2_rgb8_parse_block(struct etc2_block *block,
   */
  block->base_colors[0][i] = etc1_base_color_diff_hi(src[i]);
  block->base_colors[1][i] = etc1_base_color_diff_lo(src[i]);
- }
+  }
}
 
if (block->is_ind_mode || block->is_diff_mode) {
@@ -737,10 +737,10 @@ etc2_unpack_srgb8(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -773,10 +773,10 @@ etc2_unpack_rgba8(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -817,10 +817,10 @@ etc2_unpack_srgb8_alpha8(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -852,10 +852,10 @@ etc2_unpack_r11(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -900,10 +900,10 @@ etc2_unpack_rg11(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -936,10 +936,10 @@ etc2_unpack_signed_r11(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
@@ -984,10 +984,10 @@ etc2_unpack_signed_rg11(uint8_t *dst_row,
 }
  }
  src += bs;
-   }
+  }
 
   src_row += src_stride;
-}
+   }
 }
 
 static void
-- 
1.7.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] i965/fs: Before reg alloc, schedule instructions to reduce live ranges.

2012-12-09 Thread Kenneth Graunke

On 12/07/2012 02:58 PM, Eric Anholt wrote:

This came from an idea by Ben Segovia.  16-wide pixel shaders are very
important for latency hiding on i965, so we want to try really hard to
get them.  If scheduling an instruction makes some set of instructions
available, those are probably the ones that make the instruction's
result dead.  By choosing those first, we'll have a tendency to reduce
the amount of live data as opposed to creating more.

Previously, we were sometimes getting this behavior out of the
scheduler, which was what produced the scheduler's original performance
wins on lightsmark.  Unfortunately, that was mostly an accident of the
lame instruction latency information that I had, which made it
impossible to fix the actual scheduling for performance.  Now that we've
fixed the scheduling for setup for register allocation, we can safely
update the latency parameters for the final schedule.

In shader-db, we lose 37 16-wide shaders, but gain 90 new ones.  4
shaders that were spilling change how many registers spill, for a
reduction of 70/3899 instructions.
---
  .../dri/i965/brw_fs_schedule_instructions.cpp  |   49 +---
  1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d48ad1e..3941eac 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -496,13 +496,50 @@ instruction_scheduler::schedule_instructions(fs_inst 
*next_block_header)
schedule_node *chosen = NULL;
int chosen_time = 0;

-  foreach_list(node, &instructions) {
-schedule_node *n = (schedule_node *)node;
+  if (post_reg_alloc) {
+ /* Of the instructions closest ready to execute or the closest to
+  * being ready, choose the oldest one.
+  */
+ foreach_list(node, &instructions) {
+schedule_node *n = (schedule_node *)node;
+
+if (!chosen || n->unblocked_time < chosen_time) {
+   chosen = n;
+   chosen_time = n->unblocked_time;
+}
+ }
+  } else {
+ /* Before register allocation, we don't care about the latencies of
+  * instructions.  All we care about is reducing live intervals of
+  * variables so that we can avoid register spilling, or get 16-wide
+  * shaders which naturally do a better job of hiding instruction
+  * latency.
+  *
+  * To do so, schedule our instructions in a roughly LIFO/depth-first
+  * order: when new instructions become available as a result of
+  * scheduling something, choose those first so that our result
+  * hopefully is consumed quickly.
+  *
+  * The exception is messages that generate more than one result
+  * register (AKA texturing).  In those cases, the LIFO search would
+  * normally tend to choose them quickly (because scheduling the
+  * previous message not only unblocked the children using its result,
+  * but also the MRF setup for the next sampler message, which in turn
+  * unblocks the next sampler message).
+  */
+ for (schedule_node *node = (schedule_node *)instructions.get_tail();
+  node != instructions.get_head()->prev;
+  node = (schedule_node *)node->prev) {
+schedule_node *n = (schedule_node *)node;
+
+if (!chosen || chosen->inst->regs_written() > 1) {
+   chosen = n;
+   if (chosen->inst->regs_written() <= 1)
+  break;
+}


I don't think the if condition is necessary here.  Just doing

for (...) {
chosen = (schedule_node *) node;
if (chosen->inst->regs_written() <= 1)
break;
}

should accomplish the same thing.


+ }

-if (!chosen || n->unblocked_time < chosen_time) {
-   chosen = n;
-   chosen_time = n->unblocked_time;
-}
+ chosen_time = chosen->unblocked_time;


It seems plausible that there could be no nodes to schedule...which 
means chosen would be NULL here.  Perhaps just move chosen_time = 
chosen->unblocked_time into the if...break above.



}

/* Schedule this instruction. */

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] i965/fs: Add empirically-determined instruction latencies for gen7.

2012-12-09 Thread Kenneth Graunke

On 12/07/2012 02:58 PM, Eric Anholt wrote:

The limited performance testing I've done on this hasn't shown any
statistically significant differences yet.
---
  .../dri/i965/brw_fs_schedule_instructions.cpp  |  150 +++-
  1 file changed, 147 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 3623c13..f3f0079 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -57,7 +57,7 @@ static bool debug = false;
  class schedule_node : public exec_node
  {
  public:
-   schedule_node(fs_inst *inst)
+   schedule_node(fs_inst *inst, int gen)
 {
this->inst = inst;
this->child_array_size = 0;
@@ -67,10 +67,14 @@ public:
this->parent_count = 0;
this->unblocked_time = 0;

-  set_latency_gen4();
+  if (gen >= 7)
+ set_latency_gen7();
+  else
+ set_latency_gen4();
 }

 void set_latency_gen4();
+   void set_latency_gen7();

 fs_inst *inst;
 schedule_node **children;
@@ -120,6 +124,146 @@ schedule_node::set_latency_gen4()
 }
  }

+void
+schedule_node::set_latency_gen7()
+{
+   switch (inst->opcode) {
+   case BRW_OPCODE_MAD:
+  /* 3 cycles (this is said to be 4 cycles sometimes depending on the
+   * register numbers in the sources):
+   * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 
WE_normal 1Q };
+   *
+   * 20 cycles:
+   * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 
WE_normal 1Q };
+   * mov(8) null   g4<4,4,1>F  { align16 WE_normal 1Q 
};
+   */
+  latency = 17;
+  break;
+
+   case SHADER_OPCODE_RCP:
+  /* 2 cycles:
+   * math inv(8) g4<1>F g2<0,1,0>F  null{ align1 WE_normal 
1Q };
+   *
+   * 18 cycles:
+   * math inv(8) g4<1>F g2<0,1,0>F  null{ align1 WE_normal 
1Q };
+   * mov(8)  null   g4<8,8,1>F  { align1 WE_normal 
1Q };
+   *
+   * Same for exp2, log2, rsq, sqrt, sin, cos.
+   */
+  latency = 16;
+  break;
+
+   case SHADER_OPCODE_POW:
+  /* 2 cycles:
+   * math pow(8) g4<1>F g2<0,1,0>F  g2.1<0,1,0>F{ align1 WE_normal 
1Q };
+   *
+   * 26 cycles:
+   * math pow(8) g4<1>F g2<0,1,0>F  g2.1<0,1,0>F{ align1 WE_normal 
1Q };
+   * mov(8)  null   g4<8,8,1>F  { align1 WE_normal 
1Q };
+   */
+  latency = 24;
+  break;
+
+   case SHADER_OPCODE_TEX:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXL:
+   case SHADER_OPCODE_TXS:
+  /* 18 cycles:
+   * mov(8)  g115<1>F   0F  { align1 WE_normal 
1Q };
+   * mov(8)  g114<1>F   0F  { align1 WE_normal 
1Q };
+   * send(8) g4<1>UWg114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_normal 
1Q };
+   *
+   * 697 +/-49 cycles (min 610, n=26):
+   * mov(8)  g115<1>F   0F  { align1 WE_normal 
1Q };
+   * mov(8)  g114<1>F   0F  { align1 WE_normal 
1Q };
+   * send(8) g4<1>UWg114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_normal 
1Q };
+   * mov(8)  null   g4<8,8,1>F  { align1 WE_normal 
1Q };
+   *
+   * So the latency on our first texture load of the batchbuffer takes
+   * ~700 cycles, since the caches are cold at that point.
+   *
+   * 840 +/- 92 cycles (min 720, n=25):
+   * mov(8)  g115<1>F   0F  { align1 WE_normal 
1Q };
+   * mov(8)  g114<1>F   0F  { align1 WE_normal 
1Q };
+   * send(8) g4<1>UWg114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_normal 
1Q };
+   * mov(8)  null   g4<8,8,1>F  { align1 WE_normal 
1Q };
+   * send(8) g4<1>UWg114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_normal 
1Q };
+   * mov(8)  null   g4<8,8,1>F  { align1 WE_normal 
1Q };
+   *
+   * On the second load, it takes just an extra ~140 cycles, and after
+   * accounting for the 14 cycles of the MOV's latency, that makes ~130.
+   *
+   * 683 +/- 49 cycles (min = 602, n=47):
+   * mov(8)  g115<1>F   0F  { align1 WE_normal 
1Q };
+   * mov(8)  g114<1>F   0F  { align1 WE_normal 
1Q };
+   * send(8) g4<1>UWg114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_normal 
1Q };
+   * send(8) g50<1>UW   g114<8,8,1>F
+   *   sampler (10, 0, 0, 1) mlen 2 rlen 4  { align1 WE_n

Re: [Mesa-dev] [PATCH 7/8] i965/fs: Rewrite discards to use a flag subreg to track discarded pixels.

2012-12-09 Thread Kenneth Graunke

On 12/07/2012 02:08 PM, Eric Anholt wrote:

This makes much more sense on gen6+, and will also prove useful for
early exit of shaders on discard.
---
  src/mesa/drivers/dri/i965/brw_defines.h|1 -
  src/mesa/drivers/dri/i965/brw_fs.cpp   |8 +++
  src/mesa/drivers/dri/i965/brw_fs.h |1 -
  src/mesa/drivers/dri/i965/brw_fs_emit.cpp  |   76 +---
  src/mesa/drivers/dri/i965/brw_fs_fp.cpp|   17 ++---
  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |3 -
  .../dri/i965/brw_fs_schedule_instructions.cpp  |1 -
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |   12 +++-
  8 files changed, 46 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 5e00b40..2b77ae6 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -673,7 +673,6 @@ enum opcode {
 FS_OPCODE_PIXEL_Y,
 FS_OPCODE_CINTERP,
 FS_OPCODE_LINTERP,
-   FS_OPCODE_DISCARD,
 FS_OPCODE_SPILL,
 FS_OPCODE_UNSPILL,
 FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index dbf48f8..b4e8d68 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2434,6 +2434,14 @@ fs_visitor::run()
else
 emit_interpolation_setup_gen6();

+  /* On gen6+, we handle discards by keeping track of the still-live
+   * pixels in f0.1.  If so, initialize it with the dispatched pixels.


The wording "If so" doesn't make sense to me.  Perhaps just "Initialize 
it with the dispatched pixels."  The fact that we only do so for 
programs that use discards is obvious from the code and makes a lot of 
sense.



+   */
+  if (fp->UsesKill) {
+ fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ discard_init->flag_subreg = 1;
+  }


Also: does this new approach work in SIMD16 mode?  (Is the f0.1 wide 
enough?)  It looks like you've removed the code to fail 16-wide compiles 
that use discard.  I wasn't sure if that was intentional.


If you do want to disallow it, this seems like a good place to do it 
(the earlier the better).


Otherwise, I like this.


/* Generate FS IR for main().  (the visitor only descends into
 * functions called "main").
 */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index b60a37e..b00755f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -501,7 +501,6 @@ private:
 void generate_math_gen4(fs_inst *inst,
   struct brw_reg dst,
   struct brw_reg src);
-   void generate_discard(fs_inst *inst);
 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
   bool negate_value);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index b3d7f81..f185eb5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -74,6 +74,17 @@ fs_generator::generate_fb_write(fs_inst *inst)
 brw_set_mask_control(p, BRW_MASK_DISABLE);
 brw_set_compression_control(p, BRW_COMPRESSION_NONE);

+   if (fp->UsesKill) {
+  struct brw_reg pixel_mask;
+
+  if (intel->gen >= 6)
+ pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+  else
+ pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+  brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+   }
+
 if (inst->header_present) {
if (intel->gen >= 6) {
 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
@@ -514,58 +525,6 @@ fs_generator::generate_ddy(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
  }

  void
-fs_generator::generate_discard(fs_inst *inst)
-{
-   struct brw_reg f0 = brw_flag_reg(0, 0);
-
-   if (intel->gen >= 6) {
-  struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-  struct brw_reg some_register;
-
-  /* As of gen6, we no longer have the mask register to look at,
-   * so life gets a bit more complicated.
-   */
-
-  /* Load the flag register with all ones. */
-  brw_push_insn_state(p);
-  brw_set_mask_control(p, BRW_MASK_DISABLE);
-  brw_MOV(p, f0, brw_imm_uw(0x));
-  brw_pop_insn_state(p);
-
-  /* Do a comparison that should always fail, to produce 0s in the flag
-   * reg where we have active channels.
-   */
-  some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
-  brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- BRW_CONDITIONAL_NZ, some_register, some_register);
-
-  /* Undo CMP's whacking of predication*/
-  brw_set_predicate

Re: [Mesa-dev] [PATCH 8/8] i965/fs: Improve performance of shaders that start out with a discard.

2012-12-09 Thread Kenneth Graunke

On 12/07/2012 02:08 PM, Eric Anholt wrote:

I had tried this in the past, but ran into trouble with applications that
sample from undiscarded pixels in the same subspan.  To fix that issue, only
jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).
---
  src/mesa/drivers/dri/i965/brw_defines.h  |1 +
  src/mesa/drivers/dri/i965/brw_eu.h   |1 +
  src/mesa/drivers/dri/i965/brw_eu_emit.c  |   53 ++---
  src/mesa/drivers/dri/i965/brw_fs.h   |   24 ++
  src/mesa/drivers/dri/i965/brw_fs_emit.cpp|   65 ++
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   14 ++
  6 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 2b77ae6..40571a4 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -679,6 +679,7 @@ enum opcode {
 FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
 FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
 FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
+   FS_OPCODE_DISCARD_JUMP,

 VS_OPCODE_URB_WRITE,
 VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index adefcfd..b2fa448 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -1032,6 +1032,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p);
  struct brw_instruction *brw_BREAK(struct brw_compile *p);
  struct brw_instruction *brw_CONT(struct brw_compile *p);
  struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
  /* Forward jumps:
   */
  void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index fb1255f..dd91a30 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1461,6 +1461,24 @@ struct brw_instruction *brw_CONT(struct brw_compile *p)
 return insn;
  }

+struct brw_instruction *gen6_HALT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+
+   insn = next_insn(p, BRW_OPCODE_HALT);
+   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+
+   if (p->compressed) {
+  insn->header.execution_size = BRW_EXECUTE_16;
+   } else {
+  insn->header.compression_control = BRW_COMPRESSION_NONE;
+  insn->header.execution_size = BRW_EXECUTE_8;
+   }
+   return insn;
+}
+
  /* DO/WHILE loop:
   *
   * The DO/WHILE is just an unterminated loop -- break or continue are
@@ -2302,8 +2320,8 @@ brw_find_next_block_end(struct brw_compile *p, int start)
 return ip;
}
 }
-   assert(!"not reached");
-   return start + 1;
+
+   return 0;
  }

  /* There is no DO instruction on gen6, so to find the end of the loop
@@ -2336,7 +2354,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
  }

  /* After program generation, go back and update the UIP and JIP of
- * BREAK and CONT instructions to their correct locations.
+ * BREAK, CONT, and HALT instructions to their correct locations.
   */
  void
  brw_set_uip_jip(struct brw_compile *p)
@@ -2360,24 +2378,45 @@ brw_set_uip_jip(struct brw_compile *p)
 continue;
}

+  int block_end_ip = brw_find_next_block_end(p, ip);
switch (insn->header.opcode) {
case BRW_OPCODE_BREAK:
-insn->bits3.break_cont.jip =
-(brw_find_next_block_end(p, ip) - ip) / scale;
+ assert(block_end_ip != 0);
+insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
 /* Gen7 UIP points to WHILE; Gen6 points just after it */
 insn->bits3.break_cont.uip =
(brw_find_loop_end(p, ip) - ip +
   (intel->gen == 6 ? 16 : 0)) / scale;
 break;
case BRW_OPCODE_CONTINUE:
-insn->bits3.break_cont.jip =
-(brw_find_next_block_end(p, ip) - ip) / scale;
+ assert(block_end_ip != 0);
+insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
 insn->bits3.break_cont.uip =
  (brw_find_loop_end(p, ip) - ip) / scale;

 assert(insn->bits3.break_cont.uip != 0);
 assert(insn->bits3.break_cont.jip != 0);
 break;
+  case BRW_OPCODE_HALT:
+/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+ *
+ *"In case of the halt instruction not inside any conditional
+ * code block, the value of  and  should be the
+ * same. In case of the halt instruction inside conditional code
+ * block, the  should be the end of the program, and the
+ *  should be end of the most inner conditional code 

Re: [Mesa-dev] [PATCH] mesa syncobj: don't store a pointer to the set_entry

2012-12-09 Thread Eric Anholt
Jordan Justen  writes:

> The set_entry pointer can become invalid if the set table
> is re-hashed.
>
> This likely will fix
> https://bugs.freedesktop.org/show_bug.cgi?id=58012
> (Regression since 56e95d3c)

Reviewed-by: Eric Anholt 


pgpRhlMJQnE0s.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] set.c vs. hash_table.c

2012-12-09 Thread Eric Anholt
Brian Paul  writes:

> Hi Jordan,
>
> set.c and hash_table.c look a LOT alike.  Could the functions in set.c 
> be implemented in terms of the hash_table.c functions?  It seems 
> little silly to have so much duplicated code.

The idea of the set code is to save most of the memory in the case that
all you're doing is storing a set of pointers.  We've got some debug
code that also wants to use this in ir_validate.

Whether the set code is important enough to have just for these two
case, I'm not sure.  I wouldn't have imagined that people wrote code
using many of these object types until I encountered minecraft, though
:)


pgpGchOsZnVHn.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl_to_tgsi: emit multi-level structs and arrays properly.

2012-12-09 Thread Dave Airlie
This follow the code from the i965 driver, and emits the structs
and arrays recursively.

This fixes an assert in the two UBO tests
fs-struct-copy-complicated and
vs-struct-copy-complicated

These tests now pass on softpipe, with no regressions.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 51 --
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a4df4e5..1d96e90 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -442,6 +442,9 @@ public:
void merge_registers(void);
void renumber_registers(void);
 
+   void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
+   st_dst_reg *l, st_src_reg *r);
+
void *mem_ctx;
 };
 
@@ -2244,6 +2247,44 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue 
*ir)
 }
 
 void
+glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type 
*type,
+ st_dst_reg *l, st_src_reg *r)
+{
+   if (type->base_type == GLSL_TYPE_STRUCT) {
+  for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_mov(ir, type->fields.structure[i].type, l, r);
+  }
+  return;
+   }
+
+   if (type->is_array()) {
+  for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_mov(ir, type->fields.array, l, r);
+  }
+  return;
+   }
+
+   if (type->is_matrix()) {
+  const struct glsl_type *vec_type;
+
+  vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+type->vector_elements, 1);
+
+  for (int i = 0; i < type->matrix_columns; i++) {
+ emit_block_mov(ir, vec_type, l, r);
+  }
+  return;
+   }
+
+   assert(type->is_scalar() || type->is_vector());
+
+   r->type = type->base_type;
+   emit(ir, TGSI_OPCODE_MOV, *l, *r);
+   l->index++;
+   r->index++;
+}
+
+void
 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 {
st_dst_reg l;
@@ -2347,15 +2388,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
   new_inst->saturate = inst->saturate;
   inst->dead_mask = inst->dst.writemask;
} else {
-  for (i = 0; i < type_size(ir->lhs->type); i++) {
- if (ir->rhs->type->is_array())
-   r.type = ir->rhs->type->element_type()->base_type;
- else if (ir->rhs->type->is_record())
-   r.type = ir->rhs->type->fields.structure[i].type->base_type;
- emit(ir, TGSI_OPCODE_MOV, l, r);
- l.index++;
- r.index++;
-  }
+  emit_block_mov(ir, ir->rhs->type, &l, &r);
}
 }
 
-- 
1.8.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev