[Mesa-dev] [PATCH V4 4/7] meta: Add functionality to do _mesa_meta_BlitFrameBuffer() using glsl

2013-01-10 Thread Anuj Phogat
This patch rewrites _mesa_meta_BlitFrameBuffer() function to add support
for blitting with GLSL/GLSL ES shaders. These changes were required to
support glBlitFrameBuffer() in gles3. This patch, along with other patches
in this series, make 16 failing framebuffer_blit test cases in gles3
conformance pass.

V2: Properly handle flipped blits for source and destination
renderbuffer / textures. Add support for GL_TEXTURE_RECTANGLE
in _mesa_meta_BlitFrameBuffer. Create a temp depth texture to
support depth buffer blitting.
V3: Remove unsupported / redundant shader code. Add an assertion to
make sure that we don't use rectangle texture in ES. Put API
guard on glTexEnvi().
V4: For gles3: Don't use ReadPixels or CopyTexImage2D to blit depth buffer.
gles3 spec says for CopyTexImage2D that "color buffer components can be
dropped during the conversion to internalformat, but new components
cannot be added." So, use the internal format of read renderbuffer to
create texture for color buffer blitting.

Signed-off-by: Anuj Phogat 
Reviewed-by: Ian Romanick 
---
I've made few changes listed above in V4 to fix failure in gles3 conformance
framebuffer_blit_coverage_mismatched_buffer_formats.test. I noticed this failure
after Jordan's recent commit 166c4d. No regressions observed after changes.

 src/mesa/drivers/common/meta.c |  436 +++-
 1 files changed, 343 insertions(+), 93 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index d211fda..55134b8 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -222,6 +222,9 @@ struct blit_state
GLuint ArrayObj;
GLuint VBO;
GLuint DepthFP;
+   GLuint ShaderProg;
+   GLuint RectShaderProg;
+   struct temp_texture depthTex;
 };
 
 
@@ -1144,7 +1147,7 @@ static void
 init_temp_texture(struct gl_context *ctx, struct temp_texture *tex)
 {
/* prefer texture rectangle */
-   if (ctx->Extensions.NV_texture_rectangle) {
+   if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle) {
   tex->Target = GL_TEXTURE_RECTANGLE;
   tex->MaxSize = ctx->Const.MaxTextureRectSize;
   tex->NPOT = GL_TRUE;
@@ -1205,6 +1208,21 @@ get_bitmap_temp_texture(struct gl_context *ctx)
return tex;
 }
 
+/**
+ * Return pointer to depth temp_texture.
+ * This does some one-time init if needed.
+ */
+static struct temp_texture *
+get_temp_depth_texture(struct gl_context *ctx)
+{
+   struct temp_texture *tex = &ctx->Meta->Blit.depthTex;
+
+   if (!tex->TexObj) {
+  init_temp_texture(ctx, tex);
+   }
+
+   return tex;
+}
 
 /**
  * Compute the width/height of texture needed to draw an image of the
@@ -1269,7 +1287,8 @@ alloc_texture(struct temp_texture *tex,
  * Setup/load texture for glCopyPixels or glBlitFramebuffer.
  */
 static void
-setup_copypix_texture(struct temp_texture *tex,
+setup_copypix_texture(struct gl_context *ctx,
+  struct temp_texture *tex,
   GLboolean newTex,
   GLint srcX, GLint srcY,
   GLsizei width, GLsizei height, GLenum intFormat,
@@ -1278,7 +1297,8 @@ setup_copypix_texture(struct temp_texture *tex,
_mesa_BindTexture(tex->Target, tex->TexObj);
_mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, filter);
_mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, filter);
-   _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES)
+  _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
 
/* copy framebuffer image to texture */
if (newTex) {
@@ -1321,7 +1341,8 @@ setup_drawpix_texture(struct gl_context *ctx,
_mesa_BindTexture(tex->Target, tex->TexObj);
_mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
_mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-   _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+   if (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES)
+  _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
 
/* copy pixel data to texture */
if (newTex) {
@@ -1388,6 +1409,158 @@ init_blit_depth_pixels(struct gl_context *ctx)
   strlen(program2), (const GLubyte *) program2);
 }
 
+static void
+setup_ff_blit_framebuffer(struct gl_context *ctx,
+  struct blit_state *blit)
+{
+   struct vertex {
+  GLfloat x, y, s, t;
+   };
+   struct vertex verts[4];
+
+   if (blit->ArrayObj == 0) {
+  /* one-time setup */
+
+  /* create vertex array object */
+  _mesa_GenVertexArrays(1, &blit->ArrayObj);
+  _mesa_BindVertexArray(blit->ArrayObj);
+
+  /* create vertex array buffer */
+  _mesa_GenBuffers(1, &blit->VBO);
+  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, blit->VBO);
+  _mesa_BufferData(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+  NULL,

[Mesa-dev] [PATCH 02/10] glsl: Add IR lisp for GLSL ES 3.00 pack/unpack functions

2013-01-10 Thread Chad Versace
For each of the following functions, add a declaration to
builtins/profiles/300es.glsl and a file to builtins/ir.
  packSnorm2x16  unpackSnorm2x16
  packUnorm2x16  unpackUnorm2x16
  packHalf2x16   unpackHalf2x16

Signed-off-by: Chad Versace 
---
 src/glsl/builtins/ir/packHalf2x16.ir|  6 ++
 src/glsl/builtins/ir/packSnorm2x16.ir   |  6 ++
 src/glsl/builtins/ir/packUnorm2x16.ir   |  6 ++
 src/glsl/builtins/ir/unpackHalf2x16.ir  |  6 ++
 src/glsl/builtins/ir/unpackSnorm2x16.ir |  6 ++
 src/glsl/builtins/ir/unpackUnorm2x16.ir |  6 ++
 src/glsl/builtins/profiles/300es.glsl   | 14 +++---
 7 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 src/glsl/builtins/ir/packHalf2x16.ir
 create mode 100644 src/glsl/builtins/ir/packSnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/packUnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackHalf2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackSnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackUnorm2x16.ir

diff --git a/src/glsl/builtins/ir/packHalf2x16.ir 
b/src/glsl/builtins/ir/packHalf2x16.ir
new file mode 100644
index 000..2f85407
--- /dev/null
+++ b/src/glsl/builtins/ir/packHalf2x16.ir
@@ -0,0 +1,6 @@
+((function packHalf2x16
+   (signature uint
+ (parameters
+   (declare (in) vec2 arg0))
+ ((return (expression uint packHalf2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/ir/packSnorm2x16.ir 
b/src/glsl/builtins/ir/packSnorm2x16.ir
new file mode 100644
index 000..b4575d2
--- /dev/null
+++ b/src/glsl/builtins/ir/packSnorm2x16.ir
@@ -0,0 +1,6 @@
+((function packSnorm2x16
+   (signature uint
+ (parameters
+   (declare (in) vec2 arg0))
+ ((return (expression uint packSnorm2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/ir/packUnorm2x16.ir 
b/src/glsl/builtins/ir/packUnorm2x16.ir
new file mode 100644
index 000..c809f2d
--- /dev/null
+++ b/src/glsl/builtins/ir/packUnorm2x16.ir
@@ -0,0 +1,6 @@
+((function packUnorm2x16
+   (signature uint
+ (parameters
+   (declare (in) vec2 arg0))
+ ((return (expression uint packUnorm2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/ir/unpackHalf2x16.ir 
b/src/glsl/builtins/ir/unpackHalf2x16.ir
new file mode 100644
index 000..f5b6fc2
--- /dev/null
+++ b/src/glsl/builtins/ir/unpackHalf2x16.ir
@@ -0,0 +1,6 @@
+((function unpackHalf2x16
+   (signature vec2
+ (parameters
+   (declare (in) uint arg0))
+ ((return (expression vec2 unpackHalf2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/ir/unpackSnorm2x16.ir 
b/src/glsl/builtins/ir/unpackSnorm2x16.ir
new file mode 100644
index 000..9092a06
--- /dev/null
+++ b/src/glsl/builtins/ir/unpackSnorm2x16.ir
@@ -0,0 +1,6 @@
+((function unpackSnorm2x16
+   (signature vec2
+ (parameters
+   (declare (in) uint arg0))
+ ((return (expression vec2 unpackSnorm2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/ir/unpackUnorm2x16.ir 
b/src/glsl/builtins/ir/unpackUnorm2x16.ir
new file mode 100644
index 000..935dc85
--- /dev/null
+++ b/src/glsl/builtins/ir/unpackUnorm2x16.ir
@@ -0,0 +1,6 @@
+((function unpackUnorm2x16
+   (signature vec2
+ (parameters
+   (declare (in) uint arg0))
+ ((return (expression vec2 unpackUnorm2x16 (var_ref arg0)
+))
diff --git a/src/glsl/builtins/profiles/300es.glsl 
b/src/glsl/builtins/profiles/300es.glsl
index 45212b3..4b7160d 100644
--- a/src/glsl/builtins/profiles/300es.glsl
+++ b/src/glsl/builtins/profiles/300es.glsl
@@ -325,14 +325,14 @@ vec4  uintBitsToFloat(uvec4 value);
 
 /*
  * 8.4 - Floating-Point Pack and Unpack Functions
- * FIXME: implement these
  */
-// uint packSnorm2x16(vec2 v);
-// vec2 unpackSnorm2x16(uint p);
-// uint packUnorm2x16(vec2 v);
-// vec2 unpackUnorm2x16(uint p);
-// uint packHalf2x16(vec2 v);
-// vec2 unpackHalf2x16(uint p);
+highp   uint packSnorm2x16(vec2 v);
+highp   uint packUnorm2x16(vec2 v);
+highp   uint packHalf2x16 (mediump vec2 v);
+
+highp   vec2 unpackSnorm2x16(highp uint p);
+highp   vec2 unpackUnorm2x16(highp uint p);
+mediump vec2 unpackHalf2x16 (highp uint p);
 
 /*
  * 8.5 - Geometric Functions
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
That is, evaluate constant expressions of the following functions:
  packSnorm2x16  unpackSnorm2x16
  packUnorm2x16  unpackUnorm2x16
  packHalf2x16   unpackHalf2x16

Signed-off-by: Chad Versace 
---
 src/glsl/ir_constant_expression.cpp | 362 
 1 file changed, 362 insertions(+)

diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 17b54b9..2038498 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -94,6 +94,332 @@ bitcast_f2u(float f)
return u;
 }
 
+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef uint16_t
+(*pack_1x16_func_t)(float);
+
+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef void
+(*unpack_1x16_func_t)(uint16_t, float*);
+
+/**
+ * Evaluate a 2x16 floating-point packing function.
+ */
+static uint32_t
+pack_2x16(pack_1x16_func_t pack_1x16,
+  float x, float y)
+{
+   /* From section 8.4 of the GLSL ES 3.00 spec:
+*
+*packSnorm2x16
+*-
+*The first component of the vector will be written to the least
+*significant bits of the output; the last component will be written to
+*the most significant bits.
+*
+* The specifications for the other packing functions contain similar
+* language.
+*/
+   uint32_t u = 0;
+   u |= ((uint32_t) pack_1x16(x) << 0);
+   u |= ((uint32_t) pack_1x16(y) << 16);
+   return u;
+}
+
+/**
+ * Evaluate a 2x16 floating-point unpacking function.
+ */
+static void
+unpack_2x16(unpack_1x16_func_t unpack_1x16,
+uint32_t u,
+float *x, float *y)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackSnorm2x16
+ *---
+ *The first component of the returned vector will be extracted from
+ *the least significant bits of the input; the last component will be
+ *extracted from the most significant bits.
+ *
+ * The specifications for the other unpacking functions contain similar
+ * language.
+ */
+   unpack_1x16((uint16_t) (u & 0x), x);
+   unpack_1x16((uint16_t) (u >> 16), y);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *packSnorm2x16
+ *---
+ *The conversion for component c of v to fixed point is done as
+ *follows:
+ *
+ *  packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ */
+   return (uint16_t) round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static void
+unpack_snorm_1x16(uint16_t u, float *f)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackSnorm2x16
+ *---
+ *The conversion for unpacked fixed-point value f to floating point is
+ *done as follows:
+ *
+ *   unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+ */
+   *f = CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *packUnorm2x16
+ *---
+ *The conversion for component c of v to fixed point is done as
+ *follows:
+ *
+ *   packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ */
+   return (uint16_t) round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static void
+unpack_unorm_1x16(uint16_t u, float *f)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackUnorm2x16
+ *---
+ *The conversion for unpacked fixed-point value f to floating point is
+ *done as follows:
+ *
+ *   unpackUnorm2x16: f / 65535.0
+ */
+   *f = (float) u / 65535.0f;
+}
+
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+   /* The bit layout of a float16 is:
+*   sign: 15
+*   exponent: 10:14
+*   mantissa: 0:9
+*
+* The sign, exponent, and mantissa of a float16 determine its value thus:
+*
+*  if e = 0 and m = 0, then zero:   (-1)^s * 0
+*  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
+*  if 0 < e < 31, then normal:  (-1)^s * 2^(e - 15) * (1 + m / 
2^10)
+*  if e = 31 and m = 0, then inf:   (-1)^s * inf
+*  if e = 31 and m != 0, then NaN
+*
+*  where 0 <= m < 2^10 .
+*/
+
+   /* Calculate the resultant float16's sign, exponent, and mantissa
+* bits.
+*/
+   const int s = (copysign(1.0f, x) < 0) ? 1 : 0;
+   int e;
+   int m;
+
+   switch (fpclassify(x)) {
+   case FP_NAN:
+   /* Any representation with e = 31 and m != 0 suffices. */
+  

[Mesa-dev] [PATCH 07/10] i965: Add opcodes for F32TO16 and F16TO32

2013-01-10 Thread Chad Versace
The GLSL ES 3.00 operations packHalf2x16 and unpackHalf2x16 will emit
these opcodes.

- Define the opcodes BRW_OPCODE_{F32TO16,F16TO32}.
- Add the opcodes to the brw_disasm table.
- Define convenience functions brw_{F32TO16,F16TO32}.

Signed-off-by: Chad Versace 
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
 src/mesa/drivers/dri/i965/brw_disasm.c  | 2 ++
 src/mesa/drivers/dri/i965/brw_eu.h  | 2 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 1d0cf02..22d3e98 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -631,6 +631,8 @@ enum opcode {
BRW_OPCODE_ASR =12,
BRW_OPCODE_CMP =16,
BRW_OPCODE_CMPN =   17,
+   BRW_OPCODE_F32TO16 = 19,
+   BRW_OPCODE_F16TO32 = 20,
BRW_OPCODE_JMPI =   32,
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF =35,
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index 6dd1736..0568738 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -41,6 +41,8 @@ const struct opcode_desc opcode_descs[128] = {
 [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
 [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
 [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+[BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
+[BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
 
 [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
 [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 7b874c7..06daaba 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -157,6 +157,8 @@ ALU2(SHL)
 ALU2(RSR)
 ALU2(RSL)
 ALU2(ASR)
+ALU1(F32TO16)
+ALU1(F16TO32)
 ALU2(JMPI)
 ALU2(ADD)
 ALU2(AVG)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index bd9c182..fecbff1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -907,6 +907,8 @@ ALU2(SHL)
 ALU2(RSR)
 ALU2(RSL)
 ALU2(ASR)
+ALU1(F32TO16)
+ALU1(F16TO32)
 ALU1(FRC)
 ALU1(RNDD)
 ALU2(MAC)
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] i965: Quote the PRM to document a HorzStride subtlety

2013-01-10 Thread Chad Versace
Signed-off-by: Chad Versace 
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index fecbff1..b34754a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -126,7 +126,10 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction 
*insn,
   else {
 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
-/* even ignored in da16, still need to set as '01' */
+/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+ *Although Dst.HorzStride is a don't care for Align16, HW needs
+ *this to be programmed as "01".
+ */
 insn->bits1.da16.dest_horiz_stride = 1;
   }
}
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/10] (gles3) glsl, i965: Implement GLSL ES 3.00 pack/unpack functions

2013-01-10 Thread Chad Versace
This series lives on my gles3-glsl-packing branch.

Tested against my piglit gles3-glsl-packing branch on gen6 and gen7. All tests
pass. However, the tests and implementation share much common code and
concepts, so thorough review is welcome.

This is my first attempt at emitting native i965 code, so please review it
carefully.  Just because it passes my tests does not imply that it's correct.

In patch "i965/vs/gen7", there is a comment explaining a potential
optimization that I did not know how to achieve. I'm happy committing this
series as-is without the optimization, but I'd like feedbach on how to achieve
it so I can do it in a follow-on series.

Chad Versace (10):
  glsl: Fix typo in comment
  glsl: Add IR lisp for GLSL ES 3.00 pack/unpack functions
  glsl: Extend ir_expression_operation for GLSL 3.00 pack/unpack
functions
  glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations
  glsl: Add lowering pass for GLSL ES 3.00 pack/unpack operations
  i965: Lower the GLSL ES 3.00 pack/unpack operations
  i965:  Add opcodes for F32TO16 and F16TO32
  i965: Quote the PRM to document a HorzStride subtlety
  i965/vs/gen7: Emit code for GLSL ES 3.00 pack/unpack operations
  i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

 src/glsl/Makefile.sources  |1 +
 src/glsl/builtins/ir/packHalf2x16.ir   |6 +
 src/glsl/builtins/ir/packSnorm2x16.ir  |6 +
 src/glsl/builtins/ir/packUnorm2x16.ir  |6 +
 src/glsl/builtins/ir/unpackHalf2x16.ir |6 +
 src/glsl/builtins/ir/unpackSnorm2x16.ir|6 +
 src/glsl/builtins/ir/unpackUnorm2x16.ir|6 +
 src/glsl/builtins/profiles/300es.glsl  |   14 +-
 src/glsl/ir.cpp|   27 +
 src/glsl/ir.h  |   33 +-
 src/glsl/ir_constant_expression.cpp|  362 +
 src/glsl/ir_optimization.h |   18 +
 src/glsl/ir_validate.cpp   |   26 +
 src/glsl/lower_packing_builtins.cpp| 1567 
 src/mesa/drivers/dri/i965/brw_defines.h|3 +
 src/mesa/drivers/dri/i965/brw_disasm.c |2 +
 src/mesa/drivers/dri/i965/brw_eu.h |2 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c|7 +-
 src/mesa/drivers/dri/i965/brw_fs.h |7 +
 .../dri/i965/brw_fs_channel_expressions.cpp|   29 +-
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp  |   39 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |   78 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   32 +
 src/mesa/drivers/dri/i965/brw_vec4.h   |3 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|8 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  156 +-
 src/mesa/program/ir_to_mesa.cpp|   12 +-
 27 files changed, 2446 insertions(+), 16 deletions(-)
 create mode 100644 src/glsl/builtins/ir/packHalf2x16.ir
 create mode 100644 src/glsl/builtins/ir/packSnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/packUnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackHalf2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackSnorm2x16.ir
 create mode 100644 src/glsl/builtins/ir/unpackUnorm2x16.ir
 create mode 100644 src/glsl/lower_packing_builtins.cpp

-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/10] glsl: Fix typo in comment

2013-01-10 Thread Chad Versace
Discovered because Eclipse failed to resolve the false reference.

s/ir_expression::num_operands/ir_expression::get_num_operands/

Signed-off-by: Chad Versace 
---
 src/glsl/ir.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 85fc5ce..c9c91e8 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -908,7 +908,7 @@ public:
unsigned write_mask:4;
 };
 
-/* Update ir_expression::num_operands() and operator_strs when
+/* Update ir_expression::get_num_operands() and operator_strs when
  * updating this list.
  */
 enum ir_expression_operation {
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/10] glsl: Extend ir_expression_operation for GLSL 3.00 pack/unpack functions

2013-01-10 Thread Chad Versace
For each function {pack,unpack}{Snorm,Unorm,Half}2x16, add a corresponding
opcode to enum ir_expression_operation.  Validate the new opcodes in
ir_validate.cpp.

Also, add opcodes for scalarized variants of the Half2x16 functions.  (The
code generator for the i965 fragment shader requires that all vector
operations be scalarized.  A lowering pass, to be added later, will
scalarize the Half2x16 functions).

Signed-off-by: Chad Versace 
---
 src/glsl/ir.cpp | 27 +++
 src/glsl/ir.h   | 31 +++
 src/glsl/ir_validate.cpp| 26 ++
 src/mesa/program/ir_to_mesa.cpp | 12 +++-
 4 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 703f5ec..ddaf3c3 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -306,6 +306,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   break;
 
case ir_unop_noise:
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
   this->type = glsl_type::float_type;
   break;
 
@@ -313,6 +315,18 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
   this->type = glsl_type::bool_type;
   break;
 
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+  this->type = glsl_type::uint_type;
+  break;
+
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+  this->type = glsl_type::vec2_type;
+  break;
+
default:
   assert(!"not reached: missing automatic type setup for ir_expression");
   this->type = op0->type;
@@ -386,6 +400,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1)
   this->type = glsl_type::float_type;
   break;
 
+   case ir_binop_pack_half_2x16_split:
+  this->type = glsl_type::uint_type;
+  break;
+
case ir_binop_lshift:
case ir_binop_rshift:
   this->type = op0->type;
@@ -454,6 +472,14 @@ static const char *const operator_strs[] = {
"cos_reduced",
"dFdx",
"dFdy",
+   "packSnorm2x16",
+   "packUnorm2x16",
+   "packHalf2x16",
+   "unpackSnorm2x16",
+   "unpackUnorm2x16",
+   "unpackHalf2x16",
+   "unpackHalf2x16_split_x",
+   "unpackHalf2x16_split_y",
"noise",
"+",
"-",
@@ -480,6 +506,7 @@ static const char *const operator_strs[] = {
"min",
"max",
"pow",
+   "packHalf2x16_split",
"ubo_load",
"vector",
 };
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index c9c91e8..c1fbe7c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -969,6 +969,28 @@ enum ir_expression_operation {
ir_unop_dFdy,
/*@}*/
 
+   /**
+* \name Floating point pack and unpack operations.
+*/
+   /*@{*/
+   ir_unop_pack_snorm_2x16,
+   ir_unop_pack_unorm_2x16,
+   ir_unop_pack_half_2x16,
+   ir_unop_unpack_snorm_2x16,
+   ir_unop_unpack_unorm_2x16,
+   ir_unop_unpack_half_2x16,
+   /*@}*/
+
+   /**
+* \name Lowered floating point unpacking operations.
+*
+* \see lower_packing_builtins_visitor::split_unpack_half_2x16
+*/
+   /*@{*/
+   ir_unop_unpack_half_2x16_split_x,
+   ir_unop_unpack_half_2x16_split_y,
+   /*@}*/
+
ir_unop_noise,
 
/**
@@ -1036,6 +1058,15 @@ enum ir_expression_operation {
ir_binop_pow,
 
/**
+* \name Lowered floating point packing operations.
+*
+* \see lower_packing_builtins_visitor::split_pack_half_2x16
+*/
+   /*@{*/
+   ir_binop_pack_half_2x16_split,
+   /*@}*/
+
+   /**
 * Load a value the size of a given GLSL type from a uniform block.
 *
 * operand0 is the ir_constant uniform block index in the linked shader.
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index ad57a31..9019637 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -329,6 +329,26 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type == ir->type);
   break;
 
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+  assert(ir->type == glsl_type::uint_type);
+  assert(ir->operands[0]->type == glsl_type::vec2_type);
+  break;
+
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+  assert(ir->type == glsl_type::vec2_type);
+  assert(ir->operands[0]->type == glsl_type::uint_type);
+  break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+  assert(ir->type == glsl_type::float_type);
+  assert(ir->operands[0]->type == glsl_type::uint_type);
+  break;
+
case ir_unop_noise:
   /* XXX what can we assert here? */
   break;
@@ -423,6 +443,12 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[0]->type == ir->operands[1]->type);
   break;
 
+   case ir_binop_pack_half_2x16_split:
+  assert(ir->type == glsl_type::uint_type);
+  asse

[Mesa-dev] [PATCH 06/10] i965: Lower the GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
On gen < 7, we fully lower all operations to arithmetic and bitwise
operations.

On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and
partially lower the Half2x16 operations.

Signed-off-by: Chad Versace 
---
 src/glsl/lower_packing_builtins.cpp  |  1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp | 32 
 2 files changed, 33 insertions(+)

diff --git a/src/glsl/lower_packing_builtins.cpp 
b/src/glsl/lower_packing_builtins.cpp
index cd84084..f965a27 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -1013,6 +1013,7 @@ private:
  new(mem_ctx) ir_variable(glsl_type::vec2_type,
   "tmp_split_pack_half_2x16_v",
   ir_var_temporary);
+  insert_instruction(v);
   insert_instruction(
  new(mem_ctx) ir_assignment(
 new(mem_ctx) ir_dereference_variable(v),
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1e8d574..65f8e7d 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
return true;
 }
 
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+   gl_shader_type shader_type,
+   exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+   | LOWER_UNPACK_SNORM_2x16
+   | LOWER_PACK_UNORM_2x16
+   | LOWER_UNPACK_UNORM_2x16;
+
+   if (brw->intel.gen >= 7) {
+  switch (shader_type) {
+  case MESA_SHADER_FRAGMENT:
+ /* Scalarize the these operations. */
+ ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+ |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+ break;
+  default:
+ break;
+  }
+   } else {
+  ops |= LOWER_PACK_HALF_2x16
+  |  LOWER_UNPACK_HALF_2x16;
+   }
+
+   lower_packing_builtins(ir, ops);
+}
+
 GLboolean
 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
@@ -113,6 +141,10 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   shader->ir = new(shader) exec_list;
   clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
+  /* lower_packing_builtins() inserts arithmetic instructions, so it
+   * must precede lower_instructions().
+   */
+  brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
   do_mat_op_to_vec(shader->ir);
   lower_instructions(shader->ir,
 MOD_TO_FRACT |
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
Signed-off-by: Chad Versace 
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs.h |  7 ++
 .../dri/i965/brw_fs_channel_expressions.cpp| 29 +++-
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp  | 39 ++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 78 +-
 5 files changed, 149 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 22d3e98..1c43d68 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -713,6 +713,7 @@ enum opcode {
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_GLOBAL_OFFSET,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
 
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index bcf38f3..59aa28d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,10 @@ public:
fs_reg fix_math_operand(fs_reg src);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
+   void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
+   void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
+
void emit_minmax(uint32_t conditionalmod, fs_reg dst,
 fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
@@ -541,6 +545,9 @@ private:
struct brw_reg src,
struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
+   void generate_unpack_half_2x16_split_y(fs_inst *inst,
+  struct brw_reg dst,
+  struct brw_reg src);
 
void patch_discard_jumps_to_fb_writes();
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 58521ee..7081511 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir)
   return false;
 
for (i = 0; i < expr->get_num_operands(); i++) {
-  if (expr->operands[i]->type->is_vector())
-return true;
+  if (expr->operands[i]->type->is_vector()) {
+ switch (expr->operation) {
+ case ir_binop_pack_half_2x16_split:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+assert(!"WTF");
+break;
+ default:
+break;
+ }
+
+ return true;
+  }
}
 
return false;
@@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
   assert(!"not yet supported");
   break;
 
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
case ir_quadop_vector:
   assert(!"should have been lowered");
   break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+  assert("!not reached: expression operates on scalars only");
+  break;
}
 
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 63f09fe..46e2409 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
 }
 
 void
+fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
+struct brw_reg dst,
+struct brw_reg src)
+{
+   assert(intel->gen >= 7);
+
+   /* src has the form of unpackHalf2x16's input:
+*
+* w z y  x
+*   |undef|undef|undef|0x|
+*
+* We wish to access only the "" bits of the source register, and hence
+* must access it with a 16 bit subregister offset.  To do so, we must
+* halve the size of the source data type from UD to UW and compensate by
+* doubling the stride.
+*/
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+   src.type = BRW_REGISTER_TYPE_UW;
+   if (src.vstride > 0)
+  ++src.vstride;
+   if (src.hstride > 0)
+  ++src.hstride;
+   src.subnr += 2;
+
+   brw_F16TO32(p, dst, src);
+}
+
+void
 fs_generator::generate_code(exec_list *instructions)
 {
int last_native_insn_offset = p->next_

[Mesa-dev] [PATCH 09/10] i965/vs/gen7: Emit code for GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
Signed-off-by: Chad Versace 
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |   3 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   8 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 156 -
 3 files changed, 166 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index e65b92c..43d0454 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -431,6 +431,9 @@ public:
void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
src_reg fix_math_operand(src_reg src);
 
+   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
+   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
+
void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
 
void emit_ndc_computation();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 9fa742d..a38bb02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -803,6 +803,14 @@ vec4_generator::generate_code(exec_list *instructions)
 brw_DP2(p, dst, src[0], src[1]);
 break;
 
+  case BRW_OPCODE_F32TO16:
+ brw_F32TO16(p, dst, src[0]);
+ break;
+
+  case BRW_OPCODE_F16TO32:
+ brw_F16TO32(p, dst, src[0]);
+ break;
+
   case BRW_OPCODE_IF:
 if (inst->src[0].file != BAD_FILE) {
/* The instruction has an embedded compare (only allowed on gen6) */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 02feff6..96376c4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -348,6 +348,143 @@ vec4_visitor::emit_math(enum opcode opcode,
 }
 
 void
+vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
+{
+   if (intel->gen < 7)
+  assert(!"ir_unop_pack_half_2x16 should be lowered");
+
+   /* uint dst; */
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+
+   /* vec2 src0; */
+   assert(src0.type == BRW_REGISTER_TYPE_F);
+
+   /* uvec2 tmp;
+*
+* The PRM lists the destination type of f32to16 as W.  However, I've
+* experimentally confirmed on gen7 that it must be a 32-bit size, such as
+* UD, in align16 mode.
+*/
+   dst_reg tmp_dst(this, glsl_type::uvec2_type);
+   src_reg tmp_src(tmp_dst);
+
+   /* tmp.xy = f32to16(src0); */
+   tmp_dst.writemask = WRITEMASK_XY;
+   emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_F32TO16,
+  tmp_dst, src0));
+
+   /* The result's high 16 bits are in the low 16 bits of the temporary
+* register's Y channel.  The result's low 16 bits are in the low 16 bits
+* of the X channel.
+*
+* In experiments on gen7 I've found the that, in the temporary register,
+* the hight 16 bits of the X and Y channels are zeros. This is critical
+* for the SHL and OR instructions below to work as expected.
+*/
+
+   /* dst = tmp.y << 16; */
+   tmp_src.swizzle = SWIZZLE_Y;
+   emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_SHL,
+  dst, tmp_src, src_reg(16u)));
+   /* dst |= tmp.x; */
+   tmp_src.swizzle = SWIZZLE_X;
+   emit(new(mem_ctx) vec4_instruction(this, BRW_OPCODE_OR,
+  dst, src_reg(dst), tmp_src));
+
+
+   /* Idea for reducing the above number of registers and instructions
+* 
+*
+* It should be possible to remove the temporary register and replace the
+* SHL and OR instructions above with a single MOV instruction mode in
+* align1 mode that uses clever register region addressing. (It is
+* impossible to specify the necessary register regions in align16 mode).
+* Unfortunately, it is difficult to emit an align1 instruction here.
+*
+* In particular, I want to do this:
+*
+*   # Give dst the form:
+*   #
+*   #w z  y  x w z  y  x
+*   #  |0|0|0x|0x|0|0|0x|0x|
+*   #
+*   f32to16(8) dst<1>.xy:UD src<4;4,1>:F {align16}
+*
+*   # Transform dst into the form of packHalf2x16's output.
+*   #
+*   #w z  y  x w z  y  x
+*   #  |0|0|0x|0x|0|0|0x|0x|
+*   #
+*   # Use width=2 in order to move the Y channel's high 16 bits
+*   # into the low 16 bits, thus clearing the Y channel to zero.
+*   #
+*   mov(4) dst.1<1>:UW dst.2<8;2,1>:UW {align1}
+*/
+}
+
+void
+vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
+{
+   if (intel->gen < 7)
+  assert(!"ir_unop_unpack_half_2x16 should be lowered");
+
+   /* vec2 dst; */
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+
+   /* uint src0; */
+   assert(src0.type 

[Mesa-dev] [Bug 59187] [Steam] Black screen but audio song On TF2 (Intel HM 55/ Ironlake Mobile)

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59187

Lukas M  changed:

   What|Removed |Added

 CC||l...@slyon.de

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Revert "configure.ac: Disable compiler optimizations when --enable-debug is set"

2013-01-10 Thread Marek Olšák
Maybe we should split --enable-debug into --enable-assertions and
--enable-optimized. That's what LLVM does.

Marek

On Thu, Jan 10, 2013 at 4:03 AM, Chia-I Wu  wrote:
> On Wed, Jan 9, 2013 at 5:32 PM, Michel Dänzer  wrote:
>> On Die, 2013-01-08 at 09:31 -0800, Brian Paul wrote:
>>> On 01/08/2013 08:38 AM, Marek Olšák wrote:
>>> >
>>> > Those who do not want optimizations should set the CFLAGS and CXXFLAGS
>>> > environment variables. In my opinion, --enable-debug should only set 
>>> > -DDEBUG.
>>>
>>> Previously, if we set --enable-debug we got -g -O2 which was pretty
>>> much useless if you planned to use gdb (and why else would you use
>>> --enable-debug?).
>>
>> I'm using it because src/gallium/auxiliary/util/u_debug.h disables
>> assertions without DEBUG. Maybe that should be fixed to only disable
>> them with NDEBUG.
> I agree with Michel here.  I was surprised to find out that assert()
> is compiled to no-op in gallium without --enable-debug.
>
>>
>> --
>> Earthling Michel Dänzer   |   http://www.amd.com
>> Libre software enthusiast |  Debian, X and DRI developer
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
> --
> o...@lunarg.com
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Revert "configure.ac: Disable compiler optimizations when --enable-debug is set"

2013-01-10 Thread Török Edwin
On 01/08/2013 10:17 PM, Kenneth Graunke wrote:
> On 01/08/2013 12:06 PM, Matt Turner wrote:
>> On Tue, Jan 8, 2013 at 8:38 AM, Marek Olšák  wrote:
>>> Some distributions use --enable-debug for testing packages and the commit
>>> results in terrible CPU performance. It can be embarrassing for us.
>>
>> If distros want debug symbols, they should put -g in their CFLAGS
>> instead of --enable-debug. --enable-debug turns on a lot of other
>> things.
> 
> I concur.
> 
> Personally I think that --enable-debug ought to make it possible to use a 
> debugger sanely (which requires -O0).  But I override CFLAGS/CXXFLAGS anyway.

Would it make sense to show something in the OpenGL renderer string about the 
status of these flags?
#ifdef DEBUG
 " (ASSERTIONS)"
#endif
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
 " (UNOPTIMIZED)"
#endif

At least it'll be obvious what the problem is if someone misconfigured Mesa.

Best regards,
--Edwin




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/23] i965 teximage: don't call _mesa_store_teximage if format/type==GL_NONE

2013-01-10 Thread Eric Anholt
Jordan Justen  writes:

> On Wed, Jan 9, 2013 at 11:11 AM, Eric Anholt  wrote:
>> Jordan Justen  writes:
>>
>>> Mesa core's copyteximage calls the driver with format/type==GL_NONE
>>> to "Allocate texture memory". In this case, we shouldn't call
>>> _mesa_store_teximage.
>>
>> I'm not sure if GL_NONE/GL_NONE really makes sense to test for here.  I
>> bet the actual problem is that the app's pixel unpack state (like having
>> a PBO bound, and thus pixels == NULL still meaning to upload data) is
>> leaking into this path through the use of &ctx->Unpack instead of
>> ctx->DefaultPacking.
>
> There is an assert during the _mesa_store_teximage, because at some
> point someone tries to get the bytes/pixel of GL_NONE/GL_NONE.
> (Indeed, a few levels under a call to _mesa_validate_pbo_teximage).
>
> I'm not sure I understand your alternative approach to fix this.

Either use Driver.AllocateTexImageBuffer instead of Driver->TexImage()
(since we never want to upload anything), or use ctx->DefaultPacking in
the Driver->TexImage() call instead of the user-specified ctx->Unpack,
since it's a user PBO being bound in ctx->Unpack that's causing
_mesa_store_teximage() to actually try to store something.  I think
AllocateTexImageBuffer() is the right way to go.



pgpfElKZEAWyr.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/23] readpix: for implentation format/type, ignore int vs. non-int check

2013-01-10 Thread Ian Romanick

On 01/09/2013 11:02 AM, Eric Anholt wrote:

Jordan Justen  writes:


In ES or GL+GL_ARB_ES2_compatibility, the usage of
format = IMPLEMENTATION_COLOR_READ_FORMAT +
type = IMPLEMENTATION_COLOR_READ_TYPE
can function, even if the src/dst int vs. non-int types
differ.


This seems bogus -- why should you get to read to formats that ought to
be invalid?  I bet what we're missing is an implementation of this bit
of spec: "The implementation-chosen format may vary depending on the
format of the selected read buffer of the currently bound read
framebuffer." (section 4.3 of the gles3 spec) which lets us actually
specify a format that makes sense for an int framebuffer.


I think you're right.  This is also why tests that try to 
glReadPixels(GL_RED) from a GL_RED FBO work on other implementations but 
generate an error on ours.  I think we should have the 
IMPLEMENTATION_COLOR_READ_* match the format / type of the FBO itself.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/23] readpix: raise priority of FBO completeness error

2013-01-10 Thread Ian Romanick

On 01/09/2013 12:47 PM, Jordan Justen wrote:

On Tue, Jan 8, 2013 at 9:21 AM, Jordan Justen  wrote:

On Mon, Jan 7, 2013 at 1:19 PM, Ian Romanick  wrote:

On 01/04/2013 06:41 PM, Jordan Justen wrote:


GTF/gles3 test suite wants this error to have higher priority
than the type checking.



If the ReadPixels call in the test has multiple errors, the test should be
changed.  There is no requirement to generate a specific error when multiple
problems exist.  Is my understanding of the test case correct?


Okay, it sounds like it is a test bug. I'll drop this patch and look
at the test instead.


Looks like I'm wrong about the test. It seems like the fb-inc error is
the only valid error.

But, in patch 12, I added:
rb = _mesa_get_read_renderbuffer(ctx, internalFormat);
if (rb == NULL) {
   _mesa_error(ctx, GL_INVALID_OPERATION,
   "glCopyTexImage%dD(read buffer)", dimensions);
   return GL_TRUE;
}

This is done because we need to look at the read-buffer format type
for gles3. But, this code will also cause an error when the
framebuffer is not complete, only I had it return invalid-op.

So, I think this patch is needed to check the framebuffer completeness
before this code tries to get the read-buffer. I propose a new log
message:

readpix: check FBO completeness before trying to access the read-buffer


Ah... that makes sense.

Reviewed-by: Ian Romanick 


-Jordan


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/23] copytexture: update error checking for GLES3

2013-01-10 Thread Ian Romanick

On 01/09/2013 12:54 AM, Anuj Phogat wrote:

On Wed, Jan 9, 2013 at 3:38 AM, Jordan Justen  wrote:

On Mon, Jan 7, 2013 at 12:32 AM, Anuj Phogat  wrote:

On Sat, Jan 5, 2013 at 8:11 AM, Jordan Justen  wrote:

Changes based on GTF/gles3 conformance test suite.

Signed-off-by: Jordan Justen 
---
  src/mesa/main/teximage.c |   62 +-
  1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 169e768..cb0084a 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2370,6 +2370,9 @@ copytexture_error_check( struct gl_context *ctx, GLuint 
dimensions,
   GLint width, GLint height, GLint border )
  {
 GLint baseFormat;
+   GLint rb_base_format;
+   struct gl_renderbuffer *rb;
+   GLenum rb_internal_format;

 /* check target */
 if (!legal_texsubimage_target(ctx, dimensions, target)) {
@@ -2414,31 +2417,56 @@ copytexture_error_check( struct gl_context *ctx, GLuint 
dimensions,
return GL_TRUE;
 }

-   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
-* internalFormat.
-*/
-   if (_mesa_is_gles(ctx) && !_mesa_is_gles3(ctx)) {
-  switch (internalFormat) {
-  case GL_ALPHA:
-  case GL_RGB:
-  case GL_RGBA:
-  case GL_LUMINANCE:
-  case GL_LUMINANCE_ALPHA:
- break;
-  default:
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glCopyTexImage%dD(internalFormat)", dimensions);
- return GL_TRUE;
-  }
+   rb = _mesa_get_read_renderbuffer(ctx, internalFormat);
+   if (rb == NULL) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glCopyTexImage%dD(read buffer)", dimensions);
+  return GL_TRUE;
 }

+   rb_internal_format = rb->InternalFormat;
 baseFormat = _mesa_base_tex_format(ctx, internalFormat);
+   rb_base_format = _mesa_base_tex_format(ctx, rb->InternalFormat);
 if (baseFormat < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
+  _mesa_error(ctx, GL_INVALID_OPERATION,
"glCopyTexImage%dD(internalFormat)", dimensions);
return GL_TRUE;
 }

+   if (_mesa_is_color_format(internalFormat)) {
+  if (rb_base_format < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glCopyTexImage%dD(internalFormat)", dimensions);
+ return GL_TRUE;
+  }
+   }
+
+   if (_mesa_is_gles(ctx)) {
+  bool valid = true;
+  if (_mesa_base_format_component_count(baseFormat) >
+  _mesa_base_format_component_count(rb_base_format)) {
+ valid = false;
+  }
+  if (baseFormat == GL_DEPTH_COMPONENT ||
+  baseFormat == GL_DEPTH_STENCIL ||
+  rb_base_format == GL_DEPTH_COMPONENT ||
+  rb_base_format == GL_DEPTH_STENCIL ||
+  ((baseFormat == GL_LUMINANCE_ALPHA ||
+baseFormat == GL_ALPHA) &&
+   rb_base_format != GL_RGBA) ||
+  internalFormat == GL_RGB9_E5) {
+ valid = false;
+  }
+  if (internalFormat == GL_RGB9_E5) {
+ valid = false;
+  }
+  if (!valid) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glCopyTexImage%dD(internalFormat)", dimensions);
+ return GL_TRUE;
+  }
+   }
+

This looks incorrect as baseFormat=GL_DEPTH_COMPONENT is allowed in
gles3. It was not allowed in gles 1.1 and 2.0 You need to put an additional
check to test  ctx->Version < 30.


I think depth/stencil is allowed for GLES3 with TexImage, but not for
CopyTexImage. This code will cause CopyTexImage to be rejected on any
GLES when the source or dest is depth/stencil.

Does that seem correct?

Yes, Table 3.15 in gles 3.0 spec doesn't allow depth/stencil for CopyTexImage.
I was referring to gles 3.0 reference pages which now looks incorrect:
http://www.khronos.org/opengles/sdk/docs/man3/


I've submitted a bug for this.  Good catch.


I'll modify meta implementation of BlitFramebuffer() to account for this change.


-Jordan


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] glsl: Extend ir_expression_operation for GLSL 3.00 pack/unpack functions

2013-01-10 Thread Ian Romanick

On 01/10/2013 12:10 AM, Chad Versace wrote:

For each function {pack,unpack}{Snorm,Unorm,Half}2x16, add a corresponding
opcode to enum ir_expression_operation.  Validate the new opcodes in
ir_validate.cpp.

Also, add opcodes for scalarized variants of the Half2x16 functions.  (The
code generator for the i965 fragment shader requires that all vector
operations be scalarized.  A lowering pass, to be added later, will
scalarize the Half2x16 functions).

Signed-off-by: Chad Versace 


One nit below...


---
  src/glsl/ir.cpp | 27 +++
  src/glsl/ir.h   | 31 +++
  src/glsl/ir_validate.cpp| 26 ++
  src/mesa/program/ir_to_mesa.cpp | 12 +++-
  4 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 703f5ec..ddaf3c3 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -306,6 +306,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
break;

 case ir_unop_noise:
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
this->type = glsl_type::float_type;
break;

@@ -313,6 +315,18 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
this->type = glsl_type::bool_type;
break;

+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+  this->type = glsl_type::uint_type;
+  break;
+
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+  this->type = glsl_type::vec2_type;
+  break;
+
 default:
assert(!"not reached: missing automatic type setup for ir_expression");
this->type = op0->type;
@@ -386,6 +400,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1)
this->type = glsl_type::float_type;
break;

+   case ir_binop_pack_half_2x16_split:
+  this->type = glsl_type::uint_type;
+  break;
+
 case ir_binop_lshift:
 case ir_binop_rshift:
this->type = op0->type;
@@ -454,6 +472,14 @@ static const char *const operator_strs[] = {
 "cos_reduced",
 "dFdx",
 "dFdy",
+   "packSnorm2x16",
+   "packUnorm2x16",
+   "packHalf2x16",
+   "unpackSnorm2x16",
+   "unpackUnorm2x16",
+   "unpackHalf2x16",
+   "unpackHalf2x16_split_x",
+   "unpackHalf2x16_split_y",
 "noise",
 "+",
 "-",
@@ -480,6 +506,7 @@ static const char *const operator_strs[] = {
 "min",
 "max",
 "pow",
+   "packHalf2x16_split",
 "ubo_load",
 "vector",
  };
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index c9c91e8..c1fbe7c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -969,6 +969,28 @@ enum ir_expression_operation {
 ir_unop_dFdy,
 /*@}*/

+   /**
+* \name Floating point pack and unpack operations.
+*/
+   /*@{*/
+   ir_unop_pack_snorm_2x16,
+   ir_unop_pack_unorm_2x16,
+   ir_unop_pack_half_2x16,
+   ir_unop_unpack_snorm_2x16,
+   ir_unop_unpack_unorm_2x16,
+   ir_unop_unpack_half_2x16,
+   /*@}*/
+
+   /**
+* \name Lowered floating point unpacking operations.
+*
+* \see lower_packing_builtins_visitor::split_unpack_half_2x16
+*/
+   /*@{*/
+   ir_unop_unpack_half_2x16_split_x,
+   ir_unop_unpack_half_2x16_split_y,
+   /*@}*/
+
 ir_unop_noise,

 /**
@@ -1036,6 +1058,15 @@ enum ir_expression_operation {
 ir_binop_pow,

 /**
+* \name Lowered floating point packing operations.
+*
+* \see lower_packing_builtins_visitor::split_pack_half_2x16
+*/
+   /*@{*/
+   ir_binop_pack_half_2x16_split,
+   /*@}*/
+
+   /**
  * Load a value the size of a given GLSL type from a uniform block.
  *
  * operand0 is the ir_constant uniform block index in the linked shader.
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index ad57a31..9019637 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -329,6 +329,26 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == ir->type);
break;

+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+  assert(ir->type == glsl_type::uint_type);
+  assert(ir->operands[0]->type == glsl_type::vec2_type);
+  break;
+
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+  assert(ir->type == glsl_type::vec2_type);
+  assert(ir->operands[0]->type == glsl_type::uint_type);
+  break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+  assert(ir->type == glsl_type::float_type);
+  assert(ir->operands[0]->type == glsl_type::uint_type);
+  break;
+
 case ir_unop_noise:
/* XXX what can we assert here? */
break;
@@ -423,6 +443,12 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == ir->operands[1]->type);
bre

Re: [Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 12:10 AM, Chad Versace wrote:

That is, evaluate constant expressions of the following functions:
   packSnorm2x16  unpackSnorm2x16
   packUnorm2x16  unpackUnorm2x16
   packHalf2x16   unpackHalf2x16


I believe that there is already code somewhere in Mesa for doing 
float/half conversion.  Could we just re-use that?



Signed-off-by: Chad Versace 
---
  src/glsl/ir_constant_expression.cpp | 362 
  1 file changed, 362 insertions(+)

diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index 17b54b9..2038498 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -94,6 +94,332 @@ bitcast_f2u(float f)
 return u;
  }

+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef uint16_t
+(*pack_1x16_func_t)(float);
+
+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef void
+(*unpack_1x16_func_t)(uint16_t, float*);
+
+/**
+ * Evaluate a 2x16 floating-point packing function.
+ */
+static uint32_t
+pack_2x16(pack_1x16_func_t pack_1x16,
+  float x, float y)
+{
+   /* From section 8.4 of the GLSL ES 3.00 spec:
+*
+*packSnorm2x16
+*-
+*The first component of the vector will be written to the least
+*significant bits of the output; the last component will be written to
+*the most significant bits.
+*
+* The specifications for the other packing functions contain similar
+* language.
+*/
+   uint32_t u = 0;
+   u |= ((uint32_t) pack_1x16(x) << 0);
+   u |= ((uint32_t) pack_1x16(y) << 16);
+   return u;
+}
+
+/**
+ * Evaluate a 2x16 floating-point unpacking function.
+ */
+static void
+unpack_2x16(unpack_1x16_func_t unpack_1x16,
+uint32_t u,
+float *x, float *y)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackSnorm2x16
+ *---
+ *The first component of the returned vector will be extracted from
+ *the least significant bits of the input; the last component will be
+ *extracted from the most significant bits.
+ *
+ * The specifications for the other unpacking functions contain similar
+ * language.
+ */
+   unpack_1x16((uint16_t) (u & 0x), x);
+   unpack_1x16((uint16_t) (u >> 16), y);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *packSnorm2x16
+ *---
+ *The conversion for component c of v to fixed point is done as
+ *follows:
+ *
+ *  packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ */
+   return (uint16_t) round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static void
+unpack_snorm_1x16(uint16_t u, float *f)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackSnorm2x16
+ *---
+ *The conversion for unpacked fixed-point value f to floating point is
+ *done as follows:
+ *
+ *   unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+ */
+   *f = CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *packUnorm2x16
+ *---
+ *The conversion for component c of v to fixed point is done as
+ *follows:
+ *
+ *   packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ */
+   return (uint16_t) round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static void
+unpack_unorm_1x16(uint16_t u, float *f)
+{
+/* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ *unpackUnorm2x16
+ *---
+ *The conversion for unpacked fixed-point value f to floating point is
+ *done as follows:
+ *
+ *   unpackUnorm2x16: f / 65535.0
+ */
+   *f = (float) u / 65535.0f;
+}
+
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+   /* The bit layout of a float16 is:
+*   sign: 15
+*   exponent: 10:14
+*   mantissa: 0:9
+*
+* The sign, exponent, and mantissa of a float16 determine its value thus:
+*
+*  if e = 0 and m = 0, then zero:   (-1)^s * 0
+*  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
+*  if 0 < e < 31, then normal:  (-1)^s * 2^(e - 15) * (1 + m / 
2^10)
+*  if e = 31 and m = 0, then inf:   (-1)^s * inf
+*  if e = 31 and m != 0, then NaN
+*
+*  where 0 <= m < 2^10 .
+*/
+
+   /* Calculate the resultant float16's sign, exponent, and mantissa
+* bits.
+*/
+   const int s = (co

Re: [Mesa-dev] [PATCH 06/10] i965: Lower the GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 12:10 AM, Chad Versace wrote:

On gen < 7, we fully lower all operations to arithmetic and bitwise
operations.

On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and
partially lower the Half2x16 operations.

Signed-off-by: Chad Versace 
---
  src/glsl/lower_packing_builtins.cpp  |  1 +
  src/mesa/drivers/dri/i965/brw_shader.cpp | 32 
  2 files changed, 33 insertions(+)

diff --git a/src/glsl/lower_packing_builtins.cpp 
b/src/glsl/lower_packing_builtins.cpp
index cd84084..f965a27 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -1013,6 +1013,7 @@ private:
   new(mem_ctx) ir_variable(glsl_type::vec2_type,
"tmp_split_pack_half_2x16_v",
ir_var_temporary);
+  insert_instruction(v);
insert_instruction(
   new(mem_ctx) ir_assignment(
  new(mem_ctx) ir_dereference_variable(v),


Shouldn't this hunk be in the previous patch?


diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1e8d574..65f8e7d 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
 return true;
  }

+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+   gl_shader_type shader_type,
+   exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+   | LOWER_UNPACK_SNORM_2x16
+   | LOWER_PACK_UNORM_2x16
+   | LOWER_UNPACK_UNORM_2x16;
+
+   if (brw->intel.gen >= 7) {
+  switch (shader_type) {
+  case MESA_SHADER_FRAGMENT:
+ /* Scalarize the these operations. */
+ ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+ |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+ break;


Do we think other shader types are going to need similar treatment? 
Otherwise an if-statement would be better.



+  default:
+ break;
+  }
+   } else {
+  ops |= LOWER_PACK_HALF_2x16
+  |  LOWER_UNPACK_HALF_2x16;
+   }
+
+   lower_packing_builtins(ir, ops);
+}
+
  GLboolean
  brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
  {
@@ -113,6 +141,10 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
shader->ir = new(shader) exec_list;
clone_ir_list(mem_ctx, shader->ir, shader->base.ir);

+  /* lower_packing_builtins() inserts arithmetic instructions, so it
+   * must precede lower_instructions().
+   */
+  brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
do_mat_op_to_vec(shader->ir);
lower_instructions(shader->ir,
 MOD_TO_FRACT |



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] glsl: Extend ir_expression_operation for GLSL 3.00 pack/unpack functions

2013-01-10 Thread Chad Versace
On 01/10/2013 10:28 AM, Ian Romanick wrote:
> On 01/10/2013 12:10 AM, Chad Versace wrote:
>> For each function {pack,unpack}{Snorm,Unorm,Half}2x16, add a corresponding
>> opcode to enum ir_expression_operation.  Validate the new opcodes in
>> ir_validate.cpp.
>>
>> Also, add opcodes for scalarized variants of the Half2x16 functions.  (The
>> code generator for the i965 fragment shader requires that all vector
>> operations be scalarized.  A lowering pass, to be added later, will
>> scalarize the Half2x16 functions).
>>
>> Signed-off-by: Chad Versace 
> 
> One nit below...
> 
>> ---
>>   src/glsl/ir.cpp | 27 +++
>>   src/glsl/ir.h   | 31 +++
>>   src/glsl/ir_validate.cpp| 26 ++
>>   src/mesa/program/ir_to_mesa.cpp | 12 +++-
>>   4 files changed, 95 insertions(+), 1 deletion(-)


>> +   case ir_unop_pack_snorm_2x16:
>> +   case ir_unop_pack_unorm_2x16:
>> +   case ir_unop_pack_half_2x16:
>> +   case ir_unop_unpack_snorm_2x16:
>> +   case ir_unop_unpack_unorm_2x16:
>> +   case ir_unop_unpack_half_2x16:
>> +   case ir_unop_unpack_half_2x16_split_x:
>> +   case ir_unop_unpack_half_2x16_split_y:
>> +   case ir_binop_pack_half_2x16_split:
>> +  assert(!"should be handled by lower_packing_builtins");
>> +  break;
> 
> ir_to_mesa can't handle the shifts, etc. that are generated by the lowering 
> pass
> either, so this message is kind of a lie. :)

>From that I infer that drivers that use ir_to_mesa do not support GL 3.0,
correct?

How about changing it to: assert(!"not supported")



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 57044] build failure: make[4]: *** No rule to make target `../../src/mesa/program/libprogram.la', needed by `libmesagallium.la'. Stop.

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=57044

Kenneth Graunke  changed:

   What|Removed |Added

 Status|REOPENED|RESOLVED
 Resolution|--- |FIXED

--- Comment #10 from Kenneth Graunke  ---
I went ahead and committed it.  Presumably no one had bothered to do it because
it didn't affect their builds.

commit c5e9396424bb470ec55a2a29e3ef95aa76fed738
Author: Quentin Glidic 
Date:   Wed Nov 28 16:33:47 2012 +0100

mesa/program: Fix both Classic and Gallium build

Follow-up for 907844107252260c646aca361191ef7f121f3d23 and
3a5ad21cd3f026579eeacc25b39513711556c7ee

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57044
Tested-by: Fabio Pedretti 
Tested-by: Brad King 

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] glsl: Extend ir_expression_operation for GLSL 3.00 pack/unpack functions

2013-01-10 Thread Ian Romanick

On 01/10/2013 10:42 AM, Chad Versace wrote:

On 01/10/2013 10:28 AM, Ian Romanick wrote:

On 01/10/2013 12:10 AM, Chad Versace wrote:

For each function {pack,unpack}{Snorm,Unorm,Half}2x16, add a corresponding
opcode to enum ir_expression_operation.  Validate the new opcodes in
ir_validate.cpp.

Also, add opcodes for scalarized variants of the Half2x16 functions.  (The
code generator for the i965 fragment shader requires that all vector
operations be scalarized.  A lowering pass, to be added later, will
scalarize the Half2x16 functions).

Signed-off-by: Chad Versace 


One nit below...


---
   src/glsl/ir.cpp | 27 +++
   src/glsl/ir.h   | 31 +++
   src/glsl/ir_validate.cpp| 26 ++
   src/mesa/program/ir_to_mesa.cpp | 12 +++-
   4 files changed, 95 insertions(+), 1 deletion(-)




+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+  assert(!"should be handled by lower_packing_builtins");
+  break;


ir_to_mesa can't handle the shifts, etc. that are generated by the lowering pass
either, so this message is kind of a lie. :)


 From that I infer that drivers that use ir_to_mesa do not support GL 3.0,
correct?


Correct.  i915 and (classic) swrast do not support GLSL 1.30.


How about changing it to: assert(!"not supported")


That sounds good to me.  I believe there are other, similar cases 
elsewhere in that switch.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 12:10 AM, Chad Versace wrote:

Signed-off-by: Chad Versace 
---
  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
  src/mesa/drivers/dri/i965/brw_fs.h |  7 ++
  .../dri/i965/brw_fs_channel_expressions.cpp| 29 +++-
  src/mesa/drivers/dri/i965/brw_fs_emit.cpp  | 39 ++-
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 78 +-
  5 files changed, 149 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 22d3e98..1c43d68 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -713,6 +713,7 @@ enum opcode {
 FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
 FS_OPCODE_DISCARD_JUMP,
 FS_OPCODE_SET_GLOBAL_OFFSET,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,

 VS_OPCODE_URB_WRITE,
 VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index bcf38f3..59aa28d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -355,6 +355,10 @@ public:
 fs_reg fix_math_operand(fs_reg src);
 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y);
+   void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0);
+   void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0);
+
 void emit_minmax(uint32_t conditionalmod, fs_reg dst,
  fs_reg src0, fs_reg src1);
 bool try_emit_saturate(ir_expression *ir);
@@ -541,6 +545,9 @@ private:
 struct brw_reg src,
 struct brw_reg offset);
 void generate_discard_jump(fs_inst *inst);
+   void generate_unpack_half_2x16_split_y(fs_inst *inst,
+  struct brw_reg dst,
+  struct brw_reg src);

 void patch_discard_jumps_to_fb_writes();

diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 58521ee..7081511 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir)
return false;

 for (i = 0; i < expr->get_num_operands(); i++) {
-  if (expr->operands[i]->type->is_vector())
-return true;
+  if (expr->operands[i]->type->is_vector()) {
+ switch (expr->operation) {
+ case ir_binop_pack_half_2x16_split:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+assert(!"WTF");


Classy. :)  Maybe (like below)

assert("!not reached: expression operates on scalars only");



+break;
+ default:
+break;
+ }
+
+ return true;
+  }
 }

 return false;
@@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment 
*ir)
assert(!"not yet supported");
break;

+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
 case ir_quadop_vector:
assert(!"should have been lowered");
break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+  assert("!not reached: expression operates on scalars only");
+  break;
 }

 ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 63f09fe..46e2409 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
  }

  void
+fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst,
+struct brw_reg dst,
+struct brw_reg src)
+{
+   assert(intel->gen >= 7);
+
+   /* src has the form of unpackHalf2x16's input:
+*
+* w z y  x
+*   |undef|undef|undef|0x|
+*
+* We wish to access only the "" bits of the source register, and hence
+* must access it with a 16 bit subregister offset.  To do so, we must
+* halve the size of the source data type from UD to UW and compensate by
+* doubling the stride.
+*/
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+   src.type = BRW_REGISTER_TYPE_UW;
+   if (src.vstride > 0)
+  ++src.vstride;
+   if (src.hstride > 0)
+  ++s

Re: [Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
Oliver,

Could you please clarify on the claim below in the comments to
bitcast_u2f: "[memcpy] is the only [method] supported by gcc" for
bitcasting uint to float.

On 01/10/2013 10:30 AM, Ian Romanick wrote:
> On 01/10/2013 12:10 AM, Chad Versace wrote:
>> That is, evaluate constant expressions of the following functions:
>>packSnorm2x16  unpackSnorm2x16
>>packUnorm2x16  unpackUnorm2x16
>>packHalf2x16   unpackHalf2x16
> 
> I believe that there is already code somewhere in Mesa for doing float/half
> conversion.  Could we just re-use that?
> 
>> Signed-off-by: Chad Versace 
>> ---
>>   src/glsl/ir_constant_expression.cpp | 362 
>> 
>>   1 file changed, 362 insertions(+)

I should have known that float/half conversions existed.

I just reviewed _mesa_half_to_float and _mesa_float_to_half, and they look like
suitable replacements except for one aspect: they use a union to cast between
uint and float, yet ir_constant_expression.cpp:bitcast_u2f contains this
comment:

/* This method is the only one supported by gcc.  Unions in particular
 * are iffy, and read-through-converted-pointer is killed by strict
 * aliasing.  OTOH, the compiler sees through the memcpy, so the
 * resulting asm is reasonable.
 */
static float
bitcast_u2f(unsigned int u)
{
   assert(sizeof(float) == sizeof(unsigned int));
   float f;
   memcpy(&f, &u, sizeof(f));
   return f;
}

The claim that memcpy "is the only [method] supported by gcc" for float/uint
conversion is news to me. Does that seem reasonable to you? I've CC'd the
author of the comment for clarification.

If the claim holds up, then I can easily update _mesa_float_to_half and
_mesa_half_to_float to use these functions.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] glsl: Add lowering pass for GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
On 01/10/2013 10:36 AM, Ian Romanick wrote:
> On 01/10/2013 12:10 AM, Chad Versace wrote:
>> Lower them to arithmetic and bit manipulation expressions.
>>
>> Signed-off-by: Chad Versace 
>> ---
>>   src/glsl/Makefile.sources   |1 +
>>   src/glsl/ir_optimization.h  |   18 +
>>   src/glsl/lower_packing_builtins.cpp | 1566 
>> +++
>>   3 files changed, 1585 insertions(+)
>>   create mode 100644 src/glsl/lower_packing_builtins.cpp


>> +  switch (expr->operation) {
>> +  case ir_unop_pack_snorm_2x16:
>> + assert((*rvalue)->type == glsl_type::uint_type);
>> + assert(op0->type == glsl_type::vec2_type);
> 
> Aren't all of these assertions already handled by ir_validate?

You're right. I'll remove them.

>> +   /**
>> +* \brief Pack two uint16's into a single uint32.
>> +*
>> +* Interpret the given uvec2 as a uint16 pair. Pack the pair into a 
>> uint32
>> +* where the least significant bits specify the first element of the 
>> pair.
>> +* Return the uint32 as a uint rvalue.
>> +*
>> +* This function generates IR that approximates the following GLSL:
>> +*
>> +*uvec2 *u = UVEC2_RVAL;
>> +*return (u.y << 16) | (u.x & 0x);
>> +*/
>> +   ir_rvalue*
>> +   pack_uvec2_to_uint(void *mem_ctx, ir_rvalue *uvec2_rval)
>> +   {
>> +  assert(uvec2_rval->type == glsl_type::uvec2_type);
>> +
>> +  /* uvec2 u = uvec2_rval; */
>> +  ir_variable *u2 =
>> + new(mem_ctx) ir_variable(glsl_type::uvec2_type,
>> +  "tmp_pack_uvec2_to_uint",
>> +  ir_var_temporary);
>> +  insert_instruction(u2);
>> +  insert_instruction(
>> + new(mem_ctx) ir_assignment(
>> +new(mem_ctx) ir_dereference_variable(u2),
>> +uvec2_rval));
>> +
>> +  /* return (u.y << 16) | (u.x & 0x); */
>> +  return
>> + new(mem_ctx) ir_expression(ir_binop_bit_or,
>> +   new(mem_ctx) ir_expression(ir_binop_lshift,
>> + new(mem_ctx) ir_swizzle(
>> +new(mem_ctx) ir_dereference_variable(u2),
>> +1, 0, 0, 0, 1),
>> + new(mem_ctx) ir_constant(16u)),
>> +   new(mem_ctx) ir_expression(ir_binop_bit_and,
>> + new(mem_ctx) ir_swizzle(
>> +new(mem_ctx) ir_dereference_variable(u2),
>> +0, 0, 0, 0, 1),
>> + new(mem_ctx) ir_constant(0xu)));
> 
> Reading this just turned my brain to mush.  I can't image what writing it did 
> to
> yours. :)  ir_builder, perhaps?  You may need to add a couple methods (lsr, 
> lsl,
> etc.), but that doesn't seem like a bad thing...

Good idea.

Even though all this brain-warping code will be replaced by ir_builder chains,
I'm glad I wrote it. I learned much about nuances of the IR while struggling
to write this mess.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/23] readpix: for implentation format/type, ignore int vs. non-int check

2013-01-10 Thread Jordan Justen
On Thu, Jan 10, 2013 at 10:13 AM, Ian Romanick  wrote:
> On 01/09/2013 11:02 AM, Eric Anholt wrote:
>>
>> Jordan Justen  writes:
>>
>>> In ES or GL+GL_ARB_ES2_compatibility, the usage of
>>> format = IMPLEMENTATION_COLOR_READ_FORMAT +
>>> type = IMPLEMENTATION_COLOR_READ_TYPE
>>> can function, even if the src/dst int vs. non-int types
>>> differ.
>>
>>
>> This seems bogus -- why should you get to read to formats that ought to
>> be invalid?  I bet what we're missing is an implementation of this bit
>> of spec: "The implementation-chosen format may vary depending on the
>> format of the selected read buffer of the currently bound read
>> framebuffer." (section 4.3 of the gles3 spec) which lets us actually
>> specify a format that makes sense for an int framebuffer.
>
> I think you're right.  This is also why tests that try to
> glReadPixels(GL_RED) from a GL_RED FBO work on other implementations but
> generate an error on ours.  I think we should have the
> IMPLEMENTATION_COLOR_READ_* match the format / type of the FBO itself.

I'll try to update _mesa_get_color_read_format and
_mesa_get_color_read_type for this and see what happens on gles3-gtf
and piglit.

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 10:57 AM, Chad Versace wrote:

Oliver,

Could you please clarify on the claim below in the comments to
bitcast_u2f: "[memcpy] is the only [method] supported by gcc" for
bitcasting uint to float.

On 01/10/2013 10:30 AM, Ian Romanick wrote:

On 01/10/2013 12:10 AM, Chad Versace wrote:

That is, evaluate constant expressions of the following functions:
packSnorm2x16  unpackSnorm2x16
packUnorm2x16  unpackUnorm2x16
packHalf2x16   unpackHalf2x16


I believe that there is already code somewhere in Mesa for doing float/half
conversion.  Could we just re-use that?


Signed-off-by: Chad Versace 
---
   src/glsl/ir_constant_expression.cpp | 362 

   1 file changed, 362 insertions(+)


I should have known that float/half conversions existed.

I just reviewed _mesa_half_to_float and _mesa_float_to_half, and they look like
suitable replacements except for one aspect: they use a union to cast between
uint and float, yet ir_constant_expression.cpp:bitcast_u2f contains this
comment:

/* This method is the only one supported by gcc.  Unions in particular
  * are iffy, and read-through-converted-pointer is killed by strict
  * aliasing.  OTOH, the compiler sees through the memcpy, so the
  * resulting asm is reasonable.
  */
static float
bitcast_u2f(unsigned int u)
{
assert(sizeof(float) == sizeof(unsigned int));
float f;
memcpy(&f, &u, sizeof(f));
return f;
}

The claim that memcpy "is the only [method] supported by gcc" for float/uint
conversion is news to me. Does that seem reasonable to you? I've CC'd the
author of the comment for clarification.


I was also not aware of this.


If the claim holds up, then I can easily update _mesa_float_to_half and
_mesa_half_to_float to use these functions.


That sounds like a good plan.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Ian Romanick

On 01/08/2013 02:27 PM, Paul Berry wrote:

In most cases, the width, height, and depth of the physical surface
used by the driver to implement a texture or renderbuffer is equal to
the logical width, height, and depth exposed to the client through
functions such as glTexImage3D().  However, there are two exceptions:
cube maps (which have a physical depth of 6 but a logical depth of 1)
and multisampled renderbuffers (which have larger physical dimensions
than logical dimensions to allow multiple samples per pixel).

Previous to this patch, we accounted for the difference between
physical and logical surface dimensions at inconsistent places in the
call graph (multisampling was accounted for in
intel_miptree_create_for_renderbuffer(), and cubemaps were accounted
for in intel_miptree_create_internal()).  As a result, it wasn't
always clear, when calling a miptree creation function, whether
physical or logical dimensions were needed.  Also, we weren't
consistent about storing logical dimensions in the intel_mipmap_tree
structure (we only did so in the
intel_miptree_create_for_renderbuffer() code path, and we did not
store depth).

This patch refactors things so that intel_miptree_create_internal() is
responsible for converting logical to physical dimensions and for
storing both the physical and logical dimensions in the
intel_mipmap_tree structure.  As a result, all miptree creation
functions interpret their arguments as logical dimensions, and both
physical and logical dimensions are always available to functions that
work with intel_mipmap_trees.

In addition, it renames the fields in intel_mipmap_tree used to store
the dimensions, so that it is clear from the name whether physical or
logical dimensions are being referred to.

This should fix the following bugs:

- When creating a separate stencil surface for a depthstencil cubemap,
   we would erroneously try to convert the depth from 1 to 6 twice,
   resulting in an assertion failure.

- When creating an MCS buffer for compressed multisampling, we used
   physical dimensions instead of logical dimensions, resulting in
   wasted memory.

In addition, this should considerably simplify the implementation of
ARB_texture_multisample, because it moves the code to compute the
physical size of multisampled surfaces out of renderbuffer-only code.
---
  src/mesa/drivers/dri/i915/i915_tex_layout.c |  36 ++---
  src/mesa/drivers/dri/i965/brw_tex_layout.c  |  20 +--
  src/mesa/drivers/dri/intel/intel_fbo.c  |   1 -
  src/mesa/drivers/dri/intel/intel_mipmap_tree.c  | 191 +++-
  src/mesa/drivers/dri/intel/intel_mipmap_tree.h  |  28 ++--
  src/mesa/drivers/dri/intel/intel_tex_image.c|   1 -
  src/mesa/drivers/dri/intel/intel_tex_layout.c   |  18 +--
  src/mesa/drivers/dri/intel/intel_tex_validate.c |   1 -
  8 files changed, 143 insertions(+), 153 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c 
b/src/mesa/drivers/dri/i915/i915_tex_layout.c
index 1e3cfad..90911a6 100644
--- a/src/mesa/drivers/dri/i915/i915_tex_layout.c
+++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c
@@ -114,9 +114,9 @@ static GLint bottom_offsets[6] = {
  static void
  i915_miptree_layout_cube(struct intel_mipmap_tree * mt)
  {
-   const GLuint dim = mt->width0;
+   const GLuint dim = mt->physical_width0;
 GLuint face;
-   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLuint lvlWidth = mt->physical_width0, lvlHeight = mt->physical_height0;
 GLint level;

 assert(lvlWidth == lvlHeight); /* cubemap images are square */
@@ -156,14 +156,14 @@ i915_miptree_layout_cube(struct intel_mipmap_tree * mt)
  static void
  i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
  {
-   GLuint width = mt->width0;
-   GLuint height = mt->height0;
-   GLuint depth = mt->depth0;
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint depth = mt->physical_depth0;
 GLuint stack_height = 0;
 GLint level;

 /* Calculate the size of a single slice. */
-   mt->total_width = mt->width0;
+   mt->total_width = mt->physical_width0;

 /* XXX: hardware expects/requires 9 levels at minimum. */
 for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
@@ -178,7 +178,7 @@ i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
 }

 /* Fixup depth image_offsets: */
-   depth = mt->depth0;
+   depth = mt->physical_depth0;
 for (level = mt->first_level; level <= mt->last_level; level++) {
GLuint i;
for (i = 0; i < depth; i++) {
@@ -193,18 +193,18 @@ i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
  * remarkable how wasteful of memory the i915 texture layouts
  * are.  They are largely fixed in the i945.
  */
-   mt->total_height = stack_height * mt->depth0;
+   mt->total_height = stack_height * mt->physical_depth0;
  }

  static void
  i915_miptree_layout_2d(struct intel_mipmap_tree * mt)
  {
-   GLuint width = mt->width0;
-   GLuint he

Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Paul Berry
On 10 January 2013 12:01, Ian Romanick  wrote:

> On 01/08/2013 02:27 PM, Paul Berry wrote:
>
>> In most cases, the width, height, and depth of the physical surface
>> used by the driver to implement a texture or renderbuffer is equal to
>> the logical width, height, and depth exposed to the client through
>> functions such as glTexImage3D().  However, there are two exceptions:
>> cube maps (which have a physical depth of 6 but a logical depth of 1)
>> and multisampled renderbuffers (which have larger physical dimensions
>> than logical dimensions to allow multiple samples per pixel).
>>
>> Previous to this patch, we accounted for the difference between
>> physical and logical surface dimensions at inconsistent places in the
>> call graph (multisampling was accounted for in
>> intel_miptree_create_for_**renderbuffer(), and cubemaps were accounted
>> for in intel_miptree_create_internal(**)).  As a result, it wasn't
>> always clear, when calling a miptree creation function, whether
>> physical or logical dimensions were needed.  Also, we weren't
>> consistent about storing logical dimensions in the intel_mipmap_tree
>> structure (we only did so in the
>> intel_miptree_create_for_**renderbuffer() code path, and we did not
>> store depth).
>>
>> This patch refactors things so that intel_miptree_create_internal(**) is
>> responsible for converting logical to physical dimensions and for
>> storing both the physical and logical dimensions in the
>> intel_mipmap_tree structure.  As a result, all miptree creation
>> functions interpret their arguments as logical dimensions, and both
>> physical and logical dimensions are always available to functions that
>> work with intel_mipmap_trees.
>>
>> In addition, it renames the fields in intel_mipmap_tree used to store
>> the dimensions, so that it is clear from the name whether physical or
>> logical dimensions are being referred to.
>>
>> This should fix the following bugs:
>>
>> - When creating a separate stencil surface for a depthstencil cubemap,
>>we would erroneously try to convert the depth from 1 to 6 twice,
>>resulting in an assertion failure.
>>
>> - When creating an MCS buffer for compressed multisampling, we used
>>physical dimensions instead of logical dimensions, resulting in
>>wasted memory.
>>
>> In addition, this should considerably simplify the implementation of
>> ARB_texture_multisample, because it moves the code to compute the
>> physical size of multisampled surfaces out of renderbuffer-only code.
>> ---
>>   src/mesa/drivers/dri/i915/**i915_tex_layout.c |  36 ++---
>>   src/mesa/drivers/dri/i965/brw_**tex_layout.c  |  20 +--
>>   src/mesa/drivers/dri/intel/**intel_fbo.c  |   1 -
>>   src/mesa/drivers/dri/intel/**intel_mipmap_tree.c  | 191
>> +++-
>>   src/mesa/drivers/dri/intel/**intel_mipmap_tree.h  |  28 ++--
>>   src/mesa/drivers/dri/intel/**intel_tex_image.c|   1 -
>>   src/mesa/drivers/dri/intel/**intel_tex_layout.c   |  18 +--
>>   src/mesa/drivers/dri/intel/**intel_tex_validate.c |   1 -
>>   8 files changed, 143 insertions(+), 153 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i915/**i915_tex_layout.c
>> b/src/mesa/drivers/dri/i915/**i915_tex_layout.c
>> index 1e3cfad..90911a6 100644
>> --- a/src/mesa/drivers/dri/i915/**i915_tex_layout.c
>> +++ b/src/mesa/drivers/dri/i915/**i915_tex_layout.c
>> @@ -114,9 +114,9 @@ static GLint bottom_offsets[6] = {
>>   static void
>>   i915_miptree_layout_cube(**struct intel_mipmap_tree * mt)
>>   {
>> -   const GLuint dim = mt->width0;
>> +   const GLuint dim = mt->physical_width0;
>>  GLuint face;
>> -   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
>> +   GLuint lvlWidth = mt->physical_width0, lvlHeight =
>> mt->physical_height0;
>>  GLint level;
>>
>>  assert(lvlWidth == lvlHeight); /* cubemap images are square */
>> @@ -156,14 +156,14 @@ i915_miptree_layout_cube(**struct
>> intel_mipmap_tree * mt)
>>   static void
>>   i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
>>   {
>> -   GLuint width = mt->width0;
>> -   GLuint height = mt->height0;
>> -   GLuint depth = mt->depth0;
>> +   GLuint width = mt->physical_width0;
>> +   GLuint height = mt->physical_height0;
>> +   GLuint depth = mt->physical_depth0;
>>  GLuint stack_height = 0;
>>  GLint level;
>>
>>  /* Calculate the size of a single slice. */
>> -   mt->total_width = mt->width0;
>> +   mt->total_width = mt->physical_width0;
>>
>>  /* XXX: hardware expects/requires 9 levels at minimum. */
>>  for (level = mt->first_level; level <= MAX2(8, mt->last_level);
>> level++) {
>> @@ -178,7 +178,7 @@ i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
>>  }
>>
>>  /* Fixup depth image_offsets: */
>> -   depth = mt->depth0;
>> +   depth = mt->physical_depth0;
>>  for (level = mt->first_level; level <= mt->last_level; level++) {
>> GLuint i;
>> for (i = 0; i < depth; i++) {
>> @@ -193,18 +193,18 @@ i915_miptree_layout_3

Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Ian Romanick

On 01/10/2013 01:03 PM, Paul Berry wrote:

On 10 January 2013 12:01, Ian Romanick mailto:i...@freedesktop.org>> wrote:

On 01/08/2013 02:27 PM, Paul Berry wrote:

In most cases, the width, height, and depth of the physical surface
used by the driver to implement a texture or renderbuffer is
equal to
the logical width, height, and depth exposed to the client through
functions such as glTexImage3D().  However, there are two
exceptions:
cube maps (which have a physical depth of 6 but a logical depth
of 1)
and multisampled renderbuffers (which have larger physical
dimensions
than logical dimensions to allow multiple samples per pixel).

Previous to this patch, we accounted for the difference between
physical and logical surface dimensions at inconsistent places
in the
call graph (multisampling was accounted for in
intel_miptree_create_for___renderbuffer(), and cubemaps were
accounted
for in intel_miptree_create_internal(__)).  As a result, it wasn't
always clear, when calling a miptree creation function, whether
physical or logical dimensions were needed.  Also, we weren't
consistent about storing logical dimensions in the intel_mipmap_tree
structure (we only did so in the
intel_miptree_create_for___renderbuffer() code path, and we did not
store depth).

This patch refactors things so that
intel_miptree_create_internal(__) is
responsible for converting logical to physical dimensions and for
storing both the physical and logical dimensions in the
intel_mipmap_tree structure.  As a result, all miptree creation
functions interpret their arguments as logical dimensions, and both
physical and logical dimensions are always available to
functions that
work with intel_mipmap_trees.

In addition, it renames the fields in intel_mipmap_tree used to
store
the dimensions, so that it is clear from the name whether
physical or
logical dimensions are being referred to.

This should fix the following bugs:

- When creating a separate stencil surface for a depthstencil
cubemap,
we would erroneously try to convert the depth from 1 to 6 twice,
resulting in an assertion failure.

- When creating an MCS buffer for compressed multisampling, we used
physical dimensions instead of logical dimensions, resulting in
wasted memory.

In addition, this should considerably simplify the implementation of
ARB_texture_multisample, because it moves the code to compute the
physical size of multisampled surfaces out of renderbuffer-only
code.
---
   src/mesa/drivers/dri/i915/__i915_tex_layout.c |  36 ++---
   src/mesa/drivers/dri/i965/brw___tex_layout.c  |  20 +--
   src/mesa/drivers/dri/intel/__intel_fbo.c  |   1 -
   src/mesa/drivers/dri/intel/__intel_mipmap_tree.c  | 191
+++-
   src/mesa/drivers/dri/intel/__intel_mipmap_tree.h  |  28 ++--
   src/mesa/drivers/dri/intel/__intel_tex_image.c|   1 -
   src/mesa/drivers/dri/intel/__intel_tex_layout.c   |  18 +--
   src/mesa/drivers/dri/intel/__intel_tex_validate.c |   1 -
   8 files changed, 143 insertions(+), 153 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/__i915_tex_layout.c
b/src/mesa/drivers/dri/i915/__i915_tex_layout.c
index 1e3cfad..90911a6 100644
--- a/src/mesa/drivers/dri/i915/__i915_tex_layout.c
+++ b/src/mesa/drivers/dri/i915/__i915_tex_layout.c
@@ -114,9 +114,9 @@ static GLint bottom_offsets[6] = {
   static void
   i915_miptree_layout_cube(__struct intel_mipmap_tree * mt)
   {
-   const GLuint dim = mt->width0;
+   const GLuint dim = mt->physical_width0;
  GLuint face;
-   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLuint lvlWidth = mt->physical_width0, lvlHeight =
mt->physical_height0;
  GLint level;

  assert(lvlWidth == lvlHeight); /* cubemap images are square */
@@ -156,14 +156,14 @@ i915_miptree_layout_cube(__struct
intel_mipmap_tree * mt)
   static void
   i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
   {
-   GLuint width = mt->width0;
-   GLuint height = mt->height0;
-   GLuint depth = mt->depth0;
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint depth = mt->physical_depth0;
  GLuint stack_height = 0;
  GLint level;

  /* Calculate the size of a single slice. */
-   mt->total_width = mt->width

[Mesa-dev] [PATCH] mesa: Print more informative debug for _mesa_do_init_remap_table().

2013-01-10 Thread Eric Anholt
This is the same logic from _mesa_map_function_array().
---
 src/mesa/main/remap.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/remap.c b/src/mesa/main/remap.c
index c89fba4..a098705 100644
--- a/src/mesa/main/remap.c
+++ b/src/mesa/main/remap.c
@@ -208,8 +208,10 @@ _mesa_do_init_remap_table(const char *pool,
   offset = _mesa_map_function_spec(spec);
   /* store the dispatch offset in the remap table */
   driDispatchRemapTable[i] = offset;
-  if (offset < 0)
- _mesa_warning(NULL, "failed to remap index %d", i);
+  if (offset < 0) {
+ const char *name = spec + strlen(spec) + 1;
+ _mesa_warning(NULL, "failed to remap %s", name);
+  }
}
 }
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Paul Berry
On 10 January 2013 13:23, Ian Romanick  wrote:

> On 01/10/2013 01:03 PM, Paul Berry wrote:
>
>> On 10 January 2013 12:01, Ian Romanick > > wrote:
>>
>> On 01/08/2013 02:27 PM, Paul Berry wrote:
>>
>> In most cases, the width, height, and depth of the physical
>> surface
>> used by the driver to implement a texture or renderbuffer is
>> equal to
>> the logical width, height, and depth exposed to the client through
>> functions such as glTexImage3D().  However, there are two
>> exceptions:
>> cube maps (which have a physical depth of 6 but a logical depth
>> of 1)
>> and multisampled renderbuffers (which have larger physical
>> dimensions
>> than logical dimensions to allow multiple samples per pixel).
>>
>> Previous to this patch, we accounted for the difference between
>> physical and logical surface dimensions at inconsistent places
>> in the
>> call graph (multisampling was accounted for in
>> intel_miptree_create_for___**renderbuffer(), and cubemaps were
>> accounted
>> for in intel_miptree_create_internal(**__)).  As a result, it
>> wasn't
>>
>> always clear, when calling a miptree creation function, whether
>> physical or logical dimensions were needed.  Also, we weren't
>> consistent about storing logical dimensions in the
>> intel_mipmap_tree
>> structure (we only did so in the
>> intel_miptree_create_for___**renderbuffer() code path, and we
>> did not
>>
>> store depth).
>>
>> This patch refactors things so that
>> intel_miptree_create_internal(**__) is
>>
>> responsible for converting logical to physical dimensions and for
>> storing both the physical and logical dimensions in the
>> intel_mipmap_tree structure.  As a result, all miptree creation
>> functions interpret their arguments as logical dimensions, and
>> both
>> physical and logical dimensions are always available to
>> functions that
>> work with intel_mipmap_trees.
>>
>> In addition, it renames the fields in intel_mipmap_tree used to
>> store
>> the dimensions, so that it is clear from the name whether
>> physical or
>> logical dimensions are being referred to.
>>
>> This should fix the following bugs:
>>
>> - When creating a separate stencil surface for a depthstencil
>> cubemap,
>> we would erroneously try to convert the depth from 1 to 6
>> twice,
>> resulting in an assertion failure.
>>
>> - When creating an MCS buffer for compressed multisampling, we
>> used
>> physical dimensions instead of logical dimensions, resulting
>> in
>> wasted memory.
>>
>> In addition, this should considerably simplify the implementation
>> of
>> ARB_texture_multisample, because it moves the code to compute the
>> physical size of multisampled surfaces out of renderbuffer-only
>> code.
>> ---
>>src/mesa/drivers/dri/i915/__**i915_tex_layout.c |  36
>> ++---
>>src/mesa/drivers/dri/i965/brw_**__tex_layout.c  |  20 +--
>>src/mesa/drivers/dri/intel/__**intel_fbo.c  |   1 -
>>src/mesa/drivers/dri/intel/__**intel_mipmap_tree.c  | 191
>> +++-
>>src/mesa/drivers/dri/intel/__**intel_mipmap_tree.h  |  28 ++--
>>src/mesa/drivers/dri/intel/__**intel_tex_image.c|   1 -
>>src/mesa/drivers/dri/intel/__**intel_tex_layout.c   |  18 +--
>>src/mesa/drivers/dri/intel/__**intel_tex_validate.c |   1 -
>>
>>8 files changed, 143 insertions(+), 153 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i915/__**i915_tex_layout.c
>> b/src/mesa/drivers/dri/i915/__**i915_tex_layout.c
>> index 1e3cfad..90911a6 100644
>> --- a/src/mesa/drivers/dri/i915/__**i915_tex_layout.c
>> +++ b/src/mesa/drivers/dri/i915/__**i915_tex_layout.c
>>
>> @@ -114,9 +114,9 @@ static GLint bottom_offsets[6] = {
>>static void
>>i915_miptree_layout_cube(__**struct intel_mipmap_tree * mt)
>>
>>{
>> -   const GLuint dim = mt->width0;
>> +   const GLuint dim = mt->physical_width0;
>>   GLuint face;
>> -   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
>> +   GLuint lvlWidth = mt->physical_width0, lvlHeight =
>> mt->physical_height0;
>>   GLint level;
>>
>>   assert(lvlWidth == lvlHeight); /* cubemap images are square
>> */
>> @@ -156,14 +156,14 @@ i915_miptree_layout_cube(__**struct
>>
>> intel_mipmap_tree * mt)
>>static void
>>i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
>>{
>>   

Re: [Mesa-dev] [PATCH 06/10] i965: Lower the GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
On 01/10/2013 10:39 AM, Ian Romanick wrote:
> On 01/10/2013 12:10 AM, Chad Versace wrote:
>> On gen < 7, we fully lower all operations to arithmetic and bitwise
>> operations.
>>
>> On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and
>> partially lower the Half2x16 operations.
>>
>> Signed-off-by: Chad Versace 
>> ---
>>   src/glsl/lower_packing_builtins.cpp  |  1 +
>>   src/mesa/drivers/dri/i965/brw_shader.cpp | 32 
>> 
>>   2 files changed, 33 insertions(+)
>>
>> diff --git a/src/glsl/lower_packing_builtins.cpp
>> b/src/glsl/lower_packing_builtins.cpp
>> index cd84084..f965a27 100644
>> --- a/src/glsl/lower_packing_builtins.cpp
>> +++ b/src/glsl/lower_packing_builtins.cpp
>> @@ -1013,6 +1013,7 @@ private:
>>new(mem_ctx) ir_variable(glsl_type::vec2_type,
>> "tmp_split_pack_half_2x16_v",
>> ir_var_temporary);
>> +  insert_instruction(v);
>> insert_instruction(
>>new(mem_ctx) ir_assignment(
>>   new(mem_ctx) ir_dereference_variable(v),
> 
> Shouldn't this hunk be in the previous patch?

Right. My mistake.

>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index 1e8d574..65f8e7d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct
>> gl_shader_program *prog)
>>  return true;
>>   }
>>
>> +static void
>> +brw_lower_packing_builtins(struct brw_context *brw,
>> +   gl_shader_type shader_type,
>> +   exec_list *ir)
>> +{
>> +   int ops = LOWER_PACK_SNORM_2x16
>> +   | LOWER_UNPACK_SNORM_2x16
>> +   | LOWER_PACK_UNORM_2x16
>> +   | LOWER_UNPACK_UNORM_2x16;
>> +
>> +   if (brw->intel.gen >= 7) {
>> +  switch (shader_type) {
>> +  case MESA_SHADER_FRAGMENT:
>> + /* Scalarize the these operations. */
>> + ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
>> + |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
>> + break;
> 
> Do we think other shader types are going to need similar treatment? Otherwise 
> an
> if-statement would be better.

Only SOA code will require splitting the Half2x16 functions. That rules out the
vs and gs, at least for now.

I don't really care either way about switch-vs-if. So, if you think it's easier
to read with an if-statement, I can do that. It'll look like this:

if (brw->intel.gen >= 7 && shader_type == MESA_SHADER_FRAGMENT) {
   ops |= ...;
} else {
   ops |= ...;
}

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Ian Romanick

On 01/10/2013 01:48 PM, Paul Berry wrote:

On 10 January 2013 13:23, Ian Romanick mailto:i...@freedesktop.org>> wrote:

On 01/10/2013 01:03 PM, Paul Berry wrote:

On 10 January 2013 12:01, Ian Romanick mailto:i...@freedesktop.org>
>> wrote:

 On 01/08/2013 02:27 PM, Paul Berry wrote:

 In most cases, the width, height, and depth of the
physical surface
 used by the driver to implement a texture or
renderbuffer is
 equal to
 the logical width, height, and depth exposed to the
client through
 functions such as glTexImage3D().  However, there are two
 exceptions:
 cube maps (which have a physical depth of 6 but a
logical depth
 of 1)
 and multisampled renderbuffers (which have larger physical
 dimensions
 than logical dimensions to allow multiple samples per
pixel).

 Previous to this patch, we accounted for the difference
between
 physical and logical surface dimensions at inconsistent
places
 in the
 call graph (multisampling was accounted for in
 intel_miptree_create_for_renderbuffer(), and
cubemaps were
 accounted
 for in intel_miptree_create_internal()).  As a
result, it wasn't

 always clear, when calling a miptree creation function,
whether
 physical or logical dimensions were needed.  Also, we
weren't
 consistent about storing logical dimensions in the
intel_mipmap_tree
 structure (we only did so in the
 intel_miptree_create_for_renderbuffer() code path,
and we did not

 store depth).

 This patch refactors things so that
 intel_miptree_create_internal() is

 responsible for converting logical to physical
dimensions and for
 storing both the physical and logical dimensions in the
 intel_mipmap_tree structure.  As a result, all miptree
creation
 functions interpret their arguments as logical
dimensions, and both
 physical and logical dimensions are always available to
 functions that
 work with intel_mipmap_trees.

 In addition, it renames the fields in intel_mipmap_tree
used to
 store
 the dimensions, so that it is clear from the name whether
 physical or
 logical dimensions are being referred to.

 This should fix the following bugs:

 - When creating a separate stencil surface for a
depthstencil
 cubemap,
 we would erroneously try to convert the depth from
1 to 6 twice,
 resulting in an assertion failure.

 - When creating an MCS buffer for compressed
multisampling, we used
 physical dimensions instead of logical dimensions,
resulting in
 wasted memory.

 In addition, this should considerably simplify the
implementation of
 ARB_texture_multisample, because it moves the code to
compute the
 physical size of multisampled surfaces out of
renderbuffer-only
 code.
 ---
src/mesa/drivers/dri/i915/i915_tex_layout.c
|  36 ++---
src/mesa/drivers/dri/i965/brw_tex_layout.c
  |  20 +--
src/mesa/drivers/dri/intel/intel_fbo.c
  |   1 -
src/mesa/drivers/dri/intel/intel_mipmap_tree.c
  | 191
 +++-
src/mesa/drivers/dri/intel/intel_mipmap_tree.h
  |  28 ++--
src/mesa/drivers/dri/intel/intel_tex_image.c
  |   1 -
src/mesa/drivers/dri/intel/intel_tex_layout.c
|  18 +--
src/mesa/drivers/dri/intel/intel_tex_validate.c
|   1 -

8 files changed, 143 insertions(+), 153 deletions(-)

 diff --git
a/src/mesa/drivers/dri/i915/i915_tex_layout.c
 b/src/mesa/drivers/dri/i915/i915_tex_layout.c
 index 1e3cfad..90911a6 100644
 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c
 +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c

 @@ -114,9 +114,9 @@ static GLint bottom_offsets[6] = {

Re: [Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

2013-01-10 Thread Chad Versace
On 01/10/2013 10:45 AM, Ian Romanick wrote:
> On 01/10/2013 12:10 AM, Chad Versace wrote:
>> Signed-off-by: Chad Versace 
>> ---
>>   src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>>   src/mesa/drivers/dri/i965/brw_fs.h |  7 ++
>>   .../dri/i965/brw_fs_channel_expressions.cpp| 29 +++-
>>   src/mesa/drivers/dri/i965/brw_fs_emit.cpp  | 39 ++-
>>   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 78 
>> +-
>>   5 files changed, 149 insertions(+), 5 deletions(-)


>> +  if (expr->operands[i]->type->is_vector()) {
>> + switch (expr->operation) {
>> + case ir_binop_pack_half_2x16_split:
>> + case ir_unop_pack_half_2x16:
>> + case ir_unop_unpack_half_2x16:
>> + case ir_unop_unpack_half_2x16_split_x:
>> + case ir_unop_unpack_half_2x16_split_y:
>> +assert(!"WTF");
> 
> Classy. :)  Maybe (like below)
> 
> assert("!not reached: expression operates on scalars only");
> 

Um... thanks. I forgot to fix that.
(The WTF was from a particularly frustrating debugging session).

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/10] i965: Lower the GLSL ES 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 01:51 PM, Chad Versace wrote:

On 01/10/2013 10:39 AM, Ian Romanick wrote:

On 01/10/2013 12:10 AM, Chad Versace wrote:

On gen < 7, we fully lower all operations to arithmetic and bitwise
operations.

On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and
partially lower the Half2x16 operations.

Signed-off-by: Chad Versace 
---
   src/glsl/lower_packing_builtins.cpp  |  1 +
   src/mesa/drivers/dri/i965/brw_shader.cpp | 32 

   2 files changed, 33 insertions(+)

diff --git a/src/glsl/lower_packing_builtins.cpp
b/src/glsl/lower_packing_builtins.cpp
index cd84084..f965a27 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -1013,6 +1013,7 @@ private:
new(mem_ctx) ir_variable(glsl_type::vec2_type,
 "tmp_split_pack_half_2x16_v",
 ir_var_temporary);
+  insert_instruction(v);
 insert_instruction(
new(mem_ctx) ir_assignment(
   new(mem_ctx) ir_dereference_variable(v),


Shouldn't this hunk be in the previous patch?


Right. My mistake.


diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1e8d574..65f8e7d 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct
gl_shader_program *prog)
  return true;
   }

+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+   gl_shader_type shader_type,
+   exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+   | LOWER_UNPACK_SNORM_2x16
+   | LOWER_PACK_UNORM_2x16
+   | LOWER_UNPACK_UNORM_2x16;
+
+   if (brw->intel.gen >= 7) {
+  switch (shader_type) {
+  case MESA_SHADER_FRAGMENT:
+ /* Scalarize the these operations. */
+ ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+ |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+ break;


Do we think other shader types are going to need similar treatment? Otherwise an
if-statement would be better.


Only SOA code will require splitting the Half2x16 functions. That rules out the
vs and gs, at least for now.

I don't really care either way about switch-vs-if. So, if you think it's easier
to read with an if-statement, I can do that. It'll look like this:

if (brw->intel.gen >= 7 && shader_type == MESA_SHADER_FRAGMENT) {
ops |= ...;
} else {
ops |= ...;
}


That's different from what you currently have.  What you currently have is

   if (brw->intel.gen >= 7) {
  if (shader_type == MESA_SHADER_FRAGMENT) {
 ops |= ...;
  }
   } else {
  ops |= ...;
   }

Right?  I think these if-statements with the comment about SOA code is good.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations

2013-01-10 Thread Ian Romanick

On 01/10/2013 01:54 PM, Chad Versace wrote:

On 01/10/2013 10:45 AM, Ian Romanick wrote:

On 01/10/2013 12:10 AM, Chad Versace wrote:

Signed-off-by: Chad Versace 
---
   src/mesa/drivers/dri/i965/brw_defines.h|  1 +
   src/mesa/drivers/dri/i965/brw_fs.h |  7 ++
   .../dri/i965/brw_fs_channel_expressions.cpp| 29 +++-
   src/mesa/drivers/dri/i965/brw_fs_emit.cpp  | 39 ++-
   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 78 
+-
   5 files changed, 149 insertions(+), 5 deletions(-)




+  if (expr->operands[i]->type->is_vector()) {
+ switch (expr->operation) {
+ case ir_binop_pack_half_2x16_split:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+assert(!"WTF");


Classy. :)  Maybe (like below)

 assert("!not reached: expression operates on scalars only");



Um... thanks. I forgot to fix that.
(The WTF was from a particularly frustrating debugging session).


Yeah, I figured it was something like that.  Your "WTF" is way better 
than what you usually use. :)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Mesa 9.1 stable branch

2013-01-10 Thread Ian Romanick

It's getting to be about that time again. :)

I'd like to make the 9.1 branch on Wednesday, January 25th (a week from 
next Wednesday).  I'd like to shoot for doing the 9.1 release on Friday, 
February 22nd.


Does this sound workable to folks?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] build fail after recent changes

2013-01-10 Thread Andy Furniss

make distclean
git clean -dfx
git pull

./autogen.sh --prefix=/usr --disable-egl --enable-texture-float 
--enable-gallium-g3dvl --enable-r600-llvm-compiler 
--with-gallium-drivers=r600,swrast --with-dri-drivers= && make -j5


(get fail and run just make again)


Making all in dri-r600
make[3]: Entering directory 
`/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/targets/dri-r600'

  GEN  r600_dri.la
../../../../src/gallium/drivers/r600/.libs/libr600.a(cso_cache.o): In 
function `hash_key':
/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/auxiliary/cso_cache/cso_cache.c:57: 
multiple definition of `cso_construct_key'
../../../../src/gallium/auxiliary/.libs/libgallium.a(cso_cache.o):/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/auxiliary/cso_cache/cso_cache.c:57: 
first defined here
../../../../src/gallium/drivers/r600/.libs/libr600.a(cso_cache.o): In 
function `_cso_hash_for_type':
/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/auxiliary/cso_cache/cso_cache.c:86: 
multiple definition of `cso_insert_state'


1000s of similar lines

/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/auxiliary/vl/vl_zscan.c:571: 
multiple definition of `vl_zscan_render'
../../../../src/gallium/auxiliary/.libs/libgallium.a(vl_zscan.o):/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/auxiliary/vl/vl_zscan.c:571: 
first defined here
../../../../src/gallium/drivers/r600/.libs/libr600.a(vl_zscan.o):(.rodata+0x0): 
multiple definition of `vl_zscan_alternate'
../../../../src/gallium/auxiliary/.libs/libgallium.a(vl_zscan.o):(.rodata+0x0): 
first defined here
../../../../src/gallium/drivers/r600/.libs/libr600.a(vl_zscan.o):(.rodata+0x100): 
multiple definition of `vl_zscan_normal'
../../../../src/gallium/auxiliary/.libs/libgallium.a(vl_zscan.o):(.rodata+0x100): 
first defined here
../../../../src/gallium/drivers/r600/.libs/libr600.a(vl_zscan.o):(.rodata+0x200): 
multiple definition of `vl_zscan_linear'
../../../../src/gallium/auxiliary/.libs/libgallium.a(vl_zscan.o):(.rodata+0x200): 
first defined here

collect2: ld returned 1 exit status
make[3]: *** [r600_dri.la] Error 1
make[3]: Leaving directory 
`/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/targets/dri-r600'

make[2]: *** [all-recursive] Error 1
make[2]: Leaving directory 
`/mnt/sdb1/Src64/Mesa-git/mesa/src/gallium/targets'

make[1]: *** [all-recursive] Error 1
make[1]: Leaving directory `/mnt/sdb1/Src64/Mesa-git/mesa/src'
make: *** [all-recursive] Error 1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] intel: Make intel_blit.c take pitches in bytes.

2013-01-10 Thread Eric Anholt
As we gain support for NPOT cpp, a pitch may not divide by cpp cleanly.
---
 src/mesa/drivers/dri/intel/intel_blit.c |5 -
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c  |8 
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c |2 +-
 src/mesa/drivers/dri/intel/intel_pixel_read.c   |4 ++--
 src/mesa/drivers/dri/intel/intel_regions.c  |4 ++--
 src/mesa/drivers/dri/intel/intel_tex_copy.c |4 ++--
 src/mesa/drivers/dri/intel/intel_tex_image.c|3 ++-
 src/mesa/drivers/dri/intel/intel_tex_subimage.c |4 ++--
 8 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_blit.c 
b/src/mesa/drivers/dri/intel/intel_blit.c
index 867d7b3..9351df7 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -144,9 +144,6 @@ intelEmitCopyBlit(struct intel_context *intel,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
 
-   src_pitch *= cpp;
-   dst_pitch *= cpp;
-
/* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to 
drop
 * the low bits.
 */
@@ -421,8 +418,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context 
*intel,
if (w < 0 || h < 0)
   return true;
 
-   dst_pitch *= cpp;
-
DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
__FUNCTION__,
dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 7542219..ad43781 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -727,9 +727,9 @@ intel_miptree_copy_slice(struct intel_context *intel,
 
if (!intelEmitCopyBlit(intel,
  dst_mt->region->cpp,
- src_mt->region->pitch, src_mt->region->bo,
+ src_mt->region->pitch * src_mt->region->cpp, 
src_mt->region->bo,
  0, src_mt->region->tiling,
- dst_mt->region->pitch, dst_mt->region->bo,
+ dst_mt->region->pitch * dst_mt->region->cpp, 
dst_mt->region->bo,
  0, dst_mt->region->tiling,
  src_x, src_y,
  dst_x, dst_y,
@@ -1165,9 +1165,9 @@ intel_miptree_map_blit(struct intel_context *intel,
 
if (!intelEmitCopyBlit(intel,
  mt->region->cpp,
- mt->region->pitch, mt->region->bo,
+ mt->region->pitch * mt->region->cpp, mt->region->bo,
  0, mt->region->tiling,
- map->stride / mt->region->cpp, map->bo,
+ map->stride, map->bo,
  0, I915_TILING_NONE,
  x, y,
  0, 0,
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c 
b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 954dfc5..6105b42 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -274,7 +274,7 @@ do_blit_bitmap( struct gl_context *ctx,
(GLubyte *)stipple,
sz,
color,
-   dst->pitch,
+   dst->pitch * dst->cpp,
dst->bo,
0,
dst->tiling,
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c 
b/src/mesa/drivers/dri/intel/intel_pixel_read.c
index ab4e581..1e524cd 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -151,8 +151,8 @@ do_blit_readpixels(struct gl_context * ctx,
 
if (!intelEmitCopyBlit(intel,
  src->cpp,
- src->pitch, src->bo, 0, src->tiling,
- rowLength, dst_buffer, dst_offset, false,
+ src->pitch * src->cpp, src->bo, 0, src->tiling,
+ rowLength * src->cpp, dst_buffer, dst_offset, false,
  x, y,
  dst_x, dst_y,
  width, height,
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c 
b/src/mesa/drivers/dri/intel/intel_regions.c
index 7cb008c..d9e026b 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -391,8 +391,8 @@ intel_region_copy(struct intel_context *intel,
 
return intelEmitCopyBlit(intel,
dst->cpp,
-   src_pitch, src->bo, src_offset, src->tiling,
-  

[Mesa-dev] [PATCH 2/4] intel: Make intel_region's pitch be bytes instead of pixels.

2013-01-10 Thread Eric Anholt
We almost never want a stride in pixels -- if you're doing anything with
a stride, you're specifying an offset or incrementing a pointer, and in
both cases you had to multiply by cpp to get the bytes value you wanted.
But worse, on the way to creating a region from a new tiled BO, we
divided by cpp to get pitch in pixels, and for an RGB32 buffer (an
upcoming change) the pitch wouldn't divide exactly, and we'd end up with
a wrong stride in our region.
---
 src/mesa/drivers/dri/i915/i830_texstate.c |2 +-
 src/mesa/drivers/dri/i915/i915_texstate.c |5 ++---
 src/mesa/drivers/dri/i915/i915_vtbl.c |2 +-
 src/mesa/drivers/dri/i965/brw_misc_state.c|6 +++---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |4 ++--
 src/mesa/drivers/dri/i965/gen6_blorp.cpp  |8 +++
 src/mesa/drivers/dri/i965/gen7_blorp.cpp  |8 +++
 src/mesa/drivers/dri/i965/gen7_misc_state.c   |6 +++---
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |7 +++---
 src/mesa/drivers/dri/intel/intel_blit.c   |8 +++
 src/mesa/drivers/dri/intel/intel_context.c|2 +-
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c|   24 +++--
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c   |2 +-
 src/mesa/drivers/dri/intel/intel_pixel_read.c |2 +-
 src/mesa/drivers/dri/intel/intel_regions.c|   10 -
 src/mesa/drivers/dri/intel/intel_regions.h|2 +-
 src/mesa/drivers/dri/intel/intel_screen.c |   17 ++-
 src/mesa/drivers/dri/intel/intel_tex_copy.c   |4 ++--
 src/mesa/drivers/dri/intel/intel_tex_image.c  |9 
 src/mesa/drivers/dri/intel/intel_tex_subimage.c   |6 ++
 src/mesa/drivers/dri/intel/intel_tex_validate.c   |   10 +
 21 files changed, 67 insertions(+), 77 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c 
b/src/mesa/drivers/dri/i915/i830_texstate.c
index 73ab55e6..f186fac 100644
--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -150,7 +150,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint 
unit, GLuint ss3)
 
drm_intel_bo_reference(intelObj->mt->region->bo);
i830->state.tex_buffer[unit] = intelObj->mt->region->bo;
-   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;
+   pitch = intelObj->mt->region->pitch;
 
/* XXX: This calculation is probably broken for tiled images with
 * a non-page-aligned offset.
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c 
b/src/mesa/drivers/dri/i915/i915_texstate.c
index 94a8e55..2f32869 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -141,7 +141,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint 
unit, GLuint ss3)
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage;
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-   GLuint *state = i915->state.Tex[unit], format, pitch;
+   GLuint *state = i915->state.Tex[unit], format;
GLint lodbias, aniso = 0;
GLubyte border[4];
GLfloat maxlod;
@@ -169,7 +169,6 @@ i915_update_tex_unit(struct intel_context *intel, GLuint 
unit, GLuint ss3)
 
format = translate_texture_format(firstImage->TexFormat,
 tObj->DepthMode);
-   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;
 
state[I915_TEXREG_MS3] =
   (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
@@ -187,7 +186,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint 
unit, GLuint ss3)
 */
maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
state[I915_TEXREG_MS4] =
-  pitch / 4) - 1) << MS4_PITCH_SHIFT) |
+  intelObj->mt->region->pitch / 4) - 1) << MS4_PITCH_SHIFT) |
MS4_CUBE_FACE_ENA_MASK |
(U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c 
b/src/mesa/drivers/dri/i915/i915_vtbl.c
index e78dbc8..91fde55 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -531,7 +531,7 @@ i915_set_buf_info_for_region(uint32_t *state, struct 
intel_region *region,
state[1] = buffer_id;
 
if (region != NULL) {
-  state[1] |= BUF_3D_PITCH(region->pitch * region->cpp);
+  state[1] |= BUF_3D_PITCH(region->pitch);
 
   if (region->tiling != I915_TILING_NONE) {
 state[1] |= BUF_3D_TILED_SURFACE;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index d7724e3..1024c42 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -656,7 +656,7 @@ static void emit_depthbuffer(struct brw_context *brw)
 
   BEGIN_BATCH(len);
   OUT_BATCH(_3DSTATE_DEPT

[Mesa-dev] [PATCH 4/4] i965: Add support for GL_ARB_texture_buffer_object_rgb32.

2013-01-10 Thread Eric Anholt
Tested with piglit ARB_texture_buffer_object/formats.
---
 docs/GL3.txt  |2 +-
 src/mesa/drivers/dri/intel/intel_extensions.c |1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 34cce72..e367c07 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL_ARB_gpu_shader_fp64   not 
started
 GL_ARB_sample_shadingnot started
 GL_ARB_shader_subroutine not started
 GL_ARB_tessellation_shader   not started
-GL_ARB_texture_buffer_object_rgb32   DONE (softpipe)
+GL_ARB_texture_buffer_object_rgb32   DONE (i965, softpipe)
 GL_ARB_texture_cube_map_arrayDONE (i965, softpipe)
 GL_ARB_texture_gathernot started
 GL_ARB_transform_feedback2   DONE
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c 
b/src/mesa/drivers/dri/intel/intel_extensions.c
index 5c6b651..32c96fc 100755
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -103,6 +103,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_draw_buffers_blend = true;
   ctx->Extensions.ARB_uniform_buffer_object = true;
   ctx->Extensions.ARB_texture_buffer_object = true;
+  ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
   ctx->Extensions.ARB_texture_cube_map_array = true;
}
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965: Add support for MESA_FORMAT_RGB_FLOAT32 surfaces.

2013-01-10 Thread Eric Anholt
This is for GL_ARB_texture_buffer_object_rgb32 support, but it also
causes the format to get used for float32 rgb textures as well on
Ironlake and later.  Since that came with some surprises, separate
the change from the enable commit.
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 233cd9b..5e99592 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -365,7 +365,7 @@ brw_format_for_mesa_format(gl_format mesa_format)
 
   [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
   [MESA_FORMAT_RGBA_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
-  [MESA_FORMAT_RGB_FLOAT32] = 0,
+  [MESA_FORMAT_RGB_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32_FLOAT,
   [MESA_FORMAT_RGB_FLOAT16] = 0,
   [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
   [MESA_FORMAT_ALPHA_FLOAT16] = BRW_SURFACEFORMAT_A16_FLOAT,
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] r600g glsl 1.40 streamout with no position test

2013-01-10 Thread Dave Airlie
On Thu, Dec 27, 2012 at 12:29 PM, Marek Olšák  wrote:
> On Thu, Dec 27, 2012 at 12:54 AM, Dave Airlie  wrote:
>> So I've got a persistent hang with the glsl-1.40-tf-no-position when I
>> enabled ubo/tbo and glsl 1.40.
>>
>> My original thoughts were there was no param export from the vertex
>> shader, but I was wrong on that count, the
>> vertex shader exports a param along with the stream output.
>>
>> Below is the TGSI/r600g dump from my evergreen for the offending
>> program, it reliably takes out all the EG cards I tested on (haven't
>> tried anything else).
>>
>> This is blocking me from enabling UBO/TBO on evergreen as I don't want
>> to start having piglit lockups once I enable it.
>>
>> Dave.
>> __
>> --
>> VERT
>> DCL IN[0]
>> DCL OUT[0], GENERIC[12]
>> DCL TEMP[0], LOCAL
>>   0: MOV TEMP[0].x, IN[0].
>>   1: MOV OUT[0], TEMP[0]
>>   2: END
>> STREAMOUT
>>   0: MEM_STREAM0_BUF0 OUT[0].x___
>
> This looks like the "copy_buffer" shader, which is known to work. The
> only differences are that there is no position output and OUT[0] is
> declared as GENERIC[12].
>
> My bet is that the problem is with the declaration, not with streamout
> itself. Maybe the POSITION output should be added artificially if it's
> missing in the shader, or maybe something else goes wrong after the
> vertex shader. (SPI?)

Just FYI, if I disable transform feedback in the test (comment out the
glBeginTF and glEndTF)

I don't get a hang anymore, not sure this helps.

Dave.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Fix out of bounds writes when uncompressing non-block-aligned ETC1.

2013-01-10 Thread Eric Anholt
Fixes a crash in GLB2.1 offscreen on the glthread branch.
---
 src/mesa/main/texcompress_etc_tmp.h |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/texcompress_etc_tmp.h 
b/src/mesa/main/texcompress_etc_tmp.h
index 8bbb2cd..5497566 100644
--- a/src/mesa/main/texcompress_etc_tmp.h
+++ b/src/mesa/main/texcompress_etc_tmp.h
@@ -153,9 +153,9 @@ etc1_unpack_rgba(uint8_t *dst_row,
   for (x = 0; x < width; x+= bw) {
  etc1_parse_block(&block, src);
 
- for (j = 0; j < bh; j++) {
+ for (j = 0; j < MIN2(bh, height - y); j++) {
 uint8_t *dst = dst_row + (y + j) * dst_stride + x * comps;
-for (i = 0; i < bw; i++) {
+for (i = 0; i < MIN2(bw, width - x); i++) {
etc1_fetch_texel(&block, i, j, dst);
dst[3] = 255;
dst += comps;
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Eric Anholt
Paul Berry  writes:

> On 10 January 2013 12:01, Ian Romanick  wrote:
>
>> On 01/08/2013 02:27 PM, Paul Berry wrote:
>> This is basically converting depth0 from logical to physical.  We had
>> discussed that this could cause problems with future cubemap arrays.  I may
>> not be following the code completely, but does this potential future
>> problem still loom?
>>
>>
> I think we're ok w.r.t. cubemap arrays.  Once we get around to supporting
> them, all we should have to do is remove the "assert(depth0 == 1)" line and
> replace "depth0 = 6" with "depth0 *= 6".

"get around to supporting them"?  We have GL_ARB_texture_cube_map_array
as of december.


pgpOyyLNx8tmI.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: more integer texture format fetch fixes

2013-01-10 Thread sroland
From: Roland Scheidegger 

Change the texel type to int/uint instead of float throughout the sampling
code which makes it easier to catch errors (as llvm will complain about wrong
types if we mistakenly treat these values as real floats somewhere).
This should also get things like e.g. sampler swizzles (for unused channels)
right.
This fixes piglit texture_integer_glsl130 test.
Border color not working (crashing) yet.
(These formats are not exposed yet in llvmpipe.)

v2: couple cleanups according to José's comments

Reviewed-by: José Fonseca 
---
 src/gallium/drivers/llvmpipe/lp_bld_blend.h |2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |6 +++---
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   22 +++---
 src/gallium/drivers/llvmpipe/lp_test_blend.c|2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h 
b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index 0a1cea1..3a3be81 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -56,7 +56,7 @@ lp_build_blend(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c 
b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 641c253..2dc2082 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -266,7 +266,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
  * @param blend the blend state of the shader variant
  * @param cbuf_format   format of the colour buffer
  * @param type  data type of the pixel vector
- * @param rtrt number
+ * @param rtblend state index number
  * @param src   blend src
  * @param dst   blend dst
  * @param mask  optional mask to apply to the blending result
@@ -278,7 +278,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
 LLVMValueRef
 lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
@@ -298,7 +298,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
unsigned i;
 
-   desc = util_format_description(cbuf_format[rt]);
+   desc = util_format_description(cbuf_format);
 
/* Setup build context */
memset(&bld, 0, sizeof bld);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 5a8351b..551fba6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1010,7 +1010,7 @@ lp_blend_type_from_format_desc(const struct 
util_format_description *format_desc
 
 
 /**
- * Scale a normalised value from src_bits to dst_bits
+ * Scale a normalized value from src_bits to dst_bits
  */
 static INLINE LLVMValueRef
 scale_bits(struct gallivm_state *gallivm,
@@ -1578,6 +1578,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 unsigned pixels = block_size / src_count;
 unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
 unsigned alpha_span = 1;
+LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 
 /* Check if we need 2 src_alphas for our shuffles */
 if (pixels > alpha_type.length) {
@@ -1585,8 +1586,15 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 }
 
 /* Broadcast alpha across all channels, e.g. a1a2 to 
a1a1a1a1a2a2a2a2 */
+for (j = 0; j < row_type.length; ++j) {
+   if (j < pixels * channels) {
+  shuffles[j] = lp_build_const_int32(gallivm, j / channels);
+   } else {
+  shuffles[j] = 
LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+   }
+}
+
 for (i = 0; i < src_count; ++i) {
-   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
unsigned idx1 = i, idx2 = i;
 
if (alpha_span > 1){
@@ -1594,14 +1602,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
   idx2 = idx1 + 1;
}
 
-   for (j = 0; j < row_type.length; ++j) {
-  if (j < pixels * channels) {
- shuffles[j] = lp_build_const_int32(gallivm, j / channels);
-  } else {
-   

Re: [Mesa-dev] [PATCH] gallivm: more integer texture format fetch fixes

2013-01-10 Thread Roland Scheidegger

Oops sorry that was totally the wrong commit message, ignore it...

Roland



On 01/10/2013 06:22 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

Change the texel type to int/uint instead of float throughout the sampling
code which makes it easier to catch errors (as llvm will complain about wrong
types if we mistakenly treat these values as real floats somewhere).
This should also get things like e.g. sampler swizzles (for unused channels)
right.
This fixes piglit texture_integer_glsl130 test.
Border color not working (crashing) yet.
(These formats are not exposed yet in llvmpipe.)

v2: couple cleanups according to José's comments

Reviewed-by: José Fonseca 
---
  src/gallium/drivers/llvmpipe/lp_bld_blend.h |2 +-
  src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |6 +++---
  src/gallium/drivers/llvmpipe/lp_state_fs.c  |   22 +++---
  src/gallium/drivers/llvmpipe/lp_test_blend.c|2 +-
  4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h 
b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index 0a1cea1..3a3be81 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -56,7 +56,7 @@ lp_build_blend(struct lp_build_context *bld,
  LLVMValueRef
  lp_build_blend_aos(struct gallivm_state *gallivm,
 const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
 struct lp_type type,
 unsigned rt,
 LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c 
b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 641c253..2dc2082 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -266,7 +266,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
   * @param blend the blend state of the shader variant
   * @param cbuf_format   format of the colour buffer
   * @param type  data type of the pixel vector
- * @param rtrt number
+ * @param rtblend state index number
   * @param src   blend src
   * @param dst   blend dst
   * @param mask  optional mask to apply to the blending result
@@ -278,7 +278,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
  LLVMValueRef
  lp_build_blend_aos(struct gallivm_state *gallivm,
 const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
 struct lp_type type,
 unsigned rt,
 LLVMValueRef src,
@@ -298,7 +298,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
 unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
 unsigned i;

-   desc = util_format_description(cbuf_format[rt]);
+   desc = util_format_description(cbuf_format);

 /* Setup build context */
 memset(&bld, 0, sizeof bld);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 5a8351b..551fba6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1010,7 +1010,7 @@ lp_blend_type_from_format_desc(const struct 
util_format_description *format_desc


  /**
- * Scale a normalised value from src_bits to dst_bits
+ * Scale a normalized value from src_bits to dst_bits
   */
  static INLINE LLVMValueRef
  scale_bits(struct gallivm_state *gallivm,
@@ -1578,6 +1578,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
  unsigned pixels = block_size / src_count;
  unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
  unsigned alpha_span = 1;
+LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

  /* Check if we need 2 src_alphas for our shuffles */
  if (pixels > alpha_type.length) {
@@ -1585,8 +1586,15 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
  }

  /* Broadcast alpha across all channels, e.g. a1a2 to 
a1a1a1a1a2a2a2a2 */
+for (j = 0; j < row_type.length; ++j) {
+   if (j < pixels * channels) {
+  shuffles[j] = lp_build_const_int32(gallivm, j / channels);
+   } else {
+  shuffles[j] = 
LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+   }
+}
+
  for (i = 0; i < src_count; ++i) {
-   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 unsigned idx1 = i, idx2 = i;

 if (alpha_span > 1){
@@ -1594,14 +1602,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
idx2 = idx1 + 1;
 }

-   for (j = 0; j < row_type.length; ++j) {
-   

[Mesa-dev] [PATCH] llvmpipe: fix using wrong format with MRT in blend code

2013-01-10 Thread sroland
From: Roland Scheidegger 

We were passing in the rt index however this was always 0 for non-independent
blend case. (The format was only actually used to decide if the color mask
covered all channels so this went unnoticed and was discovered by accident.)
(Also do some trivial cleanup.)
---
 src/gallium/drivers/llvmpipe/lp_bld_blend.h |2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |6 +++---
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   22 +++---
 src/gallium/drivers/llvmpipe/lp_test_blend.c|2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h 
b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index 0a1cea1..3a3be81 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -56,7 +56,7 @@ lp_build_blend(struct lp_build_context *bld,
 LLVMValueRef
 lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c 
b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 641c253..2dc2082 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -266,7 +266,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
  * @param blend the blend state of the shader variant
  * @param cbuf_format   format of the colour buffer
  * @param type  data type of the pixel vector
- * @param rtrt number
+ * @param rtblend state index number
  * @param src   blend src
  * @param dst   blend dst
  * @param mask  optional mask to apply to the blending result
@@ -278,7 +278,7 @@ lp_build_blend_factor(struct lp_build_blend_aos_context 
*bld,
 LLVMValueRef
 lp_build_blend_aos(struct gallivm_state *gallivm,
const struct pipe_blend_state *blend,
-   const enum pipe_format *cbuf_format,
+   const enum pipe_format cbuf_format,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
@@ -298,7 +298,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
unsigned i;
 
-   desc = util_format_description(cbuf_format[rt]);
+   desc = util_format_description(cbuf_format);
 
/* Setup build context */
memset(&bld, 0, sizeof bld);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 5a8351b..551fba6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1010,7 +1010,7 @@ lp_blend_type_from_format_desc(const struct 
util_format_description *format_desc
 
 
 /**
- * Scale a normalised value from src_bits to dst_bits
+ * Scale a normalized value from src_bits to dst_bits
  */
 static INLINE LLVMValueRef
 scale_bits(struct gallivm_state *gallivm,
@@ -1578,6 +1578,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 unsigned pixels = block_size / src_count;
 unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
 unsigned alpha_span = 1;
+LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
 
 /* Check if we need 2 src_alphas for our shuffles */
 if (pixels > alpha_type.length) {
@@ -1585,8 +1586,15 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 }
 
 /* Broadcast alpha across all channels, e.g. a1a2 to 
a1a1a1a1a2a2a2a2 */
+for (j = 0; j < row_type.length; ++j) {
+   if (j < pixels * channels) {
+  shuffles[j] = lp_build_const_int32(gallivm, j / channels);
+   } else {
+  shuffles[j] = 
LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+   }
+}
+
 for (i = 0; i < src_count; ++i) {
-   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
unsigned idx1 = i, idx2 = i;
 
if (alpha_span > 1){
@@ -1594,14 +1602,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
   idx2 = idx1 + 1;
}
 
-   for (j = 0; j < row_type.length; ++j) {
-  if (j < pixels * channels) {
- shuffles[j] = lp_build_const_int32(gallivm, j / channels);
-  } else {
- shuffles[j] = 
LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-  }
-   }
-
src_alpha[i] = LLVMBuildShuffleVector(builder,
  src_alpha[idx1],

Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Paul Berry
On 10 January 2013 18:12, Eric Anholt  wrote:

> Paul Berry  writes:
>
> > On 10 January 2013 12:01, Ian Romanick  wrote:
> >
> >> On 01/08/2013 02:27 PM, Paul Berry wrote:
> >> This is basically converting depth0 from logical to physical.  We had
> >> discussed that this could cause problems with future cubemap arrays.  I
> may
> >> not be following the code completely, but does this potential future
> >> problem still loom?
> >>
> >>
> > I think we're ok w.r.t. cubemap arrays.  Once we get around to supporting
> > them, all we should have to do is remove the "assert(depth0 == 1)" line
> and
> > replace "depth0 = 6" with "depth0 *= 6".
>
> "get around to supporting them"?  We have GL_ARB_texture_cube_map_array
> as of december.
>

Yeah, Ken pointed out my mistake at the office today.  What's weird is that
the depth0 == 1 assertion pre-dated my patch series, which makes me wonder,
do they work?  Do we have tests for them?  It's hard for me to imagine how
they could possibly work with this assertion present.  I'll look into this
first thing tomorrow.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600: always export a position from vertex shader

2013-01-10 Thread Dave Airlie
From: Dave Airlie 

This fixes piglit glsl-1.40-tf-no-position from gpu hanging on my rv635
at least.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index cd511ae..1244a30 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1761,6 +1761,23 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
}
}
 
+/* add fake position export */
+   if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
+   memset(&output[j], 0, sizeof(struct 
r600_bytecode_output));
+   output[j].gpr = 0;
+   output[j].elem_size = 3;
+   output[j].swizzle_x = 7;
+   output[j].swizzle_y = 7;
+   output[j].swizzle_z = 7;
+   output[j].swizzle_w = 7;
+   output[j].burst_count = 1;
+   output[j].barrier = 1;
+   output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+   output[j].array_base = next_pos_base;
+   output[j].inst = BC_INST(ctx.bc, 
V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+   j++;
+   }
+
/* add fake param output for vertex shader if no param is exported */
if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
memset(&output[j], 0, sizeof(struct 
r600_bytecode_output));
-- 
1.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel: Clean up confusion between logical and physical surface dimensions.

2013-01-10 Thread Chris Forbes
Hi Paul,

They do work -- they just take the same path as TEXTURE_2D_ARRAY by
this point. It's possible that we're missing some assert(depth0 % 6 ==
0) for sanity there.

Have a look at this piglit:
tests/spec/arb_texture_cube_map_array/cubemap.c; it uses a 2-layer
cubemap array, and depth0 is 12 in intel_miptree_create_internal.

-- Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 59225] New: SIGSEGV src/mesa/program/symbol_table.c:200

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59225

  Priority: medium
Bug ID: 59225
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: SIGSEGV src/mesa/program/symbol_table.c:200
  Severity: critical
Classification: Unclassified
OS: Linux (All)
  Reporter: v...@freedesktop.org
  Hardware: x86-64 (AMD64)
Status: NEW
   Version: git
 Component: Mesa core
   Product: Mesa

mesa: babab2876080af0fe65249dff559244aebd0b87e (master)

Run piglit arb_es2_compatibility-releaseshadercompiler-ge7.

$ ./bin/arb_es2_compatibility-releaseshadercompiler-ge7 -auto
Segmentation fault (core dumped)


(gdb) bt
#0  0x7fa4fa1fe182 in find_symbol (table=0x0, name=0x1cda0c0 "@") at
src/mesa/program/symbol_table.c:200
#1  0x7fa4fa1fe470 in _mesa_symbol_table_find_symbol (table=0x0,
name_space=-1, name=0x1cda0c0 "@") at src/mesa/program/symbol_table.c:306
#2  0x7fa4fa245cdf in glsl_symbol_table::get_entry (this=0x1c56cf0,
name=0x1cda0c0 "@") at src/glsl/glsl_symbol_table.cpp:164
#3  0x7fa4fa245c99 in glsl_symbol_table::get_function (this=0x1c56cf0,
name=0x1cda0c0 "@") at src/glsl/glsl_symbol_table.cpp:157
#4  0x7fa4fa25c9d3 in find_matching_signature (name=0x1cda0c0 "@",
actual_parameters=0x1c9d520, shader_list=0x1d19eb0, num_shaders=4,
use_builtin=true)
at src/glsl/link_functions.cpp:254
#5  0x7fa4fa25ccb1 in call_link_visitor::visit_enter (this=0x7fff8f090eb0,
ir=0x1c9d4f0) at src/glsl/link_functions.cpp:90
#6  0x7fa4fa254672 in ir_call::accept (this=0x1c9d4f0, v=0x7fff8f090eb0) at
src/glsl/ir_hv_accept.cpp:332
#7  0x7fa4fa253a7b in visit_list_elements (v=0x7fff8f090eb0, l=0x1d12848,
statement_list=true) at src/glsl/ir_hv_accept.cpp:56
#8  0x7fa4fa253d6d in ir_function_signature::accept (this=0x1d12800,
v=0x7fff8f090eb0) at src/glsl/ir_hv_accept.cpp:136
#9  0x7fa4fa253a7b in visit_list_elements (v=0x7fff8f090eb0, l=0x1ccee18,
statement_list=false) at src/glsl/ir_hv_accept.cpp:56
#10 0x7fa4fa253e01 in ir_function::accept (this=0x1ccedf0,
v=0x7fff8f090eb0) at src/glsl/ir_hv_accept.cpp:148
#11 0x7fa4fa253a7b in visit_list_elements (v=0x7fff8f090eb0, l=0x1c56cf0,
statement_list=true) at src/glsl/ir_hv_accept.cpp:56
#12 0x7fa4fa2539a4 in ir_hierarchical_visitor::run (this=0x7fff8f090eb0,
instructions=0x1c56cf0) at src/glsl/ir_hierarchical_visitor.cpp:291
#13 0x7fa4fa25ca93 in link_function_calls (prog=0x1c3b030, main=0x1c412e0,
shader_list=0x1d19eb0, num_shaders=4) at src/glsl/link_functions.cpp:284
#14 0x7fa4fa25a8bb in link_intrastage_shaders (mem_ctx=0x1c56fb0,
ctx=0x1b52650, prog=0x1c3b030, shader_list=0x1c56d10, num_shaders=1) at
src/glsl/linker.cpp:1018
#15 0x7fa4fa25bc4b in link_shaders (ctx=0x1b52650, prog=0x1c3b030) at
src/glsl/linker.cpp:1635
#16 0x7fa4fa0dab44 in _mesa_glsl_link_shader (ctx=0x1b52650,
prog=0x1c3b030) at src/mesa/program/ir_to_mesa.cpp:3155
#17 0x7fa4fa0995ae in link_program (ctx=0x1b52650, program=3) at
src/mesa/main/shaderapi.c:753
#18 0x7fa4fa09a788 in _mesa_LinkProgram (programObj=3) at
src/mesa/main/shaderapi.c:1249
#19 0x7fa4fc7da999 in stub_glLinkProgram (program=3) at
piglit/tests/util/generated_dispatch.c:13975
#20 0x7fa4fc80c61f in piglit_link_simple_program (vs=1, fs=2) at
piglit/tests/util/piglit-shader.c:247
#21 0x00400fbd in draw () at
piglit/tests/spec/arb_es2_compatibility/arb_es2_compatibility-releaseshadercompiler-ge7.c:70
#22 0x0040105f in piglit_display () at
piglit/tests/spec/arb_es2_compatibility/arb_es2_compatibility-releaseshadercompiler-ge7.c:93
#23 0x7fa4fc7bd408 in display () at
piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:60
#24 0x7fa4fc177137 in fghRedrawWindow (window=0x1ae3690) at
freeglut_main.c:210
#25 fghcbDisplayWindow (window=0x1ae3690, enumerator=0x7fff8f0912e0) at
freeglut_main.c:227
#26 0x7fa4fc17a889 in fgEnumWindows (enumCallback=0x7fa4fc1770d0
, enumerator=0x7fff8f0912e0) at freeglut_structure.c:394
#27 0x7fa4fc1775fa in fghDisplayAll () at freeglut_main.c:249
#28 glutMainLoopEvent () at freeglut_main.c:1450
#29 0x7fa4fc177f05 in glutMainLoop () at freeglut_main.c:1498
#30 0x7fa4fc7bd5e0 in run_test (gl_fw=0x7fa4fca877e0, argc=1,
argv=0x7fff8f0916a8) at
piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:127
#31 0x7fa4fc7bb726 in piglit_gl_test_run (argc=1, argv=0x7fff8f0916a8,
config=0x7fff8f091590) at piglit/tests/util/piglit-framework-gl.c:127
#32 0x00400f60 in main (argc=2, argv=0x7fff8f0916a8) at
piglit/tests/spec/arb_es2_compatibility/arb_es2_compatibility-releaseshadercompiler-ge7.c:51
(gdb) frame 0
#0  0x7fa4fa1fe182 in find_symbol (table=0x0, name=0x1cda0c0 "@") at
src/mesa/program/symbol_table.c:200
200return (struct symbol_header *) hash_table_find(table->ht, name);
(gdb) print table
$1 = (struct _mesa_symbol_table *) 0x0

-- 
You are receiving this mail because:

[Mesa-dev] [Bug 59226] New: softpipe automake build does not load properly

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59226

  Priority: medium
Bug ID: 59226
  Keywords: regression
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: softpipe automake build does not load properly
  Severity: blocker
Classification: Unclassified
OS: Linux (All)
  Reporter: v...@freedesktop.org
  Hardware: x86-64 (AMD64)
Status: NEW
   Version: git
 Component: Mesa core
   Product: Mesa

mesa: 5eeedb852b8a422f31d4d4f018187020c713fffa (master)

$ ./autogen.sh --with-gallium-drivers=swrast --with-dri-drivers=
--enable-gallium-llvm=no
[...]
$ LIBGL_DEBUG=verbose LIBGL_DRIVERS_PATH=lib/gallium glxinfo
[...]
libGL: OpenDriver: trying lib/gallium/tls/swrast_dri.so
libGL: OpenDriver: trying lib/gallium/swrast_dri.so
libGL error: dlopen lib/gallium/swrast_dri.so failed
(lib/gallium/swrast_dri.so: undefined symbol:
_ZTVN10__cxxabiv120__si_class_type_infoE)
libGL error: unable to load driver: swrast_dri.so
libGL error: reverting to indirect rendering

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 54626] [PATCH] llvmpipe's Makefile: CXXFLAGS, CPPFLAGS and LDFLAGS are not properly propagated

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=54626

Alexandre Demers  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
   Assignee|mesa-dev@lists.freedesktop. |alexandre.f.dem...@gmail.co
   |org |m
  Attachment #71708|0   |1
is obsolete||

--- Comment #3 from Alexandre Demers  ---
Created attachment 72834
  --> https://bugs.freedesktop.org/attachment.cgi?id=72834&action=edit
Fix llvmpipe crosscompilation

It fixes architecture used by linker when using --enable-XY-bit options.

Please, review and commit (if the patch is OK). Thank you.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 59226] softpipe automake build does not load properly

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59226

Vinson Lee  changed:

   What|Removed |Added

 CC||matts...@gmail.com

--- Comment #1 from Vinson Lee  ---
b3068d87cb26fa4f5225382f6c43600ce3956ba5 is the first bad commit
commit b3068d87cb26fa4f5225382f6c43600ce3956ba5
Author: Matt Turner 
Date:   Wed Sep 5 11:17:33 2012 -0700

targets/dri-swrast: Convert to automake

v2: Andreas Boll 
- Provide compatibility with scripts for the old Mesa build system

:100644 100644 661a0b2f42bf59a577cac4c3152b80ec5cb442b8
23df744783958e77f859ca593b77306f662dab2a Mconfigure.ac
:04 04 70474633089b1b840a62734bbfe41d3977d7cc75
f8cbc77f6c22c64abd7537e465159cda83d0a70c Msrc
bisect run success

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 59226] softpipe automake build does not load properly

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59226

Matt Turner  changed:

   What|Removed |Added

 CC||andreas.boll@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Fix out of bounds writes when uncompressing non-block-aligned ETC1.

2013-01-10 Thread Kenneth Graunke

On 01/10/2013 06:06 PM, Eric Anholt wrote:

Fixes a crash in GLB2.1 offscreen on the glthread branch.
---
  src/mesa/main/texcompress_etc_tmp.h |4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/texcompress_etc_tmp.h 
b/src/mesa/main/texcompress_etc_tmp.h
index 8bbb2cd..5497566 100644
--- a/src/mesa/main/texcompress_etc_tmp.h
+++ b/src/mesa/main/texcompress_etc_tmp.h
@@ -153,9 +153,9 @@ etc1_unpack_rgba(uint8_t *dst_row,
for (x = 0; x < width; x+= bw) {
   etc1_parse_block(&block, src);

- for (j = 0; j < bh; j++) {
+ for (j = 0; j < MIN2(bh, height - y); j++) {
  uint8_t *dst = dst_row + (y + j) * dst_stride + x * comps;
-for (i = 0; i < bw; i++) {
+for (i = 0; i < MIN2(bw, width - x); i++) {
 etc1_fetch_texel(&block, i, j, dst);
 dst[3] = 255;
 dst += comps;


Nice find.

Reviewed-by: Kenneth Graunke 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] targets/dri-swrast: Force c++ linker in all cases.

2013-01-10 Thread Johannes Obermayr
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=59226
---
 src/gallium/targets/dri-swrast/Makefile.am |   10 ++
 1 Datei geändert, 2 Zeilen hinzugefügt(+), 8 Zeilen entfernt(-)

diff --git a/src/gallium/targets/dri-swrast/Makefile.am 
b/src/gallium/targets/dri-swrast/Makefile.am
index 82fcfd2..62b5922 100644
--- a/src/gallium/targets/dri-swrast/Makefile.am
+++ b/src/gallium/targets/dri-swrast/Makefile.am
@@ -58,17 +58,11 @@ swrast_dri_la_LIBADD = \
$(top_builddir)/src/gallium/drivers/rbug/librbug.la \
$(GALLIUM_DRI_LIB_DEPS)
 
-if HAVE_MESA_LLVM
-swrast_dri_la_LINK = $(CXXLINK) $(swrast_dri_la_LDFLAGS)
-# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
-nodist_EXTRA_swrast_dri_la_SOURCES = dummy-cpp.cpp
+nodist_EXTRA_swrast_dri_la_SOURCES = dummy.cpp
 
+if HAVE_MESA_LLVM
 AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
 swrast_dri_la_LIBADD += 
$(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS)
-else
-swrast_dri_la_LINK = $(LINK) $(swrast_dri_la_LDFLAGS)
-# Mention a dummy pure C file to trigger generation of the $(LINK) variable
-nodist_EXTRA_swrast_dri_la_SOURCES = dummy-c.c
 endif
 
 # Provide compatibility with scripts for the old Mesa build system for
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 59226] softpipe automake build does not load properly

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59226

--- Comment #2 from Matt Turner  ---
Created attachment 72835
  --> https://bugs.freedesktop.org/attachment.cgi?id=72835&action=edit
patch

Does this help? (I didn't think non-LLVM softpipe needed C++ linking)

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] targets/dri-swrast: Force c++ linker in all cases.

2013-01-10 Thread Matt Turner
On Thu, Jan 10, 2013 at 9:06 PM, Johannes Obermayr
 wrote:
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=59226
> ---
>  src/gallium/targets/dri-swrast/Makefile.am |   10 ++
>  1 Datei geändert, 2 Zeilen hinzugefügt(+), 8 Zeilen entfernt(-)
>
> diff --git a/src/gallium/targets/dri-swrast/Makefile.am 
> b/src/gallium/targets/dri-swrast/Makefile.am
> index 82fcfd2..62b5922 100644
> --- a/src/gallium/targets/dri-swrast/Makefile.am
> +++ b/src/gallium/targets/dri-swrast/Makefile.am
> @@ -58,17 +58,11 @@ swrast_dri_la_LIBADD = \
> $(top_builddir)/src/gallium/drivers/rbug/librbug.la \
> $(GALLIUM_DRI_LIB_DEPS)
>
> -if HAVE_MESA_LLVM
> -swrast_dri_la_LINK = $(CXXLINK) $(swrast_dri_la_LDFLAGS)
> -# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
> -nodist_EXTRA_swrast_dri_la_SOURCES = dummy-cpp.cpp
> +nodist_EXTRA_swrast_dri_la_SOURCES = dummy.cpp
>
> +if HAVE_MESA_LLVM
>  AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
>  swrast_dri_la_LIBADD += 
> $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS)
> -else
> -swrast_dri_la_LINK = $(LINK) $(swrast_dri_la_LDFLAGS)
> -# Mention a dummy pure C file to trigger generation of the $(LINK) variable
> -nodist_EXTRA_swrast_dri_la_SOURCES = dummy-c.c
>  endif
>
>  # Provide compatibility with scripts for the old Mesa build system for
> --
> 1.7.10.4

Oh cool, same thing I came up with. Any idea why non-LLVM softpipe
needs C++ linking? Maybe because of C++ code in libmesagallium.la or
something?

Reviewed-by: Matt Turner 

(I'll commit when vlee confirms this fixes it)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] i965: Add support for GL_ARB_texture_buffer_object_rgb32.

2013-01-10 Thread Kenneth Graunke

On 01/10/2013 04:14 PM, Eric Anholt wrote:

Tested with piglit ARB_texture_buffer_object/formats.
---
  docs/GL3.txt  |2 +-
  src/mesa/drivers/dri/intel/intel_extensions.c |1 +
  2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 34cce72..e367c07 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL_ARB_gpu_shader_fp64   not 
started
  GL_ARB_sample_shadingnot started
  GL_ARB_shader_subroutine not started
  GL_ARB_tessellation_shader   not started
-GL_ARB_texture_buffer_object_rgb32   DONE (softpipe)
+GL_ARB_texture_buffer_object_rgb32   DONE (i965, softpipe)
  GL_ARB_texture_cube_map_arrayDONE (i965, softpipe)
  GL_ARB_texture_gathernot started
  GL_ARB_transform_feedback2   DONE
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c 
b/src/mesa/drivers/dri/intel/intel_extensions.c
index 5c6b651..32c96fc 100755
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -103,6 +103,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_draw_buffers_blend = true;
ctx->Extensions.ARB_uniform_buffer_object = true;
ctx->Extensions.ARB_texture_buffer_object = true;
+  ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
ctx->Extensions.ARB_texture_cube_map_array = true;
 }


I looked over this series and it looked good, though probably not as 
carefully as I ought have.


Reviewed-by: Kenneth Graunke 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: attempt v1 at tbo on r600/700

2013-01-10 Thread Dave Airlie
From: Dave Airlie 

this passes the TBO tests and textureSize fine.
---
 src/gallium/drivers/r600/r600_pipe.c |  4 +-
 src/gallium/drivers/r600/r600_pipe.h |  8 +--
 src/gallium/drivers/r600/r600_shader.c   | 73 ++--
 src/gallium/drivers/r600/r600_shader.h   |  1 +
 src/gallium/drivers/r600/r600_state.c| 47 +++
 src/gallium/drivers/r600/r600_state_common.c | 85 +++-
 6 files changed, 194 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 4d2fc2d..8ba94f0 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -416,6 +416,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
 
 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
@@ -425,7 +426,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 256;
 
case PIPE_CAP_GLSL_FEATURE_LEVEL:
-   return family >= CHIP_CEDAR ? 140 : 130;
+return 140;
 
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return rscreen->msaa_texture_support != 
MSAA_TEXTURE_SAMPLE_ZERO;
@@ -439,7 +440,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
/* Supported on Evergreen. */
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_CUBE_MAP_ARRAY:
-   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return family >= CHIP_CEDAR ? 1 : 0;
 
/* Unsupported features. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 6a13dc0..d983718 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -46,7 +46,7 @@
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_BUFFER_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 
 #define R600_MAX_CONST_BUFFER_SIZE 4096
 
@@ -331,7 +331,7 @@ struct r600_samplerview_state {
uint32_tcompressed_depthtex_mask; /* which 
textures are depth */
uint32_tcompressed_colortex_mask;
boolean dirty_txq_constants;
-   boolean dirty_buffer_txq_constants;
+   boolean dirty_buffer_constants;
 };
 
 struct r600_sampler_states {
@@ -349,8 +349,8 @@ struct r600_textures_info {
 
/* cube array txq workaround */
uint32_t*txq_constants;
-   /* buffer txq workaround */
-   uint32_t*buffer_txq_constants;
+   /* buffer related workarounds */
+   uint32_t*buffer_constants;
 };
 
 struct r600_fence {
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 620da85..d7651be 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3902,6 +3902,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, 
boolean src_requires_l
struct r600_bytecode_alu alu;
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
int src_gpr, r, i;
+int id = tgsi_tex_get_src_gpr(ctx, 1);
 
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
if (src_requires_loading) {
@@ -3923,7 +3924,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, 
boolean src_requires_l
 
memset(&vtx, 0, sizeof(vtx));
vtx.inst = 0;
-   vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + R600_MAX_CONST_BUFFERS;;
+   vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
vtx.src_gpr = src_gpr;
vtx.mega_fetch_count = 16;
@@ -3937,6 +3938,58 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx 
*ctx, boolean src_requires_l
 
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
+
+if (ctx->bc->chip_class >= EVERGREEN)
+   return 0;
+
+for (i = 0; i < 4; i++) {
+int lasti = 
tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+continue;
+
+memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+
+alu.dst.chan = i;
+alu.dst.sel = vtx.dst_gpr;
+alu.dst.write = 1;

Re: [Mesa-dev] [PATCH] targets/dri-swrast: Force c++ linker in all cases.

2013-01-10 Thread Michel Dänzer
On Don, 2013-01-10 at 21:11 -0800, Matt Turner wrote: 
> On Thu, Jan 10, 2013 at 9:06 PM, Johannes Obermayr
>  wrote:
> > Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=59226
> > ---
> >  src/gallium/targets/dri-swrast/Makefile.am |   10 ++
> >  1 Datei geändert, 2 Zeilen hinzugefügt(+), 8 Zeilen entfernt(-)
> >
> > diff --git a/src/gallium/targets/dri-swrast/Makefile.am 
> > b/src/gallium/targets/dri-swrast/Makefile.am
> > index 82fcfd2..62b5922 100644
> > --- a/src/gallium/targets/dri-swrast/Makefile.am
> > +++ b/src/gallium/targets/dri-swrast/Makefile.am
> > @@ -58,17 +58,11 @@ swrast_dri_la_LIBADD = \
> > $(top_builddir)/src/gallium/drivers/rbug/librbug.la \
> > $(GALLIUM_DRI_LIB_DEPS)
> >
> > -if HAVE_MESA_LLVM
> > -swrast_dri_la_LINK = $(CXXLINK) $(swrast_dri_la_LDFLAGS)
> > -# Mention a dummy pure C++ file to trigger generation of the $(LINK) 
> > variable
> > -nodist_EXTRA_swrast_dri_la_SOURCES = dummy-cpp.cpp
> > +nodist_EXTRA_swrast_dri_la_SOURCES = dummy.cpp
> >
> > +if HAVE_MESA_LLVM
> >  AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
> >  swrast_dri_la_LIBADD += 
> > $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS)
> > -else
> > -swrast_dri_la_LINK = $(LINK) $(swrast_dri_la_LDFLAGS)
> > -# Mention a dummy pure C file to trigger generation of the $(LINK) variable
> > -nodist_EXTRA_swrast_dri_la_SOURCES = dummy-c.c
> >  endif
> >
> >  # Provide compatibility with scripts for the old Mesa build system for
> > --
> > 1.7.10.4
> 
> Oh cool, same thing I came up with. Any idea why non-LLVM softpipe
> needs C++ linking? Maybe because of C++ code in libmesagallium.la or
> something?

Probably because of the GLSL compiler code?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 50754] Building 32 bit mesa on 64 bit OS fails since change for automake

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=50754

--- Comment #19 from Alexandre Demers  ---
Even with today's changes to mesa, it's not fixed. We still need to add -m32 to
CFLAGS and CXXFLAGS for everything to successfully build. osMesa is the first
I'm hitting a bug with.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 59226] softpipe automake build does not load properly

2013-01-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=59226

--- Comment #3 from Vinson Lee  ---
Attachment 72835 fixes the bug for me.

Tested-by: Vinson Lee 

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev