[Mesa-dev] [v3] egl: dri2: support for creating images out of dma buffers

2013-04-30 Thread Topi Pohjolainen
v2:
   - upon success close the given file descriptors

Signed-off-by: Topi Pohjolainen 
---
 src/egl/drivers/dri2/egl_dri2.c | 276 
 1 file changed, 276 insertions(+)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 10fdcef..bbdf888 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1170,6 +1171,279 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, 
_EGLContext *ctx,
return dri2_create_image(disp, dri_image);
 }
 
+static EGLBoolean
+dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs)
+{
+   unsigned i;
+
+   /**
+ * The spec says:
+ *
+ * "Required attributes and their values are as follows:
+ *
+ *  * EGL_WIDTH & EGL_HEIGHT: The logical dimensions of the buffer in 
pixels
+ *
+ *  * EGL_LINUX_DRM_FOURCC_EXT: The pixel format of the buffer, as 
specified
+ *by drm_fourcc.h and used as the pixel_format parameter of the
+ *drm_mode_fb_cmd2 ioctl."
+ *
+ *  * EGL_DMA_BUF_PLANE0_FD_EXT: The dma_buf file descriptor of plane 0 of
+ *the image.
+ *
+ *  * EGL_DMA_BUF_PLANE0_OFFSET_EXT: The offset from the start of the
+ *dma_buf of the first sample in plane 0, in bytes.
+ * 
+ *  * EGL_DMA_BUF_PLANE0_PITCH_EXT: The number of bytes between the start 
of
+ *subsequent rows of samples in plane 0. May have special meaning for
+ *non-linear formats."
+ *
+ * "* If  is EGL_LINUX_DMA_BUF_EXT, and the list of attributes is
+ *incomplete, EGL_BAD_PARAMETER is generated."
+ */
+   if (attrs->Width <= 0 || attrs->Height <= 0 ||
+   !attrs->DMABufFourCC.IsPresent ||
+   !attrs->DMABufPlaneFds[0].IsPresent ||
+   !attrs->DMABufPlaneOffsets[0].IsPresent ||
+   !attrs->DMABufPlanePitches[0].IsPresent) {
+  _eglError(EGL_BAD_PARAMETER, "attribute(s) missing");
+  return EGL_FALSE;
+   }
+
+   /**
+* Also:
+*
+* "If  is EGL_LINUX_DMA_BUF_EXT and one or more of the values
+*  specified for a plane's pitch or offset isn't supported by EGL,
+*  EGL_BAD_ACCESS is generated."
+*/
+   for (i = 0; i < sizeof(attrs->DMABufPlanePitches) /
+   sizeof(attrs->DMABufPlanePitches[0]); ++i) {
+  if (attrs->DMABufPlanePitches[i].IsPresent &&
+  attrs->DMABufPlanePitches[i].Value <= 0) {
+ _eglError(EGL_BAD_ACCESS, "invalid pitch");
+ return EGL_FALSE;
+  }
+   }
+
+   return EGL_TRUE;
+}
+
+/* Returns the total number of file descriptors zero indicating an error. */
+static unsigned
+dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
+{
+   switch (attrs->DMABufFourCC.Value) {
+   case DRM_FORMAT_RGB332:
+   case DRM_FORMAT_BGR233:
+   case DRM_FORMAT_XRGB:
+   case DRM_FORMAT_XBGR:
+   case DRM_FORMAT_RGBX:
+   case DRM_FORMAT_BGRX:
+   case DRM_FORMAT_ARGB:
+   case DRM_FORMAT_ABGR:
+   case DRM_FORMAT_RGBA:
+   case DRM_FORMAT_BGRA:
+   case DRM_FORMAT_XRGB1555:
+   case DRM_FORMAT_XBGR1555:
+   case DRM_FORMAT_RGBX5551:
+   case DRM_FORMAT_BGRX5551:
+   case DRM_FORMAT_ARGB1555:
+   case DRM_FORMAT_ABGR1555:
+   case DRM_FORMAT_RGBA5551:
+   case DRM_FORMAT_BGRA5551:
+   case DRM_FORMAT_RGB565:
+   case DRM_FORMAT_BGR565:
+   case DRM_FORMAT_RGB888:
+   case DRM_FORMAT_BGR888:
+   case DRM_FORMAT_XRGB:
+   case DRM_FORMAT_XBGR:
+   case DRM_FORMAT_RGBX:
+   case DRM_FORMAT_BGRX:
+   case DRM_FORMAT_ARGB:
+   case DRM_FORMAT_ABGR:
+   case DRM_FORMAT_RGBA:
+   case DRM_FORMAT_BGRA:
+   case DRM_FORMAT_XRGB2101010:
+   case DRM_FORMAT_XBGR2101010:
+   case DRM_FORMAT_RGBX1010102:
+   case DRM_FORMAT_BGRX1010102:
+   case DRM_FORMAT_ARGB2101010:
+   case DRM_FORMAT_ABGR2101010:
+   case DRM_FORMAT_RGBA1010102:
+   case DRM_FORMAT_BGRA1010102:
+   case DRM_FORMAT_YUYV:
+   case DRM_FORMAT_YVYU:
+   case DRM_FORMAT_UYVY:
+   case DRM_FORMAT_VYUY:
+  /* There must be one and only one plane present */
+  if (attrs->DMABufPlaneFds[0].IsPresent &&
+  attrs->DMABufPlaneOffsets[0].IsPresent &&
+  attrs->DMABufPlanePitches[0].IsPresent &&
+  !attrs->DMABufPlaneFds[1].IsPresent &&
+  !attrs->DMABufPlaneOffsets[1].IsPresent &&
+  !attrs->DMABufPlanePitches[1].IsPresent &&
+  !attrs->DMABufPlaneFds[2].IsPresent &&
+  !attrs->DMABufPlaneOffsets[2].IsPresent &&
+  !attrs->DMABufPlanePitches[2].IsPresent)
+  return 1;
+   case DRM_FORMAT_NV12:
+   case DRM_FORMAT_NV21:
+   case DRM_FORMAT_NV16:
+   case DRM_FORMAT_NV61:
+  /* There must be two and only two planes present */
+  if (attrs->DMABufPlaneFds[0].IsPresent &&
+  attrs->DMABufPlaneOffsets[0].IsPresent &&
+  attrs->DMABufPlanePitches[0].IsPresent &&
+  attrs->DMABufPlaneFds[1].IsPresent &&
+   

Re: [Mesa-dev] [PATCH mesa] wayland: Disable prime support on buggy kernels

2013-04-30 Thread Ander Conselvan de Oliveira

On 04/29/2013 07:15 PM, Kristian Høgsberg wrote:

On Tue, Apr 23, 2013 at 9:54 AM, Ander Conselvan de Oliveira
 wrote:

Linux kernel 3.8 shipped with a bug in the prime fd passing code that
makes it unreliable. As of this writing, it seems unlikely that 3.9
will contain the fix for the issue.

This patch disable prime support when running on top of those kernels,
in order to prevent unexpected behavior when running a Wayland
compositor.

Commit be8a42ae60addd8b6092535c11b42d099d6470ec in Linus tree introduces
the problem, which can be fixed by the patch below (not upstream yet):
http://lists.freedesktop.org/archives/dri-devel/2013-April/037716.html


What about the idea of using the self-ref test at init time to
determine if the kernel is buggy or not instead of the hard-coded
version check?


The test isn't reliable enough. It depends on the memory allocation for 
a dma_buf reusing the memory for an old dma_buf.


Ander



Kristian


---
  src/egl/drivers/dri2/egl_dri2.c |   36 +++-
  1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 06a21d7..13b9107 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -38,6 +38,7 @@
  #include 
  #include 
  #include 
+#include 

  #include "egl_dri2.h"

@@ -1557,6 +1558,38 @@ static struct wayland_drm_callbacks wl_drm_callbacks = {
  };

  static EGLBoolean
+kernel_prime_support_not_buggy()
+{
+   struct utsname un;
+
+   /* Linux kernel 3.8 shipped with a bug in the prime fd passing code that
+* makes it unreliable. As of this writing, it seems unlikely that 3.9
+* will contain the fix for the issue. Disable prime support when running
+* on top of those kernels, in order to prevent unexpected behavior when
+* running a Wayland compositor.
+*
+* Commit be8a42ae60addd8b6092535c11b42d099d6470ec in Linus tree introduces
+* the problem, which can be fixed by the patch below (not upstream yet):
+* http://lists.freedesktop.org/archives/dri-devel/2013-April/037716.html
+*/
+   if (uname(&un) == -1) {
+  _eglLog(_EGL_INFO,
+  "DRI2: wayland prime support disabled: unknown kernel version");
+  return EGL_FALSE;
+   }
+
+   if (strncmp(un.sysname, "Linux", strlen("Linux")) == 0 &&
+   (strncmp(un.release, "3.8.", strlen("3.8.")) == 0 ||
+strncmp(un.release, "3.9.", strlen("3.9.")) == 0)) {
+ _eglLog(_EGL_INFO,
+ "DRI2: wayland prime support disabled: buggy kernel");
+ return EGL_FALSE;
+   }
+
+   return EGL_TRUE;
+}
+
+static EGLBoolean
  dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
  struct wl_display *wl_dpy)
  {
@@ -1575,7 +1608,8 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay 
*disp,
 ret = drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap);
 if (ret == 0 && cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) &&
 dri2_dpy->image->base.version >= 7 &&
-   dri2_dpy->image->createImageFromFds != NULL)
+   dri2_dpy->image->createImageFromFds != NULL &&
+   kernel_prime_support_not_buggy())
flags |= WAYLAND_DRM_PRIME;

 dri2_dpy->wl_server_drm =
--
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


-
Intel Finland Oy
Registered Address: PL 281, 00181 Helsinki 
Business Identity Code: 0357606 - 4 
Domiciled in Helsinki 


This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.

2013-04-30 Thread Kenneth Graunke
Consider the following shader:

vec4 f(vec4 v) { return v; }
vec4 f(vec4 v);

The prototype exactly matches the signature of the earlier definition,
so there's absolutely no point in it.  However, it doesn't appear to
be illegal.  The GLSL 4.30 specification offers two relevant quotes:

"If a function name is declared twice with the same parameter types,
 then the return types and all qualifiers must also match, and it is the
 same function being declared."

"User-defined functions can have multiple declarations, but only one
 definition."

In this case the same function was declared twice, and there's only one
definition, which fits both pieces of text.  There doesn't appear to be
any text saying late prototypes are illegal, so presumably it's valid.

Unfortunately, it currently triggers an assertion failure:
ir_dereference_variable @  specifies undeclared variable `v' @ 

When we process the second line, we look for an existing exact match so
we can enforce the one-definition rule.  We then leave sig set to that
existing function, and hit sig->replace_parameters(&hir_parameters),
unfortunately nuking our existing definition's parameters (which have
actual dereferences) with the prototype's bogus unused parameters.

Simply bailing out and ignoring such late prototypes is the safest
thing to do.

Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android.

NOTE: This is a candidate for stable branches.
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Kenneth Graunke 
---
 src/glsl/ast_to_hir.cpp | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2638411..e595110 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions,
 "match prototype", name);
 }
 
-if (is_definition && sig->is_defined) {
-   YYLTYPE loc = this->get_location();
-
-   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+ if (sig->is_defined) {
+if (is_definition) {
+   YYLTYPE loc = this->get_location();
+   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+} else {
+   /* We just encountered a prototype that exactly matches a
+* function that's already been defined.  This is redundant,
+* and we should ignore it.
+*/
+   return NULL;
+}
 }
   }
} else {
-- 
1.8.2.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.

2013-04-30 Thread Tapani Pälli

On 04/30/2013 11:52 AM, Kenneth Graunke wrote:

Consider the following shader:

 vec4 f(vec4 v) { return v; }
 vec4 f(vec4 v);

The prototype exactly matches the signature of the earlier definition,
so there's absolutely no point in it.  However, it doesn't appear to
be illegal.  The GLSL 4.30 specification offers two relevant quotes:

"If a function name is declared twice with the same parameter types,
  then the return types and all qualifiers must also match, and it is the
  same function being declared."

"User-defined functions can have multiple declarations, but only one
  definition."

In this case the same function was declared twice, and there's only one
definition, which fits both pieces of text.  There doesn't appear to be
any text saying late prototypes are illegal, so presumably it's valid.

Unfortunately, it currently triggers an assertion failure:
ir_dereference_variable @  specifies undeclared variable `v' @ 

When we process the second line, we look for an existing exact match so
we can enforce the one-definition rule.  We then leave sig set to that
existing function, and hit sig->replace_parameters(&hir_parameters),
unfortunately nuking our existing definition's parameters (which have
actual dereferences) with the prototype's bogus unused parameters.

Simply bailing out and ignoring such late prototypes is the safest
thing to do.

Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android.

NOTE: This is a candidate for stable branches.
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Kenneth Graunke 
---
  src/glsl/ast_to_hir.cpp | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2638411..e595110 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions,
 "match prototype", name);
 }
  
-	 if (is_definition && sig->is_defined) {

-   YYLTYPE loc = this->get_location();
-
-   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+ if (sig->is_defined) {
+if (is_definition) {
+   YYLTYPE loc = this->get_location();
+   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+} else {
+   /* We just encountered a prototype that exactly matches a
+* function that's already been defined.  This is redundant,
+* and we should ignore it.
+*/
+   return NULL;
+}
 }
}
 } else {


Works for me, now I can remove my rather awful hack getting around this :)

Tested-by: Tapani Pälli 

// Tapani

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64084] New: Requesting git commit access to mesa

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64084

  Priority: medium
Bug ID: 64084
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Requesting git commit access to mesa
  Severity: normal
Classification: Unclassified
OS: All
  Reporter: rob...@sixbynine.org
  Hardware: Other
Status: NEW
   Version: unspecified
 Component: Other
   Product: Mesa

I'd like to request for my freedesktop.org account (rib) to have git commit
access to the mesa repo please. I'm working with mesa quite closely on the Cogl
project and have made a few small patches including some recent
EGL_EXT_swap_buffers_with_damage patches which could be nice to land soon, now
that they have been reviewed.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything

2013-04-30 Thread Brian Paul

On 04/27/2013 06:57 AM, Zack Rusin wrote:

Technically it's legal for geometry shader to not emit any
vertices. It's silly, but perfectly legal, so lets make draw
stop crashing if it happens.

Signed-off-by: Zack Rusin
---
  src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9 +
  .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9 +
  2 files changed, 18 insertions(+)

diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 7b871c2..8d66c88 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct 
draw_pt_middle_end *middle,
   }
}
 }
+   if (prim_info->count == 0) {
+  debug_printf("GS/IA didn't emit any vertices!\n");


What is "IA"?

-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Google summer code

2013-04-30 Thread yew chong Ng
Dear Mr Ian Romanick,

I am a third year undergraduate from National University of Singapore,
School of Computing. I am interested in this year Google Summer Program. As
this is my first time applying and also my first time seeing most of the
accepted organisations for this year program. After much browsing through,
I handpicked a few projects which I think I might be able to do. To be
honest, as this is my first time applying, so the two main reasons I am
interested in X.org's project idea because I saw the difficulty level of
the project idea: Improved application of GLSL complier optimizations. and
also the part stating X.org treats GSoC as an opportunity to teach new
developers.

Some brief summary of my software skills: I learnt C++ and Java. I more
confident with media softwares such as Adobe Photoshop, Illustrator,
Premiere Pro, Aftereffects, Maya, Blender and Unity.

I would like to know more the details and chances of me getting selected
for the project. Thank you for your time.

Regards,
Yew Chong
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything

2013-04-30 Thread Alex Deucher
On Tue, Apr 30, 2013 at 9:40 AM, Brian Paul  wrote:
> On 04/27/2013 06:57 AM, Zack Rusin wrote:
>>
>> Technically it's legal for geometry shader to not emit any
>> vertices. It's silly, but perfectly legal, so lets make draw
>> stop crashing if it happens.
>>
>> Signed-off-by: Zack Rusin
>> ---
>>   src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9
>> +
>>   .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9
>> +
>>   2 files changed, 18 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
>> b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
>> index 7b871c2..8d66c88 100644
>> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
>> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
>> @@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct
>> draw_pt_middle_end *middle,
>>}
>> }
>>  }
>> +   if (prim_info->count == 0) {
>> +  debug_printf("GS/IA didn't emit any vertices!\n");
>
>
> What is "IA"?

Input Assembler I assume.  First part of the DX11 pipeline.

Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything

2013-04-30 Thread Jose Fonseca


- Original Message -
> On 04/27/2013 06:57 AM, Zack Rusin wrote:
> > Technically it's legal for geometry shader to not emit any
> > vertices. It's silly, but perfectly legal, so lets make draw
> > stop crashing if it happens.
> >
> > Signed-off-by: Zack Rusin
> > ---
> >   src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9
> >   +
> >   .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9
> >   +
> >   2 files changed, 18 insertions(+)
> >
> > diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> > b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> > index 7b871c2..8d66c88 100644
> > --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> > +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> > @@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct
> > draw_pt_middle_end *middle,
> >}
> > }
> >  }
> > +   if (prim_info->count == 0) {
> > +  debug_printf("GS/IA didn't emit any vertices!\n");
> 
> What is "IA"?

"Input Assembly", a D3D10 term that roughly matches pipe_vertex_elements / 
pipe_vertex_buffer state.

BTW, I think that Chris Forbes makes a good  point -- a GS might choose to not 
emit any vertices (e.g., that does fancy culling) -- so maybe this debugging 
message should be silent by default.

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g/llvm: Undefines unrequired texture coord values

2013-04-30 Thread Vincent Lejeune
This is a port of "r600g:mask unused source components for SAMPLE"
patch from Vadim Girlin.
---
 src/gallium/drivers/r600/r600_llvm.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 83d7340..a94faf2 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -429,9 +429,32 @@ static void llvm_emit_tex(
}
}
 
+   if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
+   LLVMValueRef Vector[4] = {
+   LLVMBuildExtractElement(gallivm->builder, 
emit_data->args[0], lp_build_const_int32(gallivm, 0), ""),
+   LLVMBuildExtractElement(gallivm->builder, 
emit_data->args[0], lp_build_const_int32(gallivm, 1), ""),
+   LLVMBuildExtractElement(gallivm->builder, 
emit_data->args[0], lp_build_const_int32(gallivm, 2), ""),
+   LLVMBuildExtractElement(gallivm->builder, 
emit_data->args[0], lp_build_const_int32(gallivm, 3), ""),
+   };
+   switch (emit_data->inst->Texture.Texture) {
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   Vector[2] = Vector[3] = 
LLVMGetUndef(bld_base->base.elem_type);
+   break;
+   case TGSI_TEXTURE_1D:
+   Vector[1] = Vector[2] = Vector[3] = 
LLVMGetUndef(bld_base->base.elem_type);
+   break;
+   default:
+   break;
+   }
+   args[0] = lp_build_gather_values(gallivm, Vector, 4);
+   } else {
+   args[0] = emit_data->args[0];
+   }
+
assert(emit_data->arg_count + 2 <= Elements(args));
 
-   for (c = 0; c < emit_data->arg_count; ++c)
+   for (c = 1; c < emit_data->arg_count; ++c)
args[c] = emit_data->args[c];
 
sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/17] swrast: Factor out texture slice counting.

2013-04-30 Thread Brian Paul

On 04/22/2013 10:14 AM, Eric Anholt wrote:

This function going to get used a lot more in upcoming patches.
---
  src/mesa/swrast/s_texture.c |   16 
  1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c
index 51048be..36a90dd 100644
--- a/src/mesa/swrast/s_texture.c
+++ b/src/mesa/swrast/s_texture.c
@@ -58,6 +58,14 @@ _swrast_delete_texture_image(struct gl_context *ctx,
 _mesa_delete_texture_image(ctx, texImage);
  }

+static unsigned int
+texture_slices(struct gl_texture_image *texImage)
+{
+   if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY)
+  return texImage->Height;
+   else
+  return texImage->Depth;
+}

  /**
   * Called via ctx->Driver.AllocTextureImageBuffer()
@@ -83,11 +91,11 @@ _swrast_alloc_texture_image_buffer(struct gl_context *ctx,
  * We allocate the array for 1D/2D textures too in order to avoid special-
  * case code in the texstore routines.
  */
-   swImg->ImageOffsets = malloc(texImage->Depth * sizeof(GLuint));
+   swImg->ImageOffsets = malloc(texture_slices(texImage) * sizeof(GLuint));
 if (!swImg->ImageOffsets)
return GL_FALSE;

-   for (i = 0; i<  texImage->Depth; i++) {
+   for (i = 0; i<  texture_slices(texImage); i++) {
swImg->ImageOffsets[i] = i * texImage->Width * texImage->Height;
 }



Maybe save the result of texture_slices(texImage) in a local var so it 
doesn't get called for each loop iteration.  Not a big deal though.




@@ -209,20 +217,20 @@ _swrast_map_teximage(struct gl_context *ctx,

 map = swImage->Buffer;

+   assert(slice<  texture_slices(texImage));
+
 if (texImage->TexObject->Target == GL_TEXTURE_3D ||
 texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY) {
GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat,
   texImage->Width,
   texImage->Height,
   1);
-  assert(slice<  texImage->Depth);
map += slice * sliceSize;
 } else if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat,
   texImage->Width,
   1,
   1);
-  assert(slice<  texImage->Height);
map += slice * sliceSize;
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/17] swrast: Clean up and explain the mapping process.

2013-04-30 Thread Brian Paul

On 04/22/2013 10:14 AM, Eric Anholt wrote:

---
  src/mesa/swrast/s_texture.c |   17 -
  1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c
index 36a90dd..b6dd8cb 100644
--- a/src/mesa/swrast/s_texture.c
+++ b/src/mesa/swrast/s_texture.c
@@ -218,18 +218,17 @@ _swrast_map_teximage(struct gl_context *ctx,
 map = swImage->Buffer;

 assert(slice<  texture_slices(texImage));
+   if (slice != 0) {
+  int sliceHeight = texImage->Height;
+  /* For 1D array textures, the slices are all 1 pixel high, and Height is
+   * the number of slices.
+   */
+  if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY)
+ sliceHeight = 1;


Maybe move this logic into a slice_height() helper function as you did 
for texture_slices().





-   if (texImage->TexObject->Target == GL_TEXTURE_3D ||
-   texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY) {
GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat,
   texImage->Width,
- texImage->Height,
- 1);
-  map += slice * sliceSize;
-   } else if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
-  GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat,
- texImage->Width,
- 1,
+ sliceHeight,
   1);
map += slice * sliceSize;
 }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/17] swrast: Replace ImageOffsets with an ImageSlices pointer.

2013-04-30 Thread Brian Paul

On 04/22/2013 10:14 AM, Eric Anholt wrote:

This is a step toward allowing drivers to use their normal mapping paths,
instead of requiring that all slice mappings come from an aligned offset
from the first slice's map.

This incidentally fixes missing slice handling in FXT1 swrast.
---
  src/mesa/drivers/dri/intel/intel_tex_validate.c |   37 
  src/mesa/drivers/dri/radeon/radeon_texture.c|   13 ++---
  src/mesa/main/texcompress.c |2 +-
  src/mesa/main/texcompress.h |3 +-
  src/mesa/main/texcompress_etc.c |   51 +++--
  src/mesa/main/texcompress_fxt1.c|8 +--
  src/mesa/main/texcompress_rgtc.c|   70 +--
  src/mesa/main/texcompress_s3tc.c|   56 --
  src/mesa/swrast/s_context.h |2 +-
  src/mesa/swrast/s_texfetch.c|5 +-
  src/mesa/swrast/s_texfetch_tmp.h|4 +-
  src/mesa/swrast/s_texrender.c   |   14 +
  src/mesa/swrast/s_texture.c |   54 +
  13 files changed, 127 insertions(+), 192 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c 
b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index c880bce..6068733 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -163,34 +163,19 @@ intel_tex_map_image_for_swrast(struct intel_context 
*intel,
 for (int i = 0; i<  mt->level[level].depth; i++)
intel_miptree_slice_resolve_depth(intel, mt, level, i);

-   if (mt->target == GL_TEXTURE_3D ||
-   mt->target == GL_TEXTURE_2D_ARRAY ||
-   mt->target == GL_TEXTURE_1D_ARRAY) {
-  int i;
-
-  /* ImageOffsets[] is only used for swrast's fetch_texel_3d, so we can't
-   * share code with the normal path.
-   */
-  for (i = 0; i<  mt->level[level].depth; i++) {
-intel_miptree_get_image_offset(mt, level, i,&x,&y);
-intel_image->base.ImageOffsets[i] = x + y * (mt->region->pitch /
-  mt->region->cpp);
-  }
-
-  DBG("%s \n", __FUNCTION__);
-
-  intel_image->base.Map = intel_miptree_map_raw(intel, mt);
-   } else {
-  assert(intel_image->base.Base.Depth == 1);
-  intel_miptree_get_image_offset(mt, level, face,&x,&y);
-
-  DBG("%s: (%d,%d) ->  (%d, %d)/%d\n",
- __FUNCTION__, face, level, x, y, mt->region->pitch);
-
-  intel_image->base.Map = intel_miptree_map_raw(intel, mt) +
-x * mt->cpp + y * mt->region->pitch;
+   void *map = intel_miptree_map_raw(intel, mt);
+
+   for (int i = 0; i<  mt->level[level].depth; i++) {
+  intel_miptree_get_image_offset(mt, level, i,&x,&y);
+  intel_image->base.ImageSlices[i] = (map +
+  y * mt->region->pitch +
+  x * mt->cpp);
+  DBG("%s: (%d,%d,%d) ->  (%d, %d)/%d\n",
+ __FUNCTION__, face, level, i, x, y, mt->region->pitch);
 }

+   intel_image->base.Map = intel_image->base.ImageSlices[0];
+
 assert(mt->region->pitch % mt->region->cpp == 0);
 intel_image->base.RowStride = mt->region->pitch / mt->region->cpp;
  }
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c 
b/src/mesa/drivers/dri/radeon/radeon_texture.c
index 23942cb..aa2f734 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -638,7 +638,6 @@ radeon_swrast_map_image(radeonContextPtr rmesa,
radeon_mipmap_tree *mt;
GLuint texel_size;
radeon_mipmap_level *lvl;
-   int rs;

if (!image || !image->mt)
return;
@@ -650,18 +649,16 @@ radeon_swrast_map_image(radeonContextPtr rmesa,

lvl =&image->mt->levels[level];

-   rs = lvl->rowstride / texel_size;
-
radeon_bo_map(mt->bo, 1);

image->base.Map = mt->bo->ptr + lvl->faces[face].offset;
-   if (mt->target == GL_TEXTURE_3D) {
-   int i;

-   for (i = 0; i<  mt->levels[level].depth; i++)
-   image->base.ImageOffsets[i] = rs * lvl->height * i;
+   for (int i = 0; i<  mt->levels[level].depth; i++) {
+   image->base.ImageSlices[i] =
+   image->base.Map + (lvl->rowstride * lvl->height * i);
}
-   image->base.RowStride = rs;
+
+   image->base.RowStride = lvl->rowstride / texel_size;
  }

  static void
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index f74ac5d..1afd51c 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -587,7 +587,7 @@ _mesa_decompress_image(gl_format format, GLuint width, 
GLuint height,

 for (j = 0; j<  height; j++) {
for (i = 0; i<  width; i++) {
- fetch(src, NULL, stride, i, j, 0, dest);
+ fetch(src, stride, i, j, dest);

Re: [Mesa-dev] [PATCH 12/17] swrast: Always use MapTextureImage for mapping textures for swrast.

2013-04-30 Thread Brian Paul

On 04/22/2013 10:14 AM, Eric Anholt wrote:

Now that everything goes through ImageSlices[], we can rely on the
driver's existing texture mapping function.

A big block of code goes away on Radeon that looks like it was to deal with
the validate that happened at SpanRenderStart, which no longer occurs since we
don't need validation for the MapTextureImage hook.
---
  src/mesa/drivers/dri/i915/intel_tris.c   |2 -
  src/mesa/drivers/dri/intel/intel_span.c  |   80 ++-
  src/mesa/drivers/dri/intel/intel_span.h  |2 -
  src/mesa/drivers/dri/intel/intel_tex.h   |6 --
  src/mesa/drivers/dri/intel/intel_tex_validate.c  |   90 --
  src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c |   21 -
  src/mesa/drivers/dri/radeon/radeon_span.c|   18 +
  src/mesa/drivers/dri/radeon/radeon_texture.c |   74 --
  src/mesa/drivers/dri/radeon/radeon_texture.h |3 -
  src/mesa/swrast/s_texture.c  |   61 ---
  10 files changed, 59 insertions(+), 298 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/intel_tris.c 
b/src/mesa/drivers/dri/i915/intel_tris.c
index 4516db6..30eb6ac 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -1096,11 +1096,9 @@ intelRunPipeline(struct gl_context * ctx)
intel->NewGLState = 0;
 }

-   intel_map_vertex_shader_textures(ctx);
 intel->tnl_pipeline_running = true;
 _tnl_run_pipeline(ctx);
 intel->tnl_pipeline_running = false;
-   intel_unmap_vertex_shader_textures(ctx);

 _mesa_unlock_context_textures(ctx);
  }
diff --git a/src/mesa/drivers/dri/intel/intel_span.c 
b/src/mesa/drivers/dri/intel/intel_span.c
index d7eaa41..e74398d 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -105,31 +105,8 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, 
bool swizzled)
  }

  /**
- * Map the regions needed by intelSpanRenderStart().
- */
-static void
-intel_span_map_buffers(struct intel_context *intel)
-{
-   struct gl_context *ctx =&intel->ctx;
-   struct intel_texture_object *tex_obj;
-
-   for (int i = 0; i<  ctx->Const.MaxTextureImageUnits; i++) {
-  if (!ctx->Texture.Unit[i]._ReallyEnabled)
-continue;
-  tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-  intel_finalize_mipmap_tree(intel, i);
-  intel_tex_map_images(intel, tex_obj,
-  GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
-   }
-
-   _swrast_map_renderbuffers(ctx);
-}
-
-/**
   * Prepare for software rendering.  Map current read/draw framebuffers'
- * renderbuffes and all currently bound texture objects.
- *
- * Old note: Moved locking out to get reasonable span performance.
+ * renderbuffers and all currently bound texture objects.
   */
  void
  intelSpanRenderStart(struct gl_context * ctx)
@@ -139,7 +116,9 @@ intelSpanRenderStart(struct gl_context * ctx)
 intel_flush(ctx);
 intel_prepare_render(intel);
 intel_flush(ctx);
-   intel_span_map_buffers(intel);
+
+   _swrast_map_textures(ctx);
+   _swrast_map_renderbuffers(ctx);
  }

  /**
@@ -149,18 +128,8 @@ intelSpanRenderStart(struct gl_context * ctx)
  void
  intelSpanRenderFinish(struct gl_context * ctx)
  {
-   struct intel_context *intel = intel_context(ctx);
-   GLuint i;
-
 _swrast_flush(ctx);
-
-   for (i = 0; i<  ctx->Const.MaxTextureImageUnits; i++) {
-  if (ctx->Texture.Unit[i]._ReallyEnabled) {
- struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
- intel_tex_unmap_images(intel, intel_texture_object(texObj));
-  }
-   }
-
+   _swrast_unmap_textures(ctx);
 _swrast_unmap_renderbuffers(ctx);
  }

@@ -174,42 +143,3 @@ intelInitSpanFuncs(struct gl_context * ctx)
swdd->SpanRenderFinish = intelSpanRenderFinish;
 }
  }
-
-void
-intel_map_vertex_shader_textures(struct gl_context *ctx)
-{
-   struct intel_context *intel = intel_context(ctx);
-   int i;
-
-   if (ctx->VertexProgram._Current == NULL)
-  return;
-
-   for (i = 0; i<  ctx->Const.MaxTextureImageUnits; i++) {
-  if (ctx->Texture.Unit[i]._ReallyEnabled&&
- ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
- struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
-
- intel_tex_map_images(intel, intel_texture_object(texObj),
-  GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
-  }
-   }
-}
-
-void
-intel_unmap_vertex_shader_textures(struct gl_context *ctx)
-{
-   struct intel_context *intel = intel_context(ctx);
-   int i;
-
-   if (ctx->VertexProgram._Current == NULL)
-  return;
-
-   for (i = 0; i<  ctx->Const.MaxTextureImageUnits; i++) {
-  if (ctx->Texture.Unit[i]._ReallyEnabled&&
- ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
- struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
-
- intel_tex_unmap_images(

Re: [Mesa-dev] swrast MapTextureImage fetches

2013-04-30 Thread Brian Paul

On 04/22/2013 10:14 AM, Eric Anholt wrote:

  34 files changed, 311 insertions(+), 889 deletions(-)

Also, swrast_dri.so now passes two FXT1 tests that failed before, and
i915's vertex shader texturing works.  I haven't tested the radeon/nouveau
code, nor have I been as invasive as I could be, because I don't have any
AGP systems left.  nouveau's got a giant pile of code to be deleted if
anyone gets around to doing AllocTextureImageBuffer for it.

The way swrast renderbuffers are linked to swrast textures is still pretty
hokey, but it's the best I could do at the moment.  I think we should have
MapRenderbuffer in general call MapTextureImage for texture renderbuffers,
without drivers having to manually do so.  To do that we'd want the
gl_renderbuffer to have a pointer to its TextureImage/slice instead of
that being in the gl_renderbuffer_attachment.

Branch is at swrast-texture-mapping of my tree.


I just had a handful of small comments.

Reviewed-by: Brian Paul 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] vbo code and flush explicit mapping

2013-04-30 Thread Jose Fonseca
- Original Message -
> On Mon, Apr 29, 2013 at 2:52 PM, Dave Airlie  wrote:
> > I've been playing with a gallium driver, and started looking at some
> > wierd gears behaviour,
> >
> > The vbo code maps the buffer with GL_MAP_FLUSH_EXPLICIT_BIT which to
> > me requires that at some point
> > we call ctx->Driver.FlushMappedBufferRange. Now the code attempts to
> > call it in vbo_exec_vtx_unmap
> > but from what I can see the length is always 0 in there and so we
> > never get called.
> >
> > GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) *
> > sizeof(float);
> >
> > but I can not see buffer_ptr != buffer_map when we hit this code, I'm
> > a bit lost at this point, maybe someone else might know more.
> 
> Okay maybe I'm missing something, I'm just tracing gears with softpipe
> and realised the map/unmap
> pairs occur due to the Material calls but they don't actually seem to
> modify the VBOs.

That's weird.

I thought that material changes would just translate to constant buffer updates.

It looks like we replace the glBegin .. glEnd with VBOs, but we still translate 
Material calls literally.

Now that no Mesa driver handles fixed function directly, we could probably 
simplify some of these things.


Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] vbo code and flush explicit mapping

2013-04-30 Thread Brian Paul

On 04/30/2013 09:03 AM, Jose Fonseca wrote:

- Original Message -

On Mon, Apr 29, 2013 at 2:52 PM, Dave Airlie  wrote:

I've been playing with a gallium driver, and started looking at some
wierd gears behaviour,

The vbo code maps the buffer with GL_MAP_FLUSH_EXPLICIT_BIT which to
me requires that at some point
we call ctx->Driver.FlushMappedBufferRange. Now the code attempts to
call it in vbo_exec_vtx_unmap
but from what I can see the length is always 0 in there and so we
never get called.

GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) *
sizeof(float);

but I can not see buffer_ptr != buffer_map when we hit this code, I'm
a bit lost at this point, maybe someone else might know more.


Okay maybe I'm missing something, I'm just tracing gears with softpipe
and realised the map/unmap
pairs occur due to the Material calls but they don't actually seem to
modify the VBOs.


That's weird.

I thought that material changes would just translate to constant buffer updates.

It looks like we replace the glBegin .. glEnd with VBOs, but we still translate 
Material calls literally.

Now that no Mesa driver handles fixed function directly, we could probably 
simplify some of these things.



glMaterial is complicated.  It's handled differently depending on 
whether it's called inside or outside glBegin/End or in a display list.


Even though I've been working in the VBO code lately, it would take me 
a while to re-learn how glMaterial is handled.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything

2013-04-30 Thread Zack Rusin
> > What is "IA"?
> 
> "Input Assembly", a D3D10 term that roughly matches pipe_vertex_elements /
> pipe_vertex_buffer state.
> 
> BTW, I think that Chris Forbes makes a good  point -- a GS might choose to
> not emit any vertices (e.g., that does fancy culling) -- so maybe this
> debugging message should be silent by default.

That's still a silly thing to do, because you're running a high cost operation 
on the slowest part of the pipeline. Ideally we'd connect to arb_debug_output 
for this stuff, but currently the most frequent cause of gs/ia not emitting any 
primitives is that something broke (e.g. one of our instructions is not fully 
compliant causing vs or gs errors in outputs) and the debug message is quite 
helpful in figuring that out. Of course I don't have a strong sentimental 
attachment to a debugging output but this particular one does make my job 
easier.

z
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] i965/vs: Do round-robin register allocation on gen6+ like we do in the FS.

2013-04-30 Thread Eric Anholt
This will free instruction scheduling to make better choices.  No
statistically significant performance difference on GLB2.7 (n=93).
---
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp |4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index ac3d401..7149d46 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -102,6 +102,8 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw,
  int class_count,
  int base_reg_count)
 {
+   struct intel_context *intel = &brw->intel;
+
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
@@ -112,6 +114,8 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw,
brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
ralloc_free(brw->vs.regs);
brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
+   if (intel->gen >= 6)
+  ra_set_allocate_round_robin(brw->vs.regs);
ralloc_free(brw->vs.classes);
brw->vs.classes = ralloc_array(brw, int, class_count + 1);
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] i965: Share the register file enum between the two backends.

2013-04-30 Thread Eric Anholt
I need this so I can look at vec4 and fs registers' files from the same
.cpp file without namespaces.  As far as I can tell we never rely on the
particular numerical values of the files, though I thought it sounded like
a good idea when doing the VS (it turns out having 0 be BAD_FILE is nicer).
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   |   12 ++--
 src/mesa/drivers/dri/i965/brw_fs.h |   10 --
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp  |2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |4 ++--
 src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp |8 
 src/mesa/drivers/dri/i965/brw_shader.h |   11 +++
 src/mesa/drivers/dri/i965/brw_vec4.h   |   11 ---
 7 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9a76408..8411675 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -218,7 +218,7 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, 
uint32_t condition)
 */
if (intel->gen == 4) {
   dst.type = src0.type;
-  if (dst.file == FIXED_HW_REG)
+  if (dst.file == HW_REG)
 dst.fixed_hw_reg.type = dst.type;
}
 
@@ -405,7 +405,7 @@ fs_reg::fs_reg(uint32_t u)
 fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
 {
init();
-   this->file = FIXED_HW_REG;
+   this->file = HW_REG;
this->fixed_hw_reg = fixed_hw_reg;
this->type = fixed_hw_reg.type;
 }
@@ -1212,7 +1212,7 @@ fs_visitor::assign_curb_setup()
  constant_nr / 8,
  constant_nr % 8);
 
-   inst->src[i].file = FIXED_HW_REG;
+   inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
 }
   }
@@ -1280,12 +1280,12 @@ fs_visitor::assign_urb_setup()
   fs_inst *inst = (fs_inst *)node;
 
   if (inst->opcode == FS_OPCODE_LINTERP) {
-assert(inst->src[2].file == FIXED_HW_REG);
+assert(inst->src[2].file == HW_REG);
 inst->src[2].fixed_hw_reg.nr += urb_start;
   }
 
   if (inst->opcode == FS_OPCODE_CINTERP) {
-assert(inst->src[0].file == FIXED_HW_REG);
+assert(inst->src[0].file == HW_REG);
 inst->src[0].fixed_hw_reg.nr += urb_start;
   }
}
@@ -2402,7 +2402,7 @@ clear_deps_for_inst_src(fs_inst *inst, int 
dispatch_width, bool *deps,
   int grf;
   if (inst->src[i].file == GRF) {
  grf = inst->src[i].reg;
-  } else if (inst->src[i].file == FIXED_HW_REG &&
+  } else if (inst->src[i].file == HW_REG &&
  inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
  grf = inst->src[i].fixed_hw_reg.nr;
   } else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index bf76357..436a97a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -55,16 +55,6 @@ namespace {
struct acp_entry;
 }
 
-enum register_file {
-   BAD_FILE,
-   ARF,
-   GRF,
-   MRF,
-   IMM,
-   FIXED_HW_REG, /* a struct brw_reg */
-   UNIFORM, /* prog_data->params[reg] */
-};
-
 class fs_reg {
 public:
/* Callers of this ralloc-based new need not call delete. It's
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 0f6b715..5a5044e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -854,7 +854,7 @@ brw_reg_from_fs_reg(fs_reg *reg)
 break;
   }
   break;
-   case FIXED_HW_REG:
+   case HW_REG:
   brw_reg = reg->fixed_hw_reg;
   break;
case BAD_FILE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b9b0303..fa1a938 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -258,7 +258,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
* the start (see interp_reg()).
*/
   for (int i = 0; i < 3; i++) {
- if (inst->src[i].file == FIXED_HW_REG &&
+ if (inst->src[i].file == HW_REG &&
  inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
 int node_nr = inst->src[i].fixed_hw_reg.nr / reg_width;
 if (node_nr >= payload_node_count)
@@ -288,7 +288,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
   */
  if (intel->gen >= 6) {
 int delta_x_arg = 0;
-if (inst->src[delta_x_arg].file == FIXED_HW_REG &&
+if (inst->src[delta_x_arg].file == HW_REG &&
 inst->src[delta_x_arg].fixed_hw_reg.file ==
 BRW_GENERAL_REGISTER_FILE) {
int secha

[Mesa-dev] [PATCH 2/7] i965: Make dump_instructions be a virtual method of the visitor.

2013-04-30 Thread Eric Anholt
---
 src/mesa/drivers/dri/i965/brw_fs.cpp |   15 +++
 src/mesa/drivers/dri/i965/brw_fs.h   |3 +--
 src/mesa/drivers/dri/i965/brw_shader.cpp |   11 +++
 src/mesa/drivers/dri/i965/brw_shader.h   |3 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp   |   15 +++
 src/mesa/drivers/dri/i965/brw_vec4.h |3 +--
 6 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a8610ee..9a76408 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2665,8 +2665,10 @@ fs_visitor::lower_uniform_pull_constant_loads()
 }
 
 void
-fs_visitor::dump_instruction(fs_inst *inst)
+fs_visitor::dump_instruction(backend_instruction *be_inst)
 {
+   fs_inst *inst = (fs_inst *)be_inst;
+
if (inst->predicate) {
   printf("(%cf0.%d) ",
  inst->predicate_inverse ? '-' : '+',
@@ -2769,17 +2771,6 @@ fs_visitor::dump_instruction(fs_inst *inst)
printf("\n");
 }
 
-void
-fs_visitor::dump_instructions()
-{
-   int ip = 0;
-   foreach_list(node, &this->instructions) {
-  fs_inst *inst = (fs_inst *)node;
-  printf("%d: ", ip++);
-  dump_instruction(inst);
-   }
-}
-
 /**
  * Possibly returns an instruction that set up @param reg.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index c9c9856..bf76357 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -422,8 +422,7 @@ public:
void setup_builtin_uniform_values(ir_variable *ir);
int implied_mrf_writes(fs_inst *inst);
 
-   void dump_instructions();
-   void dump_instruction(fs_inst *inst);
+   void dump_instruction(backend_instruction *inst);
 
struct gl_fragment_program *fp;
struct brw_wm_compile *c;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a820952..9968ee5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -553,3 +553,14 @@ backend_instruction::is_control_flow()
   return false;
}
 }
+
+void
+backend_visitor::dump_instructions()
+{
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+  backend_instruction *inst = (backend_instruction *)node;
+  printf("%d: ", ip++);
+  dump_instruction(inst);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index 5189fdc..4b2b399 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -56,6 +56,9 @@ public:
 * backend_instruction)
 */
exec_list instructions;
+
+   virtual void dump_instruction(backend_instruction *inst) = 0;
+   void dump_instructions();
 };
 
 int brw_type_for_base_type(const struct glsl_type *type);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ab4668f..a3ae4a1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1064,8 +1064,10 @@ vec4_visitor::split_virtual_grfs()
 }
 
 void
-vec4_visitor::dump_instruction(vec4_instruction *inst)
+vec4_visitor::dump_instruction(backend_instruction *be_inst)
 {
+   vec4_instruction *inst = (vec4_instruction *)be_inst;
+
printf("%s ", brw_instruction_name(inst->opcode));
 
switch (inst->dst.file) {
@@ -1146,17 +1148,6 @@ vec4_visitor::dump_instruction(vec4_instruction *inst)
printf("\n");
 }
 
-void
-vec4_visitor::dump_instructions()
-{
-   int ip = 0;
-   foreach_list_safe(node, &this->instructions) {
-  vec4_instruction *inst = (vec4_instruction *)node;
-  printf("%d: ", ip++);
-  dump_instruction(inst);
-   }
-}
-
 /**
  * Replace each register of type ATTR in this->instructions with a reference
  * to a fixed HW register.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index a4fca2d..cb97a86 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -477,8 +477,7 @@ public:
 
bool process_move_condition(ir_rvalue *ir);
 
-   void dump_instruction(vec4_instruction *inst);
-   void dump_instructions();
+   void dump_instruction(backend_instruction *inst);
 
 protected:
void emit_vertex();
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] i965: Pull a couple of FS scheduling functions out to methods.

2013-04-30 Thread Eric Anholt
These will get virtualized as we add VS scheduling support.
---
 .../drivers/dri/i965/brw_schedule_instructions.cpp |  132 
 1 file changed, 77 insertions(+), 55 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 5affedf..af8af1d 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -364,6 +364,17 @@ public:
void calculate_deps();
void schedule_instructions(fs_inst *next_block_header);
 
+   schedule_node *choose_instruction_to_schedule();
+
+   /**
+* Returns how many cycles it takes the instruction to issue.
+*
+* Instructions in gen hardware are handled one simd4 vector at a time,
+* with 1 cycle per vector dispatched.  Thus 8-wide pixel shaders take 2
+* cycles to dispatch and 16-wide (compressed) instructions take 4.
+*/
+   int issue_time(fs_inst *inst);
+
bool is_compressed(fs_inst *inst);
 
void *mem_ctx;
@@ -709,6 +720,67 @@ instruction_scheduler::calculate_deps()
}
 }
 
+schedule_node *
+instruction_scheduler::choose_instruction_to_schedule()
+{
+   schedule_node *chosen = NULL;
+
+   if (post_reg_alloc) {
+  int chosen_time = 0;
+
+  /* Of the instructions closest ready to execute or the closest to
+   * being ready, choose the oldest one.
+   */
+  foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
+
+ if (!chosen || n->unblocked_time < chosen_time) {
+chosen = n;
+chosen_time = n->unblocked_time;
+ }
+  }
+   } else {
+  /* Before register allocation, we don't care about the latencies of
+   * instructions.  All we care about is reducing live intervals of
+   * variables so that we can avoid register spilling, or get 16-wide
+   * shaders which naturally do a better job of hiding instruction
+   * latency.
+   *
+   * To do so, schedule our instructions in a roughly LIFO/depth-first
+   * order: when new instructions become available as a result of
+   * scheduling something, choose those first so that our result
+   * hopefully is consumed quickly.
+   *
+   * The exception is messages that generate more than one result
+   * register (AKA texturing).  In those cases, the LIFO search would
+   * normally tend to choose them quickly (because scheduling the
+   * previous message not only unblocked the children using its result,
+   * but also the MRF setup for the next sampler message, which in turn
+   * unblocks the next sampler message).
+   */
+  for (schedule_node *node = (schedule_node *)instructions.get_tail();
+   node != instructions.get_head()->prev;
+   node = (schedule_node *)node->prev) {
+ schedule_node *n = (schedule_node *)node;
+
+ chosen = n;
+ if (chosen->inst->regs_written <= 1)
+break;
+  }
+   }
+
+   return chosen;
+}
+
+int
+instruction_scheduler::issue_time(fs_inst *inst)
+{
+   if (is_compressed(inst))
+  return 4;
+   else
+  return 2;
+}
+
 void
 instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
 {
@@ -722,52 +794,7 @@ instruction_scheduler::schedule_instructions(fs_inst 
*next_block_header)
}
 
while (!instructions.is_empty()) {
-  schedule_node *chosen = NULL;
-  int chosen_time = 0;
-
-  if (post_reg_alloc) {
- /* Of the instructions closest ready to execute or the closest to
-  * being ready, choose the oldest one.
-  */
- foreach_list(node, &instructions) {
-schedule_node *n = (schedule_node *)node;
-
-if (!chosen || n->unblocked_time < chosen_time) {
-   chosen = n;
-   chosen_time = n->unblocked_time;
-}
- }
-  } else {
- /* Before register allocation, we don't care about the latencies of
-  * instructions.  All we care about is reducing live intervals of
-  * variables so that we can avoid register spilling, or get 16-wide
-  * shaders which naturally do a better job of hiding instruction
-  * latency.
-  *
-  * To do so, schedule our instructions in a roughly LIFO/depth-first
-  * order: when new instructions become available as a result of
-  * scheduling something, choose those first so that our result
-  * hopefully is consumed quickly.
-  *
-  * The exception is messages that generate more than one result
-  * register (AKA texturing).  In those cases, the LIFO search would
-  * normally tend to choose them quickly (because scheduling the
-  * previous message not only unblocked the children using its result,
-  * but also the MRF setup for the next sampler message, which in turn
-  * unblocks the next 

[Mesa-dev] [PATCH 7/7] i965/vs: Add instruction scheduling.

2013-04-30 Thread Eric Anholt
While this is ignorant of dependency control, it's still good for a 0.39%
+/- 0.08% performance improvement on GLBenchmark 2.7 (n=548)

v2: Rewrite as a subclass of the base class for the FS instruction
scheduler, inheriting the same latency information.
---
 .../drivers/dri/i965/brw_schedule_instructions.cpp |  219 
 src/mesa/drivers/dri/i965/brw_vec4.cpp |9 +
 src/mesa/drivers/dri/i965/brw_vec4.h   |1 +
 3 files changed, 229 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 94fdf3e..6a52754 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -26,10 +26,13 @@
  */
 
 #include "brw_fs.h"
+#include "brw_vec4.h"
 #include "glsl/glsl_types.h"
 #include "glsl/ir_optimization.h"
 #include "glsl/ir_print_visitor.h"
 
+using namespace brw;
+
 /** @file brw_fs_schedule_instructions.cpp
  *
  * List scheduling of FS instructions.
@@ -297,6 +300,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
 
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
   /* testing using varying-index pull constants:
*
* 16 cycles:
@@ -405,6 +409,23 @@ 
fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
 {
 }
 
+class vec4_instruction_scheduler : public instruction_scheduler
+{
+public:
+   vec4_instruction_scheduler(vec4_visitor *v, int grf_count);
+   void calculate_deps();
+   schedule_node *choose_instruction_to_schedule();
+   int issue_time(backend_instruction *inst);
+   vec4_visitor *v;
+};
+
+vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
+   int grf_count)
+   : instruction_scheduler(v, grf_count, true),
+ v(v)
+{
+}
+
 void
 instruction_scheduler::add_inst(backend_instruction *inst)
 {
@@ -739,6 +760,163 @@ fs_instruction_scheduler::calculate_deps()
}
 }
 
+void
+vec4_instruction_scheduler::calculate_deps()
+{
+   schedule_node *last_grf_write[grf_count];
+   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_conditional_mod = NULL;
+   /* Fixed HW registers are assumed to be separate from the virtual
+* GRFs, so they can be tracked separately.  We don't really write
+* to fixed GRFs much, so don't bother tracking them on a more
+* granular level.
+*/
+   schedule_node *last_fixed_grf_write = NULL;
+
+   /* The last instruction always needs to still be the last instruction.
+* Either it's flow control (IF, ELSE, ENDIF, DO, WHILE) and scheduling
+* other things after it would disturb the basic block, or it's the EOT
+* URB_WRITE and we should do a better job at dead code eliminating
+* anything that could have been scheduled after it.
+*/
+   schedule_node *last = (schedule_node *)instructions.get_tail();
+   add_barrier_deps(last);
+
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+
+   /* top-to-bottom dependencies: RAW and WAW. */
+   foreach_list(node, &instructions) {
+  schedule_node *n = (schedule_node *)node;
+  vec4_instruction *inst = (vec4_instruction *)n->inst;
+
+  /* read-after-write deps. */
+  for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+add_dep(last_grf_write[inst->src[i].reg], n);
+ } else if (inst->src[i].file == HW_REG &&
+(inst->src[i].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE)) {
+add_dep(last_fixed_grf_write, n);
+ } else if (inst->src[i].file != BAD_FILE &&
+inst->src[i].file != IMM &&
+inst->src[i].file != UNIFORM) {
+/* No reads from MRF, and ATTR is already translated away */
+assert(inst->src[i].file != MRF &&
+   inst->src[i].file != ATTR);
+add_barrier_deps(n);
+ }
+  }
+
+  for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+  * instruction once it's sent, not when the result comes
+  * back.
+  */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+  }
+
+  if (inst->predicate) {
+ assert(last_conditional_mod);
+ add_dep(last_conditional_mod, n);
+  }
+
+  /* write-after-write deps. */
+  if (inst->dst.file == GRF) {
+ add_dep(last_grf_write[inst->dst.reg], n);
+ last_grf_write[inst->dst.reg] = n;
+  } else if (inst->dst.file == MRF) {
+ add_dep(last_mrf_write[inst->dst.reg], n);
+ last_mrf_write[inst->dst.reg] = n;
+ } else if (inst->dst.file == HW_REG &&
+ inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ last_fixed_grf_

[Mesa-dev] [PATCH 6/7] i965: Move most of the FS instruction scheduler code to a general class.

2013-04-30 Thread Eric Anholt
About half of this is shareable with the VS code.
---
 .../drivers/dri/i965/brw_schedule_instructions.cpp |  124 
 1 file changed, 75 insertions(+), 49 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index af8af1d..94fdf3e 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -57,7 +57,7 @@ static bool debug = false;
 class schedule_node : public exec_node
 {
 public:
-   schedule_node(fs_inst *inst, const struct intel_context *intel)
+   schedule_node(backend_instruction *inst, const struct intel_context *intel)
{
   this->inst = inst;
   this->child_array_size = 0;
@@ -79,7 +79,7 @@ public:
void set_latency_gen4();
void set_latency_gen7(bool is_haswell);
 
-   fs_inst *inst;
+   backend_instruction *inst;
schedule_node **children;
int *child_latency;
int child_count;
@@ -341,15 +341,15 @@ schedule_node::set_latency_gen7(bool is_haswell)
 
 class instruction_scheduler {
 public:
-   instruction_scheduler(fs_visitor *v, void *mem_ctx, int grf_count,
- bool post_reg_alloc)
+   instruction_scheduler(backend_visitor *v, int grf_count, bool 
post_reg_alloc)
{
-  this->v = v;
-  this->mem_ctx = ralloc_context(mem_ctx);
+  this->bv = v;
+  this->mem_ctx = ralloc_context(v->mem_ctx);
   this->grf_count = grf_count;
   this->instructions.make_empty();
   this->instructions_to_schedule = 0;
   this->post_reg_alloc = post_reg_alloc;
+  this->time = 0;
}
 
~instruction_scheduler()
@@ -360,11 +360,10 @@ public:
void add_dep(schedule_node *before, schedule_node *after, int latency);
void add_dep(schedule_node *before, schedule_node *after);
 
-   void add_inst(fs_inst *inst);
-   void calculate_deps();
-   void schedule_instructions(fs_inst *next_block_header);
-
-   schedule_node *choose_instruction_to_schedule();
+   void run(exec_list *instructions);
+   void add_inst(backend_instruction *inst);
+   virtual void calculate_deps() = 0;
+   virtual schedule_node *choose_instruction_to_schedule() = 0;
 
/**
 * Returns how many cycles it takes the instruction to issue.
@@ -373,23 +372,43 @@ public:
 * with 1 cycle per vector dispatched.  Thus 8-wide pixel shaders take 2
 * cycles to dispatch and 16-wide (compressed) instructions take 4.
 */
-   int issue_time(fs_inst *inst);
+   virtual int issue_time(backend_instruction *inst) = 0;
 
-   bool is_compressed(fs_inst *inst);
+   void schedule_instructions(backend_instruction *next_block_header);
 
void *mem_ctx;
 
bool post_reg_alloc;
int instructions_to_schedule;
int grf_count;
+   int time;
exec_list instructions;
+   backend_visitor *bv;
+};
+
+class fs_instruction_scheduler : public instruction_scheduler
+{
+public:
+   fs_instruction_scheduler(fs_visitor *v, int grf_count, bool post_reg_alloc);
+   void calculate_deps();
+   bool is_compressed(fs_inst *inst);
+   schedule_node *choose_instruction_to_schedule();
+   int issue_time(backend_instruction *inst);
fs_visitor *v;
 };
 
+fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
+   int grf_count,
+   bool post_reg_alloc)
+   : instruction_scheduler(v, grf_count, post_reg_alloc),
+ v(v)
+{
+}
+
 void
-instruction_scheduler::add_inst(fs_inst *inst)
+instruction_scheduler::add_inst(backend_instruction *inst)
 {
-   schedule_node *n = new(mem_ctx) schedule_node(inst, v->intel);
+   schedule_node *n = new(mem_ctx) schedule_node(inst, bv->intel);
 
assert(!inst->is_head_sentinel());
assert(!inst->is_tail_sentinel());
@@ -480,7 +499,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
  * actually writes 2 MRFs.
  */
 bool
-instruction_scheduler::is_compressed(fs_inst *inst)
+fs_instruction_scheduler::is_compressed(fs_inst *inst)
 {
return (v->dispatch_width == 16 &&
   !inst->force_uncompressed &&
@@ -488,7 +507,7 @@ instruction_scheduler::is_compressed(fs_inst *inst)
 }
 
 void
-instruction_scheduler::calculate_deps()
+fs_instruction_scheduler::calculate_deps()
 {
/* Pre-register-allocation, this tracks the last write per VGRF (so
 * different reg_offsets within it can interfere when they shouldn't).
@@ -521,7 +540,7 @@ instruction_scheduler::calculate_deps()
/* top-to-bottom dependencies: RAW and WAW. */
foreach_list(node, &instructions) {
   schedule_node *n = (schedule_node *)node;
-  fs_inst *inst = n->inst;
+  fs_inst *inst = (fs_inst *)n->inst;
 
   if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT)
  add_barrier_deps(n);
@@ -629,7 +648,7 @@ instruction_scheduler::calculate_deps()
!node->is_head_sentinel();
node = prev, prev = node->prev) {
   schedule_node *n = (schedule_no

Re: [Mesa-dev] [PATCH 2/2] mesa/program: Don't copy propagate from swizzles.

2013-04-30 Thread Ian Romanick

On 04/24/2013 04:30 PM, Fabian Bieler wrote:

Do not propagate a copy if source and destination are identical.

Otherwise code like

MOV TEMP[0].xyzw, TEMP[0].wzyx
MOV TEMP[1].xyzw, TEMP[0].xyzw

is changed to

MOV TEMP[0].xyzw, TEMP[0].wzyx
MOV TEMP[1].xyzw, TEMP[0].wzyx

This fixes Piglit test shaders/glsl-copy-propagation-self-2 for drivers that
use Mesa IR.


D'oh.  Good catch.  Please add

NOTE: This is a candidate for stable branches.

to the commit message.

Reviewed-by: Ian Romanick 


---
  src/mesa/program/ir_to_mesa.cpp | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 14cf5ba..dff1762 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2757,6 +2757,8 @@ ir_to_mesa_visitor::copy_propagate(void)
/* If this is a copy, add it to the ACP. */
if (inst->op == OPCODE_MOV &&
  inst->dst.file == PROGRAM_TEMPORARY &&
+ !(inst->dst.file == inst->src[0].file &&
+   inst->dst.index == inst->src[0].index) &&
  !inst->dst.reladdr &&
  !inst->saturate &&
  !inst->src[0].reladdr &&



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3 v2] mesa: Add a script to generate the list of fixed bugs

2013-04-30 Thread Ian Romanick

On 04/18/2013 12:38 AM, Andreas Boll wrote:

This list appears in the fixed bugs section of the release notes.

v2: Add usage examples

NOTE: This is a candidate for the stable branches.


The series is

Reviewed-by: Ian Romanick 


---
  bin/bugzilla_mesa.sh |   52 ++
  1 file changed, 52 insertions(+)
  create mode 100755 bin/bugzilla_mesa.sh

diff --git a/bin/bugzilla_mesa.sh b/bin/bugzilla_mesa.sh
new file mode 100755
index 000..491ca0e
--- /dev/null
+++ b/bin/bugzilla_mesa.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# This script is used to generate the list of fixed bugs that
+# appears in the release notes files, with HTML formatting.
+#
+# Note: This script could take a while until all details have
+#   been fetched from bugzilla.
+#
+# Usage examples:
+#
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 > bugfixes
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | tee bugfixes
+# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3
+# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | wc -l
+
+
+# regex pattern: trim before url
+trim_before='s/.*\(http\)/\1/'
+
+# regex pattern: trim after url
+trim_after='s/\(show_bug.cgi?id=[0-9]*\).*/\1/'
+
+# regex pattern: always use https
+use_https='s/http:/https:/'
+
+# extract fdo urls from commit log
+urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before 
-e $trim_after -e $use_https | sort | uniq)
+
+# if DRYRUN is set to "yes", simply print the URLs and don't fetch the
+# details from fdo bugzilla.
+#DRYRUN=yes
+
+if [ "x$DRYRUN" = xyes ]; then
+   for i in $urls
+   do
+   echo $i
+   done
+else
+   echo ""
+   echo ""
+
+   for i in $urls
+   do
+   id=$(echo $i | cut -d'=' -f2)
+   summary=$(wget --quiet -O - $i | grep -e '.*' | sed -e 's/ 
*Bug [0-9]\+ – \(.*\)<\/title>/\1/')
+   echo "Bug $id - $summary"
+   echo ""
+   done
+
+   echo ""
+fi



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [GSoC 13]Interested in the idea: Improved application of GLSL complier optimizations

2013-04-30 Thread Ian Romanick

On 04/19/2013 06:04 AM, pkucoin wrote:

Hi,
My name is Sida Li and I am a senior student from Peking University in
China. I am interested in the idea that
improved application of GLSL complier optimizations.
I have downloaded the source code and read some parts of the it. First
let me talk about my understanding about the problem.
The loop we try to improve should be in the main.cpp:
  do {
  progress = do_common_optimization(shader->ir, false, false, 32);
  } while (progress);
In the function do_common_optimization, all optimization passes are
called in a certain order. As the loop goes in a few times, some
optimization passes do not contribute to optimizing the code since the
original unoptimized code has been optimized and no more code that can
be optimized by these optimization passes is generated by
other optimization passes. So time spending on these optimization passes
is wasted.
The goal is to find a static ordering, with possible repeats, of
optimization passes that does not compromise the quality of the
generated code. I find it difficult to start because a single
optimization pass or a combo of optimization passes may generate some
code that can be optimized by other optimization pass, which can not be
decided before.


Since I suggested the project, I'll offer some opinions.

There are a few ways to attack this problem, I think.  Certain 
optimizations can generate opportunities for other optimizations to do 
work.  For example, copy propagation can "create" dead code.  Using this 
sort of analysis, it should be possible to come up with an initial 
ordering of passes that still uses the "while progress" loop.


Once that is done, it should be possible to instrument the code to see 
which optimization passes make progress on each pass.  This will let us 
know, for example, if do_algebraic never does anything after the first 
pass.  Then it's a matter of running the compiler on a large set of 
shaders and seeing what happens.



Can you give me some advice?
Best regards,
Sida Li


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] st/vdpau: fix background handling in the mixer

2013-04-30 Thread Christian König
From: Christian König 

Signed-off-by: Christian König 
---
 src/gallium/state_trackers/vdpau/mixer.c |   19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index 1d2ae49..26db5c8 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -221,7 +221,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
 
vlVdpVideoMixer *vmixer;
vlVdpSurface *surf;
-   vlVdpOutputSurface *dst;
+   vlVdpOutputSurface *dst, *bg = NULL;
 
struct vl_compositor *compositor;
 
@@ -250,20 +250,21 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
if (!dst)
   return VDP_STATUS_INVALID_HANDLE;
 
-   pipe_mutex_lock(vmixer->device->mutex);
-   vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL);
if (background_surface != VDP_INVALID_HANDLE) {
-  vlVdpOutputSurface *bg = vlGetDataHTAB(background_surface);
-  if (!bg) {
- pipe_mutex_unlock(vmixer->device->mutex);
+  bg = vlGetDataHTAB(background_surface);
+  if (!bg)
  return VDP_STATUS_INVALID_HANDLE;
-  }
-  vl_compositor_set_rgba_layer(&vmixer->cstate, compositor, layer++, 
bg->sampler_view,
-   RectToPipe(background_source_rect, &rect), 
NULL, NULL);
}
 
+   pipe_mutex_lock(vmixer->device->mutex);
+   vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL);
+
vl_compositor_clear_layers(&vmixer->cstate);
 
+   if (bg)
+  vl_compositor_set_rgba_layer(&vmixer->cstate, compositor, layer++, 
bg->sampler_view,
+   RectToPipe(background_source_rect, &rect), 
NULL, NULL);
+
switch (current_picture_structure) {
case VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD:
   deinterlace = VL_COMPOSITOR_BOB_TOP;
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] vl/buffer: use 2D_ARRAY instead of 3D textures

2013-04-30 Thread Christian König
From: Christian König 

Signed-off-by: Christian König 
---
 src/gallium/auxiliary/vl/vl_compositor.c|   12 ++--
 src/gallium/auxiliary/vl/vl_video_buffer.c  |   23 ---
 src/gallium/auxiliary/vl/vl_video_buffer.h  |7 ---
 src/gallium/drivers/r600/r600_uvd.c |   14 +++---
 src/gallium/drivers/radeonsi/radeonsi_uvd.c |   14 +++---
 src/gallium/state_trackers/vdpau/surface.c  |8 
 6 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 1c393a9..62f593a 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -152,7 +152,7 @@ create_frag_shader_video_buffer(struct vl_compositor *c)
 * fragment = csc * texel
 */
for (i = 0; i < 3; ++i)
-  ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), 
TGSI_TEXTURE_3D, tc, sampler[i]);
+  ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), 
TGSI_TEXTURE_2D_ARRAY, tc, sampler[i]);
 
ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), 
ureg_imm1f(shader, 1.0f));
 
@@ -207,7 +207,7 @@ create_frag_shader_weave(struct vl_compositor *c)
i_tc[i], ureg_imm1f(shader, 0.5f));
   ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), 
ureg_src(t_tc[i]));
   ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W),
-   ureg_imm1f(shader, i ? -0.25f : 0.25f));
+   ureg_imm1f(shader, i ? 1.0f : 0.0f));
   ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ),
ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f));
   ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y),
@@ -227,7 +227,7 @@ create_frag_shader_weave(struct vl_compositor *c)
 TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, 
TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
 
  ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j),
-  TGSI_TEXTURE_3D, src, sampler[j]);
+  TGSI_TEXTURE_2D_ARRAY, src, sampler[j]);
   }
 
/* calculate linear interpolation factor
@@ -558,7 +558,7 @@ static INLINE struct u_rect
 default_rect(struct vl_compositor_layer *layer)
 {
struct pipe_resource *res = layer->sampler_views[0]->texture;
-   struct u_rect rect = { 0, res->width0, 0, res->height0 * res->depth0 };
+   struct u_rect rect = { 0, res->width0, 0, res->height0 * res->array_size };
return rect;
 }
 
@@ -902,14 +902,14 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state 
*s,
  break;
 
   case VL_COMPOSITOR_BOB_TOP:
- s->layers[layer].zw.x = 0.25f;
+ s->layers[layer].zw.x = 0.0f;
  s->layers[layer].src.tl.y += half_a_line;
  s->layers[layer].src.br.y += half_a_line;
  s->layers[layer].fs = c->fs_video_buffer;
  break;
 
   case VL_COMPOSITOR_BOB_BOTTOM:
- s->layers[layer].zw.x = 0.75f;
+ s->layers[layer].zw.x = 1.0f;
  s->layers[layer].src.tl.y -= half_a_line;
  s->layers[layer].src.br.y -= half_a_line;
  s->layers[layer].fs = c->fs_video_buffer;
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c 
b/src/gallium/auxiliary/vl/vl_video_buffer.c
index d61dab2..220c3ea 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -216,15 +216,16 @@ void
 vl_vide_buffer_template(struct pipe_resource *templ,
 const struct pipe_video_buffer *tmpl,
 enum pipe_format resource_format,
-unsigned depth, unsigned usage, unsigned plane)
+unsigned array_size, unsigned usage,
+unsigned plane)
 {
memset(templ, 0, sizeof(*templ));
-   templ->target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D;
+   templ->target = array_size > 1 ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
templ->format = resource_format;
templ->width0 = tmpl->width;
templ->height0 = tmpl->height;
-   templ->depth0 = depth;
-   templ->array_size = 1;
+   templ->depth0 = 1;
+   templ->array_size = array_size;
templ->bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
templ->usage = usage;
 
@@ -349,15 +350,15 @@ vl_video_buffer_surfaces(struct pipe_video_buffer *buffer)
struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
struct pipe_surface surf_templ;
struct pipe_context *pipe;
-   unsigned i, j, depth, surf;
+   unsigned i, j, array_size, surf;
 
assert(buf);
 
pipe = buf->base.context;
 
-   depth = buffer->interlaced ? 2 : 1;
+   array_size = buffer->interlaced ? 2 : 1;
for (i = 0, surf = 0; i < VL_NUM_COMPONENTS; ++i) {
-  for (j = 0; j < depth; ++j, ++surf) {
+  for (j = 0; j < array_size; ++j, ++surf) {
  assert(surf < (VL_NUM_COMPONENTS * 2));
 
  if (!buf->resources[i]) {
@@ -433,7 +434,7 @@ st

[Mesa-dev] [PATCH 1/3] vl/compositor: cleanup background clearing

2013-04-30 Thread Christian König
From: Christian König 

Add an extra parameter to specify if we should clear the render target.

Signed-off-by: Christian König 
---
 src/gallium/auxiliary/vl/vl_compositor.c|7 ---
 src/gallium/auxiliary/vl/vl_compositor.h|3 ++-
 src/gallium/state_trackers/vdpau/device.c   |2 +-
 src/gallium/state_trackers/vdpau/mixer.c|2 +-
 src/gallium/state_trackers/vdpau/output.c   |   12 
 src/gallium/state_trackers/vdpau/presentation.c |2 +-
 src/gallium/state_trackers/xvmc/surface.c   |2 +-
 7 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 0df2b57..1c393a9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -986,7 +986,8 @@ void
 vl_compositor_render(struct vl_compositor_state *s,
  struct vl_compositor   *c,
  struct pipe_surface*dst_surface,
- struct u_rect  *dirty_area)
+ struct u_rect  *dirty_area,
+ boolclear_dirty)
 {
assert(c);
assert(dst_surface);
@@ -1004,8 +1005,8 @@ vl_compositor_render(struct vl_compositor_state *s,
 
gen_vertex_data(c, s, dirty_area);
 
-   if (dirty_area && (dirty_area->x0 < dirty_area->x1 ||
-  dirty_area->y0 < dirty_area->y1)) {
+   if (clear_dirty && dirty_area &&
+   (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
 
   c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
0, 0, dst_surface->width, 
dst_surface->height);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h 
b/src/gallium/auxiliary/vl/vl_compositor.h
index 6de6ca0..2a1f66c 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -224,7 +224,8 @@ void
 vl_compositor_render(struct vl_compositor_state *state,
  struct vl_compositor   *compositor,
  struct pipe_surface*dst_surface,
- struct u_rect  *dirty_area);
+ struct u_rect  *dirty_area,
+ boolclear_dirty);
 
 /**
  * destroy this compositor
diff --git a/src/gallium/state_trackers/vdpau/device.c 
b/src/gallium/state_trackers/vdpau/device.c
index dd586f5..c530f43 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -279,7 +279,7 @@ vlVdpResolveDelayedRendering(vlVdpDevice *dev, struct 
pipe_surface *surface, str
   dirty_area = &vlsurface->dirty_area;
}
 
-   vl_compositor_render(cstate, &dev->compositor, surface, dirty_area);
+   vl_compositor_render(cstate, &dev->compositor, surface, dirty_area, true);
 
dev->delayed_rendering.surface = VDP_INVALID_HANDLE;
dev->delayed_rendering.cstate = NULL;
diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index 81a5c29..1d2ae49 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -312,7 +312,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter)
   vlVdpSave4DelayedRendering(vmixer->device, destination_surface, 
&vmixer->cstate);
else {
-  vl_compositor_render(&vmixer->cstate, compositor, dst->surface, 
&dst->dirty_area);
+  vl_compositor_render(&vmixer->cstate, compositor, dst->surface, 
&dst->dirty_area, true);
 
   /* applying the noise reduction after scaling is actually not very
  clever, but currently we should avoid to copy around the image
diff --git a/src/gallium/state_trackers/vdpau/output.c 
b/src/gallium/state_trackers/vdpau/output.c
index df0f458..7266cdb 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -382,8 +382,7 @@ vlVdpOutputSurfacePutBitsIndexed(VdpOutputSurface surface,
vl_compositor_clear_layers(cstate);
vl_compositor_set_palette_layer(cstate, compositor, 0, sv_idx, sv_tbl, 
NULL, NULL, false);
vl_compositor_set_layer_dst_area(cstate, 0, RectToPipe(destination_rect, 
&dst_rect));
-   vl_compositor_render(cstate, compositor, vlsurface->surface, NULL);
-   vl_compositor_reset_dirty_area(&vlsurface->dirty_area);
+   vl_compositor_render(cstate, compositor, vlsurface->surface, 
&vlsurface->dirty_area, false);
 
pipe_sampler_view_reference(&sv_idx, NULL);
pipe_sampler_view_reference(&sv_tbl, NULL);
@@ -489,8 +488,7 @@ vlVdpOutputSurfacePutBitsYCbCr(VdpOutputSurface surface,
vl_compositor_clear_layers(cstate);
vl_compositor_set_buffer_layer(cstate, compositor, 0, vbuffer, NULL, NULL, 
VL_COMPOSITOR_WEAVE);
vl_compositor_set_layer_dst_area(cstate, 0, RectToPip

Re: [Mesa-dev] GSOC 2013

2013-04-30 Thread Ian Romanick

On 04/20/2013 10:26 PM, Piyush Tiwari wrote:

Hello,
I am really interested in doing the GSOC 2013 project "Find common
patterns in real GLSL shaders".


Implementation:
Algorithm:- Max-miner algorithm as it uses the same data structure as
Apriori i.e. hash tree.


I've only skimmed the Bayardo paper on Max-Miner, and I think it may be 
overkill.  It is optimized for finding very long patterns in a database. 
 In this context "very long" is likely longer than any GLSL shader our 
compiler has ever encountered.  That's not to say it's a bad idea, it 
just might be more work to implement than is necessary for this problem. 
 Doing a quick search, I don't see any papers about applying this 
algorithm to this problem, so, from a pure research perspective, it may 
be interesting none the less.


I think the difficulty of this project will be finding a representation 
of programs that will allow them to be mined.  We need to be able to 
detect that "a + b * c" in one shader is the same pattern as "d + e * f" 
in another shader.  For longer programs with lots of variables, this 
becomes challenging.



The following implementation has been found faster than normal ways:
Max-Miner uses the hash tree to quickly look up all candidate groups
whose head appears in the transaction. Then, for each candidate
group "g" identified, it traverses down its tail items one by one.
(Efficiently mining long patterns from database).

I would like some reviews on my idea.

Thanks
Piyush
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.

2013-04-30 Thread Chad Versace

On 04/30/2013 01:52 AM, Kenneth Graunke wrote:

Consider the following shader:

 vec4 f(vec4 v) { return v; }
 vec4 f(vec4 v);

The prototype exactly matches the signature of the earlier definition,
so there's absolutely no point in it.  However, it doesn't appear to
be illegal.  The GLSL 4.30 specification offers two relevant quotes:

"If a function name is declared twice with the same parameter types,
  then the return types and all qualifiers must also match, and it is the
  same function being declared."

"User-defined functions can have multiple declarations, but only one
  definition."

In this case the same function was declared twice, and there's only one
definition, which fits both pieces of text.  There doesn't appear to be
any text saying late prototypes are illegal, so presumably it's valid.

Unfortunately, it currently triggers an assertion failure:
ir_dereference_variable @  specifies undeclared variable `v' @ 

When we process the second line, we look for an existing exact match so
we can enforce the one-definition rule.  We then leave sig set to that
existing function, and hit sig->replace_parameters(&hir_parameters),
unfortunately nuking our existing definition's parameters (which have
actual dereferences) with the prototype's bogus unused parameters.

Simply bailing out and ignoring such late prototypes is the safest
thing to do.

Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android.

NOTE: This is a candidate for stable branches.
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Kenneth Graunke 


Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.

2013-04-30 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On Tue, Apr 30, 2013 at 1:52 AM, Kenneth Graunke  wrote:
> Consider the following shader:
>
> vec4 f(vec4 v) { return v; }
> vec4 f(vec4 v);
>
> The prototype exactly matches the signature of the earlier definition,
> so there's absolutely no point in it.  However, it doesn't appear to
> be illegal.  The GLSL 4.30 specification offers two relevant quotes:
>
> "If a function name is declared twice with the same parameter types,
>  then the return types and all qualifiers must also match, and it is the
>  same function being declared."
>
> "User-defined functions can have multiple declarations, but only one
>  definition."
>
> In this case the same function was declared twice, and there's only one
> definition, which fits both pieces of text.  There doesn't appear to be
> any text saying late prototypes are illegal, so presumably it's valid.
>
> Unfortunately, it currently triggers an assertion failure:
> ir_dereference_variable @  specifies undeclared variable `v' @ 
>
> When we process the second line, we look for an existing exact match so
> we can enforce the one-definition rule.  We then leave sig set to that
> existing function, and hit sig->replace_parameters(&hir_parameters),
> unfortunately nuking our existing definition's parameters (which have
> actual dereferences) with the prototype's bogus unused parameters.
>
> Simply bailing out and ignoring such late prototypes is the safest
> thing to do.
>
> Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android.
>
> NOTE: This is a candidate for stable branches.
> Cc: Tapani Pälli 
> Cc: Ian Romanick 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/glsl/ast_to_hir.cpp | 15 +++
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index 2638411..e595110 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions,
>  "match prototype", name);
>  }
>
> -if (is_definition && sig->is_defined) {
> -   YYLTYPE loc = this->get_location();
> -
> -   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
> + if (sig->is_defined) {
> +if (is_definition) {
> +   YYLTYPE loc = this->get_location();
> +   _mesa_glsl_error(& loc, state, "function `%s' redefined", 
> name);
> +} else {
> +   /* We just encountered a prototype that exactly matches a
> +* function that's already been defined.  This is redundant,
> +* and we should ignore it.
> +*/
> +   return NULL;
> +}
>  }
>}
> } else {
> --
> 1.8.2.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] glsl: add AMD_vertex_shader_layer support

2013-04-30 Thread Ian Romanick

On 04/27/2013 04:32 PM, Jordan Justen wrote:

This GLSL extension requires that AMD_vertex_shader_layer be
enabled by the driver.


Most (all?) extensions also add a preprocessor define.  Can you poke at 
AMD's driver to see if GL_AMD_vertex_shader_layer is defined?  If so, 
then we need to add it too.



Signed-off-by: Jordan Justen 
---
  src/glsl/builtin_variables.cpp  |   31 +++
  src/glsl/glsl_parser_extras.cpp |1 +
  src/glsl/glsl_parser_extras.h   |2 ++
  3 files changed, 34 insertions(+)

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b0c7a20..098c3f1 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -39,6 +39,12 @@ generate_ARB_draw_instanced_variables(exec_list *,
struct _mesa_glsl_parse_state *,
bool, _mesa_glsl_parser_targets);

+static void
+generate_AMD_vertex_shader_layer_variables(exec_list *instructions,
+   struct _mesa_glsl_parse_state 
*state,
+   bool warn,
+   _mesa_glsl_parser_targets target);
+
  struct builtin_variable {
 enum ir_variable_mode mode;
 int slot;
@@ -818,6 +824,8 @@ generate_130_vs_variables(exec_list *instructions,
"gl_ClipDistance", clip_distance_array_type, ir_var_shader_out,
  VARYING_SLOT_CLIP_DIST0);

+   generate_AMD_vertex_shader_layer_variables(instructions, state, false,
+  vertex_shader);
  }


@@ -1020,6 +1028,29 @@ generate_ARB_draw_instanced_variables(exec_list 
*instructions,
 }
  }

+static void
+generate_AMD_vertex_shader_layer_variables(exec_list *instructions,
+   struct _mesa_glsl_parse_state 
*state,
+   bool warn,
+   _mesa_glsl_parser_targets target)
+{
+   /* gl_Layer is only available in the vertex shader for the
+* AMD_vertex_shader_layer extension. It will also be available in the
+* geometry shader when GLSL 1.50 is supported.
+*/
+   if (target != vertex_shader)
+  return;
+
+   if (state->AMD_vertex_shader_layer_enable) {
+  ir_variable *inst =
+ add_variable(instructions, state->symbols,
+  "gl_Layer", glsl_type::int_type,
+  ir_var_shader_out, VARYING_SLOT_LAYER);
+
+  if (warn)
+ inst->warn_extension = "GL_AMD_vertex_shader_layer";
+   }
+}

  static void
  generate_ARB_shader_stencil_export_variables(exec_list *instructions,
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 0992294..e419264 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -468,6 +468,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
 EXT(ARB_shading_language_packing,   true,  false, true,  true,  false, 
ARB_shading_language_packing),
 EXT(ARB_texture_multisample,true,  false, true,  true,  false, 
ARB_texture_multisample),
 EXT(ARB_texture_query_lod,  false, false, true,  true,  false, 
ARB_texture_query_lod),
+   EXT(AMD_vertex_shader_layer,true,  false, false, true,  false, 
AMD_vertex_shader_layer),
  };

  #undef EXT
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 95891b5..3386365 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -284,6 +284,8 @@ struct _mesa_glsl_parse_state {
 bool ARB_texture_multisample_warn;
 bool ARB_texture_query_lod_enable;
 bool ARB_texture_query_lod_warn;
+   bool AMD_vertex_shader_layer_enable;
+   bool AMD_vertex_shader_layer_warn;
 /*@}*/

 /** Extensions supported by the OpenGL implementation. */



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] glsl: add AMD_vertex_shader_layer support

2013-04-30 Thread Jordan Justen
On Tue, Apr 30, 2013 at 9:57 AM, Ian Romanick  wrote:
> On 04/27/2013 04:32 PM, Jordan Justen wrote:
>>
>> This GLSL extension requires that AMD_vertex_shader_layer be
>> enabled by the driver.
>
> Most (all?) extensions also add a preprocessor define.  Can you poke at
> AMD's driver to see if GL_AMD_vertex_shader_layer is defined?  If so, then
> we need to add it too.

Unfortunately, I don't have an AMD card.

Anuj tried my piglit test with his AMD card, and found the extension
wasn't available.

-Jordan

>> Signed-off-by: Jordan Justen 
>> ---
>>   src/glsl/builtin_variables.cpp  |   31 +++
>>   src/glsl/glsl_parser_extras.cpp |1 +
>>   src/glsl/glsl_parser_extras.h   |2 ++
>>   3 files changed, 34 insertions(+)
>>
>> diff --git a/src/glsl/builtin_variables.cpp
>> b/src/glsl/builtin_variables.cpp
>> index b0c7a20..098c3f1 100644
>> --- a/src/glsl/builtin_variables.cpp
>> +++ b/src/glsl/builtin_variables.cpp
>> @@ -39,6 +39,12 @@ generate_ARB_draw_instanced_variables(exec_list *,
>> struct _mesa_glsl_parse_state *,
>> bool, _mesa_glsl_parser_targets);
>>
>> +static void
>> +generate_AMD_vertex_shader_layer_variables(exec_list *instructions,
>> +   struct _mesa_glsl_parse_state
>> *state,
>> +   bool warn,
>> +   _mesa_glsl_parser_targets
>> target);
>> +
>>   struct builtin_variable {
>>  enum ir_variable_mode mode;
>>  int slot;
>> @@ -818,6 +824,8 @@ generate_130_vs_variables(exec_list *instructions,
>> "gl_ClipDistance", clip_distance_array_type,
>> ir_var_shader_out,
>>   VARYING_SLOT_CLIP_DIST0);
>>
>> +   generate_AMD_vertex_shader_layer_variables(instructions, state, false,
>> +  vertex_shader);
>>   }
>>
>>
>> @@ -1020,6 +1028,29 @@ generate_ARB_draw_instanced_variables(exec_list
>> *instructions,
>>  }
>>   }
>>
>> +static void
>> +generate_AMD_vertex_shader_layer_variables(exec_list *instructions,
>> +   struct _mesa_glsl_parse_state
>> *state,
>> +   bool warn,
>> +   _mesa_glsl_parser_targets
>> target)
>> +{
>> +   /* gl_Layer is only available in the vertex shader for the
>> +* AMD_vertex_shader_layer extension. It will also be available in the
>> +* geometry shader when GLSL 1.50 is supported.
>> +*/
>> +   if (target != vertex_shader)
>> +  return;
>> +
>> +   if (state->AMD_vertex_shader_layer_enable) {
>> +  ir_variable *inst =
>> + add_variable(instructions, state->symbols,
>> +  "gl_Layer", glsl_type::int_type,
>> +  ir_var_shader_out, VARYING_SLOT_LAYER);
>> +
>> +  if (warn)
>> + inst->warn_extension = "GL_AMD_vertex_shader_layer";
>> +   }
>> +}
>>
>>   static void
>>   generate_ARB_shader_stencil_export_variables(exec_list *instructions,
>> diff --git a/src/glsl/glsl_parser_extras.cpp
>> b/src/glsl/glsl_parser_extras.cpp
>> index 0992294..e419264 100644
>> --- a/src/glsl/glsl_parser_extras.cpp
>> +++ b/src/glsl/glsl_parser_extras.cpp
>> @@ -468,6 +468,7 @@ static const _mesa_glsl_extension
>> _mesa_glsl_supported_extensions[] = {
>>  EXT(ARB_shading_language_packing,   true,  false, true,  true,
>> false, ARB_shading_language_packing),
>>  EXT(ARB_texture_multisample,true,  false, true,  true,
>> false, ARB_texture_multisample),
>>  EXT(ARB_texture_query_lod,  false, false, true,  true,
>> false, ARB_texture_query_lod),
>> +   EXT(AMD_vertex_shader_layer,true,  false, false, true,  false,
>> AMD_vertex_shader_layer),
>>   };
>>
>>   #undef EXT
>> diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
>> index 95891b5..3386365 100644
>> --- a/src/glsl/glsl_parser_extras.h
>> +++ b/src/glsl/glsl_parser_extras.h
>> @@ -284,6 +284,8 @@ struct _mesa_glsl_parse_state {
>>  bool ARB_texture_multisample_warn;
>>  bool ARB_texture_query_lod_enable;
>>  bool ARB_texture_query_lod_warn;
>> +   bool AMD_vertex_shader_layer_enable;
>> +   bool AMD_vertex_shader_layer_warn;
>>  /*@}*/
>>
>>  /** Extensions supported by the OpenGL implementation. */
>>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] mesa: implement glFramebufferTexture

2013-04-30 Thread Ian Romanick

On 04/27/2013 04:37 PM, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/mapi/glapi/gen/GL3x.xml |2 +-
  src/mesa/main/fbobject.c|   17 +
  src/mesa/main/fbobject.h|4 
  3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml
index 9ca3d47..5078f7b 100644
--- a/src/mapi/glapi/gen/GL3x.xml
+++ b/src/mapi/glapi/gen/GL3x.xml
@@ -607,7 +607,7 @@
  


-  
+  
  
  
  
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 419e871..32dcc75 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2402,6 +2402,23 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum 
attachment,


  void GLAPIENTRY
+_mesa_FramebufferTexture(GLenum target, GLenum attachment,
+ GLuint texture, GLint level)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if ((_mesa_is_desktop_gl(ctx) && ctx->Version >= 32) ||
+   ctx->Extensions.ARB_geometry_shader4) {


This should be

if (_mesa_is_desktop_gl(ctx) && (ctx->Version >= 32 ||
 ctx->Extensions.ARB_geometry_shader4))

If a driver sets the ARB_geometry_shader4 bit in an ES context, this 
function should still generate GL_INVALID_OPERATION.


Actually... is the _mesa_is_desktop_gl check even necessary?  This 
function shouldn't get put in the dispatch table for an ES context at 
all.  Right?



+  framebuffer_texture(ctx, "Layer", target, attachment, 0, texture,
+  level, 0, GL_TRUE);
+   } else {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "unsupported function (glFramebufferTexture) called");
+   }
+}
+
+
+void GLAPIENTRY
  _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment,
   GLenum renderbufferTarget,
   GLuint renderbuffer)
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index ba013fd..2d88001 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -192,6 +192,10 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum 
attachment,
   GLuint texture, GLint level, GLint layer);

  extern void GLAPIENTRY
+_mesa_FramebufferTexture(GLenum target, GLenum attachment,
+ GLuint texture, GLint level);
+
+extern void GLAPIENTRY
  _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment,
   GLenum renderbuffertarget,
   GLuint renderbuffer);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.

2013-04-30 Thread Ian Romanick

On 04/30/2013 01:52 AM, Kenneth Graunke wrote:

Consider the following shader:

 vec4 f(vec4 v) { return v; }
 vec4 f(vec4 v);

The prototype exactly matches the signature of the earlier definition,
so there's absolutely no point in it.  However, it doesn't appear to
be illegal.  The GLSL 4.30 specification offers two relevant quotes:

"If a function name is declared twice with the same parameter types,
  then the return types and all qualifiers must also match, and it is the
  same function being declared."

"User-defined functions can have multiple declarations, but only one
  definition."

In this case the same function was declared twice, and there's only one
definition, which fits both pieces of text.  There doesn't appear to be
any text saying late prototypes are illegal, so presumably it's valid.

Unfortunately, it currently triggers an assertion failure:
ir_dereference_variable @  specifies undeclared variable `v' @ 


OMG.  I wonder if this is the bug with Second Life.

https://bugs.freedesktop.org/show_bug.cgi?id=39251
https://bugs.freedesktop.org/show_bug.cgi?id=61773


When we process the second line, we look for an existing exact match so
we can enforce the one-definition rule.  We then leave sig set to that
existing function, and hit sig->replace_parameters(&hir_parameters),
unfortunately nuking our existing definition's parameters (which have
actual dereferences) with the prototype's bogus unused parameters.

Simply bailing out and ignoring such late prototypes is the safest
thing to do.

Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android.

NOTE: This is a candidate for stable branches.
Cc: Tapani Pälli 
Cc: Ian Romanick 


Reviewed-by: Ian Romanick 


Signed-off-by: Kenneth Graunke 
---
  src/glsl/ast_to_hir.cpp | 15 +++
  1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2638411..e595110 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions,
 "match prototype", name);
 }

-if (is_definition && sig->is_defined) {
-   YYLTYPE loc = this->get_location();
-
-   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+ if (sig->is_defined) {
+if (is_definition) {
+   YYLTYPE loc = this->get_location();
+   _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+} else {
+   /* We just encountered a prototype that exactly matches a
+* function that's already been defined.  This is redundant,
+* and we should ignore it.
+*/
+   return NULL;
+}
 }
}
 } else {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] mesa: implement glFramebufferTexture

2013-04-30 Thread Jordan Justen
On Tue, Apr 30, 2013 at 10:04 AM, Ian Romanick  wrote:
> On 04/27/2013 04:37 PM, Jordan Justen wrote:
>>
>> Signed-off-by: Jordan Justen 
>> ---
>>   src/mapi/glapi/gen/GL3x.xml |2 +-
>>   src/mesa/main/fbobject.c|   17 +
>>   src/mesa/main/fbobject.h|4 
>>   3 files changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml
>> index 9ca3d47..5078f7b 100644
>> --- a/src/mapi/glapi/gen/GL3x.xml
>> +++ b/src/mapi/glapi/gen/GL3x.xml
>> @@ -607,7 +607,7 @@
>>   
>> 
>>
>> -  
>> +  
>>   
>>   
>>   
>> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
>> index 419e871..32dcc75 100644
>> --- a/src/mesa/main/fbobject.c
>> +++ b/src/mesa/main/fbobject.c
>> @@ -2402,6 +2402,23 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum
>> attachment,
>>
>>
>>   void GLAPIENTRY
>> +_mesa_FramebufferTexture(GLenum target, GLenum attachment,
>> + GLuint texture, GLint level)
>> +{
>> +   GET_CURRENT_CONTEXT(ctx);
>> +
>> +   if ((_mesa_is_desktop_gl(ctx) && ctx->Version >= 32) ||
>> +   ctx->Extensions.ARB_geometry_shader4) {
>
>
> This should be
>
> if (_mesa_is_desktop_gl(ctx) && (ctx->Version >= 32 ||
>  ctx->Extensions.ARB_geometry_shader4))
>
> If a driver sets the ARB_geometry_shader4 bit in an ES context, this
> function should still generate GL_INVALID_OPERATION.
>
> Actually... is the _mesa_is_desktop_gl check even necessary?  This function
> shouldn't get put in the dispatch table for an ES context at all.  Right?

You are right. The _mesa_is_desktop_gl check is not necessary.

Hopefully at some point we can update the api_exec.c to handle GL
versions / extensions as well.

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/uvd: fix quant scan order for mpeg2

2013-04-30 Thread Christian König
From: Christian König 

Signed-off-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_uvd.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c 
b/src/gallium/drivers/radeon/radeon_uvd.c
index 5a7eab6..bcd282e 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -514,6 +514,7 @@ static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, 
struct pipe_video_buff
 static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
   struct pipe_mpeg12_picture_desc *pic)
 {
+   const int *zscan = pic->alternate_scan ? vl_zscan_alternate : 
vl_zscan_normal;
struct ruvd_mpeg2 result;
unsigned i;
 
@@ -524,8 +525,11 @@ static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder 
*dec,
 
result.load_intra_quantiser_matrix = 1;
result.load_nonintra_quantiser_matrix = 1;
-   memcpy(&result.intra_quantiser_matrix, pic->intra_matrix, 64);
-   memcpy(&result.nonintra_quantiser_matrix, pic->non_intra_matrix, 64);
+
+   for (i = 0; i < 64; ++i) {
+   result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
+   result.nonintra_quantiser_matrix[i] = 
pic->non_intra_matrix[zscan[i]];
+   }
 
result.profile_and_level_indication = 0;
result.chroma_format = 0x1;
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [v2 07/10] dri: propagate extra dma_buf import attributes to the drivers

2013-04-30 Thread Ian Romanick

On 04/29/2013 04:08 AM, Topi Pohjolainen wrote:

Signed-off-by: Topi Pohjolainen 
---
  include/GL/internal/dri_interface.h| 23 +++
  src/egl/drivers/dri2/egl_dri2.c|  1 +
  src/mesa/drivers/dri/intel/intel_regions.h |  7 +++
  src/mesa/drivers/dri/intel/intel_screen.c  |  9 +
  4 files changed, 40 insertions(+)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index 30ce175..2691893 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1025,6 +1025,25 @@ struct __DRIdri2ExtensionRec {
  * 7+. Each query will return a
  * new fd. */

+enum __DRIYUVColorSpace {
+   __DRI_YUV_COLOR_SPACE_UNDEFINED = 0,
+   __DRI_YUV_COLOR_SPACE_ITU_REC601 = 0x327F,
+   __DRI_YUV_COLOR_SPACE_ITU_REC709 = 0x3280,
+   __DRI_YUV_COLOR_SPACE_ITU_REC2020 = 0x3281
+};
+
+enum __DRISampleRange {
+   __DRI_YUV_RANGE_UNDEFINED = 0,
+   __DRI_YUV_FULL_RANGE = 0x3282,
+   __DRI_YUV_NARROW_RANGE = 0x3283
+};
+
+enum __DRIChromaSiting {
+   __DRI_YUV_CHROMA_SITING_UNDEFINED = 0,
+   __DRI_YUV_CHROMA_SITING_0 = 0x3284,
+   __DRI_YUV_CHROMA_SITING_0_5 = 0x3285
+};
+
  /**
   * \name Reasons that __DRIimageExtensionRec::createImageFromTexture might 
fail
   */
@@ -1129,6 +1148,10 @@ struct __DRIimageExtensionRec {
   int width, int height, int fourcc,
   int *fds, int num_fds,
   int *strides, int *offsets,
+ enum __DRIYUVColorSpace yuv_color_space,
+ enum __DRISampleRange sample_range,
+ enum __DRIChromaSiting horizontal_siting,
+ enum __DRIChromaSiting vertical_siting,
   void *loaderPrivate);


Uh... you absolutely cannot change existing functions.  This is ABI!  If 
you run an updated driver against an old libEGL (or vice versa), it will 
explode.


You need to add a new function and bump the version of DRI_IMAGE to 8. 
egl_dri.c can use the version of the DRI_IMAGE extension from the driver 
to determine whether to enable EGL_EXT_image_dma_buf_import.



  };

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 1011f27..10fdcef 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1523,6 +1523,7 @@ dri2_wl_reference_buffer(void *user_data, uint32_t name, 
int fd,
  &fd, 1,
  buffer->stride,
  buffer->offset,
+0, 0, 0, 0,
  NULL);

 if (img == NULL)
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h 
b/src/mesa/drivers/dri/intel/intel_regions.h
index e610f6b..323f834 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -40,6 +40,7 @@

  #include "main/mtypes.h"
  #include "intel_bufmgr.h"
+#include 

  #ifdef __cplusplus
  extern "C" {
@@ -156,6 +157,12 @@ struct __DRIimageRec {
 GLuint tile_y;
 bool has_depthstencil;

+   /* Provided by dma_buf import extension */
+   enum __DRIYUVColorSpace yuv_color_space;
+   enum __DRISampleRange sample_range;
+   enum __DRIChromaSiting horizontal_siting;
+   enum __DRIChromaSiting vertical_siting;
+
 void *data;
  };

diff --git a/src/mesa/drivers/dri/intel/intel_screen.c 
b/src/mesa/drivers/dri/intel/intel_screen.c
index e912bc7..75dbb5a 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -703,6 +703,10 @@ static __DRIimage *
  intel_create_image_from_fds(__DRIscreen *screen,
  int width, int height, int fourcc,
  int *fds, int num_fds, int *strides, int *offsets,
+enum __DRIYUVColorSpace yuv_color_space,
+enum __DRISampleRange sample_range,
+enum __DRIChromaSiting horizontal_siting,
+enum __DRIChromaSiting vertical_siting,
  void *loaderPrivate)
  {
 struct intel_screen *intelScreen = screen->driverPrivate;
@@ -737,6 +741,11 @@ intel_create_image_from_fds(__DRIscreen *screen,
}
 }

+   image->yuv_color_space = yuv_color_space;
+   image->sample_range = sample_range;
+   image->horizontal_siting = horizontal_siting;
+   image->vertical_siting = vertical_siting;
+
 return image;
  }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [v2 08/10] egl: definitions for EXT_image_dma_buf_import

2013-04-30 Thread Ian Romanick

On 04/29/2013 04:08 AM, Topi Pohjolainen wrote:

As specified in:

http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt

Checking for the valid fourcc values is left for drivers avoiding
dependency to drm header files here.

v2:
- enforce EGL_NO_CONTEXT

Signed-off-by: Topi Pohjolainen 
---
  include/EGL/eglext.h   | 33 
  src/egl/main/eglapi.c  |  7 -
  src/egl/main/egldisplay.h  |  1 +
  src/egl/main/eglimage.c| 76 ++
  src/egl/main/eglimage.h| 15 +
  src/mesa/main/extensions.c |  1 +
  src/mesa/main/mtypes.h |  1 +
  7 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h
index b2b5a80..9b9be8f 100644
--- a/include/EGL/eglext.h
+++ b/include/EGL/eglext.h
@@ -532,6 +532,39 @@ typedef EGLint (EGLAPIENTRYP 
PFNEGLDUPNATIVEFENCEFDANDROIDPROC)(EGLDisplay dpy,
  #define EGL_BUFFER_AGE_EXT0x313D
  #endif

+#define EGL_LINUX_DMA_BUF_EXT 0x3270 /* eglCreateImageKHR target */
+
+/* Attributes for eglCreateImageKHR. */
+#define EGL_LINUX_DRM_FOURCC_EXT  0x3271
+#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272
+#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273
+#define EGL_DMA_BUF_PLANE0_PITCH_EXT  0x3274
+#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275
+#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276
+#define EGL_DMA_BUF_PLANE1_PITCH_EXT  0x3277
+#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278
+#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279
+#define EGL_DMA_BUF_PLANE2_PITCH_EXT  0x327A
+#define EGL_YUV_COLOR_SPACE_HINT_EXT  0x327B
+#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C
+#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT  0x327D
+#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT0x327E
+
+/* Accepted values for EGL_YUV_COLOR_SPACE_HINT_EXT attribute */
+#define EGL_ITU_REC601_EXT   0x327F
+#define EGL_ITU_REC709_EXT   0x3280
+#define EGL_ITU_REC2020_EXT  0x3281
+
+/* Accepted values for EGL_SAMPLE_RANGE_HINT_EXT attribute */
+#define EGL_YUV_FULL_RANGE_EXT0x3282
+#define EGL_YUV_NARROW_RANGE_EXT  0x3283
+
+/* Accepted values for attributes EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT
+ * and EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT
+ */
+#define EGL_YUV_CHROMA_SITING_0_EXT0x3284
+#define EGL_YUV_CHROMA_SITING_0_5_EXT  0x3285
+
  #include 

  #ifdef __cplusplus
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index bcc5465..2355d45 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1310,7 +1310,12 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, 
EGLenum target,
 _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
 if (!disp->Extensions.KHR_image_base)
RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR);
-   if (!context && ctx != EGL_NO_CONTEXT)
+
+   /**
+* "If  is EGL_LINUX_DMA_BUF_EXT,  must be a valid display,
+*   must be EGL_NO_CONTEXT..."
+*/
+   if (ctx != EGL_NO_CONTEXT && (!context || target == EGL_LINUX_DMA_BUF_EXT))
RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_IMAGE_KHR);

 img = drv->API.CreateImageKHR(drv,
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 4b33470..5a21f78 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -115,6 +115,7 @@ struct _egl_extensions

 EGLBoolean EXT_create_context_robustness;
 EGLBoolean EXT_buffer_age;
+   EGLBoolean EXT_image_dma_buf_import;
  };


diff --git a/src/egl/main/eglimage.c b/src/egl/main/eglimage.c
index bfae709..1cede31 100644
--- a/src/egl/main/eglimage.c
+++ b/src/egl/main/eglimage.c
@@ -93,6 +93,82 @@ _eglParseImageAttribList(_EGLImageAttribs *attrs, 
_EGLDisplay *dpy,
   attrs->PlaneWL = val;
   break;

+  case EGL_LINUX_DRM_FOURCC_EXT:
+ attrs->DMABufFourCC.Value = val;
+ attrs->DMABufFourCC.IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE0_FD_EXT:
+ attrs->DMABufPlaneFds[0].Value = val;
+ attrs->DMABufPlaneFds[0].IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE0_OFFSET_EXT:
+ attrs->DMABufPlaneOffsets[0].Value = val;
+ attrs->DMABufPlaneOffsets[0].IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE0_PITCH_EXT:
+ attrs->DMABufPlanePitches[0].Value = val;
+ attrs->DMABufPlanePitches[0].IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE1_FD_EXT:
+ attrs->DMABufPlaneFds[1].Value = val;
+ attrs->DMABufPlaneFds[1].IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE1_OFFSET_EXT:
+ attrs->DMABufPlaneOffsets[1].Value = val;
+ attrs->DMABufPlaneOffsets[1].IsPresent = EGL_TRUE;
+ break;
+  case EGL_DMA_BUF_PLANE1_PITCH_EXT:
+ attrs->DMABufPlanePitches[1].Value = val;
+ attrs->DMABufPlan

[Mesa-dev] [Bug 64091] New: piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

  Priority: medium
Bug ID: 64091
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: piglit glean/readPixSanity testcase fails on ppc64
with Radeon adapter
  Severity: normal
Classification: Unclassified
OS: Linux (All)
  Reporter: kleb...@linux.vnet.ibm.com
  Hardware: PowerPC
Status: NEW
   Version: 9.1
 Component: Mesa core
   Product: Mesa

I'm performing the bring-up of a Radeon adapter (AMD FirePro 2270) using Fedora
19 on a ppc64 machine, and running the sanity test profile from piglit the
glean/readPixSanity testcase fails. I've also run the test with
LIBGL_ALWAYS_SOFTWARE=1 and it fails some of the tests, so it might indicate
that the problem is not on the Radeon Mesa driver itself.

I will attach to this bug the output from glxinfo and glean/readPixSanity for
both Radeon driver and software rasterizer.

Steps to reproduce:
1. Install piglit
2. Run: # ./piglit-run.py tests/sanity.tests results/sanity.results

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #1 from Kleber Sacilotto de Souza  ---
Created attachment 78661
  --> https://bugs.freedesktop.org/attachment.cgi?id=78661&action=edit
glxinfo using the radeon driver

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #2 from Kleber Sacilotto de Souza  ---
Created attachment 78662
  --> https://bugs.freedesktop.org/attachment.cgi?id=78662&action=edit
readPixSanity output using the radeon driver

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #3 from Kleber Sacilotto de Souza  ---
Created attachment 78663
  --> https://bugs.freedesktop.org/attachment.cgi?id=78663&action=edit
glxinfo using software rasterizer

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #4 from Kleber Sacilotto de Souza  ---
Created attachment 78664
  --> https://bugs.freedesktop.org/attachment.cgi?id=78664&action=edit
readPixSanity output using software rasterizer

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

Brian King  changed:

   What|Removed |Added

 CC||brk...@linux.vnet.ibm.com

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swrast: add casts for ImageSlices pointer arithmetic

2013-04-30 Thread Brian Paul
MSVC doesn't like pointer arithmetic with void * so use GLubyte *.
---
 src/mesa/swrast/s_texfetch_tmp.h |8 
 src/mesa/swrast/s_texfilter.c|2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/swrast/s_texfetch_tmp.h b/src/mesa/swrast/s_texfetch_tmp.h
index c9991cd..714226c 100644
--- a/src/mesa/swrast/s_texfetch_tmp.h
+++ b/src/mesa/swrast/s_texfetch_tmp.h
@@ -44,15 +44,15 @@
 #if DIM == 1
 
 #define TEXEL_ADDR( type, image, i, j, k, size ) \
-   ((void) (j), (void) (k), ((type *)(image)->ImageSlices[0] + (i) * 
(size)))
+   ((void) (j), (void) (k), ((type *)((GLubyte *) (image)->ImageSlices[0]) 
+ (i) * (size)))
 
 #define FETCH(x) fetch_texel_1d_##x
 
 #elif DIM == 2
 
 #define TEXEL_ADDR( type, image, i, j, k, size )   \
-   ((void) (k),\
-((type *)((image)->ImageSlices[0] + (image)->RowStride * (j)) + \
+   ((void) (k),\
+((type *)((GLubyte *) (image)->ImageSlices[0] + (image)->RowStride * 
(j)) + \
   (i) * (size)))
 
 #define FETCH(x) fetch_texel_2d_##x
@@ -60,7 +60,7 @@
 #elif DIM == 3
 
 #define TEXEL_ADDR( type, image, i, j, k, size )   \
-   ((type *)((image)->ImageSlices[k] + \
+((type *)((GLubyte *) (image)->ImageSlices[k] +  \
   (image)->RowStride * (j)) + (i) * (size))
 
 #define FETCH(x) fetch_texel_3d_##x
diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c
index c8ea26a..fba8e6c 100644
--- a/src/mesa/swrast/s_texfilter.c
+++ b/src/mesa/swrast/s_texfilter.c
@@ -1436,7 +1436,7 @@ opt_sample_rgb_2d(struct gl_context *ctx,
   GLint i = IFLOOR(texcoords[k][0] * width) & colMask;
   GLint j = IFLOOR(texcoords[k][1] * height) & rowMask;
   GLint pos = (j << shift) | i;
-  GLubyte *texel = swImg->ImageSlices[0] + 3 * pos;
+  GLubyte *texel = (GLubyte *) swImg->ImageSlices[0] + 3 * pos;
   rgba[k][RCOMP] = UBYTE_TO_FLOAT(texel[2]);
   rgba[k][GCOMP] = UBYTE_TO_FLOAT(texel[1]);
   rgba[k][BCOMP] = UBYTE_TO_FLOAT(texel[0]);
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] i965: Use brw_blorp_blit_miptrees() for CopyTexSubImage().

2013-04-30 Thread Eric Anholt
Now that depth resolves are handled there, we don't need to make the
temporary renderbuffer.
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 84 ++--
 src/mesa/drivers/dri/intel/intel_fbo.c   | 30 --
 src/mesa/drivers/dri/intel/intel_fbo.h   |  4 --
 3 files changed, 41 insertions(+), 77 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 32da141..c3ef054 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -169,19 +169,9 @@ do_blorp_blit(struct intel_context *intel, GLbitfield 
buffer_bit,
intel_renderbuffer_set_needs_downsample(dst_irb);
 }
 
-
 static bool
-formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
-  struct intel_renderbuffer *dst_irb)
+color_formats_match(gl_format src_format, gl_format dst_format)
 {
-   /* Note: don't just check gl_renderbuffer::Format, because in some cases
-* multiple gl_formats resolve to the same native type in the miptree (for
-* example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit
-* between those formats.
-*/
-   gl_format src_format = find_miptree(buffer_bit, src_irb)->format;
-   gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format;
-
gl_format linear_src_format = _mesa_get_srgb_format_linear(src_format);
gl_format linear_dst_format = _mesa_get_srgb_format_linear(dst_format);
 
@@ -197,6 +187,21 @@ formats_match(GLbitfield buffer_bit, struct 
intel_renderbuffer *src_irb,
 }
 
 static bool
+formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
+  struct intel_renderbuffer *dst_irb)
+{
+   /* Note: don't just check gl_renderbuffer::Format, because in some cases
+* multiple gl_formats resolve to the same native type in the miptree (for
+* example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit
+* between those formats.
+*/
+   gl_format src_format = find_miptree(buffer_bit, src_irb)->format;
+   gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format;
+
+   return color_formats_match(src_format, dst_format);
+}
+
+static bool
 try_blorp_blit(struct intel_context *intel,
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
@@ -313,29 +318,21 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
 {
struct gl_context *ctx = &intel->ctx;
struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
-   struct intel_renderbuffer *dst_irb;
+   struct intel_texture_image *intel_image = intel_texture_image(dst_image);
+
+   /* Sync up the state of window system buffers.  We need to do this before
+* we go looking at the src renderbuffer's miptree.
+*/
+   intel_prepare_render(intel);
+
+   struct intel_mipmap_tree *src_mt = src_irb->mt;
+   struct intel_mipmap_tree *dst_mt = intel_image->mt;
 
/* BLORP is not supported before Gen6. */
if (intel->gen < 6)
   return false;
 
-   /* Create a fake/wrapper renderbuffer to allow us to use do_blorp_blit(). */
-   dst_irb = intel_create_fake_renderbuffer_wrapper(intel, dst_image);
-   if (!dst_irb)
-  return false;
-
-   struct gl_renderbuffer *dst_rb = &dst_irb->Base.Base;
-
-   /* Unlike BlitFramebuffer, CopyTexSubImage doesn't have a buffer bit.
-* It's only used by find_miptee() to decide whether to dereference the
-* separate stencil miptree.  In the case of packed depth/stencil, core
-* Mesa hands us the depth attachment as src_rb (not stencil), so assume
-* non-stencil for now.  A buffer bit of 0 works for both color and depth.
-*/
-   GLbitfield buffer_bit = 0;
-
-   if (!formats_match(buffer_bit, src_irb, dst_irb)) {
-  dst_rb->Delete(ctx, dst_rb);
+   if (!color_formats_match(src_mt->format, dst_mt->format)) {
   return false;
}
 
@@ -353,11 +350,6 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
int dstX1 = dstX0 + width;
int dstY1 = dstY0 + height;
 
-   /* Sync up the state of window system buffers.  We need to do this before
-* we go looking for the buffers.
-*/
-   intel_prepare_render(intel);
-
/* Account for the fact that in the system framebuffer, the origin is at
 * the lower left.
 */
@@ -369,23 +361,29 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
   mirror_y = true;
}
 
-   do_blorp_blit(intel, buffer_bit, src_irb, dst_irb,
- srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, false, mirror_y);
+   brw_blorp_blit_miptrees(intel,
+   src_mt, src_irb->mt_level, src_irb->mt_layer,
+   dst_mt, dst_image->Level, dst_image->Face,
+   srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+   false, mirror_y);
 
-   /* If we're copying a packed depth stencil texture, the above do_blorp_blit
-* 

[Mesa-dev] [PATCH 1/6] i965: Move blorp resolve setup into brw_blorp_blit_miptrees().

2013-04-30 Thread Eric Anholt
There was some comment about trying to avoid marking resolves in
updownsample, but if the downsample is never actually rendered to, then
the required resolve tracked in the downsample will never be executed, so
who cares?
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp   | 12 +---
 src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 11 ---
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index d4b1fda..32da141 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -132,6 +132,9 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
 int dst_x1, int dst_y1,
 bool mirror_x, bool mirror_y)
 {
+   intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_layer);
+   intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_layer);
+
brw_blorp_blit_params params(brw_context(&intel->ctx),
 src_mt, src_level, src_layer,
 dst_mt, dst_level, dst_layer,
@@ -140,6 +143,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
 dst_x1, dst_y1,
 mirror_x, mirror_y);
brw_blorp_exec(intel, ¶ms);
+
+   intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
 }
 
 static void
@@ -154,12 +159,6 @@ do_blorp_blit(struct intel_context *intel, GLbitfield 
buffer_bit,
struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
 
-   /* Get ready to blit.  This includes depth resolving the src and dst
-* buffers if necessary.
-*/
-   intel_renderbuffer_resolve_depth(intel, src_irb);
-   intel_renderbuffer_resolve_depth(intel, dst_irb);
-
/* Do the blit */
brw_blorp_blit_miptrees(intel,
src_mt, src_irb->mt_level, src_irb->mt_layer,
@@ -167,7 +166,6 @@ do_blorp_blit(struct intel_context *intel, GLbitfield 
buffer_bit,
srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
mirror_x, mirror_y);
 
-   intel_renderbuffer_set_needs_hiz_resolve(dst_irb);
intel_renderbuffer_set_needs_downsample(dst_irb);
 }
 
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 12a4a22..586599e 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -1289,9 +1289,6 @@ intel_miptree_updownsample(struct intel_context *intel,
int dst_x0 = 0;
int dst_y0 = 0;
 
-   intel_miptree_slice_resolve_depth(intel, src, 0, 0);
-   intel_miptree_slice_resolve_depth(intel, dst, 0, 0);
-
brw_blorp_blit_miptrees(intel,
src, 0 /* level */, 0 /* layer */,
dst, 0 /* level */, 0 /* layer */,
@@ -1339,13 +1336,6 @@ intel_miptree_downsample(struct intel_context *intel,
   mt->logical_width0,
   mt->logical_height0);
mt->need_downsample = false;
-
-   /* Strictly speaking, after a downsample on a depth miptree, a hiz
-* resolve is needed on the singlesample miptree. However, since the
-* singlesample miptree is never rendered to, the hiz resolve will never
-* occur. Therefore we do not mark the needed hiz resolve after
-* downsampling.
-*/
 }
 
 /**
@@ -1365,7 +1355,6 @@ intel_miptree_upsample(struct intel_context *intel,
   mt->singlesample_mt, mt,
   mt->logical_width0,
   mt->logical_height0);
-   intel_miptree_slice_set_needs_hiz_resolve(mt, 0, 0);
 }
 
 void *
-- 
1.8.3.rc0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] intel: Simplify renderbuffer-for-texture width setup.

2013-04-30 Thread Eric Anholt
We're looking for the logical width of our level, which is what
image->Width2/Height2 is.  The previous code relied on MSAA textures being
only level 0.
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index f44cb4d..a3817eb 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -493,17 +493,8 @@ intel_renderbuffer_update_wrapper(struct intel_context 
*intel,
rb->InternalFormat = image->InternalFormat;
rb->_BaseFormat = image->_BaseFormat;
rb->NumSamples = mt->num_samples;
-
-   if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE) {
-  assert(level == 0);
-  rb->Width = mt->logical_width0;
-  rb->Height = mt->logical_height0;
-   }
-   else {
-  rb->Width = mt->level[level].width;
-  rb->Height = mt->level[level].height;
-   }
-
+   rb->Width = image->Width2;
+   rb->Height = image->Height2;
rb->Delete = intel_delete_renderbuffer;
rb->AllocStorage = intel_nop_alloc_storage;
 
-- 
1.8.3.rc0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] mesa: Make core Mesa allocate the texture renderbuffer wrapper.

2013-04-30 Thread Eric Anholt
Every driver did the same thing.
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 17 +--
 src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 10 ---
 src/mesa/drivers/dri/radeon/radeon_fbo.c   | 14 -
 src/mesa/main/fbobject.c   | 48 ++
 src/mesa/main/fbobject.h   |  5 
 src/mesa/main/teximage.c   |  3 +-
 src/mesa/state_tracker/st_cb_fbo.c | 22 ++
 src/mesa/swrast/s_texrender.c  | 38 +++
 8 files changed, 56 insertions(+), 101 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index 1d247c7..f44cb4d 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -606,28 +606,13 @@ intel_render_texture(struct gl_context * ctx,
   /* Fallback on drawing to a texture that doesn't have a miptree
* (has a border, width/height 0, etc.)
*/
-  _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
   _swrast_render_texture(ctx, fb, att);
   return;
}
-   else if (!irb) {
-  intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
 
-  irb = (struct intel_renderbuffer *)intel_new_renderbuffer(ctx, ~0);
-
-  if (irb) {
- /* bind the wrapper to the attachment point */
- _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base.Base);
-  }
-  else {
- /* fallback to software rendering */
- _swrast_render_texture(ctx, fb, att);
- return;
-  }
-   }
+   intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
 
if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) {
-   _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
_swrast_render_texture(ctx, fb, att);
return;
}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c 
b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index b487009..adead3d 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -270,16 +270,6 @@ nouveau_render_texture(struct gl_context *ctx, struct 
gl_framebuffer *fb,
struct gl_texture_image *ti =
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
 
-   /* Allocate a renderbuffer object for the texture if we
-* haven't already done so. */
-   if (!rb) {
-   rb = nouveau_renderbuffer_new(ctx, ~0);
-   assert(rb);
-
-   rb->AllocStorage = NULL;
-   _mesa_reference_renderbuffer(&att->Renderbuffer, rb);
-   }
-
/* Update the renderbuffer fields from the texture. */
set_renderbuffer_format(rb, get_tex_format(ti));
rb->Width = ti->Width;
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c 
b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index eb592db..5f996c5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -835,25 +835,11 @@ radeon_render_texture(struct gl_context * ctx,
if (!radeon_image->mt) {
   /* Fallback on drawing to a texture without a miptree.
*/
-  _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
   _swrast_render_texture(ctx, fb, att);
   return;
}
-   else if (!rrb) {
-  rrb = radeon_wrap_texture(ctx, newImage);
-  if (rrb) {
- /* bind the wrapper to the attachment point */
- _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base.Base);
-  }
-  else {
- /* fallback to software rendering */
- _swrast_render_texture(ctx, fb, att);
- return;
-  }
-   }
 
if (!radeon_update_wrapper(ctx, rrb, newImage)) {
-   _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
_swrast_render_texture(ctx, fb, att);
return;
}
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 645a8a3..26d1cce 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -341,6 +341,47 @@ _mesa_remove_attachment(struct gl_context *ctx,
att->Complete = GL_TRUE;
 }
 
+/**
+ * Create a renderbuffer which will be set up by the driver to wrap the
+ * texture image slice.
+ *
+ * By using a gl_renderbuffer (like user-allocated renderbuffers), drivers get
+ * to share most of their framebuffer rendering code between winsys,
+ * renderbuffer, and texture attachments.
+ *
+ * The allocated renderbuffer uses a non-zero Name so that drivers can check
+ * it for determining vertical orientation, but we use ~0 to make it fairly
+ * unambiguous with actual user (non-texture) renderbuffers.
+ */
+void
+_mesa_update_texture_renderbuffer(struct gl_context *ctx,
+  struct gl_framebuffer *fb,
+  struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *texImage;
+   struct gl_renderbuffer *rb;
+
+   texImage = _mesa_get_attachment_teximage(att);
+ 

[Mesa-dev] [PATCH 5/6] mesa: Make Mesa core set up wrapped texture renderbuffer state.

2013-04-30 Thread Eric Anholt
Everyone was doing effectively the same thing, except for some funky code
reuse in Intel, and swrast mistakenly recomputing _BaseFormat instead of
using the texture's _BaseFormat.  swrast's sRGB handling is left in place,
though it should be done by using _mesa_get_render_format() at render time
instead (as-is, it will miss updates to GL_FRAMEBUFFER_SRGB).
---
 src/mesa/drivers/dri/intel/intel_fbo.c |  6 --
 src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 18 --
 src/mesa/main/fbobject.c   |  7 +++
 src/mesa/state_tracker/st_cb_fbo.c |  5 -
 src/mesa/swrast/s_texrender.c  |  5 -
 5 files changed, 7 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index a3817eb..f037445 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -489,12 +489,6 @@ intel_renderbuffer_update_wrapper(struct intel_context 
*intel,
struct intel_mipmap_tree *mt = intel_image->mt;
int level = image->Level;
 
-   rb->Format = image->TexFormat;
-   rb->InternalFormat = image->InternalFormat;
-   rb->_BaseFormat = image->_BaseFormat;
-   rb->NumSamples = mt->num_samples;
-   rb->Width = image->Width2;
-   rb->Height = image->Height2;
rb->Delete = intel_delete_renderbuffer;
rb->AllocStorage = intel_nop_alloc_storage;
 
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c 
b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index adead3d..a692051 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -247,21 +247,6 @@ nouveau_framebuffer_renderbuffer(struct gl_context *ctx, 
struct gl_framebuffer *
context_dirty(ctx, FRAMEBUFFER);
 }
 
-static GLenum
-get_tex_format(struct gl_texture_image *ti)
-{
-   switch (ti->TexFormat) {
-   case MESA_FORMAT_ARGB:
-   return GL_RGBA8;
-   case MESA_FORMAT_XRGB:
-   return GL_RGB8;
-   case MESA_FORMAT_RGB565:
-   return GL_RGB5;
-   default:
-   return GL_NONE;
-   }
-}
-
 static void
 nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb,
   struct gl_renderbuffer_attachment *att)
@@ -271,9 +256,6 @@ nouveau_render_texture(struct gl_context *ctx, struct 
gl_framebuffer *fb,
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
 
/* Update the renderbuffer fields from the texture. */
-   set_renderbuffer_format(rb, get_tex_format(ti));
-   rb->Width = ti->Width;
-   rb->Height = ti->Height;
nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
&to_nouveau_renderbuffer(rb)->surface);
 
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 26d1cce..d88c062 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -380,6 +380,13 @@ _mesa_update_texture_renderbuffer(struct gl_context *ctx,
   rb->AllocStorage = NULL;
}
 
+   rb->_BaseFormat = texImage->_BaseFormat;
+   rb->Format = texImage->TexFormat;
+   rb->InternalFormat = texImage->InternalFormat;
+   rb->Width = texImage->Width2;
+   rb->Height = texImage->Height2;
+   rb->NumSamples = texImage->NumSamples;
+
ctx->Driver.RenderTexture(ctx, fb, att);
 }
 
diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index affe656..aa245d3 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -414,11 +414,6 @@ st_render_texture(struct gl_context *ctx,
strb->rtt_level = att->TextureLevel;
strb->rtt_face = att->CubeMapFace;
strb->rtt_slice = att->Zoffset;
-   rb->NumSamples = texImage->NumSamples;
-   rb->Width = texImage->Width2;
-   rb->Height = texImage->Height2;
-   rb->_BaseFormat = texImage->_BaseFormat;
-   rb->InternalFormat = texImage->InternalFormat;
 
pipe_resource_reference( &strb->texture, pt );
 
diff --git a/src/mesa/swrast/s_texrender.c b/src/mesa/swrast/s_texrender.c
index f56a0d5..00b3ca5 100644
--- a/src/mesa/swrast/s_texrender.c
+++ b/src/mesa/swrast/s_texrender.c
@@ -50,11 +50,6 @@ update_wrapper(struct gl_context *ctx, struct 
gl_renderbuffer_attachment *att)
   zOffset = att->Zoffset;
}
 
-   rb->Width = swImage->Base.Width;
-   rb->Height = swImage->Base.Height;
-   rb->InternalFormat = swImage->Base.InternalFormat;
-   rb->_BaseFormat = _mesa_get_format_base_format(format);
-
/* Want to store linear values, not sRGB */
rb->Format = _mesa_get_srgb_format_linear(format);
 
-- 
1.8.3.rc0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] intel: Remove renderbuffer delete setup from texture wrapping.

2013-04-30 Thread Eric Anholt
This is already set by intel_new_renderbuffer().
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c 
b/src/mesa/drivers/dri/intel/intel_fbo.c
index f037445..45424a9 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -489,7 +489,6 @@ intel_renderbuffer_update_wrapper(struct intel_context 
*intel,
struct intel_mipmap_tree *mt = intel_image->mt;
int level = image->Level;
 
-   rb->Delete = intel_delete_renderbuffer;
rb->AllocStorage = intel_nop_alloc_storage;
 
intel_miptree_check_level_layer(mt, level, layer);
-- 
1.8.3.rc0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #5 from Jerome Glisse  ---
For radeon my guess is that depth/stencil readback needs some fiddling with
texture format :

mesa/src/gallium/drivers/r600/r600_blit.c

r600_blit_decompress_depth()

surf_tmpl.format = flushed_depth_texture->resource.b.b.format;

i would look at what format you got there and what try to play with some other
format.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #6 from Alex Deucher  ---
Gallium is big endian safe at the moment.  See these discussion threads for
what needs to be done:
http://lists.freedesktop.org/archives/mesa-dev/2013-January/033590.html
http://lists.freedesktop.org/archives/mesa-dev/2013-February/034047.html

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter

2013-04-30 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=64091

--- Comment #7 from Alex Deucher  ---
(In reply to comment #6)
> Gallium is big endian safe at the moment.  See these discussion threads for
> what needs to be done:
> http://lists.freedesktop.org/archives/mesa-dev/2013-January/033590.html
> http://lists.freedesktop.org/archives/mesa-dev/2013-February/034047.html

*Gallium is NOT big endian safe at the moment.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] fixup! glsl: Add a pass to lower bitfield-insert into bfm+bfi.

2013-04-30 Thread Matt Turner
---
Does this squashed in seem okay, Eric?

 src/glsl/lower_instructions.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 6c70a4a..d32ec80 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -321,9 +321,10 @@ 
lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
ir_rvalue *base_expr = ir->operands[0];
 
ir->operation = ir_triop_bfi;
-   ir->operands[0] = new(ir) ir_expression(ir_binop_bfm, ir->type,
-   swizzle_(ir->operands[3]),
-   swizzle_(ir->operands[2]));
+   ir->operands[0] = new(ir) ir_expression(ir_binop_bfm,
+   ir->type->get_base_type(),
+   ir->operands[3],
+   ir->operands[2]);
/* ir->operands[1] is still the value to insert. */
ir->operands[2] = base_expr;
ir->operands[3] = NULL;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Mesa 9.1.2 release

2013-04-30 Thread Ian Romanick

Mesa 9.1.2 has been released.  Mesa 9.1.2 is a bug fix release which
fixes bugs found since the 9.1.1 release.

The tag in the GIT repository for Mesa 9.1.2 is 'mesa-9.1.2'.

Mesa 9.1.1 is available for download at
ftp://freedesktop.org/pub/mesa/9.1.2/

md5sums:

df2aab86ff4a510ce5b0d074caa0a59f  MesaLib-9.1.2.tar.bz2
415c2bc3a9eb571aafbfa474ebf5a2e0  MesaLib-9.1.2.tar.gz
b1ae5a4d9255953980bc9254f5323420  MesaLib-9.1.2.zip

I have verified building from the .tar.bz2 file by doing:

tar -xjf MesaLib-9.1.2.tar.bz2
cd Mesa-9.1.2
./configure --enable-gallium-llvm --with-llvm-shared-libs
make -j6
make install

I have also verified that I pushed the tag.

Better never than late, I always say!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/llvm: Use LLVM C API for compiling LLVM IR to ISA.

2013-04-30 Thread Tom Stellard
On Sat, Apr 27, 2013 at 10:33:29AM +0200, Mathias Fröhlich wrote:
> 
> Hi,
> 
> On Thursday, April 25, 2013 10:29:27 Jose Fonseca wrote:
> > - There are a bunch of options that need to be set via globals, (see
> > lp_set_target_options), so app/drivers could tamper with each other
> > options.
> >
> > - llvm::cl::ParseCommandLineOptions will complain if called multiple times
> > -- I think we no longer need to call it these days though
> >
> > In short, LLVM was not designed for multiple users in the same process.
> Yep.
> 
> Also llvm is still emerging too fast to assume a specific version to be 
> available. At least with r600 we do currently need a somewhat recent version 
> and kind of have this assumption.
> But due to the api not kept strictly backwards compatible and all the 
> pitfalls 
> that happen while emerging fast its very likely that a potential application 
> that also tries to make use of the driver modules just brings its own 
> probably 
> incompatible llvm version in some way. So shielding this in any way makes 
> sense ...
> 
> 
> For the Mesa wrappers:
> I have attached a shell script again as a rapid proof that is able to build a 
> linker script that builds up a wrapper shared library that contains a private 
> llvm copy. That's again non optimal - it contains just all static libs that I 
> have in my current test environment... It's just to sketch how this could 
> work.
> 
> The MesaLLVM-with-prefix.link script can by used with the command
> 
> g++ -shared -o libMesaLLVM.so MesaLLVM-with-prefix.link
> 
> to produce a libMesaLLVM.so that contains all C symbols starting with LLVM 
> from libLLVMCore.a. All of them get prefixed with Mesa and are the only 
> exported symbols then.
> That's close to Joses suggestion but with less work to be done in sources.
> 
> The MesaLLVM-with-version.link script can by used with the command
> 
> g++ -shared -o libMesaLLVM.so MesaLLVM-with-version.link
> 
> to produce a libMesaLLVM.so shared library that uses symbol versioning to 
> distinguish between the llvm versions. I got this idea yesterday and this 
> might simplify the problem a lot.
> By this variant we do not even need to prefix all the callers by Mesa. What 
> this does is to explicitly assign a symbol version to all these calls. At 
> static link symbol resolve time with this libMesaLLVM.so, this symbol version 
>  
> (the 'A MesaLLVM_1.0' entry) is then pulled out of this shared object and all 
> users, in libllvmradeon.so for example, are linked against 
> LLVMCreateContext@MesaLLVM_1,0 instead of just LLVMCreateContext. So we 
> should 
> get a private copy of llvm in libMesaLLVM.so with just the same call names 
> than usual source code wise.
> That's to be tested and verified, but if this works like I think it should, 
> this is the easiest way to get our own LLVM version on linux at least.
> 
> And sorry for just doing these crude proof of concept stuff ...
> 

Hi Mathias,

I took the linker script from your email and took at shot at creating
libMesaLLVM.so within Mesa.  I've pushed my initial code here:
http://cgit.freedesktop.org/~tstellar/mesa/log/?h=libmesallvm

I ran into a few minor issues:

I had to export all the LLVM symbols in libMesaLLVM.so, because gallivm
still uses some C++ functions, and I was unsure how to handle the name
mangling in the linker script.

Clover still has a number of undefined symbols.  I'm still not quite
sure what the problem is, but I think the problem has something
to do with the LLVM symbols in the clang libraries clover is using.

I didn't do much testing yet, but glxgears works for me with r600g and
llvmpipe.

Also, note that there are 4 new commits in that repo, the first three
are just variations from my previous C API patches for drivers/radeon.
The biggest change is that I moved the static initializer that calls
the llvm_multithreaded* functions into gallivm/lp_bld_misc.cpp

Let me know if you have any questions, concerns or other ideas.

Thanks,
Tom


> /* Mesa llvm linker script */
> EXTERN(
> LLVMAddAlias
> LLVMAddAttribute
> LLVMAddCase
> LLVMAddClause
> LLVMAddDestination
> LLVMAddFunction
> LLVMAddFunctionAttr
> LLVMAddGlobal
> LLVMAddGlobalInAddressSpace
> LLVMAddIncoming
> LLVMAddInstrAttribute
> LLVMAddNamedMetadataOperand
> LLVMAddTargetDependentFunctionAttr
> LLVMAlignOf
> LLVMAppendBasicBlock
> LLVMAppendBasicBlockInContext
> LLVMArrayType
> LLVMBasicBlockAsValue
> LLVMBlockAddress
> LLVMBuildAdd
> LLVMBuildAggregateRet
> LLVMBuildAlloca
> LLVMBuildAnd
> LLVMBuildArrayAlloca
> LLVMBuildArrayMalloc
> LLVMBuildAShr
> LLVMBuildAtomicRMW
> LLVMBuildBinOp
> LLVMBuildBitCast
> LLVMBuildBr
> LLVMBuildCall
> LLVMBuildCast
> LLVMBuildCondBr
> LLVMBuildExactSDiv
> LLVMBuildExtractElement
> LLVMBuildExtractValue
> LLVMBuildFAdd
> LLVMBuildFCmp
> LLVMBuildFDiv
> LLVMBuildFMul
> LLVMBuildFNeg
> LLVMBuildFPCast
> LLVMBuildFPExt
> LLVMBuildFPToSI
> LLVMBuildFPToUI
> LLVMBuildFPTrunc
> LLVMBuildFree
> LLVMBuildFRem
> LLVMBuildFSub
> LLVMBuildGE

Re: [Mesa-dev] [PATCH] fixup! glsl: Add a pass to lower bitfield-insert into bfm+bfi.

2013-04-30 Thread Eric Anholt
Matt Turner  writes:

> ---
> Does this squashed in seem okay, Eric?

Yeah, though it seems like there could be a bit more ir_validation that
things stay the way we think -- that bfm's on scalars making a scalar,
and bfi's operands[0] is scalar.



pgpIR7hwIhr2b.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/fs: Make virtual grf live intervals actually cover their used range.

2013-04-30 Thread Eric Anholt
Previously, we would sometimes not consider a write to a register to
extend the end of the interval, nor would we consider a read before a
write to extend the start.  This made for a bunch of complicated logic
related to how to treat the results when dead code might be present.
Instead, just extend the interval and fix dead code elimination to know
how to remove it.

Interestingly, this actually results in a tiny bit more optimization:
total instructions in shared programs: 1391220 -> 1390799 (-0.03%)
instructions in affected programs: 14037 -> 13616 (-3.00%)
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 21 +++---
 src/mesa/drivers/dri/i965/brw_fs.h |  4 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |  2 +-
 .../drivers/dri/i965/brw_fs_live_variables.cpp | 76 ++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  3 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |  4 +-
 6 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a8610ee..0821c05 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1449,8 +1449,8 @@ fs_visitor::compact_virtual_grfs()
  remap_table[i] = new_index;
  virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
  if (live_intervals_valid) {
-virtual_grf_use[new_index] = virtual_grf_use[i];
-virtual_grf_def[new_index] = virtual_grf_def[i];
+virtual_grf_start[new_index] = virtual_grf_start[i];
+virtual_grf_end[new_index] = virtual_grf_end[i];
  }
  ++new_index;
   }
@@ -1764,10 +1764,8 @@ fs_visitor::opt_algebraic()
 }
 
 /**
- * Must be called after calculate_live_intervales() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
  */
 bool
 fs_visitor::dead_code_eliminate()
@@ -1780,9 +1778,12 @@ fs_visitor::dead_code_eliminate()
foreach_list_safe(node, &this->instructions) {
   fs_inst *inst = (fs_inst *)node;
 
-  if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) 
{
-inst->remove();
-progress = true;
+  if (inst->dst.file == GRF) {
+ assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+ if (this->virtual_grf_end[inst->dst.reg] == pc) {
+inst->remove();
+progress = true;
+ }
   }
 
   pc++;
@@ -2194,7 +2195,7 @@ fs_visitor::compute_to_mrf()
   /* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
-  if (this->virtual_grf_use[inst->src[0].reg] > ip)
+  if (this->virtual_grf_end[inst->src[0].reg] > ip)
 continue;
 
   /* Found a move of a GRF to a MRF.  Let's see if we can go
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index c9c9856..3df2ce1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -434,8 +434,8 @@ public:
int *virtual_grf_sizes;
int virtual_grf_count;
int virtual_grf_array_size;
-   int *virtual_grf_def;
-   int *virtual_grf_use;
+   int *virtual_grf_start;
+   int *virtual_grf_end;
bool live_intervals_valid;
 
/* This is the map from UNIFORM hw_reg + reg_offset as generated by
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index b5c2200..9b60d9b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 /* Kill any AEB entries using registers that don't get reused any
  * more -- a sure sign they'll fail operands_match().
  */
-if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) {
+if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
entry->remove();
ralloc_free(entry);
   break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index fdcfac6..dd8923e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals()
if (this->live_intervals_valid)
   return;
 
-   int *def = ralloc_array(mem_ctx, int, num_vars);
-   int *use = ralloc_array(mem_ctx, int, num_vars);
-   ralloc_free(this->virtual_grf_def);
-   ralloc_free(this->virtual_grf_use);
-   this->virtual_grf_def = def;
-   this->virtual_grf_use = use;
+   int *start = ralloc_array(mem_ctx, int, num_vars);
+   int *end = ralloc_array(me

[Mesa-dev] [PATCH 2/2] i965/vs: Make virtual grf live intervals actually cover their used range.

2013-04-30 Thread Eric Anholt
This is the same change as the previous commit to the FS.  A very few VSes
are regressed by 1 or 2 instructions, which look recoverable with a bit
more dead code elimination.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 11 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h   |  4 +-
 .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 75 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  4 +-
 4 files changed, 31 insertions(+), 63 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ab4668f..75f446d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -298,9 +298,12 @@ vec4_visitor::dead_code_eliminate()
foreach_list_safe(node, &this->instructions) {
   vec4_instruction *inst = (vec4_instruction *)node;
 
-  if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) 
{
-inst->remove();
-progress = true;
+  if (inst->dst.file == GRF) {
+ assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+ if (this->virtual_grf_end[inst->dst.reg] == pc) {
+inst->remove();
+progress = true;
+ }
   }
 
   pc++;
@@ -825,7 +828,7 @@ vec4_visitor::opt_register_coalesce()
   /* Can't coalesce this GRF if someone else was going to
* read it later.
*/
-  if (this->virtual_grf_use[inst->src[0].reg] > ip)
+  if (this->virtual_grf_end[inst->src[0].reg] > ip)
 continue;
 
   /* We need to check interference with the final destination between this
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index a4fca2d..6fdeaeb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -249,8 +249,8 @@ public:
int virtual_grf_array_size;
int first_non_payload_grf;
unsigned int max_grf;
-   int *virtual_grf_def;
-   int *virtual_grf_use;
+   int *virtual_grf_start;
+   int *virtual_grf_end;
dst_reg userplane[MAX_CLIP_PLANES];
 
/**
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index f34111c..db3787b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -183,8 +183,8 @@ vec4_live_variables::~vec4_live_variables()
  * We could expose per-channel live intervals to the consumer based on the
  * information we computed in vec4_live_variables, except that our only
  * current user is virtual_grf_interferes().  So we instead union the
- * per-channel ranges into a per-vgrf range for virtual_grf_def[] and
- * virtual_grf_use[].
+ * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
+ * virtual_grf_end[].
  *
  * We could potentially have virtual_grf_interferes() do the test per-channel,
  * which would let some interesting register allocation occur (particularly on
@@ -200,16 +200,16 @@ vec4_visitor::calculate_live_intervals()
if (this->live_intervals_valid)
   return;
 
-   int *def = ralloc_array(mem_ctx, int, this->virtual_grf_count);
-   int *use = ralloc_array(mem_ctx, int, this->virtual_grf_count);
-   ralloc_free(this->virtual_grf_def);
-   ralloc_free(this->virtual_grf_use);
-   this->virtual_grf_def = def;
-   this->virtual_grf_use = use;
+   int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count);
+   int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count);
+   ralloc_free(this->virtual_grf_start);
+   ralloc_free(this->virtual_grf_end);
+   this->virtual_grf_start = start;
+   this->virtual_grf_end = end;
 
for (int i = 0; i < this->virtual_grf_count; i++) {
-  def[i] = MAX_INSTRUCTION;
-  use[i] = -1;
+  start[i] = MAX_INSTRUCTION;
+  end[i] = -1;
}
 
/* Start by setting up the intervals with no knowledge of control
@@ -223,14 +223,16 @@ vec4_visitor::calculate_live_intervals()
 if (inst->src[i].file == GRF) {
int reg = inst->src[i].reg;
 
-   use[reg] = ip;
+start[reg] = MIN2(start[reg], ip);
+end[reg] = ip;
 }
   }
 
   if (inst->dst.file == GRF) {
  int reg = inst->dst.reg;
 
- def[reg] = MIN2(def[reg], ip);
+ start[reg] = MIN2(start[reg], ip);
+ end[reg] = ip;
   }
 
   ip++;
@@ -247,60 +249,23 @@ vec4_visitor::calculate_live_intervals()
for (int b = 0; b < cfg.num_blocks; b++) {
   for (int i = 0; i < livevars.num_vars; i++) {
 if (livevars.bd[b].livein[i]) {
-   def[i / 4] = MIN2(def[i / 4], cfg.blocks[b]->start_ip);
-   use[i / 4] = MAX2(use[i / 4], cfg.blocks[b]->start_ip);
+   start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
+   end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
 }
 
 if (livevars.bd[b].liveout[i]) {
-   def[i / 4] = MI