[Mesa-dev] [PATCH] winsys/radeon: cleanup CS offloading

2013-10-19 Thread Christian König
From: Christian König 

Using atomic function for ncs is superfluous since it is
protected by a mutex anyway. Also lock the mutex only once
while retrieving the next CS for submission.

Signed-off-by: Christian König 
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 31 ---
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 4f43093..f8aeb96 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -542,13 +542,12 @@ void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, 
struct radeon_drm_cs *
 {
 retry:
 pipe_mutex_lock(ws->cs_stack_lock);
-if (p_atomic_read(&ws->ncs) >= RING_LAST) {
+if (ws->ncs >= RING_LAST) {
 /* no room left for a flush */
 pipe_mutex_unlock(ws->cs_stack_lock);
 goto retry;
 }
-ws->cs_stack[p_atomic_read(&ws->ncs)] = cs;
-p_atomic_inc(&ws->ncs);
+ws->cs_stack[ws->ncs++] = cs;
 pipe_mutex_unlock(ws->cs_stack_lock);
 pipe_semaphore_signal(&ws->cs_queued);
 }
@@ -557,41 +556,31 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, 
param)
 {
 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param;
 struct radeon_drm_cs *cs;
-unsigned i, empty_stack;
+unsigned i;
 
 while (1) {
 pipe_semaphore_wait(&ws->cs_queued);
 if (ws->kill_thread)
 break;
-next:
+
 pipe_mutex_lock(ws->cs_stack_lock);
 cs = ws->cs_stack[0];
+for (i = 1; i < ws->ncs; i++)
+ws->cs_stack[i - 1] = ws->cs_stack[i];
+ws->cs_stack[--ws->ncs] = NULL;
 pipe_mutex_unlock(ws->cs_stack_lock);
 
 if (cs) {
 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
-
-pipe_mutex_lock(ws->cs_stack_lock);
-for (i = 1; i < p_atomic_read(&ws->ncs); i++) {
-ws->cs_stack[i - 1] = ws->cs_stack[i];
-}
-ws->cs_stack[p_atomic_read(&ws->ncs) - 1] = NULL;
-empty_stack = p_atomic_dec_zero(&ws->ncs);
-pipe_mutex_unlock(ws->cs_stack_lock);
-
 pipe_semaphore_signal(&cs->flush_completed);
-
-if (!empty_stack) {
-goto next;
-}
 }
 }
 pipe_mutex_lock(ws->cs_stack_lock);
-for (i = 0; i < p_atomic_read(&ws->ncs); i++) {
+for (i = 0; i < ws->ncs; i++) {
 pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed);
 ws->cs_stack[i] = NULL;
 }
-p_atomic_set(&ws->ncs, 0);
+ws->ncs = 0;
 pipe_mutex_unlock(ws->cs_stack_lock);
 return NULL;
 }
@@ -655,7 +644,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
 pipe_mutex_init(ws->cmask_owner_mutex);
 pipe_mutex_init(ws->cs_stack_lock);
 
-p_atomic_set(&ws->ncs, 0);
+ws->ncs = 0;
 pipe_semaphore_init(&ws->cs_queued, 0);
 if (ws->num_cpus > 1 && debug_get_option_thread())
 ws->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, ws);
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: remove remnants of GL_MESA_shader_debug

2013-10-19 Thread Brian Paul
This extension never saw any real use so remove it.
---
 include/GL/gl.h   |   20 
 src/mapi/glapi/gen/gl_API.xml |   32 
 2 files changed, 52 deletions(-)

diff --git a/include/GL/gl.h b/include/GL/gl.h
index babb746..968032c 100644
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -2086,26 +2086,6 @@ typedef void (APIENTRYP PFNGLMULTITEXCOORD4SVARBPROC) 
(GLenum target, const GLsh
 
 
 
-#if GL_ARB_shader_objects
-
-#ifndef GL_MESA_shader_debug
-#define GL_MESA_shader_debug 1
-
-#define GL_DEBUG_OBJECT_MESA  0x8759
-#define GL_DEBUG_PRINT_MESA   0x875A
-#define GL_DEBUG_ASSERT_MESA  0x875B
-
-GLAPI GLhandleARB GLAPIENTRY glCreateDebugObjectMESA (void);
-GLAPI void GLAPIENTRY glClearDebugLogMESA (GLhandleARB obj, GLenum logType, 
GLenum shaderType);
-GLAPI void GLAPIENTRY glGetDebugLogMESA (GLhandleARB obj, GLenum logType, 
GLenum shaderType, GLsizei maxLength,
- GLsizei *length, GLcharARB *debugLog);
-GLAPI GLsizei GLAPIENTRY glGetDebugLogLengthMESA (GLhandleARB obj, GLenum 
logType, GLenum shaderType);
-
-#endif /* GL_MESA_shader_debug */
-
-#endif /* GL_ARB_shader_objects */
-
-
 /*
  * ???. GL_MESA_packed_depth_stencil
  * XXX obsolete
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 48fce36..30ab9c9 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -13027,38 +13027,6 @@
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
 
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: remove GL_MESA_program_debug bits from gl.h

2013-10-19 Thread Brian Paul
The code for this was removed from Mesa some time ago.
---
 include/GL/gl.h |   21 -
 1 file changed, 21 deletions(-)

diff --git a/include/GL/gl.h b/include/GL/gl.h
index 968032c..6b94e3f 100644
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -2102,27 +2102,6 @@ typedef void (APIENTRYP PFNGLMULTITEXCOORD4SVARBPROC) 
(GLenum target, const GLsh
 #endif /* GL_MESA_packed_depth_stencil */
 
 
-#ifndef GL_MESA_program_debug
-#define GL_MESA_program_debug 1
-
-#define GL_FRAGMENT_PROGRAM_POSITION_MESA   0x8bb0
-#define GL_FRAGMENT_PROGRAM_CALLBACK_MESA   0x8bb1
-#define GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA  0x8bb2
-#define GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA  0x8bb3
-#define GL_VERTEX_PROGRAM_POSITION_MESA 0x8bb4
-#define GL_VERTEX_PROGRAM_CALLBACK_MESA 0x8bb5
-#define GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA0x8bb6
-#define GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA0x8bb7
-
-typedef void (*GLprogramcallbackMESA)(GLenum target, GLvoid *data);
-
-GLAPI void GLAPIENTRY glProgramCallbackMESA(GLenum target, 
GLprogramcallbackMESA callback, GLvoid *data);
-
-GLAPI void GLAPIENTRY glGetProgramRegisterfvMESA(GLenum target, GLsizei len, 
const GLubyte *name, GLfloat *v);
-
-#endif /* GL_MESA_program_debug */
-
-
 #ifndef GL_MESA_texture_array
 #define GL_MESA_texture_array 1
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: fix a couple issues with U_FIXED, I_FIXED macros

2013-10-19 Thread Brian Paul
Silence a bunch of MSVC type conversion warnings.

Changed return type of S_FIXED to int32_t (signed).  The result
is the same.  It just seems more intuitive that a signed conversion
function should return a signed value.
---
 src/mesa/main/macros.h |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
index 880c656..379f756 100644
--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -193,7 +193,7 @@ static INLINE uint32_t
 U_FIXED(float value, uint32_t frac_bits)
 {
value *= (1 << frac_bits);
-   return value < 0 ? 0 : value;
+   return value < 0.0f ? 0 : (uint32_t) value;
 }
 
 /**
@@ -201,10 +201,10 @@ U_FIXED(float value, uint32_t frac_bits)
  *
  * \param frac_bits   The number of bits used to store the fractional part.
  */
-static INLINE uint32_t
+static INLINE int32_t
 S_FIXED(float value, uint32_t frac_bits)
 {
-   return value * (1 << frac_bits);
+   return (int32_t) (value * (1 << frac_bits));
 }
 /*@}*/
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallivm: implement seamless cube filtering

2013-10-19 Thread Brian Paul

On 10/18/2013 01:47 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

For seamless cube filtering it is necessary to determine new faces and new
coords per sample. The logic for this is _seriously_ complex (what needs
to happen is very "asymmetric" wrt face, x/y under/overflow), further
complicated by the fact that if the 4 samples are in a corner (meaning we
only have actually 3 samples, and all 3 are on different faces) then
falling off the edge is happening _both_ on x and y axis simultaneously.
There was a noticeable performance hit in mesa's cubemap demo when seamless
filtering was forced on (just below 10 percent or so in a debug build, when
disabling all filtering hacks, otherwise it would probably be a bit more) and
when always doing the logic, hence use a branch which it only does it if any
of the pixels in a quad (or in two quads) actually hit this. With that there
was no measurable performance hit in the cubemap demo (neither in a debug nor
release buidl), but this will vary (cubemap demo very rarely hits edges).
Might also be different on other cpus, as this forces SoA sampling path which
potentially can be quite a bit slower.
Note that as for corners, this code gets all the 3 samples which actually
exist right, and the 4th texel will simply be the same as one of the others,
meaning that filter weights will be a bit wrong. This however should be
enough for full OpenGL (but not d3d10) compliance.
---


That's some pretty dense code.  I doubt I could spot an error in it but 
it looks OK otherwise.


For both:
Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] i965/gs: Fix up gl_PointSize input swizzling for DUAL_INSTANCED gs.

2013-10-19 Thread Paul Berry
On 18 October 2013 17:04, Eric Anholt  wrote:

> Paul Berry  writes:
>
> > Geometry shaders that run in "DUAL_INSTANCED" mode store their inputs
> > in vec4's.  This means that when compiling gl_PointSize input
> > swizzling (a MOV instruction which uses a geometry shader input as
> > both source and destination), we need to do two things:
> >
> > - Set force_writemask_all to ensure that the MOV happens regardless of
> >   which channels are enabled.
> >
> > - Set the source register region to <4;4,1> (instead of <0;4,1> to
> >   satisfy register region restrictions.
>
> This sure sounds like something you empirically found, but I'm confused.
> I would have assumed that DUAL_INSTANCED with an instance count of 1
> meant that the channel enables the hardware gave you had the first 4
> enabled and the second 4 disabled.  And since the dst.width (and thus
> execsize) is 4, whether or not the second 4 are disabled wouldn't
> matter.  In that case, why do you need the writemask forced, since just
> the 4 channels you care about will be affected, anyway?
>

Setting force_writemask_all was not empirical--I did it out of general
caution. I couldn't find any documentation in the bspec to guarantee that
an instance count of 1 would get dispatched to the first 4 channels rather
than the second 4, and I didn't want to rely on undocumented behaviour.


>
> And, if dst.width == 4, then execsize == 4, and I'm confused what
> register region restriction is being honored by promoting the hstride to
> 4.
>

This part was empirical.  I discovered that if I don't set the hstride to
4, then I get an assertion failure here (in validate_reg() in
brw_eu_emit.c):

   /* 4. */
   if (execsize == width && hstride != 0) {
  assert(vstride == -1 || vstride == width * hstride);
   }

I don't know why the hardware cares about this, but I double-checked the
bspec and this restriction is really there.

Side note: I forgot to mention it in the comments, but in addition to
fixing up <0;4,1> -> <4;4,1>, this code fixes up <8;8,1> -> <4;4,1>.
That's necessary for a stupid reason: in the vec4 back-end we represent
GRFs as <8;8,1> so that guess_execution_size() will correctly guess an
execution size of 8.  However, in align16 mode, the hardware assumes a
width of 4, so it really needs to be <4;4,1>.  Normally, we fix that up in
brw_set_src0() and brw_set_src1(), but we do it *after* the call to
validate_reg().  So to avoid validate_reg() asserting on these width-4
instructions, we need to change the source register from <8;8,1> to <4;4,1>
before emitting the instruction.  One of these days, I swear I'm going to
get rid of guess_execution_size() so we can end this sort of madness.


>
> Putting these fixups for a couple of weird cases in just MOV and ADD
> feels wrong to me, but maybe when I understand better what's going on
> it'll seem more natural.
>

Another possibility I'd be equally happy with would be to put the fixup at
the top of vec4_generator::generate_vec4_instruction(), before the switch
statement.  It would look something like this:

   if (dst.width == BRW_WIDTH_4) {
  /* This happens in attribute fixups for "dual instanced" geometry
   * shaders, since they use attributes that are vec4's.  Since the exec
   * width is only 4, it's essential that the caller set
   * force_writemask_all in order to make sure the instruction is
executed
   * regardless of which channels are enabled.
   */
  assert(inst->force_writemask_all);

  /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to
satisfy
   * the following register region restrictions (from Graphics BSpec:
   * 3D-Media-GPGPU Engine > EU Overview > Registers and Register
Regions
   * > Register Region Restrictions)
   *
   * 1. ExecSize must be greater than or equal to Width.
   *
   * 2. If ExecSize = Width and HorzStride != 0, VertStride must be
set
   *to Width * HorzStride."
   */
  for (int i = 0; i < 3; i++) {
 if (src[i].file == BRW_GENERAL_REGISTER_FILE)
src[i] = stride(src[i], 4, 4, 1);
  }
   }

Does that seem better to you?  I actually think I like it slightly better
because by making the assertion more general, I caught another case where I
think I should be setting force_writemask_all to be on the safe side (the
"clear r0.2" instruction in the gs prolog).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/8] i965/gs: If a DUAL_OBJECT gs would spill, fall back to DUAL_INSTANCED.

2013-10-19 Thread Paul Berry
On 18 October 2013 16:46, Eric Anholt  wrote:

> Paul Berry  writes:
> > Since most geometry shaders used in piglit testing are small,
> > DUAL_INSTANCED mode won't get exercised very much in a normal piglit
> > run.  To force DUAL_INSTANCED mode to be used for all geometry
> > shaders, set INTEL_DEBUG=nogualobj.
>
>^ nodualobj
>

Whoops.  Fixed, thanks.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: Allow mixing of desktop GLSL versions.

2013-10-19 Thread Paul Berry
On 18 October 2013 11:07, Ian Romanick  wrote:

> On 10/17/2013 08:07 PM, Paul Berry wrote:
> > Previously, Mesa followed the linkage rules outlined in the GLSL
> > 1.20-1.40 specs, which (collectively) said that GLSL versions 1.10 and
> > 1.20 could be linked together, but no other versions could be linked.
> >
> > In GLSL 4.30, the linkage rules were relaxed so that any two desktop
> > GLSL versions can be linked together.  This change was made because it
> > reflected the behaviour of nearly all existing implementations (see
> > Khronos bug 8463).  Mesa was one of the few (perhaps the only)
> > exceptions to prohibit cross-linking of some GLSL versions.
> >
> > Since the GLSL linkage rules were deliberately relaxed in order to
> > match the behaviour of existing implementations, it seems appropriate
> > to relax the rules in Mesa too (even though Mesa doesn't support GLSL
> > 4.30 yet).
> >
> > Note that linking ES and desktop shaders is still prohibited, as is
> > linking ES shaders having different GLSL versions.
> >
> > Fixes piglit tests "shaders/version-mixing {interstage,intrastage}".
>
> Are there any piglit tests that now fail?  It seems like we may have had
> one or two tests that verify the (old) spec behavior...
>

I would have thought so, but I just did a full piglit run and didn't get
any regressions, so I guess not.


>
> You should ping the reporter of this bug:
>
> https://bugs.freedesktop.org/show_bug.cgi?id=70261
>
> This should fix his problem as well.
>

Good call.  I'll do that once I've landed the patch.


>
> > ---
> >  src/glsl/linker.cpp | 10 +++---
> >  1 file changed, 3 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> > index 9095a40..0a949b4 100644
> > --- a/src/glsl/linker.cpp
> > +++ b/src/glsl/linker.cpp
> > @@ -2057,14 +2057,10 @@ link_shaders(struct gl_context *ctx, struct
> gl_shader_program *prog)
> >}
> > }
> >
> > -   /* Previous to GLSL version 1.30, different compilation units could
> mix and
> > -* match shading language versions.  With GLSL 1.30 and later, the
> versions
> > -* of all shaders must match.
> > -*
> > -* GLSL ES has never allowed mixing of shading language versions.
> > +   /* In desktop GLSL, different shader versions may be linked
> together.  In
> > +* GLSL ES, all shader versions must be the same.
> >  */
> > -   if ((is_es_prog || max_version >= 130)
> > -   && min_version != max_version) {
> > +   if (is_es_prog && min_version != max_version) {
> >linker_error(prog, "all shaders must use same shading "
> >  "language version\n");
> >goto done;
> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Remove error when calling glGenQueries/glDeleteQueries while a query is active

2013-10-19 Thread Brian Paul

On 10/17/2013 02:35 PM, Carl Worth wrote:

There is nothing in the OpenGL specification which prevents the user from
calling glGenQueries to generate a new query object while another object is
active. Neither is there anything in the Mesa implementation which prevents
this. So remove the INVALID_OPERATION errors in this case.

Similarly, it is explicitly allowed by the OpenGL specification to delete an
active query, so remove the assertion for that case and be sure to call the
driver's EndQuery hook.

CC: 
---

Brian Paul  writes:

On 10/17/2013 12:14 PM, Carl Worth wrote:
But from http://www.opengl.org/registry/specs/ARB/occlusion_query.txt:

"""
  Calling either GenQueriesARB or DeleteQueriesARB while any query of
  any target is active causes an INVALID_OPERATION error to be
  generated.
"""
(it's about half-way down in the file)  It's also mentioned in the
"Errors" section.


Thanks, Brian. That certainly does justify where the original code came
from.


Maybe that was rescinded since that spec was done.  If so, I'm fine with
removing the code.


I can't find any similar error requirement in the OpenGL 4.4 (Core)
specification. (And with the increased number of different query types,
it doesn't seem that the error requirement makes sense.) I did find the
following sentence in the specification (section 4.2):

If an active query object is deleted its name immediately
becomes unused, but the underlying object is not deleted until
it is no longer active.

This sentence presumes the possibility of deleting an active query, so I
think it is reasonable to remove the error.


However, I wouldn't be surprised if our drivers crashed and burned if an
active query is deleted.  gl_query_object isn't referenced counted.


Thanks for the catch. My revised patch below calls the driver's EndQuery
hook, which will hopefully avoid this problem. This does inactivate the
query immediately at delete time, which might seem inconsistent with the
sentence I quoted from the specification above. But I think this is fine
since once the object is deleted the user has no visibility into whether
the object is active or not.

-Carl

  src/mesa/main/queryobj.c | 19 ---
  1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index a180133..ebdb71c 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -202,13 +202,6 @@ _mesa_GenQueries(GLsizei n, GLuint *ids)
return;
 }

-   /* No query objects can be active at this time! */
-   if (ctx->Query.CurrentOcclusionObject ||
-   ctx->Query.CurrentTimerObject) {
-  _mesa_error(ctx, GL_INVALID_OPERATION, "glGenQueriesARB");
-  return;
-   }
-
 first = _mesa_HashFindFreeKeyBlock(ctx->Query.QueryObjects, n);
 if (first) {
GLsizei i;
@@ -241,18 +234,14 @@ _mesa_DeleteQueries(GLsizei n, const GLuint *ids)
return;
 }

-   /* No query objects can be active at this time! */
-   if (ctx->Query.CurrentOcclusionObject ||
-   ctx->Query.CurrentTimerObject) {
-  _mesa_error(ctx, GL_INVALID_OPERATION, "glDeleteQueriesARB");
-  return;
-   }
-
 for (i = 0; i < n; i++) {
if (ids[i] > 0) {
   struct gl_query_object *q = _mesa_lookup_query_object(ctx, ids[i]);
   if (q) {
-ASSERT(!q->Active); /* should be caught earlier */
+if (q->Active) {
+   q->Active = GL_FALSE;
+   ctx->Driver.EndQuery(ctx, q);


Valgrind found an invalid pointer (and crashed!) when I modified your 
piglit test (see other msg).  We also need to make sure that the 
ctx->Query.CurrentFoo binding point is cleared.  Something like this:


if (q->Active) {
   struct gl_query_object **bindpt =
  get_query_binding_point(ctx, q->Target);
   assert(bindpt);  /* _should_ be non-null if q is active */
   if (bindpt) {
  *bindpt = NULL;
   }
   ...



+}
  _mesa_HashRemove(ctx->Query.QueryObjects, ids[i]);
  ctx->Driver.DeleteQuery(ctx, q);
   }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g/sb: Initialize shader::dce_flags.

2013-10-19 Thread Vadim Girlin

On 10/19/2013 06:18 AM, Vinson Lee wrote:

Fixes "Uninitialized scalar field" defect reported by Coverity.

Signed-off-by: Vinson Lee 


Reviewed-by: Vadim Girlin 


---
  src/gallium/drivers/r600/sb/sb_shader.cpp | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp 
b/src/gallium/drivers/r600/sb/sb_shader.cpp
index 98e52b1..38617a8 100644
--- a/src/gallium/drivers/r600/sb/sb_shader.cpp
+++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
@@ -39,7 +39,8 @@ shader::shader(sb_context &sctx, shader_target t, unsigned id)
coal(*this), bbs(),
target(t), vt(ex), ex(*this), root(),
compute_interferences(),
-  has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
+  has_alu_predication(),
+  uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}

  bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] i965: Implement FS backend for ARB_sample_shading

2013-10-19 Thread Paul Berry
On 18 October 2013 10:30, Anuj Phogat  wrote:

> I know we can specify stride if we have a brw_reg :-
> fs_reg (stride(brw_vec1_grf(0, 0), 2, 4, 0));
>
> But I could not  find a reliable way to use stride if we have a fs_reg.
> That's why I used OR to get the desired result.
>

The right way to do this, IMHO, is to create a special back-end instruction
opcode to do the work.  That way you can put the call to stride() in
fs_generator, and it will happen after registers have been assigned and
everything has been turned into brw_reg's.  See for example what happens in
FS_OPCODE_SET_SIMD4X2_OFFSET, which does a similar thing except that
instead of changing the stride of the register it changes its width.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] i965: Implement FS backend for ARB_sample_shading

2013-10-19 Thread Paul Berry
On 14 October 2013 10:12, Anuj Phogat  wrote:

> Implement the FS backend for new builtins added by the extension:
> in vec2 gl_SamplePosition
> in int gl_SampleID
> in int gl_NumSamples
> out int gl_SampleMask[]
>

There is a lot going on in this one patch, and it's getting hard to follow
all the patch review that's going on.  If you wind up re-spinning this
patch series, can you please break it into four separate patches, one to
add support for each builtin?


>
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 109
> +++
>  src/mesa/drivers/dri/i965/brw_fs.h   |   4 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  23 ++
>  src/mesa/drivers/dri/i965/brw_wm.h   |   1 +
>  4 files changed, 137 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index e5d6e4b..e4f7745 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1115,6 +1115,109 @@
> fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
> return reg;
>  }
>
> +void
> +fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
> +{
> +   int num_samples = ctx->DrawBuffer->Visual.samples;
>

This isn't safe.  When compilation depends on a piece of GL state, you need
to include the state in brw_wm_prog_key.  Otherwise the program won't get
recompiled when the value changes.

To avoid unnecessary recompiles, I'd recommend adding a boolean to
brw_wm_prog_key which is:
- true if ctx->Multisample.Enabled && num_samples != 0 && (shader accesses
gl_SamplePosition)
- false otherwise


> +   assert(num_samples >= 0 && num_samples <= 8);
> +
> +   /* From arb_sample_shading specification:
> +* "When rendering to a non-multisample buffer, or if multisample
> +*  rasterization is disabled, gl_SamplePosition will always be
> +*  (0.5, 0.5).
> +*/
> +   if (!ctx->Multisample.Enabled || num_samples == 0) {
> +  emit(BRW_OPCODE_MOV, dst, fs_reg(0.5f));
>

It looks like you're using the old, more verbose style of emitting
instructions.  Can we convert this (and the other instructions in this
patch) to the more compact style:

emit(MOV(dst, fs_reg(0.5f)));



> +   }
> +   else {
> +  /* For num_samples = {4, 8} */
> +  emit(BRW_OPCODE_MOV, dst, int_sample_pos);
> +  emit(BRW_OPCODE_MUL, dst, dst, fs_reg(1 / 16.0f));
>

Like Ken, I was confused as to why we needed a separate MOV before the
MUL.  The assertion Ken recommended would have been a useful clue, but I'd
prefer to just have explicit comments explaining why we need the MOV.  How
about this:

/* Convert int_sample_pos to floating point */
emit(BRW_OPCODE_MOV, dst, int_sample_pos);
/* Scale to the range [0, 1] */
emit(BRW_OPCODE_MUL, dst, dst, fs_reg(1 / 16.0f));


> +   }
> +}
> +
> +fs_reg *
> +fs_visitor::emit_samplepos_interpolation(ir_variable *ir)
> +{
> +   assert(brw->gen >= 6);
> +
> +   this->current_annotation = "compute sample position";
> +   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
>

Since this code assigns to two consecutive registers, it relies on the fact
that ir->type is vec2.  Just to make that explicit, can we add:

assert(ir->type == glsl_type::vec2_type);


> +   fs_reg pos = *reg;
> +   fs_reg int_sample_x = fs_reg(this, glsl_type::int_type);
> +   fs_reg int_sample_y = fs_reg(this, glsl_type::int_type);
> +
> +   /* WM will be run in MSDISPMODE_PERSAMPLE. So, only SIMD8 mode will be
> +* enabled. The X, Y sample positions come in as bytes in  thread
> payload.
> +* Sample IDs and sample positions remain same for all four slots in a
> +* subspan. So, read the positions using vstride=2, width=4, hstride=0.
>

I have similar concerns to Ken about why MSDISPMODE_PERSAMPLE implies that
only SIMD8 mode will be enabled.  I'm assuming the two of you have
adequately resolved that.


> +*/
> +   emit(BRW_OPCODE_AND, int_sample_x,
> +fs_reg(stride(retype(brw_vec1_grf(c->sample_pos_reg, 0),
> + BRW_REGISTER_TYPE_D), 2, 4, 0)),
> +fs_reg(brw_imm_d(0xff)));
>

If I understand correctly, this is creating the instruction

AND(8) int_sample_x<1>:d sample_pos_reg<2;4,0>:d 0x00ff:d

I think this works, but it would be more straightforward to do this, which
just selects the X coordinate bytes from the sample_pos_reg register:

MOV(8) int_sample_x<1>:d sample_pos_reg<16;8,2>:b

That would have the advantage that it doesn't rely on the fact that sample
IDs and sample positions are the same for all four slots in a subspan.

(Note: there are some weird restrictions on byte operations, and I can't
remember all the details.  So you might need to try this in the simulator
and tweak it to get it to work.)


> +
> +   /* Compute gl_SamplePosition.x */
> +   compute_sample_position(pos, int_sample_x);
> +   pos.reg_offset++;
>

I think if we change this to:

pos.reg_offset += dispatch_width / 8;

t

[Mesa-dev] [PATCH] radeonsi: Optimize out exports to unbound color buffers

2013-10-19 Thread Jay Cornwall
This patch identifies shader exports to unbound CBs and removes them during
TGSI to LLVM IR lowering. The method is identical to the one used in the
gallium/r600 driver.

The GLSL lower_output_reads pass generates temporary copies for writes to
shader outputs. In the case of gl_FragData, this results in writes to every
MRT when one or more elements are written in the shader. When these MRTs
are unbound and masked out there is still a performance loss equivalent to
exports to bound, unmasked MRTs on SI.

Signed-off-by: Jay Cornwall 
---
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 +
 src/gallium/drivers/radeonsi/radeonsi_shader.c |  6 ++
 src/gallium/drivers/radeonsi/si_state.c| 15 +--
 src/gallium/drivers/radeonsi/si_state.h|  1 +
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 26f7e09..bede043 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -148,6 +148,7 @@ struct r600_context {
unsignedfb_compressed_cb_mask;
unsignedpa_sc_line_stipple;
unsignedpa_su_sc_mode_cntl;
+   boolean dual_src_blend;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
struct si_pipe_shader_selector  *ps_shader;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 80ee325..e4a9f56 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -995,6 +995,12 @@ handle_semantic:
semantic_name);
}
 
+   /* Shader is keyed on nr_cbufs, optimize out exports to 
unbound CBs */
+   if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+   semantic_name == TGSI_SEMANTIC_COLOR &&
+   color_count > 
si_shader_ctx->shader->key.ps.nr_cbufs)
+   continue;
+
si_llvm_init_export_args(bld_base, d, index, target, 
args);
 
if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index da7c3d0..8109bde 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -27,6 +27,7 @@
 #include "util/u_memory.h"
 #include "util/u_framebuffer.h"
 #include "util/u_blitter.h"
+#include "util/u_dual_blend.h"
 #include "util/u_helpers.h"
 #include "util/u_math.h"
 #include "util/u_pack_color.h"
@@ -168,6 +169,8 @@ static void si_update_fb_blend_state(struct r600_context 
*rctx)
si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 
si_pm4_set_state(rctx, fb_blend, pm4);
+
+   rctx->dual_src_blend = blend->dual_src_blend;
 }
 
 /*
@@ -309,6 +312,9 @@ static void *si_create_blend_state_mode(struct pipe_context 
*ctx,
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
blend_cntl);
}
 
+   /* only MRT0 has dual src blend */
+   blend->dual_src_blend = util_blend_state_is_dual(state, 0);
+
return blend;
 }
 
@@ -2097,8 +2103,13 @@ static INLINE void si_shader_selector_key(struct 
pipe_context *ctx,
if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
key->vs.ucps_enabled |= 0x1;
} else if (sel->type == PIPE_SHADER_FRAGMENT) {
-   if (sel->fs_write_all)
-   key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+   /* Key on nr_cbufs to optimize unused EXPORTs. */
+   key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+
+   /* Dual-source blending only makes sense with nr_cbufs == 1. */
+   if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
+   key->ps.nr_cbufs = 2;
+
key->ps.export_16bpc = rctx->export_16bpc;
 
if (rctx->queued.named.rasterizer) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 6dbf880..ca51496 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -34,6 +34,7 @@ struct si_state_blend {
struct si_pm4_state pm4;
uint32_tcb_target_mask;
boolalpha_to_one;
+   booldual_src_blend;
 };
 
 struct si_state_viewport {
-- 
1.8.4.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Optimize out exports to unbound color buffers

2013-10-19 Thread Marek Olšák
As per the discussion on IRC, this is trying to fix an issue with a
GLSL compiler pass lower_output_reads that always generates 8 output
writes for any shader which writes gl_FragData. I'll fix this in the
GLSL compiler. NAK.

Marek

On Sun, Oct 20, 2013 at 2:33 AM, Jay Cornwall  wrote:
> This patch identifies shader exports to unbound CBs and removes them during
> TGSI to LLVM IR lowering. The method is identical to the one used in the
> gallium/r600 driver.
>
> The GLSL lower_output_reads pass generates temporary copies for writes to
> shader outputs. In the case of gl_FragData, this results in writes to every
> MRT when one or more elements are written in the shader. When these MRTs
> are unbound and masked out there is still a performance loss equivalent to
> exports to bound, unmasked MRTs on SI.
>
> Signed-off-by: Jay Cornwall 
> ---
>  src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  1 +
>  src/gallium/drivers/radeonsi/radeonsi_shader.c |  6 ++
>  src/gallium/drivers/radeonsi/si_state.c| 15 +--
>  src/gallium/drivers/radeonsi/si_state.h|  1 +
>  4 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h 
> b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index 26f7e09..bede043 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -148,6 +148,7 @@ struct r600_context {
> unsignedfb_compressed_cb_mask;
> unsignedpa_sc_line_stipple;
> unsignedpa_su_sc_mode_cntl;
> +   boolean dual_src_blend;
> /* for saving when using blitter */
> struct pipe_stencil_ref stencil_ref;
> struct si_pipe_shader_selector  *ps_shader;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
> b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> index 80ee325..e4a9f56 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
> @@ -995,6 +995,12 @@ handle_semantic:
> semantic_name);
> }
>
> +   /* Shader is keyed on nr_cbufs, optimize out exports 
> to unbound CBs */
> +   if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
> +   semantic_name == TGSI_SEMANTIC_COLOR &&
> +   color_count > 
> si_shader_ctx->shader->key.ps.nr_cbufs)
> +   continue;
> +
> si_llvm_init_export_args(bld_base, d, index, target, 
> args);
>
> if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> b/src/gallium/drivers/radeonsi/si_state.c
> index da7c3d0..8109bde 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -27,6 +27,7 @@
>  #include "util/u_memory.h"
>  #include "util/u_framebuffer.h"
>  #include "util/u_blitter.h"
> +#include "util/u_dual_blend.h"
>  #include "util/u_helpers.h"
>  #include "util/u_math.h"
>  #include "util/u_pack_color.h"
> @@ -168,6 +169,8 @@ static void si_update_fb_blend_state(struct r600_context 
> *rctx)
> si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
>
> si_pm4_set_state(rctx, fb_blend, pm4);
> +
> +   rctx->dual_src_blend = blend->dual_src_blend;
>  }
>
>  /*
> @@ -309,6 +312,9 @@ static void *si_create_blend_state_mode(struct 
> pipe_context *ctx,
> si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
> blend_cntl);
> }
>
> +   /* only MRT0 has dual src blend */
> +   blend->dual_src_blend = util_blend_state_is_dual(state, 0);
> +
> return blend;
>  }
>
> @@ -2097,8 +2103,13 @@ static INLINE void si_shader_selector_key(struct 
> pipe_context *ctx,
> if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf)
> key->vs.ucps_enabled |= 0x1;
> } else if (sel->type == PIPE_SHADER_FRAGMENT) {
> -   if (sel->fs_write_all)
> -   key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
> +   /* Key on nr_cbufs to optimize unused EXPORTs. */
> +   key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
> +
> +   /* Dual-source blending only makes sense with nr_cbufs == 1. 
> */
> +   if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
> +   key->ps.nr_cbufs = 2;
> +
> key->ps.export_16bpc = rctx->export_16bpc;
>
> if (rctx->queued.named.rasterizer) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h 
> b/src/gallium/drivers/radeonsi/si_state.h
> index 6dbf880..ca51496 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -34,6 +34,7 @@ struct si_st

[Mesa-dev] [PATCH] i965: Only emit interpolation setup if there are actual FS inputs.

2013-10-19 Thread Kenneth Graunke
Dead code elimination would get rid of the extra instructions, but
skipping this saves iterations through the optimization loop.

>From shader-db:

  N Min MaxMedian   AvgStddev
x 14672   3  16 3 3.13345150.59904168
+ 14672   1  16 3 2.89551530.77732963
Difference at 95.0% confidence
-0.237936 +/- 0.0158798
-7.59342% +/- 0.506783%
(Student's t, pooled s = 0.693935)

Embarassingly, the classic shadow mapping shader:

   void main() { }

used to require three iterations through the optimization loop.
With this patch, it only requires one (which makes no progress).

Signed-off-by: Kenneth Graunke 
Cc: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 65a4b66..a3268fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3025,10 +3025,12 @@ fs_visitor::run()
  emit_shader_time_begin();
 
   calculate_urb_setup();
-  if (brw->gen < 6)
-emit_interpolation_setup_gen4();
-  else
-emit_interpolation_setup_gen6();
+  if (fp->Base.InputsRead > 0) {
+ if (brw->gen < 6)
+emit_interpolation_setup_gen4();
+ else
+emit_interpolation_setup_gen6();
+  }
 
   /* We handle discards by keeping track of the still-live pixels in f0.1.
* Initialize it with the dispatched pixels.
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev