[Mesa-dev] [Bug 97231] GL_DEPTH_CLAMP doesn't clamp to the far plane

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97231

--- Comment #12 from Jules Blok  ---
Created attachment 125624
  --> https://bugs.freedesktop.org/attachment.cgi?id=125624&action=edit
apitrace file version 3

I've attached an apitrace where I've forced Dolphin to use GLSL version 1.50.
Hopefully that will be enough to be able to run it on your system. But it's
possible that I'm still using extensions not available on your system.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ddebug: dump driver states and shaders for apitrace calls

2016-08-09 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 09.08.2016 00:15, Marek Olšák wrote:

From: Marek Olšák 

I think this was an oversight when the PIPE_DUMP flags were added.
---
 src/gallium/drivers/ddebug/dd_draw.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 97325e4..4ea3129 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -1084,21 +1084,24 @@ dd_after_draw(struct dd_context *dctx, struct dd_call 
*call)
  dd_pipelined_process_draw(dctx, call);
  break;
   case DD_DUMP_ALL_CALLS:
  if (!dscreen->no_flush)
 pipe->flush(pipe, NULL, 0);
  dd_write_report(dctx, call, 0, false);
  break;
   case DD_DUMP_APITRACE_CALL:
  if (dscreen->apitrace_dump_call ==
  dctx->draw_state.apitrace_call_number) {
-dd_write_report(dctx, call, 0, false);
+dd_write_report(dctx, call,
+PIPE_DUMP_CURRENT_STATES |
+PIPE_DUMP_CURRENT_SHADERS,
+false);
 /* No need to continue. */
 exit(0);
  }
  break;
   default:
  assert(0);
   }
}

++dctx->num_draw_calls;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: define ST_NEW_ flags as uint64_t values, not enums

2016-08-09 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 09.08.2016 00:41, Brian Paul wrote:

MSVC doesn't support 64-bit enum values, at least not with C code.
The compiler was warning:

c:\users\brian\projects\mesa\src\mesa\state_tracker\st_atom_list.h(43) : warning
 C4309: 'initializing' : truncation of constant value
c:\users\brian\projects\mesa\src\mesa\state_tracker\st_atom_list.h(44) : warning
 C4309: 'initializing' : truncation of constant value
...

And at runtime we crashed since the high 32-bits of the 'dirty' bitmask
was always 0x and the 32+u_bit_scan() index went out of bounds of
the atoms[] array.
---
 src/mesa/state_tracker/st_atom.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index febd7ce..971ea35 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -68,12 +68,12 @@ enum {
 #undef ST_STATE
 };

-/* Define ST_NEW_xxx */
-enum {
-#define ST_STATE(FLAG, st_update) FLAG = 1llu << FLAG##_INDEX,
+/* Define ST_NEW_xxx values as static const uint64_t values.
+ * We can't use an enum type because MSVC doesn't allow 64-bit enum values.
+ */
+#define ST_STATE(FLAG, st_update) static const uint64_t FLAG = 1llu << 
FLAG##_INDEX;
 #include "st_atom_list.h"
 #undef ST_STATE
-};

 /* Add extern struct declarations. */
 #define ST_STATE(FLAG, st_update) extern const struct st_tracked_state 
st_update;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: add render_condition_enable param to clear_render_target/depth_stencil

2016-08-09 Thread Nicolai Hähnle

On 09.08.2016 00:59, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_clear.h|  5 +++--
 src/gallium/auxiliary/vl/vl_bicubic_filter.c|  2 +-
 src/gallium/auxiliary/vl/vl_compositor.c|  2 +-
 src/gallium/docs/source/context.rst |  7 ---
 src/gallium/drivers/ddebug/dd_draw.c| 12 
 src/gallium/drivers/freedreno/freedreno_draw.c  |  6 --
 src/gallium/drivers/i915/i915_surface.c | 12 
 src/gallium/drivers/ilo/ilo_blit.c  |  6 --
 src/gallium/drivers/llvmpipe/lp_surface.c   |  6 --
 src/gallium/drivers/noop/noop_pipe.c|  6 --
 src/gallium/drivers/nouveau/nv30/nv30_clear.c   |  6 --
 src/gallium/drivers/nouveau/nv50/nv50_surface.c | 10 ++
 src/gallium/drivers/nouveau/nv50/nv84_video.c   |  6 +++---
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c |  6 --
 src/gallium/drivers/r300/r300_blit.c|  6 --
 src/gallium/drivers/r600/r600_blit.c|  6 --
 src/gallium/drivers/radeonsi/si_blit.c  |  6 --
 src/gallium/drivers/rbug/rbug_context.c | 12 
 src/gallium/drivers/softpipe/sp_surface.c   |  6 --
 src/gallium/drivers/swr/swr_clear.cpp   |  6 --
 src/gallium/drivers/trace/tr_context.c  | 14 ++
 src/gallium/drivers/vc4/vc4_draw.c  |  6 --
 src/gallium/include/pipe/p_context.h|  6 --
 src/gallium/state_trackers/nine/device9.c   |  6 +++---
 src/gallium/state_trackers/nine/surface9.c  |  2 +-
 src/gallium/state_trackers/vdpau/surface.c  |  2 +-
 26 files changed, 109 insertions(+), 61 deletions(-)


[snip]

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index cf3f5e0..773caf7 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -490,29 +490,31 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
 vc4->draw_max_y = vc4->framebuffer.height;
 vc4->cleared |= buffers;
 vc4->resolve |= buffers;

 vc4_start_draw(vc4, 0);
 }

 static void
 vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
 const union pipe_color_union *color,
-unsigned x, unsigned y, unsigned w, unsigned h)
+unsigned x, unsigned y, unsigned w, unsigned h,
+   bool render_condition_enabled)
 {
 fprintf(stderr, "unimpl: clear RT\n");
 }

 static void
 vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
 unsigned buffers, double depth, unsigned stencil,
-unsigned x, unsigned y, unsigned w, unsigned h)
+unsigned x, unsigned y, unsigned w, unsigned h,
+   bool render_condition_enabled)


The whitespace looks inconsistent here and above. Apart from that:

Reviewed-by: Nicolai Hähnle 


 {
 fprintf(stderr, "unimpl: clear DS\n");
 }

 void
 vc4_draw_init(struct pipe_context *pctx)
 {
 pctx->draw_vbo = vc4_draw_vbo;
 pctx->clear = vc4_clear;
 pctx->clear_render_target = vc4_clear_render_target;
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index f1de189..5359164 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -409,35 +409,37 @@ struct pipe_context {
  unsigned stencil);

/**
 * Clear a color rendertarget surface.
 * \param color  pointer to an union of fiu array for each of r, g, b, a.
 */
void (*clear_render_target)(struct pipe_context *pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
-   unsigned width, unsigned height);
+   unsigned width, unsigned height,
+   bool render_condition_enabled);

/**
 * Clear a depth-stencil surface.
 * \param clear_flags  bitfield of PIPE_CLEAR_DEPTH/STENCIL values.
 * \param depth  depth clear value in [0,1].
 * \param stencil  stencil clear value
 */
void (*clear_depth_stencil)(struct pipe_context *pipe,
struct pipe_surface *dst,
unsigned clear_flags,
double depth,
unsigned stencil,
unsigned dstx, unsigned dsty,
-   unsigned width, unsigned height);
+   unsigned width, unsigned height,
+   bool render_condition_enabled);

/**
 * Clear the texture with the specified texel. Not guaranteed to be a
 * renderable format.

Re: [Mesa-dev] [PATCH] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Christian König

Am 08.08.2016 um 22:10 schrieb Leo Liu:

This is the case when the "00 00 03" is very close to the beginning of
nal unit header


I see where the problem is, but the fix is incorrect.

You always search for the emulation prevention three byte even when the 
previous fill has done so already. So it could happen in theory that you 
revert the escaping twice, e.g. remove valid bits.


You need to add this extra check to the end of vl_rbsp_init() after we 
searched for the end of the NAL unit.


Regards,
Christian.



Signed-off-by: Leo Liu 
---
  src/gallium/auxiliary/vl/vl_rbsp.h | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h
index 7867238..c134d31 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -77,8 +77,16 @@ static inline void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
 unsigned i, bits;
  
 /* abort if we still have enough bits */

-   if (valid >= 32)
+   if (valid >= 32) {
+  /* search for the emulation prevention three byte */
+  for (i = 24; i <= valid; i += 8) {
+ if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+i += 8;
+ }
+  }
return;
+   }
  
 vl_vlc_fillbits(&rbsp->nal);
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium/radeon: implement ARB_clear_texture (v3)

2016-08-09 Thread Nicolai Hähnle

Patches 2 & 3:

Reviewed-by: Nicolai Hähnle 

On 09.08.2016 00:59, Marek Olšák wrote:

From: Marek Olšák 

Some ideas copied from Jakob Sinclair's implementation, but the color
clearing is completely different.

v2: remove leftover code, disable conditional rendering
disable render condition cleanly
---
 docs/GL3.txt  |  2 +-
 docs/relnotes/12.1.0.html |  1 +
 src/gallium/drivers/r600/r600_pipe.c  |  2 +-
 src/gallium/drivers/radeon/r600_texture.c | 67 +++
 src/gallium/drivers/radeonsi/si_pipe.c|  2 +-
 5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index c185c69..5dcfc31 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -185,21 +185,21 @@ GL 4.3, GLSL 4.30 -- all DONE: nvc0, radeonsi
   GL_ARB_texture_query_levels   DONE (all drivers that 
support GLSL 1.30)
   GL_ARB_texture_storage_multisampleDONE (all drivers that 
support GL_ARB_texture_multisample)
   GL_ARB_texture_view   DONE (i965, nv50, 
r600, llvmpipe, softpipe, swr)
   GL_ARB_vertex_attrib_binding  DONE (all drivers)


 GL 4.4, GLSL 4.40:

   GL_MAX_VERTEX_ATTRIB_STRIDE   DONE (all drivers)
   GL_ARB_buffer_storage DONE (i965, nv50, 
nvc0, r600, radeonsi)
-  GL_ARB_clear_texture  DONE (i965, nv50, nvc0)
+  GL_ARB_clear_texture  DONE (i965, nv50, 
nvc0, r600, radeonsi)
   GL_ARB_enhanced_layouts   DONE (i965)
   - compile-time constant expressions   DONE
   - explicit byte offsets for blocksDONE
   - forced alignment within blocks  DONE
   - specified vec4-slot component numbers   DONE (i965)
   - specified transform/feedback layout DONE
   - input/output block locationsDONE
   GL_ARB_multi_bind DONE (all drivers)
   GL_ARB_query_buffer_objectDONE (i965/hsw+, nvc0)
   GL_ARB_texture_mirror_clamp_to_edge   DONE (i965, nv50, 
nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
diff --git a/docs/relnotes/12.1.0.html b/docs/relnotes/12.1.0.html
index 3935bb0..ed98d13 100644
--- a/docs/relnotes/12.1.0.html
+++ b/docs/relnotes/12.1.0.html
@@ -37,20 +37,21 @@ TBD.
 


 New features

 
 Note: some of the new features are only available with certain drivers.
 

 
+GL_ARB_clear_texture on r600, radeonsi
 GL_ARB_enhanced_layouts on i965
 GL_ARB_shader_group_vote on nvc0
 GL_ARB_ES3_1_compatibility on i965
 GL_EXT_window_rectangles on nv50, nvc0
 GL_KHR_texture_compression_astc_sliced_3d on i965
 

 Bug fixes

 TBD.
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 39a310a..5f69a5d 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -276,20 +276,21 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+   case PIPE_CAP_CLEAR_TEXTURE:
return 1;

case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor 
>= 43;

case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr;

case PIPE_CAP_COMPUTE:
return rscreen->b.chip_class > R700;
@@ -348,21 +349,20 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
-   case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_

Re: [Mesa-dev] [PATCH 6/6] gallium/radeon: add HUD queries for mapped VRAM/GTT

2016-08-09 Thread Nicolai Hähnle
The objection to patch 1 makes sense. Adding the marketing name (in 
_addition_ to the family name) is something that I'd support as well.


Patches 2 - 6:

Reviewed-by: Nicolai Hähnle 

On 06.08.2016 20:05, Marek Olšák wrote:

From: Marek Olšák 

mainly for monitoring visible VRAM congestion
---
 src/gallium/drivers/radeon/r600_query.c | 10 ++
 src/gallium/drivers/radeon/r600_query.h |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index c203439..592cec1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -44,20 +44,22 @@ static void r600_query_sw_destroy(struct 
r600_common_context *rctx,

screen->fence_reference(screen, &query->fence, NULL);
FREE(query);
 }

 static enum radeon_value_id winsys_id_from_type(unsigned type)
 {
switch (type) {
case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+   case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
+   case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
default: unreachable("query type does not correspond to winsys id");
}
@@ -82,20 +84,22 @@ static bool r600_query_sw_begin(struct r600_common_context 
*rctx,
query->begin_result = rctx->num_compute_calls;
break;
case R600_QUERY_SPILL_COMPUTE_CALLS:
query->begin_result = rctx->num_spill_compute_calls;
break;
case R600_QUERY_DMA_CALLS:
query->begin_result = rctx->num_dma_calls;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_MAPPED_VRAM:
+   case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
@@ -147,20 +151,22 @@ static bool r600_query_sw_end(struct r600_common_context 
*rctx,
query->end_result = rctx->num_compute_calls;
break;
case R600_QUERY_SPILL_COMPUTE_CALLS:
query->end_result = rctx->num_spill_compute_calls;
break;
case R600_QUERY_DMA_CALLS:
query->end_result = rctx->num_dma_calls;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_MAPPED_VRAM:
+   case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
case R600_QUERY_NUM_CS_FLUSHES:
case R600_QUERY_NUM_BYTES_MOVED: {
enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
@@ -1168,20 +1174,22 @@ err:
 static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS,   UINT64, 
CUMULATIVE),
X("num-shaders-created",  NUM_SHADERS_CREATED,UINT64, 
CUMULATIVE),
X("draw-calls",   DRAW_CALLS, UINT64, 
AVERAGE),
X("spill-draw-calls", SPILL_DRAW_CALLS,   UINT64, AVERAGE),
X("compute-calls",COMPUTE_CALLS,  UINT64, AVERAGE),
X("spill-compute-calls",  SPILL_COMPUTE_CALLS,UINT64, AVERAGE),
X("dma-calls",DMA_CALLS,  UINT64, 
AVERAGE),
X("requested-VRAM",   REQUESTED_VRAM, BYTES, AVERAGE),
X("requested-GTT",REQUESTED_GTT,  BYTES, AVERAGE),
+   X("mapped-VRAM",  MAPPED_VRAM,BYTES, AVERAGE),
+   X("mapped-GTT",   MAPPED_GTT, BYTES, 
AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME,   MICROSECONDS, 
CUMULATIVE),
X("num-cs-flushes",

Re: [Mesa-dev] [PATCH 1/3] gallium: add a pipe_context parameter to fence_finish

2016-08-09 Thread Nicolai Hähnle
Skimming through this, I wonder if PIPE_QUERY_GPU_FINISHED shouldn't use 
deferred fences. context.rst does say that it "does not imply 
serialization". But it only seems to be used by nine, and I'm not 
familiar with D3D expectations.


Anyway, this patch is

Reviewed-by: Nicolai Hähnle 

On 06.08.2016 17:32, Marek Olšák wrote:

From: Marek Olšák 

required by glClientWaitSync (GL 4.5 Core spec) that can optionally flush
the context
---
 src/gallium/docs/source/context.rst  | 10 --
 src/gallium/drivers/ddebug/dd_draw.c |  2 +-
 src/gallium/drivers/ddebug/dd_screen.c   |  4 +++-
 src/gallium/drivers/freedreno/freedreno_fence.c  |  5 +++--
 src/gallium/drivers/freedreno/freedreno_fence.h  |  5 +++--
 src/gallium/drivers/i915/i915_screen.c   |  1 +
 src/gallium/drivers/ilo/ilo_screen.c |  1 +
 src/gallium/drivers/llvmpipe/lp_flush.c  |  3 ++-
 src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
 src/gallium/drivers/nouveau/nouveau_screen.c |  1 +
 src/gallium/drivers/r300/r300_screen.c   |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.c|  1 +
 src/gallium/drivers/radeon/r600_query.c  |  2 +-
 src/gallium/drivers/rbug/rbug_screen.c   |  6 +++---
 src/gallium/drivers/softpipe/sp_fence.c  |  1 +
 src/gallium/drivers/softpipe/sp_flush.c  |  2 +-
 src/gallium/drivers/svga/svga_context.c  |  4 ++--
 src/gallium/drivers/svga/svga_screen.c   |  1 +
 src/gallium/drivers/swr/swr_context.cpp  |  4 ++--
 src/gallium/drivers/swr/swr_draw.cpp |  2 +-
 src/gallium/drivers/swr/swr_fence.cpp|  1 +
 src/gallium/drivers/swr/swr_fence.h  |  1 +
 src/gallium/drivers/swr/swr_query.cpp|  4 ++--
 src/gallium/drivers/swr/swr_screen.cpp   |  6 +++---
 src/gallium/drivers/swr/swr_state.cpp|  2 +-
 src/gallium/drivers/trace/tr_screen.c|  5 -
 src/gallium/drivers/vc4/vc4_fence.c  |  1 +
 src/gallium/drivers/virgl/virgl_screen.c |  1 +
 src/gallium/include/pipe/p_screen.h  | 15 ---
 src/gallium/state_trackers/clover/core/event.cpp |  4 ++--
 src/gallium/state_trackers/dri/dri2.c|  6 +++---
 src/gallium/state_trackers/dri/dri_drawable.c|  2 +-
 src/gallium/state_trackers/glx/xlib/xm_api.c |  2 +-
 src/gallium/state_trackers/nine/swapchain9.c |  6 +++---
 src/gallium/state_trackers/vdpau/presentation.c  |  4 ++--
 src/gallium/state_trackers/xa/xa_context.c   |  2 +-
 src/gallium/state_trackers/xvmc/surface.c|  2 +-
 src/mesa/state_tracker/st_cb_flush.c |  2 +-
 src/mesa/state_tracker/st_cb_syncobj.c   |  4 ++--
 39 files changed, 77 insertions(+), 50 deletions(-)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index e646ea0..af3d266 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -459,26 +459,24 @@ processed even where they're known to be obscured.


 Flushing
 

 ``flush``

 PIPE_FLUSH_END_OF_FRAME: Whether the flush marks the end of frame.

 PIPE_FLUSH_DEFERRED: It is not required to flush right away, but it is required
-to return a valid fence. The behavior of fence_finish or any other call isn't
-changed. The only side effect can be that fence_finish will wait a little
-longer. No guidance is given as to how drivers should implement fence_finish
-with deferred flushes. If some drivers can't do deferred flushes safely, they
-should just ignore the flag.
-
+to return a valid fence. If fence_finish is called with the returned fence
+and the context is still unflushed, and the ctx parameter of fence_finish is
+equal to the context where the fence was created, fence_finish will flush
+the context.


 ``flush_resource``

 Flush the resource cache, so that the resource can be used
 by an external client. Possible usage:
 - flushing a resource before presenting it on the screen
 - flushing a resource if some other process or device wants to use it
 This shouldn't be used to flush caches if the resource is only managed
 by a single pipe_screen and is not shared with another process.
diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index c1bfdaf..97325e4 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -541,21 +541,21 @@ dd_flush_and_check_hang(struct dd_context *dctx,
bool idle;

assert(timeout_ms > 0);

pipe->flush(pipe, &fence, flush_flags);
if (flush_fence)
   screen->fence_reference(screen, flush_fence, fence);
if (!fence)
   return false;

-   idle = screen->fence_finish(screen, fence, timeout_ms * 100);
+   idle = screen->fence_finish(screen, NULL, fence, timeout_ms * 100);
screen->fence_reference(screen, &fence, NULL);
if (!idle)
   fprintf(stderr, "dd: GPU hang detected!\n");
retu

Re: [Mesa-dev] [PATCH 2/3] st/mesa: set the ctx parameter of fence_finish

2016-08-09 Thread Nicolai Hähnle

On 06.08.2016 17:32, Marek Olšák wrote:

From: Marek Olšák 

for deferred flushes
---
 src/mesa/state_tracker/st_cb_syncobj.c | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_syncobj.c 
b/src/mesa/state_tracker/st_cb_syncobj.c
index 1fa1403..123925a 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -71,53 +71,64 @@ static void st_fence_sync(struct gl_context *ctx, struct 
gl_sync_object *obj,
struct st_sync_object *so = (struct st_sync_object*)obj;

assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE && flags == 0);
assert(so->fence == NULL);

pipe->flush(pipe, &so->fence, PIPE_FLUSH_DEFERRED);
 }

 static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
 {
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;

/* If the fence doesn't exist, assume it's signalled. */
if (!so->fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}

-   if (screen->fence_finish(screen, NULL, so->fence, 0)) {
+   if (screen->fence_finish(screen, pipe, so->fence, 0)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
 }

 static void st_client_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)
 {
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_screen *screen = pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;

/* If the fence doesn't exist, assume it's signalled. */
if (!so->fence) {
   so->b.StatusFlag = GL_TRUE;
   return;
}

-   /* We don't care about GL_SYNC_FLUSH_COMMANDS_BIT, because flush is
-* already called when creating a fence. */
-
+   /* Section 4.1.2 of OpenGL 4.5 (Compatibility Profile) says:
+*[...] if ClientWaitSync is called and all of the following are true:
+*- the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
+*- sync is unsignaled when ClientWaitSync is called,
+*- and the calls to ClientWaitSync and FenceSync were issued from
+*  the same context,
+*then the GL will behave as if the equivalent of Flush were inserted
+*immediately after the creation of sync.
+*
+* Assume GL_SYNC_FLUSH_COMMANDS_BIT is always set, because applications
+* forget to set it.


I wonder if we shouldn't have a dri_config flag for this, so that we 
don't punish people who actually know what they're doing. But that can 
be left for another time.


Reviewed-by: Nicolai Hähnle 


+*/
if (so->fence &&
-   screen->fence_finish(screen, NULL, so->fence, timeout)) {
+   screen->fence_finish(screen, pipe, so->fence, timeout)) {
   screen->fence_reference(screen, &so->fence, NULL);
   so->b.StatusFlag = GL_TRUE;
}
 }

 static void st_server_wait_sync(struct gl_context *ctx,
 struct gl_sync_object *obj,
 GLbitfield flags, GLuint64 timeout)
 {
/* NO-OP.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium/radeon: use unflushed fences for deferred flushes (v2)

2016-08-09 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 06.08.2016 17:32, Marek Olšák wrote:

From: Marek Olšák 

+23% Bioshock Infinite performance.

v2: - use the new fence_finish interface
- allow deferred fences with multiple contexts
- clear the ctx pointer after a deferred flush
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 44 ++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 119fdf5..1c56e6e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -40,20 +40,26 @@
 #include 

 #ifndef HAVE_LLVM
 #define HAVE_LLVM 0
 #endif

 struct r600_multi_fence {
struct pipe_reference reference;
struct pipe_fence_handle *gfx;
struct pipe_fence_handle *sdma;
+
+   /* If the context wasn't flushed at fence creation, this is non-NULL. */
+   struct {
+   struct r600_common_context *ctx;
+   unsigned ib_index;
+   } gfx_unflushed;
 };

 /*
  * shader binary helpers.
  */
 void radeon_shader_binary_init(struct radeon_shader_binary *b)
 {
memset(b, 0, sizeof(*b));
 }

@@ -255,42 +261,59 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)

 static void r600_flush_from_st(struct pipe_context *ctx,
   struct pipe_fence_handle **fence,
   unsigned flags)
 {
struct pipe_screen *screen = ctx->screen;
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned rflags = 0;
struct pipe_fence_handle *gfx_fence = NULL;
struct pipe_fence_handle *sdma_fence = NULL;
+   bool deferred_fence = false;

if (flags & PIPE_FLUSH_END_OF_FRAME)
rflags |= RADEON_FLUSH_END_OF_FRAME;
if (flags & PIPE_FLUSH_DEFERRED)
rflags |= RADEON_FLUSH_ASYNC;

if (rctx->dma.cs) {
rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
}
-   rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+   /* Instead of flushing, create a deferred fence. Constraints:
+* - The state tracker must allow a deferred flush.
+* - The state tracker must request a fence.
+* Thread safety in fence_finish must be ensured by the state tracker.
+*/
+   if (flags & PIPE_FLUSH_DEFERRED && fence) {
+   gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+   deferred_fence = true;
+   } else {
+   rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+   }

/* Both engines can signal out of order, so we need to keep both 
fences. */
if (gfx_fence || sdma_fence) {
struct r600_multi_fence *multi_fence =
CALLOC_STRUCT(r600_multi_fence);
if (!multi_fence)
return;

multi_fence->reference.count = 1;
multi_fence->gfx = gfx_fence;
multi_fence->sdma = sdma_fence;

+   if (deferred_fence) {
+   multi_fence->gfx_unflushed.ctx = rctx;
+   multi_fence->gfx_unflushed.ib_index = 
rctx->num_gfx_cs_flushes;
+   }
+
screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle*)multi_fence;
}
 }

 static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
 {
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct radeon_winsys_cs *cs = rctx->dma.cs;
@@ -953,36 +976,55 @@ static void r600_fence_reference(struct pipe_screen 
*screen,
 *rdst = rsrc;
 }

 static boolean r600_fence_finish(struct pipe_screen *screen,
 struct pipe_context *ctx,
 struct pipe_fence_handle *fence,
 uint64_t timeout)
 {
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+   struct r600_common_context *rctx =
+   ctx ? (struct r600_common_context*)ctx : NULL;
int64_t abs_timeout = os_time_get_absolute_timeout(timeout);

if (rfence->sdma) {
if (!rws->fence_wait(rws, rfence->sdma, timeout))
return false;

/* Recompute the timeout after waiting. */
if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
int64_t time = os_time_get_nano();
timeout = abs_timeout > time ? abs_timeout - time : 0;
}
}

if (!rfence->gfx)
return true;

+   /* Flush the gfx IB if it hasn't been flushed yet. */
+   if (r

[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #3 from Nicolas Boichat  ---
Ok, thanks for trying those out.

Can you try again, setting LIBGL_DEBUG=verbose ? Hopefully that'll give us more
logs and a better idea of what is going on...

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/omx/dec/h264: pass default scaling lists in raster format

2016-08-09 Thread Christian König
From: Indrajit Das 

---
 src/gallium/state_trackers/omx/vid_dec_h264.c | 40 +--
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c 
b/src/gallium/state_trackers/omx/vid_dec_h264.c
index bc7feaa..10f2959 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -50,35 +50,35 @@ struct dpb_list {
 };
 
 static const uint8_t Default_4x4_Intra[16] = {
-6, 13, 13, 20, 20, 20, 28, 28,
-   28, 28, 32, 32, 32, 37, 37, 42
+6, 13, 20, 28, 13, 20, 28, 32,
+   20, 28, 32, 37, 28, 32, 37, 42
 };
 
 static const uint8_t Default_4x4_Inter[16] = {
-   10, 14, 14, 20, 20, 20, 24, 24,
-   24, 24, 27, 27, 27, 30, 30, 34
+   10, 14, 20, 24, 14, 20, 24, 27,
+   20, 24, 27, 30, 24, 27, 30, 34
 };
 
 static const uint8_t Default_8x8_Intra[64] = {
-6, 10, 10, 13, 11, 13, 16, 16,
-   16, 16, 18, 18, 18, 18, 18, 23,
-   23, 23, 23, 23, 23, 25, 25, 25,
-   25, 25, 25, 25, 27, 27, 27, 27,
-   27, 27, 27, 27, 29, 29, 29, 29,
-   29, 29, 29, 31, 31, 31, 31, 31,
-   31, 33, 33, 33, 33, 33, 36, 36,
-   36, 36, 38, 38, 38, 40, 40, 42
+6, 10, 13, 16, 18, 23, 25, 27,
+   10, 11, 16, 18, 23, 25, 27, 29,
+   13, 16, 18, 23, 25, 27, 29, 31,
+   16, 18, 23, 25, 27, 29, 31, 33,
+   18, 23, 25, 27, 29, 31, 33, 36,
+   23, 25, 27, 29, 31, 33, 36, 38,
+   25, 27, 29, 31, 33, 36, 38, 40,
+   27, 29, 31, 33, 36, 38, 40, 42
 };
 
 static const uint8_t Default_8x8_Inter[64] = {
-9, 13, 13, 15, 13, 15, 17, 17,
-   17, 17, 19, 19, 19, 19, 19, 21,
-   21, 21, 21, 21, 21, 22, 22, 22,
-   22, 22, 22, 22, 24, 24, 24, 24,
-   24, 24, 24, 24, 25, 25, 25, 25,
-   25, 25, 25, 27, 27, 27, 27, 27,
-   27, 28, 28, 28, 28, 28, 30, 30,
-   30, 30, 32, 32, 32, 33, 33, 35
+9, 13, 15, 17, 19, 21, 22, 24,
+   13, 13, 17, 19, 21, 22, 24, 25,
+   15, 17, 19, 21, 22, 24, 25, 27,
+   17, 19, 21, 22, 24, 25, 27, 28,
+   19, 21, 22, 24, 25, 27, 28, 30,
+   21, 22, 24, 25, 27, 28, 30, 32,
+   22, 24, 25, 27, 28, 30, 32, 33,
+   24, 25, 27, 28, 30, 32, 33, 35
 };
 
 static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, 
unsigned min_bits_left);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/19] st_glsl_types: add st_glsl_type_usagemask

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Determine which components of the underlying vec4 storage will be used.
---
 src/mesa/state_tracker/st_glsl_types.cpp | 33 
 src/mesa/state_tracker/st_glsl_types.h   |  1 +
 2 files changed, 34 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_types.cpp 
b/src/mesa/state_tracker/st_glsl_types.cpp
index 857e143..89ec2cf 100644
--- a/src/mesa/state_tracker/st_glsl_types.cpp
+++ b/src/mesa/state_tracker/st_glsl_types.cpp
@@ -87,15 +87,48 @@ st_glsl_attrib_type_size(const struct glsl_type *type, bool 
is_vs_input)
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_FUNCTION:
   assert(!"Invalid type in type_size");
   break;
}
return 0;
 }
 
+/**
+ * Returns a mask of which channels of a vec4 are used for this type.
+ */
+unsigned
+st_glsl_type_usagemask(const struct glsl_type* type)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+  return (1u << type->vector_elements) - 1;
+   case GLSL_TYPE_DOUBLE:
+  if (type->vector_elements <= 1)
+ return 3;
+  else
+ return 0xf;
+   case GLSL_TYPE_ARRAY:
+  return st_glsl_type_usagemask(type->fields.array);
+   case GLSL_TYPE_STRUCT: {
+  unsigned mask = 0;
+  unsigned i;
+  for (i = 0; i < type->length && mask != 0xf; i++) {
+ mask |= st_glsl_type_usagemask(type->fields.structure[i].type);
+  }
+  return mask;
+   }
+   default:
+  assert(!"Invalid type in st_glsl_type_usagemask");
+  return 0xf;
+   }
+}
+
 int
 st_glsl_type_size(const struct glsl_type *type)
 {
return st_glsl_attrib_type_size(type, false);
 }
diff --git a/src/mesa/state_tracker/st_glsl_types.h 
b/src/mesa/state_tracker/st_glsl_types.h
index 3a39cee..a144bc0 100644
--- a/src/mesa/state_tracker/st_glsl_types.h
+++ b/src/mesa/state_tracker/st_glsl_types.h
@@ -27,18 +27,19 @@
 #ifndef __ST_GLSL_TYPES_H__
 #define __ST_GLSL_TYPES_H__
 
 #include "compiler/glsl_types.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 int st_glsl_attrib_type_size(const struct glsl_type *type, bool is_vs_input);
+unsigned st_glsl_type_usagemask(const struct glsl_type* type);
 int st_glsl_type_size(const struct glsl_type *type);
 
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* __ST_GLSL_TYPES_H__ */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/19] tgsi/ureg: allow specifying the UsageMask of array temporaries

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/auxiliary/tgsi/tgsi_ureg.c | 14 ++
 src/gallium/auxiliary/tgsi/tgsi_ureg.h |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index b67c383..3ddbd4b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -170,21 +170,24 @@ struct ureg_program
   unsigned index;
   bool atomic;
} buffer[PIPE_MAX_SHADER_BUFFERS];
unsigned nr_buffers;
 
struct util_bitmask *free_temps;
struct util_bitmask *local_temps;
struct util_bitmask *decl_temps;
unsigned nr_temps;
 
-   unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
+   struct {
+  unsigned start;
+  unsigned usagemask;
+   } array_temps[UREG_MAX_ARRAY_TEMPS];
unsigned nr_array_temps;
 
struct const_decl const_decls;
struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
 
unsigned properties[TGSI_PROPERTY_COUNT];
 
unsigned nr_addrs;
unsigned nr_preds;
unsigned nr_instructions;
@@ -556,39 +559,41 @@ struct ureg_dst ureg_DECL_temporary( struct ureg_program 
*ureg )
return alloc_temporary(ureg, FALSE);
 }
 
 struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
 {
return alloc_temporary(ureg, TRUE);
 }
 
 struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
unsigned size,
+   unsigned usagemask,
boolean local )
 {
unsigned i = ureg->nr_temps;
struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
 
if (local)
   util_bitmask_set(ureg->local_temps, i);
 
/* Always start a new declaration at the start */
util_bitmask_set(ureg->decl_temps, i);
 
ureg->nr_temps += size;
 
/* and also at the end of the array */
util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
 
if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
-  ureg->array_temps[ureg->nr_array_temps++] = i;
-  dst.ArrayID = ureg->nr_array_temps;
+  ureg->array_temps[ureg->nr_array_temps].start = i;
+  ureg->array_temps[ureg->nr_array_temps].usagemask = usagemask;
+  dst.ArrayID = ++ureg->nr_array_temps;
}
 
return dst;
 }
 
 void ureg_release_temporary( struct ureg_program *ureg,
  struct ureg_dst tmp )
 {
if(tmp.File == TGSI_FILE_TEMPORARY)
   util_bitmask_set(ureg->free_temps, tmp.Index);
@@ -1543,20 +1548,21 @@ emit_decl_temps( struct ureg_program *ureg,
out[0].decl.File = TGSI_FILE_TEMPORARY;
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
out[0].decl.Local = local;
 
out[1].value = 0;
out[1].decl_range.First = first;
out[1].decl_range.Last = last;
 
if (arrayid) {
   out[0].decl.Array = 1;
+  out[0].decl.UsageMask = ureg->array_temps[arrayid - 1].usagemask;
   out[2].value = 0;
   out[2].array.ArrayID = arrayid;
}
 }
 
 static void emit_decl_range( struct ureg_program *ureg,
  unsigned file,
  unsigned first,
  unsigned count )
 {
@@ -1894,21 +1900,21 @@ static void emit_decls( struct ureg_program *ureg )
 
if (ureg->nr_temps) {
   unsigned array = 0;
   for (i = 0; i < ureg->nr_temps;) {
  boolean local = util_bitmask_get(ureg->local_temps, i);
  unsigned first = i;
  i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
  if (i == UTIL_BITMASK_INVALID_INDEX)
 i = ureg->nr_temps;
 
- if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
+ if (array < ureg->nr_array_temps && ureg->array_temps[array].start == 
first)
 emit_decl_temps( ureg, first, i - 1, local, ++array );
  else
 emit_decl_temps( ureg, first, i - 1, local, 0 );
   }
}
 
if (ureg->nr_addrs) {
   emit_decl_range( ureg,
TGSI_FILE_ADDRESS,
0, ureg->nr_addrs );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h 
b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index b4258fd..e7b0f25 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -291,20 +291,21 @@ ureg_DECL_temporary( struct ureg_program * );
  */
 struct ureg_dst
 ureg_DECL_local_temporary( struct ureg_program * );
 
 /**
  * Declare "size" continuous temporary registers.
  */
 struct ureg_dst
 ureg_DECL_array_temporary( struct ureg_program *,
unsigned size,
+   unsigned usagemask,
boolean local );
 
 void 
 ureg_release_temporary( struct ureg_program *ureg,
 struct ureg_dst tmp );
 
 struct ureg_dst
 ureg_DECL_address( struct ureg_program *

[Mesa-dev] [PATCH 11/19] gallium/radeon: more descriptive names for LLVM temporaries in debug builds

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 7b96a58..22ff18e 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -31,20 +31,21 @@
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_misc.h"
 #include "gallivm/lp_bld_swizzle.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_debug.h"
 
+#include 
 #include 
 #include 
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  enum tgsi_opcode_type type)
 {
LLVMContextRef ctx = bld_base->base.gallivm->context;
 
switch (type) {
case TGSI_TYPE_UNSIGNED:
@@ -421,20 +422,21 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
 ctx->soa.addr[idx][chan] = 
si_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type, 
"");
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
{
+   char name[16] = "";
LLVMValueRef array_alloca = NULL;
unsigned decl_size;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
if (!ctx->arrays) {
int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
@@ -458,34 +460,42 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
ctx->arrays[id].alloca = array_alloca;
}
}
 
if (!ctx->temps_count) {
ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
}
if (!array_alloca) {
for (i = 0; i < decl_size; ++i) {
+#ifdef DEBUG
+   snprintf(name, sizeof(name), "TEMP%d.%c",
+first + i / 4, "xyzw"[i % 4]);
+#endif
ctx->temps[first * TGSI_NUM_CHANNELS + i] =

si_build_alloca_undef(bld_base->base.gallivm,
  
bld_base->base.vec_type,
- "temp");
+ name);
}
} else {
LLVMValueRef idxs[2] = {
bld_base->uint_bld.zero,
NULL
};
for (i = 0; i < decl_size; ++i) {
+#ifdef DEBUG
+   snprintf(name, sizeof(name), "TEMP%d.%c",
+first + i / 4, "xyzw"[i % 4]);
+#endif
idxs[1] = 
lp_build_const_int32(bld_base->base.gallivm, i);
ctx->temps[first * TGSI_NUM_CHANNELS + i] =
-   LLVMBuildGEP(builder, array_alloca, 
idxs, 2, "temp");
+   LLVMBuildGEP(builder, array_alloca, 
idxs, 2, name);
}
}
break;
}
case TGSI_FILE_INPUT:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
if (ctx->load_input)
ctx->load_input(ctx, idx, decl);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/19] gallium/radeon: simplify radeon_llvm_emit_fetch for direct array addressing

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

We can use the pointer stored in the temps array directly.
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 41f24d3..e084248 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -352,25 +352,20 @@ LLVMValueRef radeon_llvm_emit_fetch(struct 
lp_build_tgsi_context *bld_base,
case TGSI_FILE_TEMPORARY:
if (reg->Register.Index >= ctx->temps_count)
return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + 
swizzle];
if (tgsi_type_is_64bit(type)) {
ptr2 = ctx->temps[reg->Register.Index * 
TGSI_NUM_CHANNELS + swizzle + 1];
return radeon_llvm_emit_fetch_64bit(bld_base, type,
 LLVMBuildLoad(builder, ptr, 
""),
 LLVMBuildLoad(builder, ptr2, 
""));
}
-   LLVMValueRef array = get_alloca_for_array(bld_base, 
reg->Register.File, reg->Register.Index);
-   if (array) {
-   return bitcast(bld_base, type, 
load_value_from_array(bld_base, reg->Register.File, type,
-   swizzle, reg->Register.Index, NULL));
-   }
result = LLVMBuildLoad(builder, ptr, "");
break;
 
case TGSI_FILE_OUTPUT:
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
if (tgsi_type_is_64bit(type)) {
ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 
swizzle + 1);
return radeon_llvm_emit_fetch_64bit(bld_base, type,
 LLVMBuildLoad(builder, ptr, 
""),
 LLVMBuildLoad(builder, ptr2, 
""));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/19] gallium, radeonsi: temporary array improvements

2016-08-09 Thread Nicolai Hähnle
Hi,

this series was originally motivated by fixing a VM fault and ended up
growing a bit larger :-)

The goal of patches 1-7 is to change st/mesa so that it sets the UsageMask
field in temporary array declarations. This ends up being helpful for
lowering float and vecN arrays with N <= 3.

The remaining patches are radeon (really radeonsi) specific. They begin
with a bunch of cleanups, and then do two things: first, when alloca is
used for arrays, make use of the UsageMask to allocate smaller arrays
when possible. Second, add explicit bounds checks when accessing those
arrays to prevent VM faults -- those temporary array accesses are not
protected by limits in buffer descriptors.

Note that the radeon part of the series exposes some pre-existing LLVM
bugs in Piglit, at least one of which has already been encountered
elsewhere, see http://reviews.llvm.org/D22556 and
http://reviews.llvm.org/D23303.

Please review!

Thanks,
Nicolai

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/19] st_glsl_to_tgsi: declare temp arrays at the start of translation

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Unused arrays should have been removed earlier anyway, and it allows us to
remove st_translate::array_sizes.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4a19fe6..1df4864 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5143,21 +5143,20 @@ struct st_translate {
int num_immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
struct ureg_src shared_memory;
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
-   unsigned *array_sizes;
struct array_decl *input_arrays;
struct array_decl *output_arrays;
 
const GLuint *inputMapping;
const GLuint *outputMapping;
 
/* For every instruction that contains a label (eg CALL), keep
 * details so that we can go back afterwards and emit the correct
 * tgsi instruction number for each label.
 */
@@ -5350,24 +5349,20 @@ dst_register(struct st_translate *t, gl_register_file 
file, unsigned index,
   if (ureg_dst_is_undef(t->temps[index]))
  t->temps[index] = ureg_DECL_local_temporary(t->ureg);
 
   return t->temps[index];
 
case PROGRAM_ARRAY:
   array = index >> 16;
 
   assert(array < t->num_temp_arrays);
 
-  if (ureg_dst_is_undef(t->arrays[array]))
- t->arrays[array] = ureg_DECL_array_temporary(
-t->ureg, t->array_sizes[array], TGSI_WRITEMASK_XYZW, TRUE);
-
   return ureg_dst_array_offset(t->arrays[array],
(int)(index & 0x) - 0x8000);
 
case PROGRAM_OUTPUT:
   if (!array_id) {
  if (t->procType == PIPE_SHADER_FRAGMENT)
 assert(index < FRAG_RESULT_MAX);
  else if (t->procType == PIPE_SHADER_TESS_CTRL ||
   t->procType == PIPE_SHADER_TESS_EVAL)
 assert(index < VARYING_SLOT_TESS_MAX);
@@ -6028,24 +6023,32 @@ st_translate_program(
t = CALLOC_STRUCT(st_translate);
if (!t) {
   ret = PIPE_ERROR_OUT_OF_MEMORY;
   goto out;
}
 
t->procType = procType;
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
+
+   /*
+* Declare temporary arrays.
+*/
t->num_temp_arrays = program->next_array;
-   if (t->num_temp_arrays)
+   if (t->num_temp_arrays) {
   t->arrays = (struct ureg_dst*)
   calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
+  for (i = 0; i < t->num_temp_arrays; ++i)
+ t->arrays[i] = ureg_DECL_array_temporary(
+t->ureg, program->array_sizes[i], TGSI_WRITEMASK_XYZW, TRUE);
+   }
 
/*
 * Declare input attributes.
 */
switch (procType) {
case PIPE_SHADER_FRAGMENT:
   for (i = 0; i < numInputs; i++) {
  unsigned array_id = 0;
  unsigned array_size;
 
@@ -6243,21 +6246,20 @@ st_translate_program(
 if (procType == PIPE_SHADER_FRAGMENT &&
 semName == TGSI_SEMANTIC_POSITION)
emit_wpos(st_context(ctx), t, proginfo, ureg,
  program->wpos_transform_const);
 
 sysInputs &= ~(1 << i);
  }
   }
}
 
-   t->array_sizes = program->array_sizes;
t->input_arrays = program->input_arrays;
t->output_arrays = program->output_arrays;
 
/* Emit constants and uniforms.  TGSI uses a single index space for these,
 * so we put all the translated regs in t->constants.
 */
if (proginfo->Parameters) {
   t->constants = (struct ureg_src *)
  calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
   if (t->constants == NULL) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/19] st_glsl_to_tgsi: rename st_translate::arrays to temp_arrays

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Analogous to {input,output}_arrays.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1df4864..aa83f0a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5128,21 +5128,21 @@ struct label {
 
 /**
  * Intermediate state used during shader translation.
  */
 struct st_translate {
struct ureg_program *ureg;
 
unsigned temps_size;
struct ureg_dst *temps;
 
-   struct ureg_dst *arrays;
+   struct ureg_dst *temp_arrays;
unsigned num_temp_arrays;
struct ureg_src *constants;
int num_constants;
struct ureg_src *immediates;
int num_immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
@@ -5349,21 +5349,21 @@ dst_register(struct st_translate *t, gl_register_file 
file, unsigned index,
   if (ureg_dst_is_undef(t->temps[index]))
  t->temps[index] = ureg_DECL_local_temporary(t->ureg);
 
   return t->temps[index];
 
case PROGRAM_ARRAY:
   array = index >> 16;
 
   assert(array < t->num_temp_arrays);
 
-  return ureg_dst_array_offset(t->arrays[array],
+  return ureg_dst_array_offset(t->temp_arrays[array],
(int)(index & 0x) - 0x8000);
 
case PROGRAM_OUTPUT:
   if (!array_id) {
  if (t->procType == PIPE_SHADER_FRAGMENT)
 assert(index < FRAG_RESULT_MAX);
  else if (t->procType == PIPE_SHADER_TESS_CTRL ||
   t->procType == PIPE_SHADER_TESS_EVAL)
 assert(index < VARYING_SLOT_TESS_MAX);
  else
@@ -5568,21 +5568,21 @@ translate_tex_offset(struct st_translate *t,
   offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
   offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
   offset.Padding = 0;
   break;
case PROGRAM_ARRAY:
   array = in_offset->index >> 16;
 
   assert(array >= 0);
   assert(array < (int)t->num_temp_arrays);
 
-  dst = t->arrays[array];
+  dst = t->temp_arrays[array];
   offset.File = dst.File;
   offset.Index = dst.Index + (in_offset->index & 0x) - 0x8000;
   offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
   offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
   offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
   offset.Padding = 0;
   break;
default:
   break;
}
@@ -6029,24 +6029,24 @@ st_translate_program(
t->procType = procType;
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
 
/*
 * Declare temporary arrays.
 */
t->num_temp_arrays = program->next_array;
if (t->num_temp_arrays) {
-  t->arrays = (struct ureg_dst*)
-  calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
+  t->temp_arrays = (struct ureg_dst*)
+   calloc(1, sizeof(t->temp_arrays[0]) * 
t->num_temp_arrays);
   for (i = 0; i < t->num_temp_arrays; ++i)
- t->arrays[i] = ureg_DECL_array_temporary(
+ t->temp_arrays[i] = ureg_DECL_array_temporary(
 t->ureg, program->array_sizes[i], TGSI_WRITEMASK_XYZW, TRUE);
}
 
/*
 * Declare input attributes.
 */
switch (procType) {
case PIPE_SHADER_FRAGMENT:
   for (i = 0; i < numInputs; i++) {
  unsigned array_id = 0;
@@ -6423,21 +6423,21 @@ st_translate_program(
 
 ureg_set_next_shader_processor(ureg, next);
 break;
  }
   }
   break;
}
 
 out:
if (t) {
-  free(t->arrays);
+  free(t->temp_arrays);
   free(t->temps);
   free(t->insn);
   free(t->labels);
   free(t->constants);
   t->num_constants = 0;
   free(t->immediates);
   t->num_immediates = 0;
 
   if (t->error) {
  debug_printf("%s: translate error flag set\n", __func__);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/19] gallium/radeon: extract common getelementptr logic into get_pointer_into_array

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 105 +
 1 file changed, 66 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 531a8fe..87fc07e 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -180,20 +180,55 @@ emit_array_index(struct lp_build_tgsi_soa_context *bld,
 {
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 
if (!reg) {
return lp_build_const_int32(gallivm, offset);
}
LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, 
bld->addr[reg->Index][reg->Swizzle], "");
return LLVMBuildAdd(gallivm->builder, addr, 
lp_build_const_int32(gallivm, offset), "");
 }
 
+/**
+ * For indirect registers, construct a pointer directly to the requested
+ * element using getelementptr if possible.
+ *
+ * Returns NULL if the insertelement/extractelement fallback for array access
+ * must be used.
+ */
+static LLVMValueRef
+get_pointer_into_array(struct radeon_llvm_context *ctx,
+  unsigned file,
+  unsigned swizzle,
+  unsigned reg_index,
+  const struct tgsi_ind_register *reg_indirect)
+{
+   const struct radeon_llvm_array *array;
+   struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef idxs[2];
+   LLVMValueRef index;
+
+   if (file != TGSI_FILE_TEMPORARY)
+   return NULL;
+
+   array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect);
+   if (!array || !array->alloca)
+   return NULL;
+
+   index = emit_array_index(&ctx->soa, reg_indirect, reg_index - 
array->range.First);
+   index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, 
TGSI_NUM_CHANNELS), "");
+   index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, 
swizzle), "");
+   idxs[0] = ctx->soa.bld_base.uint_bld.zero;
+   idxs[1] = index;
+   return LLVMBuildGEP(builder, array->alloca, idxs, 2, "");
+}
+
 LLVMValueRef
 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 enum tgsi_opcode_type type,
 LLVMValueRef ptr,
 LLVMValueRef ptr2)
 {
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef result;
 
result = 
LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context,
 32), bld_base->base.type.length * 2));
@@ -236,80 +271,72 @@ emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 }
 
 static LLVMValueRef
 load_value_from_array(struct lp_build_tgsi_context *bld_base,
  unsigned file,
  enum tgsi_opcode_type type,
  unsigned swizzle,
  unsigned reg_index,
  const struct tgsi_ind_register *reg_indirect)
 {
+   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
-   struct tgsi_declaration_range range = get_array_range(bld_base, file, 
reg_index, reg_indirect);
-   LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - 
range.First);
-   LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index, 
reg_indirect);
-   LLVMValueRef ptr, val, indices[2];
-
-   if (!array) {
-   /* Handle the case where the array is stored as a vector. */
-   return LLVMBuildExtractElement(builder,
-   emit_array_fetch(bld_base, file, type, range, 
swizzle),
-   index, "");
-   }
+   LLVMValueRef ptr;
 
-   index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, 
TGSI_NUM_CHANNELS), "");
-   index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, 
swizzle), "");
-   indices[0] = bld_base->uint_bld.zero;
-   indices[1] = index;
-   ptr = LLVMBuildGEP(builder, array, indices, 2, "");
-   val = LLVMBuildLoad(builder, ptr, "");
-   if (tgsi_type_is_64bit(type)) {
-   LLVMValueRef ptr_hi, val_hi;
-   indices[0] = lp_build_const_int32(gallivm, 1);
-   ptr_hi = LLVMBuildGEP(builder, ptr, indices, 1, "");
-   val_hi = LLVMBuildLoad(builder, ptr_hi, "");
-   val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
+   ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, 
reg_indirect);
+   if (ptr) {
+   LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
+   if (tgsi_type_is_64

[Mesa-dev] [PATCH 13/19] gallium/radeon: extract common lookup code into get_temp_array function

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 73 --
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index e4bfa74..994c7da 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -104,72 +104,79 @@ static LLVMValueRef emit_swizzle(struct 
lp_build_tgsi_context *bld_base,
swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 
return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
  value,
  LLVMGetUndef(LLVMTypeOf(value)),
  LLVMConstVector(swizzles, 4), "");
 }
 
+/**
+ * Return the description of the array covering the given temporary register
+ * index.
+ */
+static const struct radeon_llvm_array *
+get_temp_array(struct lp_build_tgsi_context *bld_base,
+  unsigned reg_index,
+  const struct tgsi_ind_register *reg)
+{
+   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
+   unsigned num_arrays = 
ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
+   unsigned i;
+
+   if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
+   return &ctx->arrays[reg->ArrayID - 1];
+
+   for (i = 0; i < num_arrays; i++) {
+   const struct radeon_llvm_array *array = &ctx->arrays[i];
+
+   if (reg_index >= array->range.First && reg_index <= 
array->range.Last)
+   return array;
+   }
+
+   return NULL;
+}
+
 static struct tgsi_declaration_range
 get_array_range(struct lp_build_tgsi_context *bld_base,
unsigned File, unsigned reg_index,
const struct tgsi_ind_register *reg)
 {
-   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
+   struct tgsi_declaration_range range;
 
-   if (!reg) {
-   unsigned i;
-   unsigned num_arrays = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
-   for (i = 0; i < num_arrays; i++) {
-   const struct tgsi_declaration_range *range =
-   &ctx->arrays[i].range;
-
-   if (reg_index >= range->First && reg_index <= 
range->Last) {
-   return ctx->arrays[i].range;
-   }
-   }
+   if (File == TGSI_FILE_TEMPORARY) {
+   const struct radeon_llvm_array *array =
+   get_temp_array(bld_base, reg_index, reg);
+   if (array)
+   return array->range;
}
 
-   if (File != TGSI_FILE_TEMPORARY || !reg || reg->ArrayID == 0 ||
-   reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
-   struct tgsi_declaration_range range;
-   range.First = 0;
-   range.Last = bld_base->info->file_max[File];
-   return range;
-   }
-
-   return ctx->arrays[reg->ArrayID - 1].range;
+   range.First = 0;
+   range.Last = bld_base->info->file_max[File];
+   return range;
 }
 
 static LLVMValueRef get_alloca_for_array(struct lp_build_tgsi_context 
*bld_base,
 unsigned file,
 unsigned index)
 {
-   unsigned i;
-   unsigned num_arrays;
-   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
+   const struct radeon_llvm_array *array;
 
if (file != TGSI_FILE_TEMPORARY)
return NULL;
 
-   num_arrays = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
-   for (i = 0; i < num_arrays; i++) {
-   const struct tgsi_declaration_range *range =
-   &ctx->arrays[i].range;
+   array = get_temp_array(bld_base, index, NULL);
+   if (!array)
+   return NULL;
 
-   if (index >= range->First && index <= range->Last) {
-   return ctx->arrays[i].alloca;
-   }
-   }
-   return NULL;
+   return array->alloca;
 }
 
 static LLVMValueRef
 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 const struct tgsi_ind_register *reg,
 unsigned offset)
 {
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 
if (!reg) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/19] st_glsl_to_tgsi: apply usagemasks to array temporaries

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0913dbb..f2663bc 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -341,20 +341,21 @@ static st_dst_reg undef_dst = 
st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_T
 
 struct array_decl {
unsigned mesa_index;
unsigned array_id;
unsigned array_size;
enum glsl_base_type array_type;
 };
 
 struct temp_array_decl {
unsigned size;
+   unsigned usagemask;
 };
 
 static enum glsl_base_type
 find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
 {
unsigned i;
 
for (i = 0; i < count; i++) {
   struct array_decl *decl = &arrays[i];
 
@@ -1221,20 +1222,21 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
   if (num_temp_arrays >= max_num_temp_arrays) {
  max_num_temp_arrays += 32;
  temp_arrays = (temp_array_decl*)
 realloc(temp_arrays, sizeof(temp_arrays[0]) * max_num_temp_arrays);
   }
 
   src.file = PROGRAM_ARRAY;
   src.index = num_temp_arrays << 16 | 0x8000;
   temp_arrays[num_temp_arrays].size = type_size(type);
+  temp_arrays[num_temp_arrays].usagemask = st_glsl_type_usagemask(type);
   ++num_temp_arrays;
 
} else {
   src.file = PROGRAM_TEMPORARY;
   src.index = next_temp;
   next_temp += type_size(type);
}
 
if (type->is_array() || type->is_record()) {
   src.swizzle = SWIZZLE_NOOP;
@@ -6037,21 +6039,22 @@ st_translate_program(
 
/*
 * Declare temporary arrays.
 */
t->num_temp_arrays = program->num_temp_arrays;
if (t->num_temp_arrays) {
   t->temp_arrays = (struct ureg_dst*)
calloc(1, sizeof(t->temp_arrays[0]) * 
t->num_temp_arrays);
   for (i = 0; i < t->num_temp_arrays; ++i)
  t->temp_arrays[i] = ureg_DECL_array_temporary(
-t->ureg, program->temp_arrays[i].size, TGSI_WRITEMASK_XYZW, TRUE);
+t->ureg, program->temp_arrays[i].size,
+program->temp_arrays[i].usagemask, TRUE);
}
 
/*
 * Declare input attributes.
 */
switch (procType) {
case PIPE_SHADER_FRAGMENT:
   for (i = 0; i < numInputs; i++) {
  unsigned array_id = 0;
  unsigned array_size;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/19] st_glsl_to_tgsi: add temp_array_decl structure

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

We will use it to save the type-based usagemask.

While we're at it, rename the variable to temp_arrays etc. analogous to
{input,output}_arrays.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 36 +-
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aa83f0a..0913dbb 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -339,20 +339,24 @@ public:
 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, 
GLSL_TYPE_ERROR);
 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, 
GLSL_TYPE_ERROR);
 
 struct array_decl {
unsigned mesa_index;
unsigned array_id;
unsigned array_size;
enum glsl_base_type array_type;
 };
 
+struct temp_array_decl {
+   unsigned size;
+};
+
 static enum glsl_base_type
 find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
 {
unsigned i;
 
for (i = 0; i < count; i++) {
   struct array_decl *decl = &arrays[i];
 
   if (array_id == decl->array_id) {
  return decl->array_type;
@@ -374,23 +378,23 @@ public:
function_entry *current_function;
 
struct gl_context *ctx;
struct gl_program *prog;
struct gl_shader_program *shader_program;
struct gl_linked_shader *shader;
struct gl_shader_compiler_options *options;
 
int next_temp;
 
-   unsigned *array_sizes;
-   unsigned max_num_arrays;
-   unsigned next_array;
+   struct temp_array_decl *temp_arrays;
+   unsigned max_num_temp_arrays;
+   unsigned num_temp_arrays;
 
struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS];
unsigned num_input_arrays;
struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS];
unsigned num_output_arrays;
 
int num_address_regs;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
@@ -1208,30 +1212,30 @@ type_has_array_or_matrix(const glsl_type *type)
 st_src_reg
 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 {
st_src_reg src;
 
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
src.reladdr = NULL;
src.negate = 0;
 
if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
-  if (next_array >= max_num_arrays) {
- max_num_arrays += 32;
- array_sizes = (unsigned*)
-realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
+  if (num_temp_arrays >= max_num_temp_arrays) {
+ max_num_temp_arrays += 32;
+ temp_arrays = (temp_array_decl*)
+realloc(temp_arrays, sizeof(temp_arrays[0]) * max_num_temp_arrays);
   }
 
   src.file = PROGRAM_ARRAY;
-  src.index = next_array << 16 | 0x8000;
-  array_sizes[next_array] = type_size(type);
-  ++next_array;
+  src.index = num_temp_arrays << 16 | 0x8000;
+  temp_arrays[num_temp_arrays].size = type_size(type);
+  ++num_temp_arrays;
 
} else {
   src.file = PROGRAM_TEMPORARY;
   src.index = next_temp;
   next_temp += type_size(type);
}
 
if (type->is_array() || type->is_record()) {
   src.swizzle = SWIZZLE_NOOP;
} else {
@@ -4273,23 +4277,23 @@ glsl_to_tgsi_visitor::visit(ir_barrier *ir)
 
emit_asm(ir, TGSI_OPCODE_BARRIER);
 }
 
 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
 {
STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
 
result.file = PROGRAM_UNDEFINED;
next_temp = 1;
-   array_sizes = NULL;
-   max_num_arrays = 0;
-   next_array = 0;
+   temp_arrays = NULL;
+   max_num_temp_arrays = 0;
+   num_temp_arrays = 0;
num_input_arrays = 0;
num_output_arrays = 0;
next_signature_id = 1;
num_immediates = 0;
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
buffers_used = 0;
images_used = 0;
indirect_addr_consts = false;
@@ -4302,21 +4306,21 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
shader_program = NULL;
shader = NULL;
options = NULL;
have_sqrt = false;
have_fma = false;
use_shared_memory = false;
 }
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
-   free(array_sizes);
+   free(temp_arrays);
ralloc_free(mem_ctx);
 }
 
 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
 {
delete v;
 }
 
 
 /**
@@ -6027,27 +6031,27 @@ st_translate_program(
}
 
t->procType = procType;
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
 
/*
 * Declare temporary arrays.
 */
-   t->num_temp_arrays = program->next_array;
+   t->num_temp_arrays = program->num_temp_arrays;
if (t->num_temp_arrays) {
   t->temp_arrays = (struct ureg_dst*)
calloc(1, sizeof(t->temp_arrays[0]) * 
t->num_temp_arrays);
   for (i = 0; i < t->num_temp_arrays; ++i)
 

[Mesa-dev] [PATCH 17/19] gallium/radeon: reduce alloca of temporaries based on usagemask

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeon/radeon_llvm.h   |  1 +
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 47 +-
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 13f3336..4c946b5 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -45,20 +45,21 @@ struct radeon_llvm_branch {
unsigned has_else;
 };
 
 struct radeon_llvm_loop {
LLVMBasicBlockRef loop_block;
LLVMBasicBlockRef endloop_block;
 };
 
 struct radeon_llvm_array {
struct tgsi_declaration_range range;
+   ubyte usagemask;
LLVMValueRef alloca;
 };
 
 struct radeon_llvm_context {
struct lp_build_tgsi_soa_context soa;
 
/*=== Front end configuration ===*/
 
/* Instructions that are not described by any of the TGSI opcodes. */
 
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d8ab5b0..73e4ce2 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -191,22 +191,30 @@ get_pointer_into_array(struct radeon_llvm_context *ctx,
LLVMValueRef index;
 
if (file != TGSI_FILE_TEMPORARY)
return NULL;
 
array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect);
if (!array || !array->alloca)
return NULL;
 
index = emit_array_index(&ctx->soa, reg_indirect, reg_index - 
array->range.First);
-   index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, 
TGSI_NUM_CHANNELS), "");
-   index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, 
swizzle), "");
+   index = LLVMBuildMul(
+   builder, index,
+   lp_build_const_int32(gallivm, util_bitcount(array->usagemask)),
+   "");
+   index = LLVMBuildAdd(
+   builder, index,
+   lp_build_const_int32(
+   gallivm,
+   util_bitcount(array->usagemask & ((1 << swizzle) - 1))),
+   "");
idxs[0] = ctx->soa.bld_base.uint_bld.zero;
idxs[1] = index;
return LLVMBuildGEP(builder, array->alloca, idxs, 2, "");
 }
 
 LLVMValueRef
 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 enum tgsi_opcode_type type,
 LLVMValueRef ptr,
 LLVMValueRef ptr2)
@@ -466,54 +474,61 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
{
char name[16] = "";
LLVMValueRef array_alloca = NULL;
unsigned decl_size;
+   unsigned usagemask = decl->Declaration.UsageMask;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
+
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
+   unsigned array_size;
+
if (!ctx->arrays) {
int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
}
 
ctx->arrays[id].range = decl->Range;
+   ctx->arrays[id].usagemask = usagemask;
+   array_size = ((last - first) + 1) * 
util_bitcount(usagemask);
 
/* If the array has more than 16 elements, store it
 * in memory using an alloca that spans the entire
 * array.
 *
 * Otherwise, store each array element individually.
 * We will then generate vectors (per-channel, up to
-* <4 x float>) for indirect addressing.
+* <16 x float> if the usagemask is a single bit) for
+* indirect addressing.
 *
 * Note that 16 is the number of vector elements that
 * LLVM will store in a register, so theoretically an
 * array with up to 4 * 16 = 64 elements could be
 * handled this way, but whether that's a good idea
 * depends on VGPR register pressure elsewhere.
 *
 * FIXME: We shouldn't need to have the non-alloca
 * code path for arrays. LLVM should be smart enough to
 * promote allocas into registers when pr

[Mesa-dev] [PATCH 18/19] gallium/radeon: add radeon_llvm_bound_index for bounds checking

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/drivers/radeon/radeon_llvm.h   |  4 +++
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 29 ++
 src/gallium/drivers/radeonsi/si_shader.c   | 19 +-
 3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 4c946b5..c3efd7d 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -112,20 +112,24 @@ struct radeon_llvm_context {
 
struct gallivm_state gallivm;
 };
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  enum tgsi_opcode_type type);
 
 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 enum tgsi_opcode_type type, LLVMValueRef value);
 
+LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
+LLVMValueRef index,
+unsigned num);
+
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context 
*bld_base,
  struct lp_build_emit_data *emit_data,
  LLVMValueRef *coords_arg,
  LLVMValueRef *derivs_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context *ctx,
   const char *triple);
 
 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
 LLVMTypeRef *return_types, unsigned 
num_return_elems,
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 73e4ce2..e3b04ee 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -66,20 +66,49 @@ LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 {
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 
if (dst_type)
return LLVMBuildBitCast(builder, value, dst_type, "");
else
return value;
 }
 
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
+LLVMValueRef index,
+unsigned num)
+{
+   struct gallivm_state *gallivm = &ctx->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
+   LLVMValueRef cc;
+
+   if (util_is_power_of_two(num)) {
+   index = LLVMBuildAnd(builder, index, c_max, "");
+   } else {
+   /* In theory, this MAX pattern should result in code that is
+* as good as the bit-wise AND above.
+*
+* In practice, LLVM generates worse code (at the time of
+* writing), because its value tracking is not strong enough.
+*/
+   cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+   index = LLVMBuildSelect(builder, cc, index, c_max, "");
+   }
+
+   return index;
+}
+
 static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context 
*ctx)
 {
return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
 }
 
 static struct radeon_llvm_branch *get_current_branch(struct 
radeon_llvm_context *ctx)
 {
return ctx->branch_depth > 0 ?
ctx->branch + (ctx->branch_depth - 1) : NULL;
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de20cb..5f02fcd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -558,47 +558,30 @@ static LLVMValueRef get_indirect_index(struct 
si_shader_context *ctx,
 }
 
 /**
  * Like get_indirect_index, but restricts the return value to a (possibly
  * undefined) value inside [0..num).
  */
 static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx,
   const struct tgsi_ind_register 
*ind,
   int rel_index, unsigned num)
 {
-   struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
-   LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
-   LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
-   LLVMValueRef cc;
 
/* LLVM 3.8: If indirect resource indexing is used:
 * - SI & CIK hang
 * - VI crashes
 */
if (HAVE_LLVM <= 0x0308)
return LLVMGetUndef(ctx->i32);
 
-   if (util_is_power_of_two(num)) {
-   

[Mesa-dev] [PATCH 07/19] st_glsl_to_tgsi: use calloc the way it's meant to be used

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f2663bc..f3da77d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6036,21 +6036,21 @@ st_translate_program(
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
 
/*
 * Declare temporary arrays.
 */
t->num_temp_arrays = program->num_temp_arrays;
if (t->num_temp_arrays) {
   t->temp_arrays = (struct ureg_dst*)
-   calloc(1, sizeof(t->temp_arrays[0]) * 
t->num_temp_arrays);
+   calloc(sizeof(t->temp_arrays[0]), t->num_temp_arrays);
   for (i = 0; i < t->num_temp_arrays; ++i)
  t->temp_arrays[i] = ureg_DECL_array_temporary(
 t->ureg, program->temp_arrays[i].size,
 program->temp_arrays[i].usagemask, TRUE);
}
 
/*
 * Declare input attributes.
 */
switch (procType) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/19] gallium/radeon: simplify radeon_llvm_emit_store for direct array addressing

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

We can use the pointer stored in the temps array directly.
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index e084248..7b96a58 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -624,30 +624,23 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context 
*bld_base,
} else {
switch(reg->Register.File) {
case TGSI_FILE_OUTPUT:
temp_ptr = 
bld->outputs[reg->Register.Index][chan_index];
if (tgsi_type_is_64bit(dtype))
temp_ptr2 = 
bld->outputs[reg->Register.Index][chan_index + 1];
break;
 
case TGSI_FILE_TEMPORARY:
{
-   LLVMValueRef array;
if (reg->Register.Index >= ctx->temps_count)
continue;
-   array = get_alloca_for_array(bld_base, 
reg->Register.File, reg->Register.Index);
 
-   if (array) {
-   store_value_to_array(bld_base, value, 
reg->Register.File, chan_index, reg->Register.Index,
-   NULL);
-   continue;
-   }
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * 
reg->Register.Index + chan_index];
if (tgsi_type_is_64bit(dtype))
temp_ptr2 = ctx->temps[ 
TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 
break;
}
default:
return;
}
if (!tgsi_type_is_64bit(dtype))
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/19] gallium/radeon: pass indirect register info into get_alloca_for_array

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

To have the same signature as get_array_range.
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 994c7da..531a8fe 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -151,28 +151,29 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
return array->range;
}
 
range.First = 0;
range.Last = bld_base->info->file_max[File];
return range;
 }
 
 static LLVMValueRef get_alloca_for_array(struct lp_build_tgsi_context 
*bld_base,
 unsigned file,
-unsigned index)
+unsigned index,
+const struct tgsi_ind_register *reg)
 {
const struct radeon_llvm_array *array;
 
if (file != TGSI_FILE_TEMPORARY)
return NULL;
 
-   array = get_temp_array(bld_base, index, NULL);
+   array = get_temp_array(bld_base, index, reg);
if (!array)
return NULL;
 
return array->alloca;
 }
 
 static LLVMValueRef
 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 const struct tgsi_ind_register *reg,
 unsigned offset)
@@ -240,21 +241,21 @@ load_value_from_array(struct lp_build_tgsi_context 
*bld_base,
  enum tgsi_opcode_type type,
  unsigned swizzle,
  unsigned reg_index,
  const struct tgsi_ind_register *reg_indirect)
 {
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct tgsi_declaration_range range = get_array_range(bld_base, file, 
reg_index, reg_indirect);
LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - 
range.First);
-   LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
+   LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index, 
reg_indirect);
LLVMValueRef ptr, val, indices[2];
 
if (!array) {
/* Handle the case where the array is stored as a vector. */
return LLVMBuildExtractElement(builder,
emit_array_fetch(bld_base, file, type, range, 
swizzle),
index, "");
}
 
index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, 
TGSI_NUM_CHANNELS), "");
@@ -280,21 +281,21 @@ store_value_to_array(struct lp_build_tgsi_context 
*bld_base,
 unsigned file,
 unsigned chan_index,
 unsigned reg_index,
 const struct tgsi_ind_register *reg_indirect)
 {
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct tgsi_declaration_range range = get_array_range(bld_base, file, 
reg_index, reg_indirect);
LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - 
range.First);
-   LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
+   LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index, 
reg_indirect);
 
if (array) {
LLVMValueRef indices[2];
index = LLVMBuildMul(builder, index, 
lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
index = LLVMBuildAdd(builder, index, 
lp_build_const_int32(gallivm, chan_index), "");
indices[0] = bld_base->uint_bld.zero;
indices[1] = index;
LLVMValueRef pointer = LLVMBuildGEP(builder, array, indices, 2, 
"");
LLVMBuildStore(builder, value, pointer);
return NULL;
@@ -617,21 +618,21 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context 
*bld_base,
 
if (reg->Register.Indirect) {
struct tgsi_declaration_range range = 
get_array_range(bld_base,
reg->Register.File, reg->Register.Index, 
®->Indirect);
 
unsigned i, size = range.Last - range.First + 1;
unsigned file = reg->Register.File;
unsigned reg_index = reg->Register.Index;
LLVMValueRef array = store_value_to_array(bld_base, 
value, file, chan_index,
  reg_index, 
®->Indirect);
-   if (get_alloca_for_array(bld_base, file, reg_index)) {
+   if (get_all

[Mesa-dev] [PATCH 08/19] gallium/radeon: clean up emit_declaration for temporaries

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

In the alloca'd array case, no longer create redundant and unused allocas
for the individual elements; create getelementptrs instead.
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 27 ++
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d75311e..41f24d3 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
gallivm_state *gallivm,
LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
return ptr;
 }
 
 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 const struct tgsi_full_declaration *decl)
 {
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-   unsigned first, last, i, idx;
+   unsigned first, last, i;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
{
 unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 ctx->soa.addr[idx][chan] = 
si_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type, 
"");
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
{
+   LLVMValueRef array_alloca = NULL;
unsigned decl_size;
first = decl->Range.First;
last = decl->Range.Last;
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
if (!ctx->arrays) {
int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
-   for (i = 0; i < size; ++i) {
-   assert(!ctx->arrays[i].alloca);}
}
 
ctx->arrays[id].range = decl->Range;
 
/* If the array is more than 16 elements (each element
 * is 32-bits), then store it in a vector.  Storing the
 * array in a vector will causes the compiler to store
 * the array in registers and access it using indirect
 * addressing.  16 is number of vector elements that
 * LLVM will store in a register.
 * FIXME: We shouldn't need to do this.  LLVM should be
 * smart enough to promote allocas int registers when
 * profitable.
 */
if (decl_size > 16) {
-   ctx->arrays[id].alloca = 
LLVMBuildAlloca(builder,
+   array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type, 
decl_size),"array");
+   ctx->arrays[id].alloca = array_alloca;
}
}
-   first = decl->Range.First;
-   last = decl->Range.Last;
+
if (!ctx->temps_count) {
ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
}
-   for (idx = first; idx <= last; idx++) {
-   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-   ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
+   if (!array_alloca) {
+   for (i = 0; i < decl_size; ++i) {
+   ctx->temps[first * TGSI_NUM_CHANNELS + i] =

si_build_alloca_undef(bld_base->base.gallivm,
  
bld_base->base.vec_type,
  "temp");
}
+   } else {
+   LLVMValueRef idxs[2] = {
+   bld_base->uint_bld.zero,
+   NULL
+   };
+   for (i = 0; i < decl_size; ++i) {
+   idxs[1] = 
lp_build_const_int32(bld_base->base.gall

[Mesa-dev] [PATCH 12/19] gallium/radeon: clarify the comment on the array alloca heuristic

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 29 ++
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 22ff18e..e4bfa74 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -437,33 +437,42 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
decl_size = 4 * ((last - first) + 1);
if (decl->Declaration.Array) {
unsigned id = decl->Array.ArrayID - 1;
if (!ctx->arrays) {
int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
}
 
ctx->arrays[id].range = decl->Range;
 
-   /* If the array is more than 16 elements (each element
-* is 32-bits), then store it in a vector.  Storing the
-* array in a vector will causes the compiler to store
-* the array in registers and access it using indirect
-* addressing.  16 is number of vector elements that
-* LLVM will store in a register.
-* FIXME: We shouldn't need to do this.  LLVM should be
-* smart enough to promote allocas int registers when
-* profitable.
+   /* If the array has more than 16 elements, store it
+* in memory using an alloca that spans the entire
+* array.
+*
+* Otherwise, store each array element individually.
+* We will then generate vectors (per-channel, up to
+* <4 x float>) for indirect addressing.
+*
+* Note that 16 is the number of vector elements that
+* LLVM will store in a register, so theoretically an
+* array with up to 4 * 16 = 64 elements could be
+* handled this way, but whether that's a good idea
+* depends on VGPR register pressure elsewhere.
+*
+* FIXME: We shouldn't need to have the non-alloca
+* code path for arrays. LLVM should be smart enough to
+* promote allocas into registers when profitable.
 */
if (decl_size > 16) {
array_alloca = LLVMBuildAlloca(builder,
-   LLVMArrayType(bld_base->base.vec_type, 
decl_size),"array");
+   LLVMArrayType(bld_base->base.vec_type,
+ decl_size), "array");
ctx->arrays[id].alloca = array_alloca;
}
}
 
if (!ctx->temps_count) {
ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
}
if (!array_alloca) {
for (i = 0; i < decl_size; ++i) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/19] gallium/radeon: protect against out of bounds temporary array accesses

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

They can lead to VM faults and worse, which goes against the GL robustness
promises.
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index e3b04ee..6a010d5 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -220,20 +220,35 @@ get_pointer_into_array(struct radeon_llvm_context *ctx,
LLVMValueRef index;
 
if (file != TGSI_FILE_TEMPORARY)
return NULL;
 
array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect);
if (!array || !array->alloca)
return NULL;
 
index = emit_array_index(&ctx->soa, reg_indirect, reg_index - 
array->range.First);
+
+   /* Ensure that the index is within a valid range, to guard against
+* VM faults and overwriting critical data (e.g. spilled resource
+* descriptors).
+*
+* TODO It should be possible to avoid the additional instructions
+* if LLVM is changed so that it guarantuees:
+* 1. the scratch space descriptor isolates the current wave (this
+*could even save the scratch offset SGPR at the cost of an
+*additional SALU instruction)
+* 2. the memory for allocas must be allocated at the _end_ of the
+*scratch space (after spilled registers)
+*/
+   index = radeon_llvm_bound_index(ctx, index, array->range.Last - 
array->range.First + 1);
+
index = LLVMBuildMul(
builder, index,
lp_build_const_int32(gallivm, util_bitcount(array->usagemask)),
"");
index = LLVMBuildAdd(
builder, index,
lp_build_const_int32(
gallivm,
util_bitcount(array->usagemask & ((1 << swizzle) - 1))),
"");
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/19] gallium/radeon: always do the full store in store_value_to_array

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Doing the write-back of the temporary vector in radeon_llvm_emit_store makes
no sense.

This also allows us to get rid of get_alloca_for_array.
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 77 --
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 87fc07e..d8ab5b0 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -149,37 +149,20 @@ get_array_range(struct lp_build_tgsi_context *bld_base,
get_temp_array(bld_base, reg_index, reg);
if (array)
return array->range;
}
 
range.First = 0;
range.Last = bld_base->info->file_max[File];
return range;
 }
 
-static LLVMValueRef get_alloca_for_array(struct lp_build_tgsi_context 
*bld_base,
-unsigned file,
-unsigned index,
-const struct tgsi_ind_register *reg)
-{
-   const struct radeon_llvm_array *array;
-
-   if (file != TGSI_FILE_TEMPORARY)
-   return NULL;
-
-   array = get_temp_array(bld_base, index, reg);
-   if (!array)
-   return NULL;
-
-   return array->alloca;
-}
-
 static LLVMValueRef
 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 const struct tgsi_ind_register *reg,
 unsigned offset)
 {
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 
if (!reg) {
return lp_build_const_int32(gallivm, offset);
}
@@ -299,44 +282,67 @@ load_value_from_array(struct lp_build_tgsi_context 
*bld_base,
struct tgsi_declaration_range range =
get_array_range(bld_base, file, reg_index, 
reg_indirect);
LLVMValueRef index =
emit_array_index(bld, reg_indirect, reg_index - 
range.First);
LLVMValueRef array =
emit_array_fetch(bld_base, file, type, range, swizzle);
return LLVMBuildExtractElement(builder, array, index, "");
}
 }
 
-static LLVMValueRef
+static void
 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 LLVMValueRef value,
 unsigned file,
 unsigned chan_index,
 unsigned reg_index,
 const struct tgsi_ind_register *reg_indirect)
 {
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef ptr;
 
ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, 
reg_indirect);
if (ptr) {
LLVMBuildStore(builder, value, ptr);
-   return NULL;
} else {
+   unsigned i, size;
struct tgsi_declaration_range range = get_array_range(bld_base, 
file, reg_index, reg_indirect);
LLVMValueRef index = emit_array_index(bld, reg_indirect, 
reg_index - range.First);
LLVMValueRef array =
emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, 
range, chan_index);
-   return LLVMBuildInsertElement(builder, array, value, index, "");
+   LLVMValueRef temp_ptr;
+
+   array = LLVMBuildInsertElement(builder, array, value, index, 
"");
+
+   size = range.Last - range.First + 1;
+   for (i = 0; i < size; ++i) {
+   switch(file) {
+   case TGSI_FILE_OUTPUT:
+   temp_ptr = bld->outputs[i + 
range.First][chan_index];
+   break;
+
+   case TGSI_FILE_TEMPORARY:
+   if (range.First + i >= ctx->temps_count)
+   continue;
+   temp_ptr = ctx->temps[(i + range.First) * 
TGSI_NUM_CHANNELS + chan_index];
+   break;
+
+   default:
+   continue;
+   }
+   value = LLVMBuildExtractElement(builder, array,
+   lp_build_const_int32(gallivm, i), "");
+   LLVMBuildStore(builder, value, temp_ptr);
+   }
}
 }
 
 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
enum tgsi_opcode_type type,
unsigned swizzle)
 {
struct 

[Mesa-dev] [PATCH 1/3] gallivm: add create_builder_at_entry helper function

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Reduces code duplication.
---
 src/gallium/auxiliary/gallivm/lp_bld_flow.c | 45 ++---
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index f3b3eab..9183f45 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -447,20 +447,40 @@ lp_build_endif(struct lp_build_if_state *ifthen)
   /* no else clause */
   LLVMBuildCondBr(builder, ifthen->condition,
   ifthen->true_block, ifthen->merge_block);
}
 
/* Resume building code at end of the ifthen->merge_block */
LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
 }
 
 
+static LLVMBuilderRef
+create_builder_at_entry(struct gallivm_state *gallivm)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+   LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+   LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+   LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+   LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(gallivm->context);
+
+   if (first_instr) {
+  LLVMPositionBuilderBefore(first_builder, first_instr);
+   } else {
+  LLVMPositionBuilderAtEnd(first_builder, first_block);
+   }
+
+   return first_builder;
+}
+
+
 /**
  * Allocate a scalar (or vector) variable.
  *
  * Although not strictly part of control flow, control flow has deep impact in
  * how variables should be allocated.
  *
  * The mem2reg optimization pass is the recommended way to dealing with mutable
  * variables, and SSA. It looks for allocas and if it can handle them, it
  * promotes them, but only looks for alloca instructions in the entry block of
  * the function. Being in the entry block guarantees that the alloca is only
@@ -468,33 +488,23 @@ lp_build_endif(struct lp_build_if_state *ifthen)
  *
  * See also:
  * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
  */
 LLVMValueRef
 lp_build_alloca(struct gallivm_state *gallivm,
 LLVMTypeRef type,
 const char *name)
 {
LLVMBuilderRef builder = gallivm->builder;
-   LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
-   LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
-   LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
-   LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
-   LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(gallivm->context);
+   LLVMBuilderRef first_builder = create_builder_at_entry(gallivm);
LLVMValueRef res;
 
-   if (first_instr) {
-  LLVMPositionBuilderBefore(first_builder, first_instr);
-   } else {
-  LLVMPositionBuilderAtEnd(first_builder, first_block);
-   }
-
res = LLVMBuildAlloca(first_builder, type, name);
LLVMBuildStore(builder, LLVMConstNull(type), res);
 
LLVMDisposeBuilder(first_builder);
 
return res;
 }
 
 
 /**
@@ -510,30 +520,19 @@ lp_build_alloca(struct gallivm_state *gallivm,
  *
  * See also:
  * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory
  */
 LLVMValueRef
 lp_build_array_alloca(struct gallivm_state *gallivm,
   LLVMTypeRef type,
   LLVMValueRef count,
   const char *name)
 {
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
-   LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
-   LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
-   LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
-   LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(gallivm->context);
+   LLVMBuilderRef first_builder = create_builder_at_entry(gallivm);
LLVMValueRef res;
 
-   if (first_instr) {
-  LLVMPositionBuilderBefore(first_builder, first_instr);
-   } else {
-  LLVMPositionBuilderAtEnd(first_builder, first_block);
-   }
-
res = LLVMBuildArrayAlloca(first_builder, type, count, name);
 
LLVMDisposeBuilder(first_builder);
 
return res;
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] gallium/radeon: use lp_build_alloca_undef

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Avoid building all those store 0 / store undef instrucction pairs that
end up getting removed anyway.
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 6a010d5..b419add 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -483,43 +483,34 @@ static LLVMValueRef fetch_system_value(struct 
lp_build_tgsi_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
 
LLVMValueRef cval = ctx->system_values[reg->Register.Index];
if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
cval = LLVMBuildExtractElement(gallivm->builder, cval,
   lp_build_const_int32(gallivm, 
swizzle), "");
}
return bitcast(bld_base, type, cval);
 }
 
-static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
- LLVMTypeRef type,
- const char *name)
-{
-   LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
-   LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
-   return ptr;
-}
-
 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 const struct tgsi_full_declaration *decl)
 {
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
unsigned first, last, i;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
{
 unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-ctx->soa.addr[idx][chan] = 
si_build_alloca_undef(
+ctx->soa.addr[idx][chan] = 
lp_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.uint_bld.elem_type, 
"");
}
}
break;
}
 
case TGSI_FILE_TEMPORARY:
{
char name[16] = "";
@@ -574,21 +565,21 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
}
if (!array_alloca) {
for (i = 0; i < decl_size; ++i) {
 #ifdef DEBUG
snprintf(name, sizeof(name), "TEMP%d.%c",
 first + i / 4, "xyzw"[i % 4]);
 #endif
ctx->temps[first * TGSI_NUM_CHANNELS + i] =
-   
si_build_alloca_undef(bld_base->base.gallivm,
+   
lp_build_alloca_undef(bld_base->base.gallivm,
  
bld_base->base.vec_type,
  name);
}
} else {
LLVMValueRef idxs[2] = {
bld_base->uint_bld.zero,
NULL
};
LLVMValueRef undef = NULL;
unsigned j = 0;
@@ -633,21 +624,21 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
}
break;
 
case TGSI_FILE_OUTPUT:
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
unsigned chan;
assert(idx < RADEON_LLVM_MAX_OUTPUTS);
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-   ctx->soa.outputs[idx][chan] = 
si_build_alloca_undef(
+   ctx->soa.outputs[idx][chan] = 
lp_build_alloca_undef(
&ctx->gallivm,
ctx->soa.bld_base.base.elem_type, "");
}
}
break;
}
 
case TGSI_FILE_MEMORY:
ctx->declare_memory_region(ctx, decl);
break;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] gallivm: add lp_build_alloca_undef

2016-08-09 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/gallium/auxiliary/gallivm/lp_bld_flow.c | 19 +++
 src/gallium/auxiliary/gallivm/lp_bld_flow.h |  5 +
 2 files changed, 24 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 9183f45..3c3f16c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -501,20 +501,39 @@ lp_build_alloca(struct gallivm_state *gallivm,
res = LLVMBuildAlloca(first_builder, type, name);
LLVMBuildStore(builder, LLVMConstNull(type), res);
 
LLVMDisposeBuilder(first_builder);
 
return res;
 }
 
 
 /**
+ * Like lp_build_alloca_undef, but do not zero-initialize the variable.
+ */
+LLVMValueRef
+lp_build_alloca_undef(struct gallivm_state *gallivm,
+  LLVMTypeRef type,
+  const char *name)
+{
+   LLVMBuilderRef first_builder = create_builder_at_entry(gallivm);
+   LLVMValueRef res;
+
+   res = LLVMBuildAlloca(first_builder, type, name);
+
+   LLVMDisposeBuilder(first_builder);
+
+   return res;
+}
+
+
+/**
  * Allocate an array of scalars/vectors.
  *
  * mem2reg pass is not capable of promoting structs or arrays to registers, but
  * we still put it in the first block anyway as failure to put allocas in the
  * first block may prevent the X86 backend from successfully align the stack as
  * required.
  *
  * Also the scalarrepl pass is supposedly more powerful and can promote
  * arrays in many cases.
  *
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h 
b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index 083b0ad..674fc18 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -189,20 +189,25 @@ lp_build_endif(struct lp_build_if_state *ctx);
 
 LLVMBasicBlockRef
 lp_build_insert_new_block(struct gallivm_state *gallivm, const char *name);
 
 LLVMValueRef
 lp_build_alloca(struct gallivm_state *gallivm,
 LLVMTypeRef type,
 const char *name);
 
 LLVMValueRef
+lp_build_alloca_undef(struct gallivm_state *gallivm,
+  LLVMTypeRef type,
+  const char *name);
+
+LLVMValueRef
 lp_build_array_alloca(struct gallivm_state *gallivm,
   LLVMTypeRef type,
   LLVMValueRef count,
   const char *name);
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* !LP_BLD_FLOW_H */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/8] st/mesa: _NEW_TEXTURE & CONSTANTS shouldn't flag states that aren't used

2016-08-09 Thread Nicolai Hähnle



On 07.08.2016 03:12, Marek Olšák wrote:

From: Marek Olšák 

---
 src/mesa/state_tracker/st_context.c | 64 -
 src/mesa/state_tracker/st_context.h |  6 
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 1ff0355..b9fc9e7 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -116,20 +116,65 @@ st_query_memory_info(struct gl_context *ctx, struct 
gl_memory_info *out)

out->total_device_memory = info.total_device_memory;
out->avail_device_memory = info.avail_device_memory;
out->total_staging_memory = info.total_staging_memory;
out->avail_staging_memory = info.avail_staging_memory;
out->device_memory_evicted = info.device_memory_evicted;
out->nr_device_memory_evictions = info.nr_device_memory_evictions;
 }


+uint64_t
+st_get_active_states(struct gl_context *ctx)
+{
+   struct st_vertex_program *vp =
+  st_vertex_program(ctx->VertexProgram._Current);
+   struct st_tessctrl_program *tcp =
+  st_tessctrl_program(ctx->TessCtrlProgram._Current);
+   struct st_tesseval_program *tep =
+  st_tesseval_program(ctx->TessEvalProgram._Current);
+   struct st_geometry_program *gp =
+  st_geometry_program(ctx->GeometryProgram._Current);
+   struct st_fragment_program *fp =
+  st_fragment_program(ctx->FragmentProgram._Current);
+   struct st_compute_program *cp =
+  st_compute_program(ctx->ComputeProgram._Current);
+
+   uint64_t active_shader_states = 0;
+   uint64_t all_shader_resources;
+
+   if (vp)
+  active_shader_states |= vp->affected_states;
+   if (tcp)
+  active_shader_states |= tcp->affected_states;
+   if (tep)
+  active_shader_states |= tep->affected_states;
+   if (gp)
+  active_shader_states |= gp->affected_states;
+   if (fp)
+  active_shader_states |= fp->affected_states;
+   if (cp)
+  active_shader_states |= cp->affected_states;
+
+   all_shader_resources = ST_NEW_SAMPLER_VIEWS |
+  ST_NEW_SAMPLERS |
+  ST_NEW_CONSTANTS |
+  ST_NEW_UNIFORM_BUFFER |
+  ST_NEW_ATOMIC_BUFFER |
+  ST_NEW_STORAGE_BUFFER |
+  ST_NEW_IMAGE_UNITS;


This should probably be a #define in st_atom.h.

Nicolai


+
+   /* Mark non-shader-resource shader states as "always active". */
+   return active_shader_states | ~all_shader_resources;
+}
+
+
 /**
  * Called via ctx->Driver.UpdateState()
  */
 void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
 {
struct st_context *st = st_context(ctx);

if (new_state & _NEW_BUFFERS) {
   st->dirty |= ST_NEW_DSA |
ST_NEW_FB_STATE |
@@ -197,41 +242,44 @@ void st_invalidate_state(struct gl_context * ctx, 
GLbitfield new_state)
st_user_clip_planes_enabled(ctx))
   st->dirty |= ST_NEW_CLIP_STATE;

if (new_state & _NEW_COLOR)
   st->dirty |= ST_NEW_BLEND |
ST_NEW_DSA;

if (new_state & _NEW_PIXEL)
   st->dirty |= ST_NEW_PIXEL_TRANSFER;

-   if (new_state & _NEW_TEXTURE)
-  st->dirty |= ST_NEW_SAMPLER_VIEWS |
-   ST_NEW_SAMPLERS |
-   ST_NEW_IMAGE_UNITS;
-
if (new_state & _NEW_CURRENT_ATTRIB)
   st->dirty |= ST_NEW_VERTEX_ARRAYS;

-   if (new_state & _NEW_PROGRAM_CONSTANTS)
-  st->dirty |= ST_NEW_CONSTANTS;
-
/* Update the vertex shader if ctx->Light._ClampVertexColor was changed. */
if (st->clamp_vert_color_in_shader && (new_state & _NEW_LIGHT))
   st->dirty |= ST_NEW_VS_STATE;

/* Which shaders are dirty will be determined manually. */
if (new_state & _NEW_PROGRAM) {
   st->gfx_shaders_may_be_dirty = true;
   st->compute_shader_may_be_dirty = true;
+  /* This will mask out unused shader resources. */
+  st->active_states = st_get_active_states(ctx);
}

+   if (new_state & _NEW_TEXTURE)
+  st->dirty |= st->active_states &
+   (ST_NEW_SAMPLER_VIEWS |
+ST_NEW_SAMPLERS |
+ST_NEW_IMAGE_UNITS);
+
+   if (new_state & _NEW_PROGRAM_CONSTANTS)
+  st->dirty |= st->active_states & ST_NEW_CONSTANTS;
+
/* This is the only core Mesa module we depend upon.
 * No longer use swrast, swsetup, tnl.
 */
_vbo_InvalidateState(ctx, new_state);
 }


 static void
 st_destroy_context_priv(struct st_context *st)
 {
diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 556b9c9..f82cf3a 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -133,20 +133,23 @@ struct st_context
   GLuint poly_stipple[32];  /**< In OpenGL's bottom-to-top order */

   GLuint fb_orientation;
} state;

char vendor[100];
char renderer[100];

uint64_t dirty; /**< dirty states */

+   /** This masks out unus

Re: [Mesa-dev] [PATCH 4/8] st/mesa: determine states used or affected by shaders at compile time

2016-08-09 Thread Nicolai Hähnle

On 07.08.2016 03:12, Marek Olšák wrote:

From: Marek Olšák 

At compile time, each shader determines which ST_NEW flags should be set
at shader bind time.

This just sets the new field for all shaders. The next commit will use it.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 175 -
 src/mesa/state_tracker/st_program.c|  37 +-
 src/mesa/state_tracker/st_program.h|   6 +
 3 files changed, 215 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 362559f..fd14766 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -,31 +,202 @@ get_mesa_program_tgsi(struct gl_context *ctx,

 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
  struct gl_shader_program *shader_program,
  struct gl_linked_shader *shader)
 {
struct pipe_screen *pscreen = ctx->st->pipe->screen;
unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
   pscreen->get_shader_param(pscreen, ptarget, 
PIPE_SHADER_CAP_PREFERRED_IR);
+   struct gl_program *prog = NULL;
+
if (preferred_ir == PIPE_SHADER_IR_NIR) {
   /* TODO only for GLSL VS/FS for now: */
   switch (shader->Stage) {
   case MESA_SHADER_VERTEX:
   case MESA_SHADER_FRAGMENT:
- return st_nir_get_mesa_program(ctx, shader_program, shader);
+ prog = st_nir_get_mesa_program(ctx, shader_program, shader);
   default:
  break;
   }
+   } else {
+  prog = get_mesa_program_tgsi(ctx, shader_program, shader);
+   }
+
+   if (prog) {
+  uint64_t *states;
+
+  /* This determines which states will be updated when the shader is
+   * bound.
+   */
+  switch (shader->Stage) {
+  case MESA_SHADER_VERTEX:
+ states = &((struct st_vertex_program*)prog)->affected_states;
+
+ *states = ST_NEW_VS_STATE |
+   ST_NEW_RASTERIZER |
+   ST_NEW_VERTEX_ARRAYS;
+
+ if (prog->Parameters->NumParameters)
+*states |= ST_NEW_VS_CONSTANTS;
+
+ if (shader->num_samplers)
+*states |= ST_NEW_VS_SAMPLER_VIEWS |
+   ST_NEW_RENDER_SAMPLERS;
+
+ if (shader->NumImages)
+*states |= ST_NEW_VS_IMAGES;
+
+ if (shader->NumUniformBlocks)
+*states |= ST_NEW_VS_UBOS;
+
+ if (shader->NumShaderStorageBlocks)
+*states |= ST_NEW_VS_SSBOS;
+
+ if (shader->NumAtomicBuffers)
+*states |= ST_NEW_VS_ATOMICS;


I'm not overly fond of the code duplication here. Perhaps these could 
all be expressed relative to a stage-specific base flag?


Nicolai



+ break;
+
+  case MESA_SHADER_TESS_CTRL:
+ states = &((struct st_tessctrl_program*)prog)->affected_states;
+
+ *states = ST_NEW_TCS_STATE;
+
+ if (prog->Parameters->NumParameters)
+*states |= ST_NEW_TCS_CONSTANTS;
+
+ if (shader->num_samplers)
+*states |= ST_NEW_TCS_SAMPLER_VIEWS |
+   ST_NEW_RENDER_SAMPLERS;
+
+ if (shader->NumImages)
+*states |= ST_NEW_TCS_IMAGES;
+
+ if (shader->NumUniformBlocks)
+*states |= ST_NEW_TCS_UBOS;
+
+ if (shader->NumShaderStorageBlocks)
+*states |= ST_NEW_TCS_SSBOS;
+
+ if (shader->NumAtomicBuffers)
+*states |= ST_NEW_TCS_ATOMICS;
+ break;
+
+  case MESA_SHADER_TESS_EVAL:
+ states = &((struct st_tesseval_program*)prog)->affected_states;
+
+ *states = ST_NEW_TES_STATE |
+   ST_NEW_RASTERIZER;
+
+ if (prog->Parameters->NumParameters)
+*states |= ST_NEW_TES_CONSTANTS;
+
+ if (shader->num_samplers)
+*states |= ST_NEW_TES_SAMPLER_VIEWS |
+   ST_NEW_RENDER_SAMPLERS;
+
+ if (shader->NumImages)
+*states |= ST_NEW_TES_IMAGES;
+
+ if (shader->NumUniformBlocks)
+*states |= ST_NEW_TES_UBOS;
+
+ if (shader->NumShaderStorageBlocks)
+*states |= ST_NEW_TES_SSBOS;
+
+ if (shader->NumAtomicBuffers)
+*states |= ST_NEW_TES_ATOMICS;
+ break;
+
+  case MESA_SHADER_GEOMETRY:
+ states = &((struct st_geometry_program*)prog)->affected_states;
+
+ *states = ST_NEW_GS_STATE |
+   ST_NEW_RASTERIZER;
+
+ if (prog->Parameters->NumParameters)
+*states |= ST_NEW_GS_CONSTANTS;
+
+ if (shader->num_samplers)
+*states |= ST_NEW_GS_SAMPLER_VIEWS |
+   ST_NEW_RENDER_SAMPLERS;
+
+ if (shader->NumImages)
+*states |= ST_NEW_GS_IMAGES;
+
+ if (shader->NumUniformBlocks)
+*states |= ST_NEW_GS_UBOS;
+
+ if (shader->NumShaderStorageBlocks)
+ 

Re: [Mesa-dev] [PATCH 0/8] More state optimizations for st/mesa

2016-08-09 Thread Nicolai Hähnle

I sent comments on patches 4 & 6. Apart from that, the series is

Reviewed-by: Nicolai Hähnle 

On 07.08.2016 03:12, Marek Olšák wrote:

PS: In order to make reviewing easier, all my patches have 10 lines
of contexts instead of 3. That will be the default for all my work
from now on.


I like that idea, I've made the same change to my git config.

Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vl: add a lanczos interpolation filter v3

2016-08-09 Thread Christian König
I am more than happy to solve these problems, the Lanczos filtering 
was getting a little stale

anyway because I was not able to reproduce the problems Andy was facing.
Yeah thought so, the reason is probably that you don't have the 
necessary hardware.



Is that why I need to add a PIPE_BIND_LINEAR to a surface?

Yes exactly.

So I need to use maybe a couple of surfaces alternatively to read and 
write with the filters. This approach should work I guess.
Allocate a temporary surface for each step, apply the necessary filters 
using it and then use the temporary buffer as input for the next step.


See how the deinterlacing filter does this, you should use the same 
approach here.


I would use this order for doing things:
1. Median filter for noise reduction.
2. Sharpening/blur filter.
3. Deinterlacing.
4. Compositioning and CC conversion.
5. Advanced scaling.

Regards,
Christian.

Am 08.08.2016 um 16:32 schrieb Nayan Deshmukh:

Hi Christian,

I am more than happy to solve these problems, the Lanczos filtering 
was getting a little stale

anyway because I was not able to reproduce the problems Andy was facing.

On Mon, Aug 8, 2016 at 6:24 PM, Christian König 
mailto:christian.koe...@amd.com>> wrote:


Hi Nayan,

ok let's take a step back and put the lanczos filtering aside for
a moment. I have another task for you which is more urgent right now.

The order we do things in vlVdpVideoMixerRender() was never 100%
correct, so we have at least three problems here which needs fixing:

1) The noise reduction and sharpness filter read and write to the
same surface at the same time. That only works because we use a
linear layout.

Is that why I need to add a PIPE_BIND_LINEAR to a surface? So I need 
to use maybe a couple of surfaces alternatively to read and write with 
the filters. This approach should work I guess.


2) We apply the noise reduction and sharpness filter after the
composition. That is rather odd and should be fixed so that we
apply those filters on the original video frame instead.

 So we need to apply the filter before the CSC conversion.

3) We use delayed rendering to render into output surfaces
directly. We should rather use the DRI3 capabilities to allocate
multiple output surfaces instead.

Could you take care of some of those issues? Especially #1 has
become a problem recently.

Surely, I will start working on the first 2 problem for now and look 
at the third problem a little later.


Regards,
Nayan.

Regards,
Christian.


Am 04.08.2016 um 19:22 schrieb Nayan Deshmukh:

Hi Andy,


On Thu, Aug 4, 2016 at 8:48 PM, Andy Furniss mailto:adf.li...@gmail.com>> wrote:

Nayan Deshmukh wrote:

Hi Andy,

Thanks for testing my patches.


NP


The scaling happens after CSC.


OK, thanks.


I believe there is some misunderstanding here, I was able
to see the
artifacts in the video that you sent me previously. But I
was not
able to replicate them


Yea, I got that - just thought you may want to see how they
had changed.

with the pendulum video on my system. Same case this time the
pendulum video plays fine this time too. I am attacing a
video of it
here


https://drive.google.com/file/d/0B1s62k5GtdBwOVAtOUVaU0V5c1E/view?usp=sharing




Hmm, that's interesting for a few reasons.

Though my monitor won't do 1366x768 I can replicate the same
scale
factor windowed with mplayer ... -xy 768/576 ...

At first glance only level 2 is obviously artifacted (though
very close
inspection shows others are slightly).

Levels: for some reason your vid has the black bars at 0 but
the content
isn't scaled - like your mplayer didn't expand tv to pc on
playback.
This shouldn't happen by default. Do you have some extra config
somewhere like in $HOME/.mplayer, if so maybe better to test
without.

Most important - though the vp9 compression may be to blame I
can't
really see any difference between the levels in that vid.

In fact closely comparing just your level 1 to my (admittedly
uncompressed) level 1 and 0 output scaled the same plus
unstretched
levels wise it looks to me like you are getting level 0 on
this test.


You are right it I am getting level 0 only. I have a PRIME
configuration
and I forgot to set DRI_PRIME to 1. But for some reason, I am not
able to create
a screen recording when I use my AMD card. So, for now, I can't
reproduce the artifacts
you are having so can't debug them too :(

Regards,
Nayan.






__

[Mesa-dev] [PATCH] Fix Android compilation when swrast is enabled

2016-08-09 Thread Mathieu Maret
Swrast add dependency on libdrm, but libdrm is not defined for host
build (only for the targeted device). So host modules likes mesa_gen_matypes
cannot find there libdrm dependency

Signed-off-by: Mathieu Maret 
---
 Android.common.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Android.common.mk b/Android.common.mk
index 26d2482..3903ebe 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -100,10 +100,12 @@ LOCAL_CFLAGS += \
 endif
 
 # add libdrm if there are hardware drivers
+ifneq ($(strip $(LOCAL_IS_HOST_MODULE)),true)
 ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
+endif
 
 LOCAL_CPPFLAGS += \
$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-std=c++11) \
-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Leo Liu



On 08/09/2016 04:26 AM, Christian König wrote:

Am 08.08.2016 um 22:10 schrieb Leo Liu:

This is the case when the "00 00 03" is very close to the beginning of
nal unit header


I see where the problem is, but the fix is incorrect.

You always search for the emulation prevention three byte even when 
the previous fill has done so already. So it could happen in theory 
that you revert the escaping twice, e.g. remove valid bits.


Right. Will send v2.

Thanks,
Leo



You need to add this extra check to the end of vl_rbsp_init() after we 
searched for the end of the NAL unit.


Regards,
Christian.



Signed-off-by: Leo Liu 
---
  src/gallium/auxiliary/vl/vl_rbsp.h | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h

index 7867238..c134d31 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -77,8 +77,16 @@ static inline void vl_rbsp_fillbits(struct vl_rbsp 
*rbsp)

 unsigned i, bits;
   /* abort if we still have enough bits */
-   if (valid >= 32)
+   if (valid >= 32) {
+  /* search for the emulation prevention three byte */
+  for (i = 24; i <= valid; i += 8) {
+ if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+i += 8;
+ }
+  }
return;
+   }
   vl_vlc_fillbits(&rbsp->nal);





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium: Add c99_compat.h to u_bitcast.h

2016-08-09 Thread Brian Paul

On 08/09/2016 12:30 AM, Mathias Fröhlich wrote:

Hi Brian,

 > I don't know why my local build is failing while appveyor and our

 > in-house automated build seem OK. But applying your patch 3 alone fixes

 > things for me.

As it fixes something independent, should I push that already?


Sure.  For 1 & 3,
Reviewed-by: Brian Paul 
Tested-by: Brian Paul 




 > Yeah, I applied your whole series and the MSVC build seems OK. However,

 > I'm hitting a new runtime crash (even after fixing the unrelated issue

 > from Marek's rewrite of the state tracker validation code). It looks

 > like patch 2/3 is the problem. I'll try to dig deeper tomorrow...

Hmm, that part is meant as a nice addition on top, once we are at it.

But, if this patch is a problem then I am probably off by one in the
returned

index. You may 'return index + 1;' instead of just index.


I'll let you know soon...

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Nicolai Hähnle

Hi everybody,

addrlib is the addressing and alignment calculator which is used by 
radeonsi. It's developed (and also used) internally at AMD, and so far 
we've had one open source copy living in the Mesa repository at 
src/gallium/winsys/amdgpu/drm/addrlib.


The question of using addrlib in non-Mesa parts of our open-source stack 
has come up, in particular in relation to compute. We'd obviously like 
to share the code rather than having multiple copies flying around. 
Since the interface of addrlib is slow-moving but unstable, shared 
linking is not an option.


I think the best way forward is to create a dedicated repository for 
addrlib which is then integrated into Mesa as a git submodule.


The point of this email is to gather feedback from the Mesa community on 
this plan, which is explicitly:


(0) Create an addrlib repository, say amd/addrlib on fd.o.
(1) Add it as a git submodule to the Mesa repository.
(2) Fix up whatever aspects of the build system that may be affected 
(perhaps for building source tarballs).
(3) Go back to mostly ignoring addrlib, except for trying to get better 
at syncing with the internal closed-source version.


From initial experiments, the impact on users interested in radeon is 
that they will have to run `git submodule init` and then occasionally 
`git submodule update`. Users who do not build radeonsi should be able 
to ignore the change completely.


There are alternatives. For example, ROCm uses Google's repo tool 
already. But for Mesa, git submodule looks like a lightweight, well 
supported and overall conservative option that everybody should already 
have installed. If there are good arguments for something else, let's 
hear them!


Another point: if we proceed with this plan, I think we should consider 
moving addrlib into src/amd/addrlib. There are two reasons: First, 
transitioning to a submodule *without* changing the directory is 
probably more fragile, i.e. what happens when you switch between 
checkouts before and after the transition. Second, if/when radv ends up 
being merged into Mesa master, it makes sense to have addrlib there anyway.


Thoughts? Complaints? Praise?
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #4 from Alexandr Zelinsky  ---
Created attachment 125637
  --> https://bugs.freedesktop.org/attachment.cgi?id=125637&action=edit
xinit log with LIBGL_DEBUG=verbose

seems not very usefull
what your nickname on #dri-devel?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97261] vaapi u/v wrong order since vl/util: add copy func for yv12image to nv12surface

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97261

Bug ID: 97261
   Summary: vaapi u/v wrong order since vl/util: add copy func for
yv12image to nv12surface
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: adf.li...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 125638
  --> https://bugs.freedesktop.org/attachment.cgi?id=125638&action=edit
small test vid

As noted st the time, though Boyuan said he couldn't reproduce, for me

vl/util: add copy func for yv12image to nv12surface

gets u and v for both yv12 and I420 inputs reversed whether encoding or
playing.

Both gstreamer and mpv affected.

Testing playback using attached small test vid that instantly shows the issue
either

VAAPI_DISABLE_INTERLACE=true mpv --vo=vaapi uvtest.mkv

or

gst-launch-1.0 filesrc location=uvtest.mkv ! matroskademux ! avdec_h264 !
vaapisink

Of course any test that outputs nv12 works OK as it avoids the conversion.

It seems that the new util function expects input to be yuv, but it actually
gets yvu.

I sent a patch to the list for this -

https://lists.freedesktop.org/archives/mesa-dev/2016-July/124695.html

Filing bug/test to see if anyone else reproduce.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Rob Clark
On Tue, Aug 9, 2016 at 9:47 AM, Nicolai Hähnle  wrote:
> Hi everybody,
>
> addrlib is the addressing and alignment calculator which is used by
> radeonsi. It's developed (and also used) internally at AMD, and so far we've
> had one open source copy living in the Mesa repository at
> src/gallium/winsys/amdgpu/drm/addrlib.
>
> The question of using addrlib in non-Mesa parts of our open-source stack has
> come up, in particular in relation to compute. We'd obviously like to share
> the code rather than having multiple copies flying around. Since the
> interface of addrlib is slow-moving but unstable, shared linking is not an
> option.
>
> I think the best way forward is to create a dedicated repository for addrlib
> which is then integrated into Mesa as a git submodule.
>
> The point of this email is to gather feedback from the Mesa community on
> this plan, which is explicitly:
>
> (0) Create an addrlib repository, say amd/addrlib on fd.o.
> (1) Add it as a git submodule to the Mesa repository.
> (2) Fix up whatever aspects of the build system that may be affected
> (perhaps for building source tarballs).
> (3) Go back to mostly ignoring addrlib, except for trying to get better at
> syncing with the internal closed-source version.
>
> From initial experiments, the impact on users interested in radeon is that
> they will have to run `git submodule init` and then occasionally `git
> submodule update`. Users who do not build radeonsi should be able to ignore
> the change completely.

tbh, git submodules are more annoying than they need to be, and I'm
not really terribly excited to use that for something that is a build
dependency.  Maybe just move it into libdrm instead?

BR,
-R

> There are alternatives. For example, ROCm uses Google's repo tool already.
> But for Mesa, git submodule looks like a lightweight, well supported and
> overall conservative option that everybody should already have installed. If
> there are good arguments for something else, let's hear them!
>
> Another point: if we proceed with this plan, I think we should consider
> moving addrlib into src/amd/addrlib. There are two reasons: First,
> transitioning to a submodule *without* changing the directory is probably
> more fragile, i.e. what happens when you switch between checkouts before and
> after the transition. Second, if/when radv ends up being merged into Mesa
> master, it makes sense to have addrlib there anyway.
>
> Thoughts? Complaints? Praise?
> Nicolai
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Leo Liu
This is the case when the "00 00 03" is very close to the beginning of
nal unit header

v2: move the check to rbsp init

Signed-off-by: Leo Liu 
---
 src/gallium/auxiliary/vl/vl_rbsp.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h
index 7867238..c175e23 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -61,7 +61,18 @@ static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct 
vl_vlc *nal, unsign
while (vl_vlc_search_byte(nal, num_bits, 0x00)) {
   if (vl_vlc_peekbits(nal, 24) == 0x01 ||
   vl_vlc_peekbits(nal, 32) == 0x0001) {
+ unsigned valid;
+ int i;
+
  vl_vlc_limit(&rbsp->nal, bits_left - vl_vlc_bits_left(nal));
+ valid = vl_vlc_valid_bits(&rbsp->nal);
+ /* search for the emulation prevention three byte */
+ for (i = 24; i <= valid; i += 8) {
+if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+   vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+   i += 8;
+}
+ }
  return;
   }
   vl_vlc_eatbits(nal, 8);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #5 from Michel Dänzer  ---
Try EGL_LOG_LEVEL=debug as well.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #6 from Alexandr Zelinsky  ---
Created attachment 125640
  --> https://bugs.freedesktop.org/attachment.cgi?id=125640&action=edit
EGL_LOG_LEVEL=debug

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97261] vaapi u/v wrong order since vl/util: add copy func for yv12image to nv12surface

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97261

Andy Furniss  changed:

   What|Removed |Added

 CC||deathsim...@vodafone.de

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] gallivm: add lp_build_alloca_undef

2016-08-09 Thread Roland Scheidegger
Am 09.08.2016 um 12:38 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle 
> 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_flow.c | 19 +++
>  src/gallium/auxiliary/gallivm/lp_bld_flow.h |  5 +
>  2 files changed, 24 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
> index 9183f45..3c3f16c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
> @@ -501,20 +501,39 @@ lp_build_alloca(struct gallivm_state *gallivm,
> res = LLVMBuildAlloca(first_builder, type, name);
> LLVMBuildStore(builder, LLVMConstNull(type), res);
>  
> LLVMDisposeBuilder(first_builder);
>  
> return res;
>  }
>  
>  
>  /**
> + * Like lp_build_alloca_undef, but do not zero-initialize the variable.

Like lp_build_alloca

For the series:
Reviewed-by: Roland Scheidegger 


> + */
> +LLVMValueRef
> +lp_build_alloca_undef(struct gallivm_state *gallivm,
> +  LLVMTypeRef type,
> +  const char *name)
> +{
> +   LLVMBuilderRef first_builder = create_builder_at_entry(gallivm);
> +   LLVMValueRef res;
> +
> +   res = LLVMBuildAlloca(first_builder, type, name);
> +
> +   LLVMDisposeBuilder(first_builder);
> +
> +   return res;
> +}
> +
> +
> +/**
>   * Allocate an array of scalars/vectors.
>   *
>   * mem2reg pass is not capable of promoting structs or arrays to registers, 
> but
>   * we still put it in the first block anyway as failure to put allocas in the
>   * first block may prevent the X86 backend from successfully align the stack 
> as
>   * required.
>   *
>   * Also the scalarrepl pass is supposedly more powerful and can promote
>   * arrays in many cases.
>   *
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h 
> b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
> index 083b0ad..674fc18 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
> @@ -189,20 +189,25 @@ lp_build_endif(struct lp_build_if_state *ctx);
>  
>  LLVMBasicBlockRef
>  lp_build_insert_new_block(struct gallivm_state *gallivm, const char *name);
>  
>  LLVMValueRef
>  lp_build_alloca(struct gallivm_state *gallivm,
>  LLVMTypeRef type,
>  const char *name);
>  
>  LLVMValueRef
> +lp_build_alloca_undef(struct gallivm_state *gallivm,
> +  LLVMTypeRef type,
> +  const char *name);
> +
> +LLVMValueRef
>  lp_build_array_alloca(struct gallivm_state *gallivm,
>LLVMTypeRef type,
>LLVMValueRef count,
>const char *name);
>  
>  #ifdef __cplusplus
>  }
>  #endif
>  
>  #endif /* !LP_BLD_FLOW_H */
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium: Add c99_compat.h to u_bitcast.h

2016-08-09 Thread Brian Paul

On 08/09/2016 07:46 AM, Brian Paul wrote:

On 08/09/2016 12:30 AM, Mathias Fröhlich wrote:

Hi Brian,

 > I don't know why my local build is failing while appveyor and our

 > in-house automated build seem OK. But applying your patch 3 alone
fixes

 > things for me.

As it fixes something independent, should I push that already?


Sure.  For 1 & 3,
Reviewed-by: Brian Paul 
Tested-by: Brian Paul 




 > Yeah, I applied your whole series and the MSVC build seems OK.
However,

 > I'm hitting a new runtime crash (even after fixing the unrelated issue

 > from Marek's rewrite of the state tracker validation code). It looks

 > like patch 2/3 is the problem. I'll try to dig deeper tomorrow...

Hmm, that part is meant as a nice addition on top, once we are at it.

But, if this patch is a problem then I am probably off by one in the
returned

index. You may 'return index + 1;' instead of just index.


I'll let you know soon...


Yes, we need to add one to the index.  I'll re-test that patch when you 
update it.


-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Nicolai Hähnle

On 09.08.2016 15:58, Rob Clark wrote:

On Tue, Aug 9, 2016 at 9:47 AM, Nicolai Hähnle  wrote:

Hi everybody,

addrlib is the addressing and alignment calculator which is used by
radeonsi. It's developed (and also used) internally at AMD, and so far we've
had one open source copy living in the Mesa repository at
src/gallium/winsys/amdgpu/drm/addrlib.

The question of using addrlib in non-Mesa parts of our open-source stack has
come up, in particular in relation to compute. We'd obviously like to share
the code rather than having multiple copies flying around. Since the
interface of addrlib is slow-moving but unstable, shared linking is not an
option.

I think the best way forward is to create a dedicated repository for addrlib
which is then integrated into Mesa as a git submodule.

The point of this email is to gather feedback from the Mesa community on
this plan, which is explicitly:

(0) Create an addrlib repository, say amd/addrlib on fd.o.
(1) Add it as a git submodule to the Mesa repository.
(2) Fix up whatever aspects of the build system that may be affected
(perhaps for building source tarballs).
(3) Go back to mostly ignoring addrlib, except for trying to get better at
syncing with the internal closed-source version.

From initial experiments, the impact on users interested in radeon is that
they will have to run `git submodule init` and then occasionally `git
submodule update`. Users who do not build radeonsi should be able to ignore
the change completely.


tbh, git submodules are more annoying than they need to be, and I'm
not really terribly excited to use that for something that is a build
dependency.  Maybe just move it into libdrm instead?


I know. That's what I would have proposed if the addrlib interface were 
stable. Unfortunately it isn't, and realistically speaking, that's not 
going to change.


So shared linking is right out.

Static linking or just including source files from a separate repository 
could be considered, but then what's the process for ensuring you have 
the right version?


The nice aspect of submodules is that every commit of the Mesa 
repository "knows" what the corresponding right version of addrlib is, 
and so git can update the submodule to the correct version for you 
automatically.


Cheers,
Nicolai


BR,
-R


There are alternatives. For example, ROCm uses Google's repo tool already.
But for Mesa, git submodule looks like a lightweight, well supported and
overall conservative option that everybody should already have installed. If
there are good arguments for something else, let's hear them!

Another point: if we proceed with this plan, I think we should consider
moving addrlib into src/amd/addrlib. There are two reasons: First,
transitioning to a submodule *without* changing the directory is probably
more fragile, i.e. what happens when you switch between checkouts before and
after the transition. Second, if/when radv ends up being merged into Mesa
master, it makes sense to have addrlib there anyway.

Thoughts? Complaints? Praise?
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Enrico Weigelt, metux IT consult
On 09.08.2016 15:47, Nicolai Hähnle wrote:

> I think the best way forward is to create a dedicated repository for
> addrlib which is then integrated into Mesa as a git submodule.

If you really wanna make a lot of people, especially dist-maintainers
very unhappy ...

> From initial experiments, the impact on users interested in radeon is
> that they will have to run `git submodule init` and then occasionally
> `git submodule update`.

Which requires additional, package specific, logic in build/rule files
of all the distros out there.

> There are alternatives. For example, ROCm uses Google's repo tool
> already. 

Even worse.


--mtx

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Christian König

Am 09.08.2016 um 15:56 schrieb Leo Liu:

This is the case when the "00 00 03" is very close to the beginning of
nal unit header

v2: move the check to rbsp init

Signed-off-by: Leo Liu 
---
  src/gallium/auxiliary/vl/vl_rbsp.h | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h
index 7867238..c175e23 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -61,7 +61,18 @@ static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct 
vl_vlc *nal, unsign
 while (vl_vlc_search_byte(nal, num_bits, 0x00)) {
if (vl_vlc_peekbits(nal, 24) == 0x01 ||
vl_vlc_peekbits(nal, 32) == 0x0001) {
+ unsigned valid;
+ int i;
+
   vl_vlc_limit(&rbsp->nal, bits_left - vl_vlc_bits_left(nal));
+ valid = vl_vlc_valid_bits(&rbsp->nal);
+ /* search for the emulation prevention three byte */
+ for (i = 24; i <= valid; i += 8) {
+if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+   vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+   i += 8;
+}
+ }


Mhm, I think that isn't 100% correct either.

We return inside the loop only when we find the next NAL unit after the 
current one, but if this is the last one that isn't the case.



   return;


I think that just replacing thing return with a break and moving the 
code after the while should do the trick.


Regards,
Christian.


}
vl_vlc_eatbits(nal, 8);




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Enrico Weigelt, metux IT consult
On 09.08.2016 16:59, Nicolai Hähnle wrote:

> So shared linking is right out.

Not exactly. Just everything needs to be linked against the matching
versions. More a dist-layer problem.

addrlibs folks should learn to introduce a proper versioning and
provide MVCC-capable build rules. That really isn't hard.
> Static linking or just including source files from a separate repository
> could be considered, but then what's the process for ensuring you have
> the right version?

pkgconfig ?

> The nice aspect of submodules is that every commit of the Mesa
> repository "knows" what the corresponding right version of addrlib is,
> and so git can update the submodule to the correct version for you
> automatically.

No, it can only checkout the ref'ed commit or anywhere else the user
tells it to. Just jumping to the head does exactly *not* jump to
anything like an correct version. And that's all that git can do
for you automatically.


--mtx

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] cairo as state tracker

2016-08-09 Thread Enrico Weigelt, metux IT consult
On 07.08.2016 12:50, Marek Olšák wrote:

> It would mainly be a futile task if it had to compete with their
> official Mesa driver.

Not quite. Would give us all of gallium's capabilities also for
the intel chips, for example having lots of different state trackers.
(coming back to my original intention of cairo as a gallium st)


--mtx

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Leo Liu
This is the case when the "00 00 03" is very close to the beginning of
nal unit header

v2: move the check to rbsp init

Signed-off-by: Leo Liu 
---
 src/gallium/auxiliary/vl/vl_rbsp.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h
index 7867238..c8bebff 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -50,7 +50,8 @@ struct vl_rbsp {
  */
 static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, 
unsigned num_bits)
 {
-   unsigned bits_left = vl_vlc_bits_left(nal);
+   unsigned valid, bits_left = vl_vlc_bits_left(nal);
+   int i;
 
/* copy the position */
rbsp->nal = *nal;
@@ -62,10 +63,19 @@ static inline void vl_rbsp_init(struct vl_rbsp *rbsp, 
struct vl_vlc *nal, unsign
   if (vl_vlc_peekbits(nal, 24) == 0x01 ||
   vl_vlc_peekbits(nal, 32) == 0x0001) {
  vl_vlc_limit(&rbsp->nal, bits_left - vl_vlc_bits_left(nal));
- return;
+ break;
   }
   vl_vlc_eatbits(nal, 8);
}
+
+   valid = vl_vlc_valid_bits(&rbsp->nal);
+   /* search for the emulation prevention three byte */
+   for (i = 24; i <= valid; i += 8) {
+  if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+ vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+ i += 8;
+  }
+   }
 }
 
 /**
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] cairo as state tracker

2016-08-09 Thread Rob Clark
On Tue, Aug 9, 2016 at 11:11 AM, Enrico Weigelt, metux IT consult
 wrote:
> On 07.08.2016 12:50, Marek Olšák wrote:
>
>> It would mainly be a futile task if it had to compete with their
>> official Mesa driver.
>
> Not quite. Would give us all of gallium's capabilities also for
> the intel chips, for example having lots of different state trackers.
> (coming back to my original intention of cairo as a gallium st)
>

If you don't realize the complexity of a gpu driver, or the 100's of
thousands of hours that have gone into i965, it's easy to say 'lets
throw that all away and start from scratch with a gallium driver' ;-)

There is ilo.. I suppose if someone cared enough they could add NIR
support and figure out how to share the compiler back-end with i965,
so it wouldn't be *completely* starting from scratch.  But there is
still a big delta between ilo and i965 in terms of features and
supported hw gen's.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Nicolai Hähnle

On 09.08.2016 17:21, Marek Olšák wrote:

On Tue, Aug 9, 2016 at 3:47 PM, Nicolai Hähnle  wrote:

Hi everybody,

addrlib is the addressing and alignment calculator which is used by
radeonsi. It's developed (and also used) internally at AMD, and so far we've
had one open source copy living in the Mesa repository at
src/gallium/winsys/amdgpu/drm/addrlib.

The question of using addrlib in non-Mesa parts of our open-source stack has
come up, in particular in relation to compute. We'd obviously like to share
the code rather than having multiple copies flying around. Since the
interface of addrlib is slow-moving but unstable, shared linking is not an
option.

I think the best way forward is to create a dedicated repository for addrlib
which is then integrated into Mesa as a git submodule.

The point of this email is to gather feedback from the Mesa community on
this plan, which is explicitly:

(0) Create an addrlib repository, say amd/addrlib on fd.o.
(1) Add it as a git submodule to the Mesa repository.
(2) Fix up whatever aspects of the build system that may be affected
(perhaps for building source tarballs).
(3) Go back to mostly ignoring addrlib, except for trying to get better at
syncing with the internal closed-source version.

From initial experiments, the impact on users interested in radeon is that
they will have to run `git submodule init` and then occasionally `git
submodule update`. Users who do not build radeonsi should be able to ignore
the change completely.

There are alternatives. For example, ROCm uses Google's repo tool already.
But for Mesa, git submodule looks like a lightweight, well supported and
overall conservative option that everybody should already have installed. If
there are good arguments for something else, let's hear them!

Another point: if we proceed with this plan, I think we should consider
moving addrlib into src/amd/addrlib. There are two reasons: First,
transitioning to a submodule *without* changing the directory is probably
more fragile, i.e. what happens when you switch between checkouts before and
after the transition. Second, if/when radv ends up being merged into Mesa
master, it makes sense to have addrlib there anyway.

Thoughts? Complaints? Praise?


I don't know.

How does this ensure that Mesa and ROCm addrlib copies won't diverge?


They won't really be different copies, because both "copies" are really 
checkouts from the same repository. They will occasionally be checkouts 
of _different versions_ from the same repository -- usually that would 
happen after a sync with the internal copy, when one driver updates 
their pointer before the other does. But that's easiy to reconcile. 
Usually it should just mean changing the version pointer in whichever 
driver uses the older version.




What issues can we expect if Mesa and ROCm addrlib copies diverge?


This is about software maintenance. If we _do_ have separate copies, and 
someone applies a bug fix in one copy, they may forget to apply it to 
the other. When we want to sync with the internal copy, that has to be 
done twice. Basically, all the usual frictions that go with having the 
same (or almost the same) piece of code in more than one place.




For texture sharing, the buffer metadata is set in a way that doesn't
leave any room for interpretation. I think it's possible to bypass
addrlib in this case.


Right, this is orthogonal to interoperability. Multiple drivers with 
different versions of addrlib can coexist in a system.


Nicolai



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Christian König

Am 09.08.2016 um 15:47 schrieb Nicolai Hähnle:

Hi everybody,

addrlib is the addressing and alignment calculator which is used by 
radeonsi. It's developed (and also used) internally at AMD, and so far 
we've had one open source copy living in the Mesa repository at 
src/gallium/winsys/amdgpu/drm/addrlib.


The question of using addrlib in non-Mesa parts of our open-source 
stack has come up, in particular in relation to compute. We'd 
obviously like to share the code rather than having multiple copies 
flying around. Since the interface of addrlib is slow-moving but 
unstable, shared linking is not an option.


I think the best way forward is to create a dedicated repository for 
addrlib which is then integrated into Mesa as a git submodule.


The point of this email is to gather feedback from the Mesa community 
on this plan, which is explicitly:


(0) Create an addrlib repository, say amd/addrlib on fd.o.
(1) Add it as a git submodule to the Mesa repository.
(2) Fix up whatever aspects of the build system that may be affected 
(perhaps for building source tarballs).
(3) Go back to mostly ignoring addrlib, except for trying to get 
better at syncing with the internal closed-source version.


From initial experiments, the impact on users interested in radeon is 
that they will have to run `git submodule init` and then occasionally 
`git submodule update`. Users who do not build radeonsi should be able 
to ignore the change completely.


There are alternatives. For example, ROCm uses Google's repo tool 
already. But for Mesa, git submodule looks like a lightweight, well 
supported and overall conservative option that everybody should 
already have installed. If there are good arguments for something 
else, let's hear them!


Another point: if we proceed with this plan, I think we should 
consider moving addrlib into src/amd/addrlib. There are two reasons: 
First, transitioning to a submodule *without* changing the directory 
is probably more fragile, i.e. what happens when you switch between 
checkouts before and after the transition. Second, if/when radv ends 
up being merged into Mesa master, it makes sense to have addrlib there 
anyway.


Thoughts? Complaints? Praise?


Well using git submodule is a possibility and we had rather good 
experience with that in GStreamer.


But it would remove one major argument to beating the addrlib guys 
towards a stable interface and/or proper library version handling.


Moving it into libdrm is clearly not an option because then you would 
need to use versioning for the whole libdrm_amdgpu library which we 
don't want.


Christian.


Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Marek Olšák
On Tue, Aug 9, 2016 at 3:47 PM, Nicolai Hähnle  wrote:
> Hi everybody,
>
> addrlib is the addressing and alignment calculator which is used by
> radeonsi. It's developed (and also used) internally at AMD, and so far we've
> had one open source copy living in the Mesa repository at
> src/gallium/winsys/amdgpu/drm/addrlib.
>
> The question of using addrlib in non-Mesa parts of our open-source stack has
> come up, in particular in relation to compute. We'd obviously like to share
> the code rather than having multiple copies flying around. Since the
> interface of addrlib is slow-moving but unstable, shared linking is not an
> option.
>
> I think the best way forward is to create a dedicated repository for addrlib
> which is then integrated into Mesa as a git submodule.
>
> The point of this email is to gather feedback from the Mesa community on
> this plan, which is explicitly:
>
> (0) Create an addrlib repository, say amd/addrlib on fd.o.
> (1) Add it as a git submodule to the Mesa repository.
> (2) Fix up whatever aspects of the build system that may be affected
> (perhaps for building source tarballs).
> (3) Go back to mostly ignoring addrlib, except for trying to get better at
> syncing with the internal closed-source version.
>
> From initial experiments, the impact on users interested in radeon is that
> they will have to run `git submodule init` and then occasionally `git
> submodule update`. Users who do not build radeonsi should be able to ignore
> the change completely.
>
> There are alternatives. For example, ROCm uses Google's repo tool already.
> But for Mesa, git submodule looks like a lightweight, well supported and
> overall conservative option that everybody should already have installed. If
> there are good arguments for something else, let's hear them!
>
> Another point: if we proceed with this plan, I think we should consider
> moving addrlib into src/amd/addrlib. There are two reasons: First,
> transitioning to a submodule *without* changing the directory is probably
> more fragile, i.e. what happens when you switch between checkouts before and
> after the transition. Second, if/when radv ends up being merged into Mesa
> master, it makes sense to have addrlib there anyway.
>
> Thoughts? Complaints? Praise?

I don't know.

How does this ensure that Mesa and ROCm addrlib copies won't diverge?

What issues can we expect if Mesa and ROCm addrlib copies diverge?

For texture sharing, the buffer metadata is set in a way that doesn't
leave any room for interpretation. I think it's possible to bypass
addrlib in this case.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] vl/rbsp: add a check for emulation prevention three byte

2016-08-09 Thread Christian König

Am 09.08.2016 um 17:18 schrieb Leo Liu:

This is the case when the "00 00 03" is very close to the beginning of
nal unit header

v2: move the check to rbsp init

Signed-off-by: Leo Liu 


Reviewed-by: Christian König 


---
  src/gallium/auxiliary/vl/vl_rbsp.h | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h 
b/src/gallium/auxiliary/vl/vl_rbsp.h
index 7867238..c8bebff 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -50,7 +50,8 @@ struct vl_rbsp {
   */
  static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, 
unsigned num_bits)
  {
-   unsigned bits_left = vl_vlc_bits_left(nal);
+   unsigned valid, bits_left = vl_vlc_bits_left(nal);
+   int i;
  
 /* copy the position */

 rbsp->nal = *nal;
@@ -62,10 +63,19 @@ static inline void vl_rbsp_init(struct vl_rbsp *rbsp, 
struct vl_vlc *nal, unsign
if (vl_vlc_peekbits(nal, 24) == 0x01 ||
vl_vlc_peekbits(nal, 32) == 0x0001) {
   vl_vlc_limit(&rbsp->nal, bits_left - vl_vlc_bits_left(nal));
- return;
+ break;
}
vl_vlc_eatbits(nal, 8);
 }
+
+   valid = vl_vlc_valid_bits(&rbsp->nal);
+   /* search for the emulation prevention three byte */
+   for (i = 24; i <= valid; i += 8) {
+  if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xff) == 0x3) {
+ vl_vlc_removebits(&rbsp->nal, i - 8, 8);
+ i += 8;
+  }
+   }
  }
  
  /**



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] cairo as state tracker

2016-08-09 Thread Jason Ekstrand
On Tue, Aug 9, 2016 at 8:11 AM, Enrico Weigelt, metux IT consult <
enrico.weig...@gr13.net> wrote:

> On 07.08.2016 12:50, Marek Olšák wrote:
>
> > It would mainly be a futile task if it had to compete with their
> > official Mesa driver.
>
> Not quite. Would give us all of gallium's capabilities also for
> the intel chips, for example having lots of different state trackers.
> (coming back to my original intention of cairo as a gallium st)
>

Gallium isn't an API you support it's a way you write your driver.
Switching to gallium is a fundamental change in the way the entire driver
is architected.  It could be done (and recent changes in our driver are
shrinking the dri portion so it's getting easier) but it's a massive pile
of work with mediocre benefit.  I could go on a very long rant about it but
the short version is: If we were writing a new driver or if something came
up that made gallium hugely benificial, we might consider it but at the
moment, there's no good reason.

As far as cairo goes, I'd much rather see the GL backend improved or a
Vulkan backend added as those are both stable industry-supported APIs
rather than running directly on gallium.

--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Erik Faye-Lund
On Tue, Aug 9, 2016 at 4:59 PM, Nicolai Hähnle  wrote:
> On 09.08.2016 15:58, Rob Clark wrote:
>>
>> tbh, git submodules are more annoying than they need to be, and I'm
>> not really terribly excited to use that for something that is a build
>> dependency.  Maybe just move it into libdrm instead?
>
>
> I know. That's what I would have proposed if the addrlib interface were
> stable. Unfortunately it isn't, and realistically speaking, that's not going
> to change.
>
> So shared linking is right out.
>
> Static linking or just including source files from a separate repository
> could be considered, but then what's the process for ensuring you have the
> right version?
>
> The nice aspect of submodules is that every commit of the Mesa repository
> "knows" what the corresponding right version of addrlib is, and so git can
> update the submodule to the correct version for you automatically.

I'm not a huge fan of submodules either. They just don't deal with
distributed development properly, which should be a non-starter for
OSS IMO. You either set the submodule to point to an absolute URL, in
which case it's awkward to work with if you need to change the code,
or you use a relative URL, which forces everyone who has a fork to
fork the submodule also. Yuck. As a formerly active Git developer, my
impression is that nobody of the core-git developers really loved the
idea of git-submodule, it was mostly introduced into Git to help KDE
transition their gigantic SVN-external based source tree to Git.

IMO, a much better alternative would be to have addrlib live in its
own repository, and periodically do a git subtree-merge into mesa and
other dependent system. That means that nobody really needs to deal
with the fact that the upstream is in a different repo, except when
submitting patches for upstream. This is what git.git itself does for
some of its subsystems.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #7 from Nicolas Boichat  ---
Created attachment 125643
  --> https://bugs.freedesktop.org/attachment.cgi?id=125643&action=edit
Add tracing to egl_dri2.c

libEGL debug: EGL user error 0x3001 (EGL_NOT_INITIALIZED) in eglMakeCurrent

Is due to the new code in dri2_make_current:
   if (!dri2_dpy)
  return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");

I don't quite understand how dri2_dpy could end up being uninitialized, unless
eglMakeCurrent is called after eglTerminate...

I'd start by adding traces in dri2_initialize, dri2_terminate,
dri2_make_current, which is what this patch does...

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] prog_hash_table: Convert to using util/hash_table.h.

2016-08-09 Thread Eric Anholt
Improves glretrace -b servo.trace (a trace of Mozilla's servo rendering
engine booting, rendering a page, and exiting) from 1.8s to 1.1s.  It uses
a large uniform array of structs, making a huge number of separate program
resources, and the fixed-size hash table was killing it.  Given how many
times we've improved performance by swapping the hash table to
util/hash_table.h, just do it once and for all.

This just rebases the old hash table API on top of util/, for minimal
diff.  Cleaning things up is left for later, particularly because I want
to fix up the new hash table API a little bit.
---
 src/mesa/program/hash_table.h  |  78 +++-
 src/mesa/program/prog_hash_table.c | 181 -
 2 files changed, 54 insertions(+), 205 deletions(-)

diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
index aba5282fe9e5..362eb2ee0a78 100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include "util/hash_table.h"
 
 struct string_to_uint_map;
 
@@ -44,8 +45,6 @@ struct string_to_uint_map;
 extern "C" {
 #endif
 
-struct hash_table;
-
 typedef unsigned (*hash_func_t)(const void *key);
 typedef bool (*hash_compare_func_t)(const void *key1, const void *key2);
 
@@ -60,26 +59,32 @@ typedef bool (*hash_compare_func_t)(const void *key1, const 
void *key2);
  * \param hash Function used to compute hash value of input keys.
  * \param compare  Function used to compare keys.
  */
-extern struct hash_table *hash_table_ctor(unsigned num_buckets,
-hash_func_t hash, hash_compare_func_t compare);
-
+static inline struct hash_table *hash_table_ctor(unsigned num_buckets,
+hash_func_t hash, hash_compare_func_t compare)
+{
+   return _mesa_hash_table_create(NULL, hash, compare);
+}
 
 /**
  * Release all memory associated with a hash table
  *
  * \warning
- * This function cannot release memory occupied either by keys or data.
+ * This function does not release memory occupied either by keys or data.
  */
-extern void hash_table_dtor(struct hash_table *ht);
-
+static inline void hash_table_dtor(struct hash_table *ht)
+{
+   return _mesa_hash_table_destroy(ht, NULL);
+}
 
 /**
  * Flush all entries from a hash table
  *
  * \param ht  Table to be cleared of its entries.
  */
-extern void hash_table_clear(struct hash_table *ht);
-
+static inline void hash_table_clear(struct hash_table *ht)
+{
+   return _mesa_hash_table_clear(ht, NULL);
+}
 
 /**
  * Search a hash table for a specific element
@@ -92,25 +97,28 @@ extern void hash_table_clear(struct hash_table *ht);
  * the matching key was added.  If no matching key exists in the table,
  * \c NULL is returned.
  */
-extern void *hash_table_find(struct hash_table *ht, const void *key);
-
+static inline void *hash_table_find(struct hash_table *ht, const void *key)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+   if (!entry)
+  return NULL;
+   return entry->data;
+}
 
 /**
  * Add an element to a hash table
  *
  * \warning
- * If \c key is already in the hash table, it will be added again.  Future
- * calls to \c hash_table_find and \c hash_table_remove will return or remove,
- * repsectively, the most recently added instance of \c key.
- *
- * \warning
  * The value passed by \c key is kept in the hash table and is used by later
  * calls to \c hash_table_find.
  *
  * \sa hash_table_replace
  */
-extern void hash_table_insert(struct hash_table *ht, void *data,
-const void *key);
+static inline void hash_table_insert(struct hash_table *ht, void *data,
+ const void *key)
+{
+   _mesa_hash_table_insert(ht, key, data);
+}
 
 /**
  * Add an element to a hash table with replacement
@@ -126,13 +134,29 @@ extern void hash_table_insert(struct hash_table *ht, void 
*data,
  *
  * \sa hash_table_insert
  */
-extern bool hash_table_replace(struct hash_table *ht, void *data,
-const void *key);
+static inline bool hash_table_replace(struct hash_table *ht, void *data,
+  const void *key)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+   if (entry) {
+  entry->data = data;
+  return true;
+   } else {
+  _mesa_hash_table_insert(ht, key, data);
+  return false;
+   }
+}
 
 /**
  * Remove a specific element from a hash table.
  */
-extern void hash_table_remove(struct hash_table *ht, const void *key);
+static inline void hash_table_remove(struct hash_table *ht, const void *key)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+
+   if (entry)
+  _mesa_hash_table_remove(ht, entry);
+}
 
 /**
  * Compute hash value of a string
@@ -180,12 +204,18 @@ hash_table_pointer_hash(const void *key);
 bool
 hash_table_pointer_compare(const void *key1, const void *key2);
 
-void
+static inline void
 hash_table_call_foreach(struct hash_table *ht,
void (*callback)(co

[Mesa-dev] [PATCH 1/4] nir: Drop an unused program/hash_table.h include.

2016-08-09 Thread Eric Anholt
---
 src/compiler/nir/nir_lower_samplers.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_samplers.c 
b/src/compiler/nir/nir_lower_samplers.c
index 4a4326983a65..e878edd9b54b 100644
--- a/src/compiler/nir/nir_lower_samplers.c
+++ b/src/compiler/nir/nir_lower_samplers.c
@@ -25,7 +25,6 @@
 
 #include "nir.h"
 #include "nir_builder.h"
-#include "program/hash_table.h"
 #include "compiler/glsl/ir_uniform.h"
 
 #include "main/compiler.h"
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] mesa: Use a temporary set to track whether we've added a resource yet.

2016-08-09 Thread Eric Anholt
Saves another .1s on servo.trace.
---
 src/compiler/glsl/linker.cpp | 76 +---
 1 file changed, 50 insertions(+), 26 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index f4049133ee69..ceb86aa0a929 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -73,6 +73,7 @@
 #include "program.h"
 #include "program/hash_table.h"
 #include "program/prog_instruction.h"
+#include "util/set.h"
 #include "linker.h"
 #include "link_varyings.h"
 #include "ir_optimization.h"
@@ -3528,15 +3529,15 @@ should_add_buffer_variable(struct gl_shader_program 
*shProg,
 }
 
 static bool
-add_program_resource(struct gl_shader_program *prog, GLenum type,
- const void *data, uint8_t stages)
+add_program_resource(struct gl_shader_program *prog,
+ struct set *resource_set,
+ GLenum type, const void *data, uint8_t stages)
 {
assert(data);
 
/* If resource already exists, do not add it again. */
-   for (unsigned i = 0; i < prog->NumProgramResourceList; i++)
-  if (prog->ProgramResourceList[i].Data == data)
- return true;
+   if (_mesa_set_search(resource_set, data))
+  return true;
 
prog->ProgramResourceList =
   reralloc(prog,
@@ -3558,6 +3559,8 @@ add_program_resource(struct gl_shader_program *prog, 
GLenum type,
 
prog->NumProgramResourceList++;
 
+   _mesa_set_add(resource_set, data);
+
return true;
 }
 
@@ -3722,7 +3725,8 @@ create_shader_variable(struct gl_shader_program *shProg,
 }
 
 static bool
-add_shader_variable(struct gl_shader_program *shProg, unsigned stage_mask,
+add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
+unsigned stage_mask,
 GLenum programInterface, ir_variable *var,
 const char *name, const glsl_type *type,
 bool use_implicit_location, int location,
@@ -3750,7 +3754,8 @@ add_shader_variable(struct gl_shader_program *shProg, 
unsigned stage_mask,
   for (unsigned i = 0; i < type->length; i++) {
  const struct glsl_struct_field *field = &type->fields.structure[i];
  char *field_name = ralloc_asprintf(shProg, "%s.%s", name, 
field->name);
- if (!add_shader_variable(shProg, stage_mask, programInterface,
+ if (!add_shader_variable(shProg, resource_set,
+  stage_mask, programInterface,
   var, field_name, field->type,
   use_implicit_location, field_location,
   outermost_struct_type))
@@ -3792,13 +3797,15 @@ add_shader_variable(struct gl_shader_program *shProg, 
unsigned stage_mask,
   if (!sha_v)
  return false;
 
-  return add_program_resource(shProg, programInterface, sha_v, stage_mask);
+  return add_program_resource(shProg, resource_set,
+  programInterface, sha_v, stage_mask);
}
}
 }
 
 static bool
 add_interface_variables(struct gl_shader_program *shProg,
+struct set *resource_set,
 unsigned stage, GLenum programInterface)
 {
exec_list *ir = shProg->_LinkedShaders[stage]->ir;
@@ -3848,7 +3855,8 @@ add_interface_variables(struct gl_shader_program *shProg,
  (stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
  (stage == MESA_SHADER_FRAGMENT && var->data.mode == 
ir_var_shader_out);
 
-  if (!add_shader_variable(shProg, 1 << stage, programInterface,
+  if (!add_shader_variable(shProg, resource_set,
+   1 << stage, programInterface,
var, var->name, var->type, 
vs_input_or_fs_output,
var->data.location - loc_bias))
  return false;
@@ -3857,7 +3865,8 @@ add_interface_variables(struct gl_shader_program *shProg,
 }
 
 static bool
-add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type)
+add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
+int stage, GLenum type)
 {
struct gl_linked_shader *sh = shProg->_LinkedShaders[stage];
GLenum iface;
@@ -3882,7 +3891,8 @@ add_packed_varyings(struct gl_shader_program *shProg, int 
stage, GLenum type)
  if (type == iface) {
 const int stage_mask =
build_stageref(shProg, var->name, var->data.mode);
-if (!add_shader_variable(shProg, stage_mask,
+if (!add_shader_variable(shProg, resource_set,
+ stage_mask,
  iface, var, var->name, var->type, false,
  var->data.location - VARYING_SLOT_VAR0))
return false;
@@ -3893,7 +3903,7 @@ add_packed_varyings(struct gl_shader_program *shProg, int 
stage, GLenum type)
 }

[Mesa-dev] [PATCH 2/4] prog_hash_table: Convert compare funcs to match util/hash_table.h.

2016-08-09 Thread Eric Anholt
I'm going to replace this hash table with util/hash_table.h, and the first
step is to compare things the same way.
---
 src/mesa/program/hash_table.h  | 9 -
 src/mesa/program/prog_hash_table.c | 9 +++--
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
index d0a2abffa343..aba5282fe9e5 100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -47,7 +47,7 @@ extern "C" {
 struct hash_table;
 
 typedef unsigned (*hash_func_t)(const void *key);
-typedef int (*hash_compare_func_t)(const void *key1, const void *key2);
+typedef bool (*hash_compare_func_t)(const void *key1, const void *key2);
 
 /**
  * Hash table constructor
@@ -151,12 +151,11 @@ extern unsigned hash_table_string_hash(const void *key);
 /**
  * Compare two strings used as keys
  *
- * This is just a macro wrapper around \c strcmp.
+ * This is just a wrapper around \c strcmp.
  *
  * \sa hash_table_string_hash
  */
-#define hash_table_string_compare ((hash_compare_func_t) strcmp)
-
+bool hash_table_string_compare(const void *a, const void *b);
 
 /**
  * Compute hash value of a pointer
@@ -178,7 +177,7 @@ hash_table_pointer_hash(const void *key);
  *
  * \sa hash_table_pointer_hash
  */
-int
+bool
 hash_table_pointer_compare(const void *key1, const void *key2);
 
 void
diff --git a/src/mesa/program/prog_hash_table.c 
b/src/mesa/program/prog_hash_table.c
index 5592b6fb8148..f8a7107eb5bd 100644
--- a/src/mesa/program/prog_hash_table.c
+++ b/src/mesa/program/prog_hash_table.c
@@ -228,6 +228,11 @@ hash_table_string_hash(const void *key)
 return hash;
 }
 
+bool hash_table_string_compare(const void *a, const void *b)
+{
+   return strcmp(a, b) == 0;
+}
+
 
 unsigned
 hash_table_pointer_hash(const void *key)
@@ -236,8 +241,8 @@ hash_table_pointer_hash(const void *key)
 }
 
 
-int
+bool
 hash_table_pointer_compare(const void *key1, const void *key2)
 {
-   return key1 == key2 ? 0 : 1;
+   return key1 == key2;
 }
-- 
2.8.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Marek Olšák
On Tue, Aug 9, 2016 at 5:35 PM, Nicolai Hähnle  wrote:
> On 09.08.2016 17:21, Marek Olšák wrote:
>>
>> On Tue, Aug 9, 2016 at 3:47 PM, Nicolai Hähnle  wrote:
>>>
>>> Hi everybody,
>>>
>>> addrlib is the addressing and alignment calculator which is used by
>>> radeonsi. It's developed (and also used) internally at AMD, and so far
>>> we've
>>> had one open source copy living in the Mesa repository at
>>> src/gallium/winsys/amdgpu/drm/addrlib.
>>>
>>> The question of using addrlib in non-Mesa parts of our open-source stack
>>> has
>>> come up, in particular in relation to compute. We'd obviously like to
>>> share
>>> the code rather than having multiple copies flying around. Since the
>>> interface of addrlib is slow-moving but unstable, shared linking is not
>>> an
>>> option.
>>>
>>> I think the best way forward is to create a dedicated repository for
>>> addrlib
>>> which is then integrated into Mesa as a git submodule.
>>>
>>> The point of this email is to gather feedback from the Mesa community on
>>> this plan, which is explicitly:
>>>
>>> (0) Create an addrlib repository, say amd/addrlib on fd.o.
>>> (1) Add it as a git submodule to the Mesa repository.
>>> (2) Fix up whatever aspects of the build system that may be affected
>>> (perhaps for building source tarballs).
>>> (3) Go back to mostly ignoring addrlib, except for trying to get better
>>> at
>>> syncing with the internal closed-source version.
>>>
>>> From initial experiments, the impact on users interested in radeon is
>>> that
>>> they will have to run `git submodule init` and then occasionally `git
>>> submodule update`. Users who do not build radeonsi should be able to
>>> ignore
>>> the change completely.
>>>
>>> There are alternatives. For example, ROCm uses Google's repo tool
>>> already.
>>> But for Mesa, git submodule looks like a lightweight, well supported and
>>> overall conservative option that everybody should already have installed.
>>> If
>>> there are good arguments for something else, let's hear them!
>>>
>>> Another point: if we proceed with this plan, I think we should consider
>>> moving addrlib into src/amd/addrlib. There are two reasons: First,
>>> transitioning to a submodule *without* changing the directory is probably
>>> more fragile, i.e. what happens when you switch between checkouts before
>>> and
>>> after the transition. Second, if/when radv ends up being merged into Mesa
>>> master, it makes sense to have addrlib there anyway.
>>>
>>> Thoughts? Complaints? Praise?
>>
>>
>> I don't know.
>>
>> How does this ensure that Mesa and ROCm addrlib copies won't diverge?
>
>
> They won't really be different copies, because both "copies" are really
> checkouts from the same repository. They will occasionally be checkouts of
> _different versions_ from the same repository -- usually that would happen
> after a sync with the internal copy, when one driver updates their pointer
> before the other does. But that's easiy to reconcile. Usually it should just
> mean changing the version pointer in whichever driver uses the older
> version.
>
>
>> What issues can we expect if Mesa and ROCm addrlib copies diverge?
>
>
> This is about software maintenance. If we _do_ have separate copies, and
> someone applies a bug fix in one copy, they may forget to apply it to the
> other. When we want to sync with the internal copy, that has to be done
> twice. Basically, all the usual frictions that go with having the same (or
> almost the same) piece of code in more than one place.

Instead of introducing a new repo, can ROCm simply copy addrlib
directly from the Mesa tree?

If I understand it correctly, the only thing the git submodule would
allow is that ROCm developers wouldn't have to clone Mesa.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Nicolai Hähnle

On 09.08.2016 19:18, Marek Olšák wrote:

On Tue, Aug 9, 2016 at 5:35 PM, Nicolai Hähnle  wrote:

On 09.08.2016 17:21, Marek Olšák wrote:


On Tue, Aug 9, 2016 at 3:47 PM, Nicolai Hähnle  wrote:


Hi everybody,

addrlib is the addressing and alignment calculator which is used by
radeonsi. It's developed (and also used) internally at AMD, and so far
we've
had one open source copy living in the Mesa repository at
src/gallium/winsys/amdgpu/drm/addrlib.

The question of using addrlib in non-Mesa parts of our open-source stack
has
come up, in particular in relation to compute. We'd obviously like to
share
the code rather than having multiple copies flying around. Since the
interface of addrlib is slow-moving but unstable, shared linking is not
an
option.

I think the best way forward is to create a dedicated repository for
addrlib
which is then integrated into Mesa as a git submodule.

The point of this email is to gather feedback from the Mesa community on
this plan, which is explicitly:

(0) Create an addrlib repository, say amd/addrlib on fd.o.
(1) Add it as a git submodule to the Mesa repository.
(2) Fix up whatever aspects of the build system that may be affected
(perhaps for building source tarballs).
(3) Go back to mostly ignoring addrlib, except for trying to get better
at
syncing with the internal closed-source version.

From initial experiments, the impact on users interested in radeon is
that
they will have to run `git submodule init` and then occasionally `git
submodule update`. Users who do not build radeonsi should be able to
ignore
the change completely.

There are alternatives. For example, ROCm uses Google's repo tool
already.
But for Mesa, git submodule looks like a lightweight, well supported and
overall conservative option that everybody should already have installed.
If
there are good arguments for something else, let's hear them!

Another point: if we proceed with this plan, I think we should consider
moving addrlib into src/amd/addrlib. There are two reasons: First,
transitioning to a submodule *without* changing the directory is probably
more fragile, i.e. what happens when you switch between checkouts before
and
after the transition. Second, if/when radv ends up being merged into Mesa
master, it makes sense to have addrlib there anyway.

Thoughts? Complaints? Praise?



I don't know.

How does this ensure that Mesa and ROCm addrlib copies won't diverge?



They won't really be different copies, because both "copies" are really
checkouts from the same repository. They will occasionally be checkouts of
_different versions_ from the same repository -- usually that would happen
after a sync with the internal copy, when one driver updates their pointer
before the other does. But that's easiy to reconcile. Usually it should just
mean changing the version pointer in whichever driver uses the older
version.



What issues can we expect if Mesa and ROCm addrlib copies diverge?



This is about software maintenance. If we _do_ have separate copies, and
someone applies a bug fix in one copy, they may forget to apply it to the
other. When we want to sync with the internal copy, that has to be done
twice. Basically, all the usual frictions that go with having the same (or
almost the same) piece of code in more than one place.


Instead of introducing a new repo, can ROCm simply copy addrlib
directly from the Mesa tree?

If I understand it correctly, the only thing the git submodule would
allow is that ROCm developers wouldn't have to clone Mesa.


They certainly can copy addrlib from the Mesa tree. The question is 
whether that's a good basis for staying synchronized in the future.


Nicolai



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vl: add a lanczos interpolation filter v3

2016-08-09 Thread Nayan Deshmukh
Hi Christian,

A few questions.



On Tue, Aug 9, 2016 at 5:10 PM, Christian König 
wrote:

> I am more than happy to solve these problems, the Lanczos filtering was
> getting a little stale
> anyway because I was not able to reproduce the problems Andy was facing.
>
> Yeah thought so, the reason is probably that you don't have the necessary
> hardware.
>
> Is that why I need to add a PIPE_BIND_LINEAR to a surface?
>
> Yes exactly.
>
> So I need to use maybe a couple of surfaces alternatively to read and
> write with the filters. This approach should work I guess.
>
> Allocate a temporary surface for each step, apply the necessary filters
> using it and then use the temporary buffer as input for the next step.
>
> See how the deinterlacing filter does this, you should use the same
> approach here.
>
> I would use this order for doing things:
> 1. Median filter for noise reduction.
> 2. Sharpening/blur filter.
> 3. Deinterlacing.
> 4. Compositioning and CC conversion.
> 5. Advanced scaling.
>
>
I need to provide the median filter and the blur filter with a sampler view
and the deint filter requires a pipe_video_buffer. I am not sure how to
acheive this. Any suggestions?

Also right now deinterlacing is the first step and the other steps follow.
But if we perform median and sharpening filter before then we also need to
apply them on the past and the future surfaces that we require for
deinterlacing. Am I right?

Regards,
Nayan.

Regards,
> Christian.
>
>
> Am 08.08.2016 um 16:32 schrieb Nayan Deshmukh:
>
> Hi Christian,
>
> I am more than happy to solve these problems, the Lanczos filtering was
> getting a little stale
> anyway because I was not able to reproduce the problems Andy was facing.
>
> On Mon, Aug 8, 2016 at 6:24 PM, Christian König 
> wrote:
>
>> Hi Nayan,
>>
>> ok let's take a step back and put the lanczos filtering aside for a
>> moment. I have another task for you which is more urgent right now.
>>
>> The order we do things in vlVdpVideoMixerRender() was never 100% correct,
>> so we have at least three problems here which needs fixing:
>>
>> 1) The noise reduction and sharpness filter read and write to the same
>> surface at the same time. That only works because we use a linear layout.
>>
>> Is that why I need to add a PIPE_BIND_LINEAR to a surface? So I need to
> use maybe a couple of surfaces alternatively to read and write with the
> filters. This approach should work I guess.
>
> 2) We apply the noise reduction and sharpness filter after the
>> composition. That is rather odd and should be fixed so that we apply those
>> filters on the original video frame instead.
>>
>>  So we need to apply the filter before the CSC conversion.
>
>> 3) We use delayed rendering to render into output surfaces directly. We
>> should rather use the DRI3 capabilities to allocate multiple output
>> surfaces instead.
>>
>> Could you take care of some of those issues? Especially #1 has become a
>> problem recently.
>>
>> Surely, I will start working on the first 2 problem for now and look at
> the third problem a little later.
>
> Regards,
> Nayan.
>
>
>> Regards,
>> Christian.
>>
>>
>> Am 04.08.2016 um 19:22 schrieb Nayan Deshmukh:
>>
>> Hi Andy,
>>
>>
>> On Thu, Aug 4, 2016 at 8:48 PM, Andy Furniss  wrote:
>>
>>> Nayan Deshmukh wrote:
>>>
 Hi Andy,

 Thanks for testing my patches.

>>>
>>> NP
>>>
>>>
>>> The scaling happens after CSC.
>

>>> OK, thanks.
>>>
>>>
>>> I believe there is some misunderstanding here, I was able to see the
 artifacts in the video that you sent me previously. But I was not
 able to replicate them

>>>
>>> Yea, I got that - just thought you may want to see how they had changed.
>>>
>>> with the pendulum video on my system. Same case this time the
 pendulum video plays fine this time too. I am attacing a video of it
 here

 https://drive.google.com/file/d/0B1s62k5GtdBwOVAtOUVaU0V5c1E
 /view?usp=sharing

>>>
>>> Hmm, that's interesting for a few reasons.
>>>
>>> Though my monitor won't do 1366x768 I can replicate the same scale
>>> factor windowed with mplayer ... -xy 768/576 ...
>>>
>>> At first glance only level 2 is obviously artifacted (though very close
>>> inspection shows others are slightly).
>>>
>>> Levels: for some reason your vid has the black bars at 0 but the content
>>> isn't scaled - like your mplayer didn't expand tv to pc on playback.
>>> This shouldn't happen by default. Do you have some extra config
>>> somewhere like in $HOME/.mplayer, if so maybe better to test without.
>>>
>>> Most important - though the vp9 compression may be to blame I can't
>>> really see any difference between the levels in that vid.
>>>
>>> In fact closely comparing just your level 1 to my (admittedly
>>> uncompressed) level 1 and 0 output scaled the same plus unstretched
>>> levels wise it looks to me like you are getting level 0 on this test.
>>>
>>
>> You are right it I am getting level 0 only. I have a PRIME configuration
>> and I 

Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Nicolai Hähnle



On 09.08.2016 18:22, Erik Faye-Lund wrote:

On Tue, Aug 9, 2016 at 4:59 PM, Nicolai Hähnle  wrote:

On 09.08.2016 15:58, Rob Clark wrote:


tbh, git submodules are more annoying than they need to be, and I'm
not really terribly excited to use that for something that is a build
dependency.  Maybe just move it into libdrm instead?



I know. That's what I would have proposed if the addrlib interface were
stable. Unfortunately it isn't, and realistically speaking, that's not going
to change.

So shared linking is right out.

Static linking or just including source files from a separate repository
could be considered, but then what's the process for ensuring you have the
right version?

The nice aspect of submodules is that every commit of the Mesa repository
"knows" what the corresponding right version of addrlib is, and so git can
update the submodule to the correct version for you automatically.


I'm not a huge fan of submodules either. They just don't deal with
distributed development properly, which should be a non-starter for
OSS IMO. You either set the submodule to point to an absolute URL, in
which case it's awkward to work with if you need to change the code,
or you use a relative URL, which forces everyone who has a fork to
fork the submodule also. Yuck. As a formerly active Git developer, my
impression is that nobody of the core-git developers really loved the
idea of git-submodule, it was mostly introduced into Git to help KDE
transition their gigantic SVN-external based source tree to Git.

IMO, a much better alternative would be to have addrlib live in its
own repository, and periodically do a git subtree-merge into mesa and
other dependent system. That means that nobody really needs to deal
with the fact that the upstream is in a different repo, except when
submitting patches for upstream. This is what git.git itself does for
some of its subsystems.


That looks interesting. Are people using subtree-merge with the kind of 
linear history that Mesa uses?


Nicolai




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #8 from Alexandr Zelinsky  ---
Created attachment 125644
  --> https://bugs.freedesktop.org/attachment.cgi?id=125644&action=edit
EGL_LOG_LEVEL=debug with patches

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] freedreno/a2xx: add missing casts to silence notices

2016-08-09 Thread Francesco Ansanelli
Signed-off-by: Francesco Ansanelli 
---
 src/gallium/drivers/freedreno/a2xx/ir-a2xx.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c 
b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
index 2b62b3a..163c282 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
@@ -403,7 +403,7 @@ static int instr_emit_alu(struct ir2_instruction *instr, 
uint32_t *dwords,
assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
 
-   if (instr->alu.vector_opc == ~0) {
+   if (instr->alu.vector_opc == (instr_vector_opc_t)~0) {
alu->vector_opc  = MAXv;
alu->vector_write_mask   = 0;
} else {
@@ -431,7 +431,7 @@ static int instr_emit_alu(struct ir2_instruction *instr, 
uint32_t *dwords,
alu->vector_clamp= instr->alu.vector_clamp;
alu->scalar_clamp= instr->alu.scalar_clamp;
 
-   if (instr->alu.scalar_opc != ~0) {
+   if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) {
struct ir2_register *sdst_reg = instr->regs[reg++];
 
reg_update_stats(sdst_reg, info, true);
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] anv/gen7_pipeline: Set multisample state using shared function

2016-08-09 Thread Anuj Phogat
On Mon, Aug 8, 2016 at 5:09 PM, Jason Ekstrand  wrote:
> Does this fix any tests?  If so, we should say so in the commit message.
> With that updated,
>
No it doesn't. It surprised me too but it looks like cts don't have enough
multisampling tests.

> Reviewed-by: Jason Ekstrand 
>
> On Mon, Aug 8, 2016 at 2:57 PM, Anuj Phogat  wrote:
>>
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/intel/vulkan/gen7_pipeline.c | 16 +---
>>  1 file changed, 1 insertion(+), 15 deletions(-)
>>
>> diff --git a/src/intel/vulkan/gen7_pipeline.c
>> b/src/intel/vulkan/gen7_pipeline.c
>> index 5395e79..17d7ccc 100644
>> --- a/src/intel/vulkan/gen7_pipeline.c
>> +++ b/src/intel/vulkan/gen7_pipeline.c
>> @@ -81,21 +81,7 @@ genX(graphics_pipeline_create)(
>>   pCreateInfo->pRasterizationState, extra);
>> emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
>>
>> -   if (pCreateInfo->pMultisampleState &&
>> -   pCreateInfo->pMultisampleState->rasterizationSamples > 1)
>> -
>> anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
>> -
>> -   uint32_t samples = 1;
>> -   uint32_t log2_samples = __builtin_ffs(samples) - 1;
>> -
>> -   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
>> -  ms.PixelLocation= PIXLOC_CENTER;
>> -  ms.NumberofMultisamples = log2_samples;
>> -   }
>> -
>> -   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
>> -  sm.SampleMask = 0xff;
>> -   }
>> +   emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
>>
>> const struct brw_vs_prog_data *vs_prog_data =
>> get_vs_prog_data(pipeline);
>>
>> --
>> 2.5.5
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

Nicolas Boichat  changed:

   What|Removed |Added

 Attachment #125643|0   |1
is obsolete||

--- Comment #9 from Nicolas Boichat  ---
Created attachment 125647
  --> https://bugs.freedesktop.org/attachment.cgi?id=125647&action=edit
More tracing in egl_dri2.c

I see. When the second display is initialized, there is still an active
context, it seems (dri2_display_release should not be called on the first call
to dri2_make_current):

libEGL debug: Native platform type: drm (autodetected)
libEGL debug: dri2_initialize 0x90d7a0 0x9a2f10 (dri2_dpy=(null))
libEGL debug: the best driver is DRI2
libEGL debug: EGL user error 0x3009 (EGL_BAD_MATCH) in dri2_create_context
libEGL debug: dri2_make_current 0x90d7a0 0x9a2f10 (nil) (nil) 0x99dbf0
libEGL debug: dri2_display_release 0x9a2f10 2
refcount -> 1

This causes the reference count to drop to zero later on:
libEGL debug: dri2_make_current 0x90d7a0 0x9a2f10 (nil) (nil) (nil)
libEGL debug: dri2_display_release 0x9a2f10 1
refcount -> 0 => display is destroyed
libEGL debug: dri2_make_current 0x90d7a0 0x9a2f10 (nil) (nil) 0x99dbf0
libEGL debug: EGL user error 0x3001 (EGL_NOT_INITIALIZED) in eglMakeCurrent

One more patch to confirm this.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv/clear: Clear E5B9G9R9 images as R32_UINT

2016-08-09 Thread Nanley Chery
On Wed, Aug 03, 2016 at 01:06:10PM -0700, Jason Ekstrand wrote:
> We can't actually clear these images normally because we can't render to
> them.  Instead, we have to manually unpack the rgb9e5 color value on the
> CPU and clear it as R32_UINT.  We still have a bit of work to do to clear
> non-power-of-two images, but this should get all of the power-of-two clears
> working on at least Haswell.
> 
> Cc: "12.0" 
> ---
>  src/intel/vulkan/anv_meta_clear.c | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_meta_clear.c 
> b/src/intel/vulkan/anv_meta_clear.c
> index fa07ec1..d8b5ce0 100644
> --- a/src/intel/vulkan/anv_meta_clear.c
> +++ b/src/intel/vulkan/anv_meta_clear.c
> @@ -25,6 +25,8 @@
>  #include "anv_private.h"
>  #include "nir/nir_builder.h"
>  
> +#include "gallium/auxiliary/util/u_format_rgb9e5.h"

I encountered a build failure on this patch. Please change the include to:

#include "util/format_rgb9e5.h"

to fix it.

With the above fixed, the spelling correction, and a mention of the
passing tests in the commit message, this series is,

Reviewed-by: Nanley Chery 

> +
>  /** Vertex attributes for color clears.  */
>  struct color_clear_vattrs {
> struct anv_vue_header vue_header;
> @@ -760,6 +762,16 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
>  {
> VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
>  
> +   VkFormat vk_format = image->vk_format;
> +   if (vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
> +  /* We can't actually render to this format so we have to work around it
> +   * by manualy unpacking and using R32_UINT.
> +   */
> +  clear_value.color.uint32[0] =
> + float3_to_rgb9e5(clear_value.color.float32);
> +  vk_format = VK_FORMAT_R32_UINT;
> +   }
> +
> for (uint32_t r = 0; r < range_count; r++) {
>const VkImageSubresourceRange *range = &ranges[r];
>for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) {
> @@ -773,7 +785,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
>.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
>.image = anv_image_to_handle(image),
>.viewType = anv_meta_get_view_type(image),
> -  .format = image->vk_format,
> +  .format = vk_format,
>.subresourceRange = {
>   .aspectMask = range->aspectMask,
>   .baseMipLevel = range->baseMipLevel + l,
> @@ -800,7 +812,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
> &fb);
>  
>  VkAttachmentDescription att_desc = {
> -   .format = iview.vk_format,
> +   .format = vk_format,
> .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
> .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
> .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] anv/gen7_pipeline: Set multisample state using shared function

2016-08-09 Thread Jason Ekstrand
On Tue, Aug 9, 2016 at 11:04 AM, Anuj Phogat  wrote:

> On Mon, Aug 8, 2016 at 5:09 PM, Jason Ekstrand 
> wrote:
> > Does this fix any tests?  If so, we should say so in the commit message.
> > With that updated,
> >
> No it doesn't. It surprised me too but it looks like cts don't have enough
> multisampling tests.
>

That's an understatement  In that case, it looks like this is a strict
improvement, so go ahead with my R-B.


>
> > Reviewed-by: Jason Ekstrand 
> >
> > On Mon, Aug 8, 2016 at 2:57 PM, Anuj Phogat 
> wrote:
> >>
> >> Signed-off-by: Anuj Phogat 
> >> ---
> >>  src/intel/vulkan/gen7_pipeline.c | 16 +---
> >>  1 file changed, 1 insertion(+), 15 deletions(-)
> >>
> >> diff --git a/src/intel/vulkan/gen7_pipeline.c
> >> b/src/intel/vulkan/gen7_pipeline.c
> >> index 5395e79..17d7ccc 100644
> >> --- a/src/intel/vulkan/gen7_pipeline.c
> >> +++ b/src/intel/vulkan/gen7_pipeline.c
> >> @@ -81,21 +81,7 @@ genX(graphics_pipeline_create)(
> >>   pCreateInfo->pRasterizationState, extra);
> >> emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
> >>
> >> -   if (pCreateInfo->pMultisampleState &&
> >> -   pCreateInfo->pMultisampleState->rasterizationSamples > 1)
> >> -
> >> anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_
> STATE_CREATE_INFO");
> >> -
> >> -   uint32_t samples = 1;
> >> -   uint32_t log2_samples = __builtin_ffs(samples) - 1;
> >> -
> >> -   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
> >> -  ms.PixelLocation= PIXLOC_CENTER;
> >> -  ms.NumberofMultisamples = log2_samples;
> >> -   }
> >> -
> >> -   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
> >> -  sm.SampleMask = 0xff;
> >> -   }
> >> +   emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
> >>
> >> const struct brw_vs_prog_data *vs_prog_data =
> >> get_vs_prog_data(pipeline);
> >>
> >> --
> >> 2.5.5
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Erik Faye-Lund
On Tue, Aug 9, 2016 at 7:24 PM, Nicolai Hähnle  wrote:
>
> On 09.08.2016 18:22, Erik Faye-Lund wrote:
>>
>> On Tue, Aug 9, 2016 at 4:59 PM, Nicolai Hähnle  wrote:
>>>
>>> On 09.08.2016 15:58, Rob Clark wrote:


 tbh, git submodules are more annoying than they need to be, and I'm
 not really terribly excited to use that for something that is a build
 dependency.  Maybe just move it into libdrm instead?
>>>
>>>
>>>
>>> I know. That's what I would have proposed if the addrlib interface were
>>> stable. Unfortunately it isn't, and realistically speaking, that's not
>>> going
>>> to change.
>>>
>>> So shared linking is right out.
>>>
>>> Static linking or just including source files from a separate repository
>>> could be considered, but then what's the process for ensuring you have
>>> the
>>> right version?
>>>
>>> The nice aspect of submodules is that every commit of the Mesa repository
>>> "knows" what the corresponding right version of addrlib is, and so git
>>> can
>>> update the submodule to the correct version for you automatically.
>>
>>
>> I'm not a huge fan of submodules either. They just don't deal with
>> distributed development properly, which should be a non-starter for
>> OSS IMO. You either set the submodule to point to an absolute URL, in
>> which case it's awkward to work with if you need to change the code,
>> or you use a relative URL, which forces everyone who has a fork to
>> fork the submodule also. Yuck. As a formerly active Git developer, my
>> impression is that nobody of the core-git developers really loved the
>> idea of git-submodule, it was mostly introduced into Git to help KDE
>> transition their gigantic SVN-external based source tree to Git.
>>
>> IMO, a much better alternative would be to have addrlib live in its
>> own repository, and periodically do a git subtree-merge into mesa and
>> other dependent system. That means that nobody really needs to deal
>> with the fact that the upstream is in a different repo, except when
>> submitting patches for upstream. This is what git.git itself does for
>> some of its subsystems.
>
>
> That looks interesting. Are people using subtree-merge with the kind of
> linear history that Mesa uses?
>

I work mostly with branch-heavy work-flows these days, so I don't
really know. The subtree-merges themselves will obviously appear as
merges, but you can always keep the development in the upstream linear
if you want...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vl: add a lanczos interpolation filter v3

2016-08-09 Thread Christian König

Am 09.08.2016 um 19:21 schrieb Nayan Deshmukh:

Hi Christian,

A few questions.



On Tue, Aug 9, 2016 at 5:10 PM, Christian König 
mailto:deathsim...@vodafone.de>> wrote:



I am more than happy to solve these problems, the
Lanczos filtering was getting a little stale
anyway because I was not able to reproduce the problems Andy was
facing.

Yeah thought so, the reason is probably that you don't have the
necessary hardware.


Is that why I need to add a PIPE_BIND_LINEAR to a surface?

Yes exactly.


So I need to use maybe a couple of surfaces alternatively to read
and write with the filters. This approach should work I guess.

Allocate a temporary surface for each step, apply the necessary
filters using it and then use the temporary buffer as input for
the next step.

See how the deinterlacing filter does this, you should use the
same approach here.

I would use this order for doing things:
1. Median filter for noise reduction.
2. Sharpening/blur filter.
3. Deinterlacing.
4. Compositioning and CC conversion.
5. Advanced scaling.

I need to provide the median filter and the blur filter with a sampler 
view and the deint filter requires a pipe_video_buffer. I am not sure 
how to acheive this. Any suggestions?


video buffers are basically just a collection of sampler views and 
render targets (up to six). You just need to apply each filter to each 
plane separately.




Also right now deinterlacing is the first step and the other steps 
follow. But if we perform median and sharpening filter before then we 
also need to apply them on the past and the future surfaces that we 
require for deinterlacing. Am I right?


Oh, good point. Might be that we need to change the order to 1) 
Deinterlacing, 2) Median 3) Sharpening.


Otherwise the calculation overhead/memory bandwidth probably start to 
hit some limits on low end hardware.


Regards,
Christian.



Regards,
Nayan.

Regards,
Christian.


Am 08.08.2016 um 16:32 schrieb Nayan Deshmukh:

Hi Christian,

I am more than happy to solve these problems, the
Lanczos filtering was getting a little stale
anyway because I was not able to reproduce the problems Andy was
facing.

On Mon, Aug 8, 2016 at 6:24 PM, Christian König
mailto:christian.koe...@amd.com>> wrote:

Hi Nayan,

ok let's take a step back and put the lanczos filtering aside
for a moment. I have another task for you which is more
urgent right now.

The order we do things in vlVdpVideoMixerRender() was never
100% correct, so we have at least three problems here which
needs fixing:

1) The noise reduction and sharpness filter read and write to
the same surface at the same time. That only works because we
use a linear layout.

Is that why I need to add a PIPE_BIND_LINEAR to a surface? So I
need to use maybe a couple of surfaces alternatively to read and
write with the filters. This approach should work I guess.

2) We apply the noise reduction and sharpness filter after
the composition. That is rather odd and should be fixed so
that we apply those filters on the original video frame instead.

 So we need to apply the filter before the CSC conversion.

3) We use delayed rendering to render into output surfaces
directly. We should rather use the DRI3 capabilities to
allocate multiple output surfaces instead.

Could you take care of some of those issues? Especially #1
has become a problem recently.

Surely, I will start working on the first 2 problem for now and
look at the third problem a little later.

Regards,
Nayan.

Regards,
Christian.


Am 04.08.2016 um 19:22 schrieb Nayan Deshmukh:

Hi Andy,


On Thu, Aug 4, 2016 at 8:48 PM, Andy Furniss
mailto:adf.li...@gmail.com>> wrote:

Nayan Deshmukh wrote:

Hi Andy,

Thanks for testing my patches.


NP


The scaling happens after CSC.


OK, thanks.


I believe there is some misunderstanding here, I was
able to see the
artifacts in the video that you sent me previously.
But I was not
able to replicate them


Yea, I got that - just thought you may want to see how
they had changed.

with the pendulum video on my system. Same case this
time the
pendulum video plays fine this time too. I am
attacing a video of it
here


https://drive.google.com/file/d/0B1s62k5GtdBwOVAtOUVaU0V5c1E/view?usp=sharing




Hmm, that's interesting for a few reasons.

[Mesa-dev] [Bug 97231] GL_DEPTH_CLAMP doesn't clamp to the far plane

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97231

Jules Blok  changed:

   What|Removed |Added

 Attachment #125589|0   |1
is obsolete||
 Attachment #125624|0   |1
is obsolete||

--- Comment #13 from Jules Blok  ---
Created attachment 125650
  --> https://bugs.freedesktop.org/attachment.cgi?id=125650&action=edit
api trace file version 4

I've added an apitrace that was captured on Linux, perhaps you will have less
problems running that trace.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Use win32 intrinsics for util_last_bit if present.

2016-08-09 Thread Mathias . Froehlich
From: Mathias Fröhlich 

v2: Split into two patches.
v3: Fix off by one problem.

Signed-off-by: Mathias Fröhlich 
---
 src/util/bitscan.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index 0743fe7..8afef81 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -157,6 +157,12 @@ util_last_bit(unsigned u)
 {
 #if defined(HAVE___BUILTIN_CLZ)
return u == 0 ? 0 : 32 - __builtin_clz(u);
+#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
+   unsigned long index;
+   if (_BitScanReverse(&index, u))
+  return index + 1;
+   else
+  return 0;
 #else
unsigned r = 0;
while (u) {
@@ -177,6 +183,12 @@ util_last_bit64(uint64_t u)
 {
 #if defined(HAVE___BUILTIN_CLZLL)
return u == 0 ? 0 : 64 - __builtin_clzll(u);
+#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64)
+   unsigned long index;
+   if (_BitScanReverse64(&index, u))
+  return index + 1;
+   else
+  return 0;
 #else
unsigned r = 0;
while (u) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] gallium: Add c99_compat.h to u_bitcast.h

2016-08-09 Thread Mathias Fröhlich
Hi Brian,

On Tuesday, 9 August 2016 08:23:41 CEST Brian Paul wrote:
> >> As it fixes something independent, should I push that already?
> >
> > Sure.  For 1 & 3,
> > Reviewed-by: Brian Paul 
> > Tested-by: Brian Paul 
Pushed.

> Yes, we need to add one to the index.  I'll re-test that patch when you 
> update it.
Is sent.

Thanks!

Mathias
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

--- Comment #10 from Nicolas Boichat  ---
Created attachment 125652
  --> https://bugs.freedesktop.org/attachment.cgi?id=125652&action=edit
Possible fix

Possible fix attached, please give it a try. Thanks!

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Dave Airlie
>
> tbh, git submodules are more annoying than they need to be, and I'm
> not really terribly excited to use that for something that is a build
> dependency.  Maybe just move it into libdrm instead?
>

I've only had to use git submodules once with spice project, and it
was a nightmare. It makes packaging etc a real pita.

Alternatives are something like a fetch external sources script,
that does git submodules but does it better, you'll see Vulkan-CTS
etc use something like that, it would have to be integrated with
the build system a bit better though.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util: Use win32 intrinsics for util_last_bit if present.

2016-08-09 Thread Brian Paul

On 08/09/2016 01:41 PM, mathias.froehl...@gmx.net wrote:

From: Mathias Fröhlich 

v2: Split into two patches.
v3: Fix off by one problem.

Signed-off-by: Mathias Fröhlich 
---
  src/util/bitscan.h | 12 
  1 file changed, 12 insertions(+)

diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index 0743fe7..8afef81 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -157,6 +157,12 @@ util_last_bit(unsigned u)
  {
  #if defined(HAVE___BUILTIN_CLZ)
 return u == 0 ? 0 : 32 - __builtin_clz(u);
+#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
+   unsigned long index;
+   if (_BitScanReverse(&index, u))
+  return index + 1;
+   else
+  return 0;
  #else
 unsigned r = 0;
 while (u) {
@@ -177,6 +183,12 @@ util_last_bit64(uint64_t u)
  {
  #if defined(HAVE___BUILTIN_CLZLL)
 return u == 0 ? 0 : 64 - __builtin_clzll(u);
+#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64)
+   unsigned long index;
+   if (_BitScanReverse64(&index, u))
+  return index + 1;
+   else
+  return 0;
  #else
 unsigned r = 0;
 while (u) {



Reviewed-by: Brian Paul 
Tested-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] egl: android: query native window default width and height

2016-08-09 Thread Haixia Shi
Pinging this thread - any objection to commit this? Thanks.

On Thu, Jul 28, 2016 at 8:58 PM, Tomasz Figa  wrote:

> On Fri, Jul 29, 2016 at 2:51 AM, Haixia Shi  wrote:
> > On android platform, the width and height of a native window surface may
> > be updated after initialization. It is therefore necessary to query
> android
> > framework for the current width and height.
> >
> > v2: remove Android specific #ifdef's and just implement the fallback
> directly
> > if the platform query_surface() callback is not provided.
> >
> > TEST=dEQP-EGL.functional.resize.surface_size#* on cyan-cheets
> >
> > Change-Id: I673f7d2f1d90c3bf572b30f63da537f2cae1496e
> > ---
> >  src/egl/drivers/dri2/egl_dri2.c | 11 +++
> >  src/egl/drivers/dri2/egl_dri2.h |  4 
> >  src/egl/drivers/dri2/platform_android.c | 27
> +++
> >  3 files changed, 42 insertions(+)
>
> Reviewed-by: Tomasz Figa 
>
> Best regards,
> Tomasz
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] gallium/radeon: change the vendor string to Advanced Micro Devices, Inc.

2016-08-09 Thread Marek Olšák
On Sun, Aug 7, 2016 at 12:01 AM, Axel Davy  wrote:
> Hi,
>
> This looks like it will perturb driver detection of already written games.
>
> For example from the dolphin sources, this change would make them detect
> catalyst as driver.
>
> As it is known workarounds are applied depending on the driver detected, or
> some features are disabled,
> I believe it isn't a good change.

OK. I'll keep it as-is for now.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH V2 2/3] anv/pipeline: Add sample locations for gen7-7.5

2016-08-09 Thread Anuj Phogat
V1: Add multisample positions (Nanley)
V2: Fix 8x sample positions to match OpenGL (Anuj)
V3: Vulkan has standard sample locations. They need not be same as
in OpenGL. (Anuj)

Signed-off-by: Anuj Phogat 
Reviewed-by: Jason Ekstrand 
---
 src/intel/vulkan/genX_pipeline_util.h | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/src/intel/vulkan/genX_pipeline_util.h 
b/src/intel/vulkan/genX_pipeline_util.h
index d9d8ca4..64b89cd 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -475,6 +475,7 @@ emit_ms_state(struct anv_pipeline *pipeline,
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
   ms.NumberofMultisamples   = log2_samples;
 
+#if GEN_GEN >= 8
   /* The PRM says that this bit is valid only for DX9:
*
*SW can choose to set this bit only for DX9 API. DX10/OGL API's
@@ -482,6 +483,52 @@ emit_ms_state(struct anv_pipeline *pipeline,
*/
   ms.PixelPositionOffsetEnable  = false;
   ms.PixelLocation  = CENTER;
+#else
+  ms.PixelLocation  = PIXLOC_CENTER;
+
+  switch (samples) {
+  case 1:
+ ms.Sample0XOffset  = 0.5;
+ ms.Sample0YOffset  = 0.5;
+ break;
+  case 2:
+ ms.Sample0XOffset  = 0.25;
+ ms.Sample0YOffset  = 0.25;
+ ms.Sample1XOffset  = 0.75;
+ ms.Sample1YOffset  = 0.75;
+ break;
+  case 4:
+ ms.Sample0XOffset  = 0.375;
+ ms.Sample0YOffset  = 0.125;
+ ms.Sample1XOffset  = 0.875;
+ ms.Sample1YOffset  = 0.375;
+ ms.Sample2XOffset  = 0.125;
+ ms.Sample2YOffset  = 0.625;
+ ms.Sample3XOffset  = 0.625;
+ ms.Sample3YOffset  = 0.875;
+ break;
+  case 8:
+ ms.Sample0XOffset  = 0.5625;
+ ms.Sample0YOffset  = 0.3125;
+ ms.Sample1XOffset  = 0.4375;
+ ms.Sample1YOffset  = 0.6875;
+ ms.Sample2XOffset  = 0.8125;
+ ms.Sample2YOffset  = 0.5625;
+ ms.Sample3XOffset  = 0.3125;
+ ms.Sample3YOffset  = 0.1875;
+ ms.Sample4XOffset  = 0.1875;
+ ms.Sample4YOffset  = 0.8125;
+ ms.Sample5XOffset  = 0.0625;
+ ms.Sample5YOffset  = 0.4375;
+ ms.Sample6XOffset  = 0.6875;
+ ms.Sample6YOffset  = 0.9375;
+ ms.Sample7XOffset  = 0.9375;
+ ms.Sample7YOffset  = 0.0625;
+ break;
+  default:
+ break;
+  }
+#endif
}
 
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 2/3] anv/pipeline: Add sample locations for gen7-7.5

2016-08-09 Thread Jason Ekstrand
LGTM

On Tue, Aug 9, 2016 at 2:41 PM, Anuj Phogat  wrote:

> V1: Add multisample positions (Nanley)
> V2: Fix 8x sample positions to match OpenGL (Anuj)
> V3: Vulkan has standard sample locations. They need not be same as
> in OpenGL. (Anuj)
>
> Signed-off-by: Anuj Phogat 
> Reviewed-by: Jason Ekstrand 
> ---
>  src/intel/vulkan/genX_pipeline_util.h | 47 ++
> +
>  1 file changed, 47 insertions(+)
>
> diff --git a/src/intel/vulkan/genX_pipeline_util.h
> b/src/intel/vulkan/genX_pipeline_util.h
> index d9d8ca4..64b89cd 100644
> --- a/src/intel/vulkan/genX_pipeline_util.h
> +++ b/src/intel/vulkan/genX_pipeline_util.h
> @@ -475,6 +475,7 @@ emit_ms_state(struct anv_pipeline *pipeline,
> anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
>ms.NumberofMultisamples   = log2_samples;
>
> +#if GEN_GEN >= 8
>/* The PRM says that this bit is valid only for DX9:
> *
> *SW can choose to set this bit only for DX9 API. DX10/OGL API's
> @@ -482,6 +483,52 @@ emit_ms_state(struct anv_pipeline *pipeline,
> */
>ms.PixelPositionOffsetEnable  = false;
>ms.PixelLocation  = CENTER;
> +#else
> +  ms.PixelLocation  = PIXLOC_CENTER;
> +
> +  switch (samples) {
> +  case 1:
> + ms.Sample0XOffset  = 0.5;
> + ms.Sample0YOffset  = 0.5;
> + break;
> +  case 2:
> + ms.Sample0XOffset  = 0.25;
> + ms.Sample0YOffset  = 0.25;
> + ms.Sample1XOffset  = 0.75;
> + ms.Sample1YOffset  = 0.75;
> + break;
> +  case 4:
> + ms.Sample0XOffset  = 0.375;
> + ms.Sample0YOffset  = 0.125;
> + ms.Sample1XOffset  = 0.875;
> + ms.Sample1YOffset  = 0.375;
> + ms.Sample2XOffset  = 0.125;
> + ms.Sample2YOffset  = 0.625;
> + ms.Sample3XOffset  = 0.625;
> + ms.Sample3YOffset  = 0.875;
> + break;
> +  case 8:
> + ms.Sample0XOffset  = 0.5625;
> + ms.Sample0YOffset  = 0.3125;
> + ms.Sample1XOffset  = 0.4375;
> + ms.Sample1YOffset  = 0.6875;
> + ms.Sample2XOffset  = 0.8125;
> + ms.Sample2YOffset  = 0.5625;
> + ms.Sample3XOffset  = 0.3125;
> + ms.Sample3YOffset  = 0.1875;
> + ms.Sample4XOffset  = 0.1875;
> + ms.Sample4YOffset  = 0.8125;
> + ms.Sample5XOffset  = 0.0625;
> + ms.Sample5YOffset  = 0.4375;
> + ms.Sample6XOffset  = 0.6875;
> + ms.Sample6YOffset  = 0.9375;
> + ms.Sample7XOffset  = 0.9375;
> + ms.Sample7YOffset  = 0.0625;
> + break;
> +  default:
> + break;
> +  }
> +#endif
> }
>
> anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
> --
> 2.5.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Moving amdgpu/addrlib into a git submodule

2016-08-09 Thread Jason Ekstrand
On Tue, Aug 9, 2016 at 1:12 PM, Dave Airlie  wrote:

> >
> > tbh, git submodules are more annoying than they need to be, and I'm
> > not really terribly excited to use that for something that is a build
> > dependency.  Maybe just move it into libdrm instead?
> >
>
> I've only had to use git submodules once with spice project, and it
> was a nightmare. It makes packaging etc a real pita.
>
> Alternatives are something like a fetch external sources script,
> that does git submodules but does it better, you'll see Vulkan-CTS
> etc use something like that, it would have to be integrated with
> the build system a bit better though.
>

This isn't a plug for submodules, but *please* don't base anything on the
Vulkan CTS fetch_sources script.  We've had no end of trouble with their
git hacks.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] glcpp: Track the actual version instead of just the version_resolved flag

2016-08-09 Thread Ian Romanick
From: Ian Romanick 

Signed-off-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 10 +-
 src/compiler/glsl/glcpp/glcpp.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index ca376d9..05a76c7 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -396,13 +396,13 @@ control_line_success:
_glcpp_parser_skip_stack_pop (parser, & @1);
} NEWLINE
 |  HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
-   if (parser->version_resolved) {
+   if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the 
first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, NULL, 
true);
}
 |  HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
-   if (parser->version_resolved) {
+   if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the 
first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -1346,7 +1346,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, 
void *state, gl_api api
parser->extensions = extensions;
parser->state = state;
parser->api = api;
-   parser->version_resolved = false;
+   parser->version = 0;
 
parser->has_new_line_number = 0;
parser->new_line_number = 1;
@@ -2280,10 +2280,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
  const char *es_identifier,
  bool explicitly_set)
 {
-   if (parser->version_resolved)
+   if (parser->version != 0)
   return;
 
-   parser->version_resolved = true;
+   parser->version = version;
 
add_builtin_define (parser, "__VERSION__", version);
 
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index 07eaf68..9f35b05 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -206,7 +206,7 @@ struct glcpp_parser {
glcpp_extension_iterator extensions;
void *state;
gl_api api;
-   bool version_resolved;
+   unsigned version;
bool has_new_line_number;
int new_line_number;
bool has_new_source_number;
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] glcpp: Only disallow #undef of pre-defined macros on GLSL ES >= 3.00 shaders

2016-08-09 Thread Ian Romanick
From: Ian Romanick 

Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:

   It is an error to undefine or to redefine a built-in (pre-defined)
   macro name.

The GLSL ES 1.00 spec does not contain this text.

Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:

   #define and #undef functionality are defined as is standard for C++
   preprocessors for macro definitions both with and without macro
   parameters.

At least as far as I can tell GCC allow '#undef __FILE__'.  Furthermore,
there are desktop OpenGL conformance tests that expect '#undef
__VERSION__' and '#undef GL_core_profile' to work.

Fixes:

GL45-CTS.shaders.preprocessor.definitions.undefine_version_vertex
GL45-CTS.shaders.preprocessor.definitions.undefine_version_fragment
GL45-CTS.shaders.preprocessor.definitions.undefine_core_profile_vertex
GL45-CTS.shaders.preprocessor.definitions.undefine_core_profile_fragment

Signed-off-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/glsl/glcpp/glcpp-parse.y | 32 
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 05a76c7..eff53be 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -278,10 +278,34 @@ control_line_success:
HASH_TOKEN DEFINE_TOKEN define
 |  HASH_TOKEN UNDEF IDENTIFIER NEWLINE {
macro_t *macro;
-   if (strcmp("__LINE__", $3) == 0
-   || strcmp("__FILE__", $3) == 0
-   || strcmp("__VERSION__", $3) == 0
-   || strncmp("GL_", $3, 3) == 0)
+
+/* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:
+ *
+ *It is an error to undefine or to redefine a built-in
+ *(pre-defined) macro name.
+ *
+ * The GLSL ES 1.00 spec does not contain this text.
+ *
+ * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:
+ *
+ *#define and #undef functionality are defined as is
+ *standard for C++ preprocessors for macro definitions
+ *both with and without macro parameters.
+ *
+ * At least as far as I can tell GCC allow '#undef __FILE__'.
+ * Furthermore, there are desktop OpenGL conformance tests
+ * that expect '#undef __VERSION__' and '#undef
+ * GL_core_profile' to work.
+ *
+ * Only disallow #undef of pre-defined macros on GLSL ES >=
+ * 3.00 shaders.
+ */
+   if (parser->is_gles &&
+parser->version >= 300 &&
+(strcmp("__LINE__", $3) == 0
+ || strcmp("__FILE__", $3) == 0
+ || strcmp("__VERSION__", $3) == 0
+ || strncmp("GL_", $3, 3) == 0))
glcpp_error(& @1, parser, "Built-in (pre-defined)"
" macro names cannot be undefined.");
 
-- 
2.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97214] X not running with error "Failed to make EGL context current"

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97214

Chad Versace  changed:

   What|Removed |Added

 CC||c...@kiwitree.net

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] egl: android: query native window default width and height

2016-08-09 Thread Chad Versace
On 08/09/2016 01:49 PM, Haixia Shi wrote:
> Pinging this thread - any objection to commit this? Thanks.

I pushed it.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97270] [softpipe] piglit ext_framebuffer_multisample-fast-clear GL_ARB_texture_rg single-sample regression

2016-08-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97270

Bug ID: 97270
   Summary: [softpipe] piglit
ext_framebuffer_multisample-fast-clear
GL_ARB_texture_rg single-sample regression
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Keywords: bisected, regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org
CC: mar...@gmail.com, nhaeh...@gmail.com

mesa: aa920736feeddd1793861651e95bcd09524e024c (12.1.0-devel)


$ ./bin/ext_framebuffer_multisample-fast-clear GL_ARB_texture_rg single-sample
-auto 
Using test set: GL_ARB_texture_rg
Testing GL_R8
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.25 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.75 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.50 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Testing GL_R16
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.25 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.75 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.50 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Testing GL_RG
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.25 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.75 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.50 0.25 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Testing GL_RG8
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.25 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.75 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.50 0.25 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Testing GL_RG16
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 0.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.25 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.75 0.50 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 0.50 0.25 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
Probe color at (0,0)
  Expected: 1.00 1.00 0.00 1.00
  Observed: 0.00 0.00 0.00 1.00
PIGLIT: {"result": "fail" }


79dcd69afae4ada47fd4e746e9eec87c6d8028f0 is the first bad commit
commit 79dcd69afae4ada47fd4e746e9eec87c6d8028f0
Author: Marek Olšák 
Date:   Sun Jul 17 20:37:58 2016 +0200

st/mesa: remove excessive shader state dirtying

This just needs to be done by st_validate_state.

v2: add "shaders_may_be_dirty" flags for not skipping st_validate_state
on _NEW_PROGRAM to detect real shader changes

Reviewed-by: Nicolai Hähnle 

:04 04 4238d586367bef5a5a86b083a13b523b6f7e

Re: [Mesa-dev] [PATCH v2 00/27] i965: Rework the blorp API to use ISL

2016-08-09 Thread Chad Versace

On 07/26/2016 03:11 PM, Jason Ekstrand wrote:

This patch series builds on the previous one I just sent and reworks the
blorp API to be entirely ISL.  The last bits of intel_mipmap_tree are
removed from the ISL internals and shoved into brw_blorp.c/h which simply
serves as a wrapper around the ISL-centric brw_blorp.h file.  Eventually,
the plan is to completely separate the internals of blorp from the i965
driver and share it with the Vulkan driver.  This is just one more step on
the very long road to getting there.  This series can be found here:

https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/blorp-isl-pt2

The best place to start reviewing is by looking at patch 25/27 where we
make the final API changes.  That shows off where things are going.  That
commit can be found on cgit here:

https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=review/blorp-isl-pt2&id=b9a55af924d9cab317224ccb9b507b9f87b44c5d

Happy Reviewing!


Ping! What's the status of this series?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] prog_hash_table: Convert to using util/hash_table.h.

2016-08-09 Thread Ian Romanick
On 08/09/2016 10:17 AM, Eric Anholt wrote:
> Improves glretrace -b servo.trace (a trace of Mozilla's servo rendering
> engine booting, rendering a page, and exiting) from 1.8s to 1.1s.  It uses
> a large uniform array of structs, making a huge number of separate program
> resources, and the fixed-size hash table was killing it.  Given how many
> times we've improved performance by swapping the hash table to
> util/hash_table.h, just do it once and for all.
> 
> This just rebases the old hash table API on top of util/, for minimal
> diff.  Cleaning things up is left for later, particularly because I want
> to fix up the new hash table API a little bit.
> ---
>  src/mesa/program/hash_table.h  |  78 +++-
>  src/mesa/program/prog_hash_table.c | 181 
> -
>  2 files changed, 54 insertions(+), 205 deletions(-)
> 
> diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
> index aba5282fe9e5..362eb2ee0a78 100644
> --- a/src/mesa/program/hash_table.h
> +++ b/src/mesa/program/hash_table.h
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include "util/hash_table.h"
>  
>  struct string_to_uint_map;
>  
> @@ -44,8 +45,6 @@ struct string_to_uint_map;
>  extern "C" {
>  #endif
>  
> -struct hash_table;
> -
>  typedef unsigned (*hash_func_t)(const void *key);
>  typedef bool (*hash_compare_func_t)(const void *key1, const void *key2);
>  
> @@ -60,26 +59,32 @@ typedef bool (*hash_compare_func_t)(const void *key1, 
> const void *key2);
>   * \param hash Function used to compute hash value of input keys.
>   * \param compare  Function used to compare keys.
>   */
> -extern struct hash_table *hash_table_ctor(unsigned num_buckets,
> -hash_func_t hash, hash_compare_func_t compare);
> -
> +static inline struct hash_table *hash_table_ctor(unsigned num_buckets,
^
Add UNUSED here to avoid piles of unused parameter warnings in my build.

> +hash_func_t hash, hash_compare_func_t compare)
> +{
> +   return _mesa_hash_table_create(NULL, hash, compare);
> +}
>  
>  /**
>   * Release all memory associated with a hash table
>   *
>   * \warning
> - * This function cannot release memory occupied either by keys or data.
> + * This function does not release memory occupied either by keys or data.
>   */
> -extern void hash_table_dtor(struct hash_table *ht);
> -
> +static inline void hash_table_dtor(struct hash_table *ht)
> +{
> +   return _mesa_hash_table_destroy(ht, NULL);
> +}
>  
>  /**
>   * Flush all entries from a hash table
>   *
>   * \param ht  Table to be cleared of its entries.
>   */
> -extern void hash_table_clear(struct hash_table *ht);
> -
> +static inline void hash_table_clear(struct hash_table *ht)
> +{
> +   return _mesa_hash_table_clear(ht, NULL);
> +}
>  
>  /**
>   * Search a hash table for a specific element
> @@ -92,25 +97,28 @@ extern void hash_table_clear(struct hash_table *ht);
>   * the matching key was added.  If no matching key exists in the table,
>   * \c NULL is returned.
>   */
> -extern void *hash_table_find(struct hash_table *ht, const void *key);
> -
> +static inline void *hash_table_find(struct hash_table *ht, const void *key)
> +{
> +   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
> +   if (!entry)
> +  return NULL;
> +   return entry->data;
> +}
>  
>  /**
>   * Add an element to a hash table
>   *
>   * \warning
> - * If \c key is already in the hash table, it will be added again.  Future
> - * calls to \c hash_table_find and \c hash_table_remove will return or 
> remove,
> - * repsectively, the most recently added instance of \c key.
> - *
> - * \warning
>   * The value passed by \c key is kept in the hash table and is used by later
>   * calls to \c hash_table_find.
>   *
>   * \sa hash_table_replace
>   */
> -extern void hash_table_insert(struct hash_table *ht, void *data,
> -const void *key);
> +static inline void hash_table_insert(struct hash_table *ht, void *data,
> + const void *key)
> +{
> +   _mesa_hash_table_insert(ht, key, data);
> +}
>  
>  /**
>   * Add an element to a hash table with replacement
> @@ -126,13 +134,29 @@ extern void hash_table_insert(struct hash_table *ht, 
> void *data,
>   *
>   * \sa hash_table_insert
>   */
> -extern bool hash_table_replace(struct hash_table *ht, void *data,
> -const void *key);
> +static inline bool hash_table_replace(struct hash_table *ht, void *data,
> +  const void *key)
> +{
> +   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
> +   if (entry) {
> +  entry->data = data;
> +  return true;
> +   } else {
> +  _mesa_hash_table_insert(ht, key, data);
> +  return false;
> +   }
> +}
>  
>  /**
>   * Remove a specific element from a hash table.
>   */
> -extern void hash_table_remove(struct hash_table *ht, const void *key);
> +static inline void hash_table_remove(struct h

  1   2   >