[Mesa-dev] [Bug 105807] [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105807

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/DRI/i965
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
   Assignee|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

--- Comment #2 from Timothy Arceri  ---
Works correctly in radeonsi doesn't work in i965. Probably requires more
restrictions when 3.1 compant not available. Moving to i965 where it's less
likely to get forgotten about.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: Use an array instead of hashtable for SSA defs.

2018-04-10 Thread Bas Nieuwenhuizen
Saves about 2% of compile time for F1 2017, as well as reduce code
size of an optimized libvulkan_radeon.so by about 1 KiB.

This still keeps the hashtable, as we also stored blocks in there.
---
 src/amd/common/ac_nir_to_llvm.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 053c19808f..7c2bd5c0cc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,8 @@ struct ac_nir_context {
 
gl_shader_stage stage;
 
+   LLVMValueRef *ssa_defs;
+
struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
@@ -87,8 +89,7 @@ static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
 {
assert(src.is_ssa);
-   struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
-   return (LLVMValueRef)entry->data;
+   return nir->ssa_defs[src.ssa->index];
 }
 
 static LLVMValueRef
@@ -1028,8 +1029,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
if (result) {
assert(instr->dest.dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
-   result);
+   ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
 }
 
@@ -1062,7 +1062,7 @@ static void visit_load_const(struct ac_nir_context *ctx,
} else
value = values[0];
 
-   _mesa_hash_table_insert(ctx->defs, &instr->def, value);
+   ctx->ssa_defs[instr->def.index] = value;
 }
 
 static LLVMValueRef
@@ -3095,7 +3095,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
break;
}
if (result) {
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
 }
 
@@ -3596,7 +3596,7 @@ write_result:
if (result) {
assert(instr->dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
 }
 
@@ -3606,7 +3606,7 @@ static void visit_phi(struct ac_nir_context *ctx, 
nir_phi_instr *instr)
LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
 
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
_mesa_hash_table_insert(ctx->phis, instr, result);
 }
 
@@ -3644,7 +3644,7 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,
else {
undef = LLVMGetUndef(LLVMVectorType(type, num_components));
}
-   _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
+   ctx->ssa_defs[instr->def.index] = undef;
 }
 
 static void visit_jump(struct ac_llvm_context *ctx,
@@ -3927,6 +3927,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
 
func = (struct nir_function *)exec_list_get_head(&nir->functions);
 
+   nir_index_ssa_defs(func->impl);
+   ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
+
setup_locals(&ctx, func);
 
if (nir->info.stage == MESA_SHADER_COMPUTE)
@@ -3940,6 +3943,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
  ctx.abi->outputs);
 
free(ctx.locals);
+   free(ctx.ssa_defs);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] egl/x11: Handle both depth 30 formats for eglCreateImage(). (v2)

2018-04-10 Thread Mario Kleiner
We need to distinguish if the backing storage of a pixmap
is XRGB2101010 or XBGR2101010, as different gpu hw supports
different formats. NVidia hw prefers XBGR, whereas AMD and
Intel are happy with XRGB.

Use the red channel mask of the first depth 30 visual of
the x-screen to distinguish which hw format to choose.

This fixes desktop composition of color depth 30 windows
when the X11 compositor uses EGL.

v2: Switch from using the visual of the root window to simply
using the first depth 30 visual for the x-screen, as testing
shows that each driver only exports either xrgb ordering or
xbgr ordering for the channel masks of its depth 30 visuals,
so this should be unambiguous and avoid trouble if X ever
supports depth 30 pixmaps on screens with a non-depth 30 root
window visual. This per Michels suggestion.

Signed-off-by: Mario Kleiner 
Cc: Michel Dänzer 
---
 src/egl/drivers/dri2/egl_dri2.h  |  7 +++
 src/egl/drivers/dri2/platform_x11.c  | 36 +++-
 src/egl/drivers/dri2/platform_x11_dri3.c | 12 +++
 3 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index adabc52..7e7032d 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -413,6 +413,8 @@ EGLBoolean
 dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp);
 void
 dri2_teardown_x11(struct dri2_egl_display *dri2_dpy);
+unsigned int
+dri2_x11_get_red_mask_for_depth(struct dri2_egl_display *dri2_dpy, int depth);
 #else
 static inline EGLBoolean
 dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp)
@@ -421,6 +423,11 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp)
 }
 static inline void
 dri2_teardown_x11(struct dri2_egl_display *dri2_dpy) {}
+static inline unsigned int
+dri2_x11_get_red_mask_for_depth(struct dri2_egl_display *dri2_dpy, int depth)
+{
+   return 0;
+}
 #endif
 
 #ifdef HAVE_DRM_PLATFORM
diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index 6c287b4..47dd268 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -209,6 +209,36 @@ get_xcb_screen(xcb_screen_iterator_t iter, int screen)
 return NULL;
 }
 
+static xcb_visualtype_t *
+get_xcb_visualtype_for_depth(struct dri2_egl_display *dri2_dpy, int depth)
+{
+   xcb_visualtype_iterator_t visual_iter;
+   xcb_screen_t *screen = dri2_dpy->screen;
+   xcb_depth_iterator_t depth_iter = 
xcb_screen_allowed_depths_iterator(screen);
+
+   for (; depth_iter.rem; xcb_depth_next(&depth_iter)) {
+  if (depth_iter.data->depth != depth)
+ continue;
+
+  visual_iter = xcb_depth_visuals_iterator(depth_iter.data);
+  if (visual_iter.rem)
+ return visual_iter.data;
+   }
+
+   return NULL;
+}
+
+/* Get red channel mask for given depth. */
+unsigned int
+dri2_x11_get_red_mask_for_depth(struct dri2_egl_display *dri2_dpy, int depth)
+{
+   unsigned int red_mask = 0;
+   xcb_visualtype_t *visual = get_xcb_visualtype_for_depth(dri2_dpy, depth);
+   if (visual)
+  red_mask = visual->red_mask;
+
+   return red_mask;
+}
 
 /**
  * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
@@ -1050,7 +1080,11 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, 
_EGLContext *ctx,
   format = __DRI_IMAGE_FORMAT_XRGB;
   break;
case 30:
-  format = __DRI_IMAGE_FORMAT_XRGB2101010;
+  /* Different preferred formats for different hw */
+  if (dri2_x11_get_red_mask_for_depth(dri2_dpy, 30) == 0x3ff)
+ format = __DRI_IMAGE_FORMAT_XBGR2101010;
+  else
+ format = __DRI_IMAGE_FORMAT_XRGB2101010;
   break;
case 32:
   format = __DRI_IMAGE_FORMAT_ARGB;
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c 
b/src/egl/drivers/dri2/platform_x11_dri3.c
index a41e401..6c522ae 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -40,7 +40,7 @@
 #include "loader_dri3_helper.h"
 
 static uint32_t
-dri3_format_for_depth(uint32_t depth)
+dri3_format_for_depth(struct dri2_egl_display *dri2_dpy, uint32_t depth)
 {
switch (depth) {
case 16:
@@ -48,7 +48,11 @@ dri3_format_for_depth(uint32_t depth)
case 24:
   return __DRI_IMAGE_FORMAT_XRGB;
case 30:
-  return __DRI_IMAGE_FORMAT_XRGB2101010;
+  /* Different preferred formats for different hw */
+  if (dri2_x11_get_red_mask_for_depth(dri2_dpy, 30) == 0x3ff)
+ return __DRI_IMAGE_FORMAT_XBGR2101010;
+  else
+ return __DRI_IMAGE_FORMAT_XRGB2101010;
case 32:
   return __DRI_IMAGE_FORMAT_ARGB;
default:
@@ -293,7 +297,7 @@ dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext 
*ctx,
   return NULL;
}
 
-   format = dri3_format_for_depth(bp_reply->depth);
+   format = dri3_format_for_depth(dri2_dpy, bp_reply->depth);
if (format == __DRI_IMAGE_FORMAT_NONE) {
   _eglError(EGL_BAD_PARAMET

[Mesa-dev] [PATCH v2] dri3: Prevent multiple freeing of buffers.

2018-04-10 Thread Sergii Romantsov
Commit 3160cb86aa92 adds optimization with flag 'reallocate'.
Processing of flag causes buffers freeing while pointer
is still hold in caller stack and than again used to be freed.

Fixes: 3160cb86aa92 "egl/x11: Re-allocate buffers if format is suboptimal"

v2:
 used flag 'busy' instead of introducing new one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105906
Signed-off-by: Sergii Romantsov 
Tested-by: Andriy Khulap 
---
 src/loader/loader_dri3_helper.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index fe17df1..a934db1 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1688,6 +1688,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
(buffer_type == loader_dri3_buffer_front && draw->have_fake_front))
   && buffer) {
 
+ buffer->busy = true;
  /* Fill the new buffer with data from an old buffer */
  dri3_fence_await(draw->conn, draw, buffer);
  if (!loader_dri3_blit_image(draw,
@@ -1731,6 +1732,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
   draw->buffers[buf_id] = buffer;
}
dri3_fence_await(draw->conn, draw, buffer);
+   buffer = draw->buffers[buf_id];
 
/*
 * Do we need to preserve the content of a previous buffer?
@@ -1744,7 +1746,8 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
if (buffer_type == loader_dri3_buffer_back &&
draw->cur_blit_source != -1 &&
draw->buffers[draw->cur_blit_source] &&
-   buffer != draw->buffers[draw->cur_blit_source]) {
+   buffer != draw->buffers[draw->cur_blit_source] &&
+   buffer != NULL) {
 
   struct loader_dri3_buffer *source = draw->buffers[draw->cur_blit_source];
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: Use an array instead of hashtable for SSA defs.

2018-04-10 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 04/10/2018 09:33 AM, Bas Nieuwenhuizen wrote:

Saves about 2% of compile time for F1 2017, as well as reduce code
size of an optimized libvulkan_radeon.so by about 1 KiB.

This still keeps the hashtable, as we also stored blocks in there.
---
  src/amd/common/ac_nir_to_llvm.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 053c19808f..7c2bd5c0cc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,8 @@ struct ac_nir_context {
  
  	gl_shader_stage stage;
  
+	LLVMValueRef *ssa_defs;

+
struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
@@ -87,8 +89,7 @@ static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
  static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
  {
assert(src.is_ssa);
-   struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
-   return (LLVMValueRef)entry->data;
+   return nir->ssa_defs[src.ssa->index];
  }
  
  static LLVMValueRef

@@ -1028,8 +1029,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
if (result) {
assert(instr->dest.dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
-   result);
+   ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
  }
  
@@ -1062,7 +1062,7 @@ static void visit_load_const(struct ac_nir_context *ctx,

} else
value = values[0];
  
-	_mesa_hash_table_insert(ctx->defs, &instr->def, value);

+   ctx->ssa_defs[instr->def.index] = value;
  }
  
  static LLVMValueRef

@@ -3095,7 +3095,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
break;
}
if (result) {
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
  }
  
@@ -3596,7 +3596,7 @@ write_result:

if (result) {
assert(instr->dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
  }
  
@@ -3606,7 +3606,7 @@ static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)

LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
  
-	_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);

+   ctx->ssa_defs[instr->dest.ssa.index] = result;
_mesa_hash_table_insert(ctx->phis, instr, result);
  }
  
@@ -3644,7 +3644,7 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,

else {
undef = LLVMGetUndef(LLVMVectorType(type, num_components));
}
-   _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
+   ctx->ssa_defs[instr->def.index] = undef;
  }
  
  static void visit_jump(struct ac_llvm_context *ctx,

@@ -3927,6 +3927,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
  
  	func = (struct nir_function *)exec_list_get_head(&nir->functions);
  
+	nir_index_ssa_defs(func->impl);

+   ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
+
setup_locals(&ctx, func);
  
  	if (nir->info.stage == MESA_SHADER_COMPUTE)

@@ -3940,6 +3943,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
  ctx.abi->outputs);
  
  	free(ctx.locals);

+   free(ctx.ssa_defs);
ralloc_free(ctx.defs);
ralloc_free(ctx.phis);
ralloc_free(ctx.vars);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105942] Graphical artefacts after update to mesa 18.0.0-2

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105942

Samuel Pitoiset  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #5 from Samuel Pitoiset  ---
Should be fixed with
https://cgit.freedesktop.org/mesa/mesa/commit/?id=74b0b869ddd4dbd36482aa9bec3403d45396af2d

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines

2018-04-10 Thread Michel Dänzer
On 2018-04-10 08:38 AM, Gert Wollny wrote:
> Am Montag, den 09.04.2018, 17:26 -0400 schrieb Marek Olšák:
>> On Mon, Apr 9, 2018 at 5:19 PM, Gert Wollny 
>> wrote:
>>>
>>> There is another option: Check at configuration time whether the
>>> bit field layout is like the low or the high endian layout you
>>> already implemented, and instead of basing the selection of the
>>> struct layout on the big/low-endianess of the architecture, base it
>>> on this test.
>>>
>>> It would probably be prudent to test both layouts and then fail
>>> configuration if non of the two reflect the actual layout (at which
>>> point one would have to thing about how to implement all the bit
>>> shifting properly).
>>>
>
> Which would you prefer?
>

 I don't mind bitfields. They make the code nice and tiny. Shifts
 would decrease readability.
>>> The problem is, that the layout of bitfields is compiler dependend.
>>
>> We can fix it after we discover that it's a real problem on a
>> compiler we care about.
>>
> I don't think it is a good idea to rely on undefined behaviour, but if
> it is done, then the least one can do is to add a test that flags an
> upcoming problem before it can do any damage; one example how to do
> this I described above, another approach would be to add a unit test.

I agree. In particular, this has nothing to do with endianness, since
union si_vgt_param_key is only ever accessed by the CPU in its native
byte order.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines

2018-04-10 Thread Bas Vermeulen
On Mon, Apr 9, 2018 at 11:19 PM, Gert Wollny  wrote:

> Am Montag, den 09.04.2018, 14:03 -0400 schrieb Marek Olšák:
> > On Mon, Apr 9, 2018 at 10:51 AM, Bas Vermeulen 
> > wrote:
> Which solution is better depends on what is done more often: reading
> the index or writing to the bit fields.
>

The bitfields are read and written, and the index is mostly read. I found
four instances of the bitfields being written after which the index needs to
be updated.


> > > I am working on a new version of this patch. I have one version
> > > which does away with all the bitfields, and uses functions to
> > > update the index.
> This emulates the code the compiler would create, but it requires that
> for each bit field setters (and getters?) must be implemented.
>

Yes. I have a git branch with this change ready if that's what's
wanted/needed.


> > > Another approach would be to change the union to a struct, and use
> > > a function to get the index.
> This method has the advantage that only the access to the index needs
> new implementation.
>

I can prepare a patch for this as well.


> > > Yet another approach would be to keep the contents of the union and
> > > the index in one struct, and use a function to
> > > (re)calculate the index.
> I don't think that would make much sense.
>

It adds four lines to the code, all the key->u.xxx has it's u. removed.
But future implementation needs to remember to call that function if any of
the bitfields are changed. Which can be annoying.

There is another option: Check at configuration time whether the bit
> field layout is like the low or the high endian layout you already
> implemented, and instead of basing the selection of the struct layout
> on the big/low-endianess of the architecture, base it on this test.
>
> It would probably be prudent to test both layouts and then fail
> configuration if non of the two reflect the actual layout (at which
> point one would have to thing about how to implement all the bit
> shifting properly).


Or just keep the union dependent on endianness, and add an assert/check/test
to make sure that everything works as expected.


> > >
> > > Which would you prefer?
> > >
> >
> > I don't mind bitfields. They make the code nice and tiny. Shifts
> > would decrease readability.
> The problem is, that the layout of bitfields is compiler dependend.
>

Let me know what you guys want to have this included. I just want it fixed,
I don't really care on the form. :)

Bas Vermeulen
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs/release-calendar: update to include 18.1 and 18.2

2018-04-10 Thread Juan A. Suarez Romero
On Mon, 2018-04-09 at 19:02 +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Dylan has kindly stepped up to help with 18.1.0, while I've taken the
> liberty to nominate Andres for 18.2.0 ;-)
> 

I would like to replace Andres for the 18.0.x releases. We already talked about
that and both think it is good idea.

With that change, the remaining proposal looks good to me.


Reviewed-by: Juan A. Suarez 



> As always, people are welcome to swap/adjust where needed.
> 
> Cc: Dylan Baker 
> Cc: Andres Gomez 
> Cc: Juan A. Suarez Romero 
> Signed-off-by: Emil Velikov 
> ---
>  docs/release-calendar.html | 84 
> --
>  1 file changed, 82 insertions(+), 2 deletions(-)
> 
> diff --git a/docs/release-calendar.html b/docs/release-calendar.html
> index 8f588ab46c..cbaed4d5d9 100644
> --- a/docs/release-calendar.html
> +++ b/docs/release-calendar.html
> @@ -43,10 +43,10 @@ if you'd like to nominate a patch in the next stable 
> release.
>  2018-04-06
>  17.3.9
>  Juan A. Suarez Romero
> -Final planned release for the 17.3 series
> +Last planned 17.3.x release
>  
>  
> -18.0
> +18.0
>  2018-04-06
>  18.0.1
>  Andres Gomez
> @@ -64,6 +64,86 @@ if you'd like to nominate a patch in the next stable 
> release.
>  Andres Gomez
>  
>  
> +
> +2018-05-18
> +18.0.4
> +Andres Gomez
> +Last planned 18.0.x release
> +
> +
> +18.1
> +2018-04-20
> +18.1.0rc1
> +Dylan Baker
> +
> +
> +
> +2018-04-27
> +18.1.0rc2
> +Dylan Baker
> +
> +
> +
> +2018-05-04
> +18.1.0rc3
> +Dylan Baker
> +
> +
> +
> +2018-05-11
> +18.1.0rc4
> +Dylan Baker
> +Last planned RC/Final release
> +
> +
> +TBD
> +18.1.1
> +Emil Velikov
> +
> +
> +
> +TBD
> +18.1.2
> +Emil Velikov
> +
> +
> +
> +TBD
> +18.1.3
> +Emil Velikov
> +
> +
> +
> +TBD
> +18.1.4
> +Emil Velikov
> +Last planned RC/Final release
> +
> +
> +18.2
> +2018-07-20
> +18.2.0rc1
> +Andres Gomez
> +
> +
> +
> +2018-07-27
> +18.2.0rc2
> +Andres Gomez
> +
> +
> +
> +2018-08-03
> +18.2.0rc3
> +Andres Gomez
> +
> +
> +
> +2018-08-10
> +18.2.0rc4
> +Andres Gomez
> +Last planned RC/Final release
> +
>  
>  
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: correct si_vgt_param_key on big endian machines

2018-04-10 Thread Michel Dänzer
On 2018-04-10 10:03 AM, Bas Vermeulen wrote:
> On Mon, Apr 9, 2018 at 11:19 PM, Gert Wollny  wrote:
>> Am Montag, den 09.04.2018, 14:03 -0400 schrieb Marek Olšák:
>>> On Mon, Apr 9, 2018 at 10:51 AM, Bas Vermeulen 
> 
>> There is another option: Check at configuration time whether the bit
>> field layout is like the low or the high endian layout you already
>> implemented, and instead of basing the selection of the struct layout
>> on the big/low-endianess of the architecture, base it on this test.
>>
>> It would probably be prudent to test both layouts and then fail
>> configuration if non of the two reflect the actual layout (at which
>> point one would have to thing about how to implement all the bit
>> shifting properly).
> 
> Or just keep the union dependent on endianness, and add an assert/check/test
> to make sure that everything works as expected.

Again, it's nothing to do with endianness — the CPU always accesses the
union in its native byte order.

The issue is that the C standard doesn't define the memory layout of
bit-fields, and the Linux powerpc architecture uses a different layout
than x86.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] dri3: Prevent multiple freeing of buffers.

2018-04-10 Thread Sergii Romantsov
Hello,
i've updated patch simply,
but that seems requires additional checking because in call
*dri3_handle_present_event
*potentially may happens reset of '*busy*' with condition '*buf->pixmap ==
ie->pixmap*'

On Fri, Apr 6, 2018 at 9:03 PM, Thomas Hellstrom 
wrote:

> Hi,
>
>
> On 04/06/2018 04:51 PM, Daniel Stone wrote:
>
>> Hi Sergii,
>>
>> On 6 April 2018 at 09:12, Sergii Romantsov 
>> wrote:
>>
>>> Commit 3160cb86aa92 adds optimization with flag 'reallocate'.
>>> Processing of flag causes buffers freeing while pointer
>>> is still hold in caller stack and than again used to be freed.
>>>
>> Thanks a lot for writing this. I take it the core of the problem is
>> that dri3_handle_present_event() can be called whilst we're inside
>> dri3_get_buffer(), which wasn't the case before.
>>
>> This was only introduced as of a727c804a2c1, and I'm not sure I fully
>> follow the rationale for that commit. Thomas, why do we need to
>> process the events? I guess we could also fake it by turning 'busy'
>> into a refcount, which would be incremented/decremented as it is today
>> when posting buffers and getting Idle events, but also when we're
>> holding a local pointer which we can't have stolen from under us.
>>
>> Cheers,
>> Daniel
>>
>
> The motivation for this commit IIRC was that with internal glretrace
> automated tests, we typically would end up with corrupt rendering due to
> invalid viewports after window resizes. The resize events were typically
> not picked up as fast with dri3 as with dri2, so due to the lack of
> documented strategy how to handle window- and viewport resizes with dri3
> clients, I tried to make it mimic dri2 where we had no such issues. The
> reason for the slow pick up was that dri3 was waiting for fences rather
> than on X replies...
>
> /Thomas
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: Use an array instead of hashtable for SSA defs.

2018-04-10 Thread Timothy Arceri



On 10/04/18 17:33, Bas Nieuwenhuizen wrote:

Saves about 2% of compile time for F1 2017, as well as reduce code
size of an optimized libvulkan_radeon.so by about 1 KiB.

This still keeps the hashtable, as we also stored blocks in there.
---
  src/amd/common/ac_nir_to_llvm.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 053c19808f..7c2bd5c0cc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,8 @@ struct ac_nir_context {
  
  	gl_shader_stage stage;
  
+	LLVMValueRef *ssa_defs;

+
struct hash_table *defs;
struct hash_table *phis;
struct hash_table *vars;
@@ -87,8 +89,7 @@ static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
  static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
  {
assert(src.is_ssa);
-   struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
-   return (LLVMValueRef)entry->data;
+   return nir->ssa_defs[src.ssa->index];
  }
  
  static LLVMValueRef

@@ -1028,8 +1029,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
if (result) {
assert(instr->dest.dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
-   result);
+   ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
}
  }
  
@@ -1062,7 +1062,7 @@ static void visit_load_const(struct ac_nir_context *ctx,

} else
value = values[0];
  
-	_mesa_hash_table_insert(ctx->defs, &instr->def, value);

+   ctx->ssa_defs[instr->def.index] = value;
  }
  
  static LLVMValueRef

@@ -3095,7 +3095,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
break;
}
if (result) {
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
  }
  
@@ -3596,7 +3596,7 @@ write_result:

if (result) {
assert(instr->dest.is_ssa);
result = ac_to_integer(&ctx->ac, result);
-   _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+   ctx->ssa_defs[instr->dest.ssa.index] = result;
}
  }
  
@@ -3606,7 +3606,7 @@ static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)

LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
  
-	_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);

+   ctx->ssa_defs[instr->dest.ssa.index] = result;
_mesa_hash_table_insert(ctx->phis, instr, result);
  }
  
@@ -3644,7 +3644,7 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,

else {
undef = LLVMGetUndef(LLVMVectorType(type, num_components));
}
-   _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
+   ctx->ssa_defs[instr->def.index] = undef;
  }
  
  static void visit_jump(struct ac_llvm_context *ctx,

@@ -3927,6 +3927,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct 
ac_shader_abi *abi,
  
  	func = (struct nir_function *)exec_list_get_head(&nir->functions);
  
+	nir_index_ssa_defs(func->impl);

+   ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));


You don't seem to allocate enough memory here?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: Use an array instead of hashtable for SSA defs.

2018-04-10 Thread Timothy Arceri



On 10/04/18 18:17, Timothy Arceri wrote:



On 10/04/18 17:33, Bas Nieuwenhuizen wrote:

Saves about 2% of compile time for F1 2017, as well as reduce code
size of an optimized libvulkan_radeon.so by about 1 KiB.

This still keeps the hashtable, as we also stored blocks in there.
---
  src/amd/common/ac_nir_to_llvm.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c 
b/src/amd/common/ac_nir_to_llvm.c

index 053c19808f..7c2bd5c0cc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,8 @@ struct ac_nir_context {
  gl_shader_stage stage;
+    LLVMValueRef *ssa_defs;
+
  struct hash_table *defs;
  struct hash_table *phis;
  struct hash_table *vars;
@@ -87,8 +89,7 @@ static LLVMTypeRef get_def_type(struct 
ac_nir_context *ctx,

  static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
  {
  assert(src.is_ssa);
-    struct hash_entry *entry = _mesa_hash_table_search(nir->defs, 
src.ssa);

-    return (LLVMValueRef)entry->data;
+    return nir->ssa_defs[src.ssa->index];
  }
  static LLVMValueRef
@@ -1028,8 +1029,7 @@ static void visit_alu(struct ac_nir_context 
*ctx, const nir_alu_instr *instr)

  if (result) {
  assert(instr->dest.dest.is_ssa);
  result = ac_to_integer(&ctx->ac, result);
-    _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
-    result);
+    ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
  }
  }
@@ -1062,7 +1062,7 @@ static void visit_load_const(struct 
ac_nir_context *ctx,

  } else
  value = values[0];
-    _mesa_hash_table_insert(ctx->defs, &instr->def, value);
+    ctx->ssa_defs[instr->def.index] = value;
  }
  static LLVMValueRef
@@ -3095,7 +3095,7 @@ static void visit_intrinsic(struct 
ac_nir_context *ctx,

  break;
  }
  if (result) {
-    _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+    ctx->ssa_defs[instr->dest.ssa.index] = result;
  }
  }
@@ -3596,7 +3596,7 @@ write_result:
  if (result) {
  assert(instr->dest.is_ssa);
  result = ac_to_integer(&ctx->ac, result);
-    _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+    ctx->ssa_defs[instr->dest.ssa.index] = result;
  }
  }
@@ -3606,7 +3606,7 @@ static void visit_phi(struct ac_nir_context 
*ctx, nir_phi_instr *instr)

  LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
  LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
-    _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
+    ctx->ssa_defs[instr->dest.ssa.index] = result;
  _mesa_hash_table_insert(ctx->phis, instr, result);
  }
@@ -3644,7 +3644,7 @@ static void visit_ssa_undef(struct 
ac_nir_context *ctx,

  else {
  undef = LLVMGetUndef(LLVMVectorType(type, num_components));
  }
-    _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
+    ctx->ssa_defs[instr->def.index] = undef;
  }
  static void visit_jump(struct ac_llvm_context *ctx,
@@ -3927,6 +3927,9 @@ void ac_nir_translate(struct ac_llvm_context 
*ac, struct ac_shader_abi *abi,

  func = (struct nir_function *)exec_list_get_head(&nir->functions);
+    nir_index_ssa_defs(func->impl);
+    ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));


You don't seem to allocate enough memory here?


blah never mind my bad.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] egl/x11: Handle both depth 30 formats for eglCreateImage().

2018-04-10 Thread Mario Kleiner

On 04/09/2018 12:12 PM, Michel Dänzer wrote:

On 2018-04-06 08:56 PM, Mario Kleiner wrote:

On 04/06/2018 06:41 PM, Michel Dänzer wrote:

On 2018-04-06 06:18 PM, Mario Kleiner wrote:

On Fri, Apr 6, 2018 at 12:01 PM, Michel Dänzer 
wrote:

On 2018-03-27 07:53 PM, Daniel Stone wrote:

On 12 March 2018 at 20:45, Mario Kleiner
 wrote:

We need to distinguish if a backing pixmap of a window is
XRGB2101010 or XBGR2101010, as different gpu hw supports
different formats. NVidia hw prefers XBGR, whereas AMD and
Intel are happy with XRGB.

We use the red channel mask of the visual to distinguish at
depth 30, but because we can't easily get the associated
visual of a Pixmap, we use the visual of the x-screens root
window instead as a proxy.

This fixes desktop composition of color depth 30 windows
when the X11 compositor uses EGL.


I have no reason to doubt your testing, so this patch is:
Acked-by: Daniel Stone 

But it does rather fill me with trepidation, given that X11 Pixmaps
are supposed to be a dumb 'bag of bits', doing nothing else than
providing the same number and size of channels to the actual client
data for the Visual associated with the Window.


As far as X11 is concerned, the number of channels and their sizes
don't
even matter; a pixmap is simply a container for an unsigned integer
of n
bits (where n is the pixmap depth) per pixel, with no inherent meaning
attached to those values.

That said, I'm not sure this is true for EGL as well. But even if it
isn't, there would have to be another mechanism to determine the
format,
e.g. a config associated with the EGL pixmap. The pixmap doesn't even
necessarily have the same depth as the root window, so using the
latter's visual doesn't make much sense.


Hi Michel. I thought with this patch i was implementing what you
proposed earlier as a heuristic on how to get around the "pixmaps
don't have an inherent format, only a depth" problem?


Do you have a pointer to that discussion?


Ok, apologies, i think i was just taking your comment too far as an
inspiration. The best i can find in my inbox atm. is this message of
yours from 24th November 2017 10:44 AM in a mesa-dev thread "Re:
[Mesa-dev] 10-bit Mesa/Gallium support":

"Apologies for the badly formatted followup before, let's try that again:

On 2017-11-23 07:31 PM, Mario Kleiner wrote:


3. In principle the clean solution for nouveau would be to upgrade the
ddx to drmAddFB2 ioctl, and use xbgr2101010 scanout to support
everything back to nv50+, but everything we have in X or Wayland is
meant for xrgb2101010 not xbgr2101010. And we run into ambiguities of
what, e.g., a depth 30 pixmap means in some extensions like
glx_texture_form_pixmap.


A pixmap itself never has a format per se, it's just a container for an
n-bit integer value per pixel (where n is the pixmap depth). A
compositor using GLX_EXT_texture_from_pixmap has to determine the format
from the corresponding window's visual.
"

There's nothing in there that suggests my root window solution.
I guess i thought given that we can not get the visual of the window 
corresponding to the pixmap, let's find some window which is a good enough 
proxy for onscreen windows with associated depth 30 pixmaps on the same 
x-screen.


A pixmap isn't necessarily associated with any window.



My (possibly inaccurate) understanding is that one can only create a
depth 30 pixmap if the x-screen runs at depth >= 30. It only exposes
depth 30 as supported pixmap format (xdpyinfo) if xorg.conf
DefaultDepth 30 is selected, whereas other depths like
1,4,8,15,16,24,32 are always supported at default depth 24.


That sounds like an X server issue. Just like 32, there's no fundamental
reason a pixmap couldn't have depth 30 despite the screen depth being lower.

Out of curiosity, can you share the output of xdpyinfo with nouveau at
depth 30?


[...]


At least i don't remember seeing any "depth 30, ..." line ever on any 
driver+gpu combo if i run X at default depth 24?


I'm not questioning that's currently the case, I'm saying there's no
particular reason for it, so expect it to change at some point.



Ah ok, thanks for the explanation.



I'm interested in the full xdpyinfo *at screen depth 30*, in particular
whether it lists only one variant of depth 30 visuals. If so, one
possibility for a kludge would be to just look at any depth 30 visual.



Ok, the fresh v2 patch implements that kludge. This one retested to work 
on nouveau, ati, intel.


On intel and nouveau we only get one channel mask for depth 30 visuals 
in xdpyinfo. On amd we get both masks for xrgb2101010 and xbgr2101010, 
as the amd gallium drivers expose both formats, but the ordering is 
xrgb2101010 first, so that's fine when picking the first depth 30 visual 
to get the channel mask for decisions.





The basic problem with EGL based compositing is that for eglCreateImageKHR() 
all we have is the EGLDisplay and EGLContext used for importing an image 
resource.


Is there no EGLConfig associated somehow?


I

Re: [Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls

2018-04-10 Thread Antia Puentes

On 07/04/18 08:21, Jason Ekstrand wrote:

On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick > wrote:


From: Antia Puentes mailto:apuen...@igalia.com>>

We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
vertex element. The previous Vertex Elements order was:

  * VE 1: 
  * VE 2: 

and now it is:

  * VE 1: 
  * VE 2: 

To move the BaseVertex keeping VE1 as it is, allows to keep
pointing the
vertex buffer associated to VE 1 to the indirect buffer for indirect
draw calls.

From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:

  "gl_BaseVertex holds the integer value passed to the baseVertex
  parameter to the command that resulted in the current shader
  invocation. In the case where the command has no baseVertex
parameter,
  the value of gl_BaseVertex is zero."

Fixes CTS tests:

  * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
  *
KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstancedParameters
  *
KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
  *

KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysIndirectParameters
  *
KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirectCountParameters

v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
those in genX(emit_vertices).  Reformat commit message to 72 columns.

Signed-off-by: Ian Romanick mailto:ian.d.roman...@intel.com>>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678

---
 src/intel/compiler/brw_nir.c                  | 14 +
 src/intel/compiler/brw_vec4.cpp               | 14 +
 src/mesa/drivers/dri/i965/brw_context.h       | 32
++-
 src/mesa/drivers/dri/i965/brw_draw.c          | 45
++-
 src/mesa/drivers/dri/i965/brw_draw_upload.c   | 14 -
 src/mesa/drivers/dri/i965/genX_state_upload.c | 38
+++---
 6 files changed, 97 insertions(+), 60 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c
b/src/intel/compiler/brw_nir.c
index 16b0d86814f..16ab529737b 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
     */
    const bool has_sgvs =
       nir->info.system_values_read &
-      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
-       BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
+      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
        BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
        BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
@@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,

                nir_intrinsic_set_base(load, num_inputs);
                switch (intrin->intrinsic) {
-               case nir_intrinsic_load_base_vertex:
                case nir_intrinsic_load_first_vertex:
                   nir_intrinsic_set_component(load, 0);
                   break;
@@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
                   nir_intrinsic_set_component(load, 3);
                   break;
                case nir_intrinsic_load_draw_id:
-                  /* gl_DrawID is stored right after gl_VertexID
and friends
-                   * if any of them exist.
+               case nir_intrinsic_load_base_vertex:
+                  /* gl_DrawID and gl_BaseVertex are stored right
after
+                     gl_VertexID and friends if any of them exist.
                    */
                   nir_intrinsic_set_base(load, num_inputs +
has_sgvs);
-                  nir_intrinsic_set_component(load, 0);
+                  if (intrin->intrinsic ==
nir_intrinsic_load_draw_id)
+                     nir_intrinsic_set_component(load, 0);
+                  else
+                     nir_intrinsic_set_component(load, 1);
                   break;
                default:
                   unreachable("Invalid system value intrinsic");
diff --git a/src/intel/compiler/brw_vec4.cpp
b/src/intel/compiler/brw_vec4.cpp
index 1e384f5bf4d..d33caefdea9 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
*compiler, void *log_data,
     * incoming vertex attribute.  So, add an extra slot.
     */
    if (shader->info.system_values_read &
-       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
-        BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
+       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
         BITFIELD64_BIT(SYSTEM_VALUE_

[Mesa-dev] [PATCH] i965: Remove brw_bo_alloc_tiled_2d from intel_detect_swizzling.

2018-04-10 Thread Kenneth Graunke
I'd like to drop this pre-isl function.  This drops one of the two uses.
---
 src/mesa/drivers/dri/i965/intel_screen.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 29cb7ad57d7..7f3c82fab8d 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1828,24 +1828,18 @@ intel_init_bufmgr(struct intel_screen *screen)
 static bool
 intel_detect_swizzling(struct intel_screen *screen)
 {
-   struct brw_bo *buffer;
-   unsigned flags = 0;
-   uint32_t aligned_pitch;
uint32_t tiling = I915_TILING_X;
uint32_t swizzle_mode = 0;
-
-   buffer = brw_bo_alloc_tiled_2d(screen->bufmgr, "swizzle test",
-  64, 64, 4, tiling, &aligned_pitch, flags);
+   struct brw_bo *buffer =
+  brw_bo_alloc_tiled(screen->bufmgr, "swizzle test", 32768,
+ tiling, 512, 0);
if (buffer == NULL)
   return false;
 
brw_bo_get_tiling(buffer, &tiling, &swizzle_mode);
brw_bo_unreference(buffer);
 
-   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
-  return false;
-   else
-  return true;
+   return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
 }
 
 static int
-- 
2.16.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] egl/x11: Handle both depth 30 formats for eglCreateImage().

2018-04-10 Thread Michel Dänzer
On 2018-04-10 10:22 AM, Mario Kleiner wrote:
> On 04/09/2018 12:12 PM, Michel Dänzer wrote:
>> On 2018-04-06 08:56 PM, Mario Kleiner wrote:
>>
>> I'm interested in the full xdpyinfo *at screen depth 30*, in particular
>> whether it lists only one variant of depth 30 visuals. If so, one
>> possibility for a kludge would be to just look at any depth 30 visual.
> 
> Ok, the fresh v2 patch implements that kludge. This one retested to work
> on nouveau, ati, intel.
> 
> On intel and nouveau we only get one channel mask for depth 30 visuals
> in xdpyinfo. On amd we get both masks for xrgb2101010 and xbgr2101010,
> as the amd gallium drivers expose both formats, but the ordering is
> xrgb2101010 first, so that's fine when picking the first depth 30 visual
> to get the channel mask for decisions.

Hmm, that sounds fragile though when there are both variants; is there
any guarantee they can't appear in the opposite order?


>>> The basic problem with EGL based compositing is that for
>>> eglCreateImageKHR() all we have is the EGLDisplay and EGLContext used
>>> for importing an image resource.
>>
>> Is there no EGLConfig associated somehow?
> 
> I guess we could get the EGLConfig from the context, but i assume this
> context of the importing application (e.g., typically x11 compositor)
> could have an EGLConfig possibly unrelated to depth 30 pixmaps.

Really, the compositor should explicitly specify the format based on the
window visual in this case. But from discussion on IRC, EGL doesn't seem
to have a mechanism for that yet.


>> P.S. IME nouveau is in for a world of pain in general with a format
>> which doesn't start at bit 0. Once upon a time, I explored this approach
>> for depth 24 on big-endian hosts, but ran into lots of issues both in
>> xserver and on the client side.
> 
> Can you clarify for me what you mean with "doesn't start at bit 0"? The
> position of red and blue channel is swapped in nouveau's depth 30
> format, but the red channel fills the 10 LSBs. So if there are padding
> bits, they are the 2 MSBs.

Yeah, I misunderstood what the two variants are here, sorry for the noise.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Remove brw_bo_alloc_tiled_2d from intel_detect_swizzling.

2018-04-10 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga 

On Tue, 2018-04-10 at 01:33 -0700, Kenneth Graunke wrote:
> I'd like to drop this pre-isl function.  This drops one of the two
> uses.
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 14 --
>  1 file changed, 4 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 29cb7ad57d7..7f3c82fab8d 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1828,24 +1828,18 @@ intel_init_bufmgr(struct intel_screen
> *screen)
>  static bool
>  intel_detect_swizzling(struct intel_screen *screen)
>  {
> -   struct brw_bo *buffer;
> -   unsigned flags = 0;
> -   uint32_t aligned_pitch;
> uint32_t tiling = I915_TILING_X;
> uint32_t swizzle_mode = 0;
> -
> -   buffer = brw_bo_alloc_tiled_2d(screen->bufmgr, "swizzle test",
> -  64, 64, 4, tiling, &aligned_pitch,
> flags);
> +   struct brw_bo *buffer =
> +  brw_bo_alloc_tiled(screen->bufmgr, "swizzle test", 32768,
> + tiling, 512, 0);
> if (buffer == NULL)
>return false;
>  
> brw_bo_get_tiling(buffer, &tiling, &swizzle_mode);
> brw_bo_unreference(buffer);
>  
> -   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
> -  return false;
> -   else
> -  return true;
> +   return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
>  }
>  
>  static int
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105952] radv causes GPU hang on SI

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105952

--- Comment #9 from Turo Lamminen  ---
Still happens in 4381be4648b9ebb15b0a06885489998d5daac482

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105952] radv causes GPU hang on SI

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105952

--- Comment #10 from Turo Lamminen  ---
I did a little experiment, I rebased locally and removed the broken commit
(4ad7595f350462c704fbe5b2bd2ca406c904e78e) and then the followups (942fdfe357,
f1d7c16e85, 04e609f1f8) because they no longer applied cleanly. The resulting
mesa works and does not exhibit this bug.

So there are no other confounding issuses and there's still some case in there
which you've missed on SI.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: move save/restore operations close to the slow clears

2018-04-10 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

I'm wondering though, doesn't this result in more saves/restores, as
we now do it for each part of a subpass clear separately?

On Mon, Apr 9, 2018 at 11:10 PM, Samuel Pitoiset
 wrote:
> This removes the emission of unnecessary states, for example
> when performing a fast depth stencil clear (ie. clearing htile),
> we don't need to save/restore the graphics pipeline.
>
> For GFX9 chips that have the scissor bug workaround, that
> should also reduce the number of partial flushes.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_bufimage.c |  8 +
>  src/amd/vulkan/radv_meta_clear.c| 47 +
>  2 files changed, 22 insertions(+), 33 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
> b/src/amd/vulkan/radv_meta_bufimage.c
> index 69e15d3213..5018ce1f2e 100644
> --- a/src/amd/vulkan/radv_meta_bufimage.c
> +++ b/src/amd/vulkan/radv_meta_bufimage.c
> @@ -1242,8 +1242,14 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer 
> *cmd_buffer,
>  {
> VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
> struct radv_device *device = cmd_buffer->device;
> +   struct radv_meta_saved_state saved_state;
> struct radv_image_view dst_iview;
>
> +   radv_meta_save(&saved_state, cmd_buffer,
> +  RADV_META_SAVE_COMPUTE_PIPELINE |
> +  RADV_META_SAVE_CONSTANTS |
> +  RADV_META_SAVE_DESCRIPTORS);
> +
> create_iview(cmd_buffer, dst, &dst_iview);
> cleari_bind_descriptors(cmd_buffer, &dst_iview);
>
> @@ -1268,4 +1274,6 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer 
> *cmd_buffer,
>   push_constants);
>
> radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, 
> dst->image->info.height, 1);
> +
> +   radv_meta_restore(&saved_state, cmd_buffer);
>  }
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index 016c1ee296..833e3cebab 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -342,6 +342,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
> unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
> VkClearColorValue clear_value = clear_att->clearValue.color;
> VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
> +   struct radv_meta_saved_state saved_state;
> VkPipeline pipeline;
>
> if (fs_key == -1) {
> @@ -359,6 +360,10 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
> assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
> assert(clear_att->colorAttachment < subpass->color_count);
>
> +   radv_meta_save(&saved_state, cmd_buffer,
> +  RADV_META_SAVE_GRAPHICS_PIPELINE |
> +  RADV_META_SAVE_CONSTANTS);
> +
> radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
>   device->meta_state.clear_color_p_layout,
>   VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
> @@ -397,6 +402,8 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
> }
>
> radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
> +
> +   radv_meta_restore(&saved_state, cmd_buffer);
>  }
>
>
> @@ -613,12 +620,17 @@ emit_depthstencil_clear(struct radv_cmd_buffer 
> *cmd_buffer,
> const uint32_t samples = iview->image->info.samples;
> const uint32_t samples_log2 = ffs(samples) - 1;
> VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
> +   struct radv_meta_saved_state saved_state;
>
> assert(pass_att != VK_ATTACHMENT_UNUSED);
>
> if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
> clear_value.depth = 1.0f;
>
> +   radv_meta_save(&saved_state, cmd_buffer,
> +  RADV_META_SAVE_GRAPHICS_PIPELINE |
> +  RADV_META_SAVE_CONSTANTS);
> +
> radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
>   device->meta_state.clear_depth_p_layout,
>   VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
> @@ -664,6 +676,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer 
> *cmd_buffer,
> radv_CmdSetStencilReference(cmd_buffer_h, 
> VK_STENCIL_FACE_FRONT_BIT,
>   prev_reference);
> }
> +
> +   radv_meta_restore(&saved_state, cmd_buffer);
>  }
>
>  static bool
> @@ -1165,17 +1179,12 @@ void
>  radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
>  {
> struct radv_cmd_state *cmd_state = &cmd_buffer->state;
> -   struct radv_meta_saved_state saved_state;
> enum radv_cmd_flush_bits pre_flush = 0;
> enum radv_cmd_flush_bits post_flush = 0;
>
> if (!radv_subpass_needs_clear(cmd_buffer))
> return;
>
> -   radv_me

Re: [Mesa-dev] [PATCH 1/4] ac/surface: don't set the display flag for obviously unsupported cases (v2)

2018-04-10 Thread Michel Dänzer
On 2018-04-06 07:12 PM, Marek Olšák wrote:
> From: Marek Olšák 
> 
> This enables the tile swizzle for some cases of the displayable micro mode,
> and it also fixes an addrlib assertion failure on Vega.

[...]

> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index dd3189c67d0..ef6f1072abd 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -919,20 +919,21 @@ radv_image_create(VkDevice _device,
>   if (!image)
>   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
>  
>   image->type = pCreateInfo->imageType;
>   image->info.width = pCreateInfo->extent.width;
>   image->info.height = pCreateInfo->extent.height;
>   image->info.depth = pCreateInfo->extent.depth;
>   image->info.samples = pCreateInfo->samples;
>   image->info.array_size = pCreateInfo->arrayLayers;
>   image->info.levels = pCreateInfo->mipLevels;
> + image->info.num_channels = 4; /* TODO: set this correctly */

Maybe a radv developer can suggest something here? Anyway,

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] nv50/ir: make a copy of tex src if it's referenced multiple times

2018-04-10 Thread Karol Herbst
I guess this fixes a bug somewhere?

On Tue, Apr 10, 2018 at 6:11 AM, Ilia Mirkin  wrote:
> For nv50 we coalesce the srcs and defs into a single node. As such, we
> can end up with impossible constraints if the source is referenced
> after the tex operation (which, due to the coalescing of values, will
> have overwritten it).
>
> This logic already exists for inserting moves for MERGE/UNION sources.
> It's the exact same idea here, so leverage that code, which also
> includes a few optimizations around not extending live ranges
> unnecessarily.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> v1 -> v2: make use of existing logic in insertConstraintMoves
>
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 86 
> --
>  1 file changed, 49 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 3a0e56e1385..7d107aca68d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -257,6 +257,7 @@ private:
> private:
>virtual bool visit(BasicBlock *);
>
> +  void insertConstraintMove(Instruction *, int s);
>bool insertConstraintMoves();
>
>void condenseDefs(Instruction *);
> @@ -2216,6 +2217,8 @@ 
> RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
> for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
>if (!tex->srcExists(c))
>   tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
> +  else
> + insertConstraintMove(tex, c);
>if (!tex->defExists(c))
>   tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
> }
> @@ -2288,6 +2291,51 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
> return true;
>  }
>
> +void
> +RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int 
> s)
> +{
> +   const uint8_t size = cst->src(s).getSize();
> +
> +   assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> +
> +   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> +   bool imm = defi->op == OP_MOV &&
> +  defi->src(0).getFile() == FILE_IMMEDIATE;
> +   bool load = defi->op == OP_LOAD &&
> +  defi->src(0).getFile() == FILE_MEMORY_CONST &&
> +  !defi->src(0).isIndirect(0);
> +   // catch some cases where don't really need MOVs
> +   if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
> +  if (imm || load) {
> + // Move the defi right before the cst. No point in expanding
> + // the range.
> + defi->bb->remove(defi);
> + cst->bb->insertBefore(cst, defi);
> +  }
> +  return;
> +   }
> +
> +   LValue *lval = new_LValue(func, cst->src(s).getFile());
> +   lval->reg.size = size;
> +
> +   Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> +   mov->setDef(0, lval);
> +   mov->setSrc(0, cst->getSrc(s));
> +
> +   if (load) {
> +  mov->op = OP_LOAD;
> +  mov->setSrc(0, defi->getSrc(0));
> +   } else if (imm) {
> +  mov->setSrc(0, defi->getSrc(0));
> +   }
> +
> +   if (defi->getPredicate())
> +  mov->setPredicate(defi->cc, defi->getPredicate());
> +
> +   cst->setSrc(s, mov->getDef(0));
> +   cst->bb->insertBefore(cst, mov);
> +}
> +
>  // Insert extra moves so that, if multiple register constraints on a value 
> are
>  // in conflict, these conflicts can be resolved.
>  bool
> @@ -2328,46 +2376,10 @@ 
> RegAlloc::InsertConstraintsPass::insertConstraintMoves()
> cst->bb->insertBefore(cst, mov);
> continue;
>  }
> -assert(cst->getSrc(s)->defs.size() == 1); // still SSA
> -
> -Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
> -bool imm = defi->op == OP_MOV &&
> -   defi->src(0).getFile() == FILE_IMMEDIATE;
> -bool load = defi->op == OP_LOAD &&
> -   defi->src(0).getFile() == FILE_MEMORY_CONST &&
> -   !defi->src(0).isIndirect(0);
> -// catch some cases where don't really need MOVs
> -if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) 
> {
> -   if (imm || load) {
> -  // Move the defi right before the cst. No point in 
> expanding
> -  // the range.
> -  defi->bb->remove(defi);
> -  cst->bb->insertBefore(cst, defi);
> -   }
> -   continue;
> -}
>
> -LValue *lval = new_LValue(func, cst->src(s).getFile());
> -lval->reg.size = size;
> -
> -mov = new_Instruction(func, OP_MOV, typeOfSize(size));
> -mov->setDef(0, lval);
> -mov->setSrc(0, cst->getSrc(s));
> -
> -if (load) {
> -   mov->op = OP_LOAD;
> -   mov->setSrc(0, defi->getSrc(0));
> -} else if (imm) {
> -   mov->setSrc

[Mesa-dev] [Bug 105832] radeonsi NIR missing bindless textures support

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105832

--- Comment #19 from Karol Herbst  ---
how can you make dow2 use bindless_textures? This kind of looks like a mesa
issue and not really related to radeonsi at all (the shader compile fails)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105832] radeonsi NIR missing bindless textures support

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105832

--- Comment #20 from Karol Herbst  ---
(In reply to Karol Herbst from comment #19)
> how can you make dow2 use bindless_textures? This kind of looks like a mesa
> issue and not really related to radeonsi at all (the shader compile fails)

ohh comment 17 has the info...

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105832] radeonsi NIR missing bindless textures support

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105832

--- Comment #21 from Timothy Arceri  ---
(In reply to Karol Herbst from comment #19)
> how can you make dow2 use bindless_textures? This kind of looks like a mesa
> issue and not really related to radeonsi at all (the shader compile fails)

DoW3 requires bindless. I could be wrong but the shader compiles are likely
just a shader-db issue rather than a Mesa bug.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/22] VP9 support

2018-04-10 Thread Christian König

Nice work. Series is Acked-by: Christian König .

Christian.

Am 09.04.2018 um 18:35 schrieb Leo Liu:

This series will enable VP9 support for profile0 and profile2 on VCN.
It will support players with VP9 VA-API enabled.

Leo Liu (22):
   vl: add VP9 profile0 and format
   vl: add VP9 picture description
   radeon/vcn: add VP9 stream type for decoder
   radeon/vcn: add VP9 dpb buffer size
   vl: add VP9 probability tables
   radeon/vcn: add VP9 prob table buffer
   radeon/vcn: add VP9 message buffer interface
   radeon/vcn: fill probability table to prob buffers
   radeon/vcn: get VP9 msg buffer
   radeon/vcn: add VP9 context buffer
   radeonsi: cap VP9 support to Raven
   radeonsi: cap VP9 support to progressive buffer
   st/va: add VP9 picture to context
   st/va: add handles for VP9 buffers
   st/va: add picture parameter handling for VP9
   st/va: add slice parameter handling for VP9
   st/va: parse VP9 uncompressed frame header
   st/va: add VP9 config to enable profile0
   vl: add VP9 profile2 support
   radeon/vcn: add VP9 profile2 support
   radeonsi: use PIPE_FORMAT_P016 format for VP9 profile2
   st/va: add VP9 config to enable profile2

  src/gallium/auxiliary/Makefile.sources |   3 +-
  src/gallium/auxiliary/meson.build  |   1 +
  src/gallium/auxiliary/util/u_video.h   |   4 +
  src/gallium/auxiliary/vl/vl_probs_table.h  | 585 +
  src/gallium/drivers/radeon/radeon_vcn_dec.c| 317 +-
  src/gallium/drivers/radeon/radeon_vcn_dec.h| 139 ++
  src/gallium/drivers/radeonsi/si_get.c  |   9 +-
  src/gallium/include/pipe/p_video_enums.h   |   7 +-
  src/gallium/include/pipe/p_video_state.h   |  94 
  src/gallium/state_trackers/va/Makefile.sources |   1 +
  src/gallium/state_trackers/va/config.c |   2 +-
  src/gallium/state_trackers/va/context.c|   4 +
  src/gallium/state_trackers/va/meson.build  |   4 +-
  src/gallium/state_trackers/va/picture.c|  11 +
  src/gallium/state_trackers/va/picture_vp9.c| 348 +++
  src/gallium/state_trackers/va/va_private.h |  12 +
  16 files changed, 1515 insertions(+), 26 deletions(-)
  create mode 100644 src/gallium/auxiliary/vl/vl_probs_table.h
  create mode 100644 src/gallium/state_trackers/va/picture_vp9.c



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: fix glsl version mismatch in compat profile

2018-04-10 Thread Timothy Arceri
Drivers that only support compat 3.0 were reporting GLSL 1.40
support. This fixes issues with the menu of Dawn of War II.

Fixes: a0c8b49284ef "mesa: enable OpenGL 3.1 with ARB_compatibility"

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105807
---
 src/mesa/main/version.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 0a4e7630da6..84babd69e2f 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -620,8 +620,11 @@ _mesa_compute_version(struct gl_context *ctx)
/* Make sure that the GLSL version lines up with the GL version. In some
 * cases it can be too high, e.g. if an extension is missing.
 */
-   if (_mesa_is_desktop_gl(ctx) && ctx->Version >= 31) {
+   if (_mesa_is_desktop_gl(ctx)) {
   switch (ctx->Version) {
+  case 30:
+ ctx->Const.GLSLVersion = 130;
+ break;
   case 31:
  ctx->Const.GLSLVersion = 140;
  break;
@@ -629,7 +632,8 @@ _mesa_compute_version(struct gl_context *ctx)
  ctx->Const.GLSLVersion = 150;
  break;
   default:
- ctx->Const.GLSLVersion = ctx->Version * 10;
+ if (ctx->Version >= 33)
+ctx->Const.GLSLVersion = ctx->Version * 10;
  break;
   }
}
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] nv50/ir: make a copy of tex src if it's referenced multiple times

2018-04-10 Thread Ilia Mirkin
On Tue, Apr 10, 2018 at 6:08 AM, Karol Herbst  wrote:
> I guess this fixes a bug somewhere?

Yeah... I describe it in the commit description, I thought. Here's the
situation:

%r1 = 5
%r2 = texsize %r1
%r3 = texsize %r1

Now, let's not worry about why those didn't get CSE'd. (Let's say they
refer to different textures, but query the same LOD.)

With the current code, r1, r2, and r3 all get joined to a single RIG
node with coalesceValues() which happens as part of the whole
JOIN_MASK_TEX thing (to make sure that src == dst regs for a tex op,
since nothing else can be encoded for nv50).

This is obviously bad - no way to make that RA happen -- the assigned
reg (to the RIG node) will overwrite the %r1 value after the first
texsize, and the second size will get a bogus LOD input in addition to
then also overwriting the result of the first texsize.

This is basically the same problem as a merge, and we get out of it
the same way as a merge -- adding extra copies. I refactored that
constraint code, although in hindsight perhaps I should have left it
alone and just pushed the tex onto the constrList and treat it like a
MERGE. I can go redo it that way too.

>
> On Tue, Apr 10, 2018 at 6:11 AM, Ilia Mirkin  wrote:
>> For nv50 we coalesce the srcs and defs into a single node. As such, we
>> can end up with impossible constraints if the source is referenced
>> after the tex operation (which, due to the coalescing of values, will
>> have overwritten it).
>>
>> This logic already exists for inserting moves for MERGE/UNION sources.
>> It's the exact same idea here, so leverage that code, which also
>> includes a few optimizations around not extending live ranges
>> unnecessarily.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> v1 -> v2: make use of existing logic in insertConstraintMoves
>>
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 86 
>> --
>>  1 file changed, 49 insertions(+), 37 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
>> index 3a0e56e1385..7d107aca68d 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
>> @@ -257,6 +257,7 @@ private:
>> private:
>>virtual bool visit(BasicBlock *);
>>
>> +  void insertConstraintMove(Instruction *, int s);
>>bool insertConstraintMoves();
>>
>>void condenseDefs(Instruction *);
>> @@ -2216,6 +2217,8 @@ 
>> RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
>> for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
>>if (!tex->srcExists(c))
>>   tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
>> +  else
>> + insertConstraintMove(tex, c);
>>if (!tex->defExists(c))
>>   tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
>> }
>> @@ -2288,6 +2291,51 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
>> return true;
>>  }
>>
>> +void
>> +RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int 
>> s)
>> +{
>> +   const uint8_t size = cst->src(s).getSize();
>> +
>> +   assert(cst->getSrc(s)->defs.size() == 1); // still SSA
>> +
>> +   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
>> +   bool imm = defi->op == OP_MOV &&
>> +  defi->src(0).getFile() == FILE_IMMEDIATE;
>> +   bool load = defi->op == OP_LOAD &&
>> +  defi->src(0).getFile() == FILE_MEMORY_CONST &&
>> +  !defi->src(0).isIndirect(0);
>> +   // catch some cases where don't really need MOVs
>> +   if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) {
>> +  if (imm || load) {
>> + // Move the defi right before the cst. No point in expanding
>> + // the range.
>> + defi->bb->remove(defi);
>> + cst->bb->insertBefore(cst, defi);
>> +  }
>> +  return;
>> +   }
>> +
>> +   LValue *lval = new_LValue(func, cst->src(s).getFile());
>> +   lval->reg.size = size;
>> +
>> +   Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size));
>> +   mov->setDef(0, lval);
>> +   mov->setSrc(0, cst->getSrc(s));
>> +
>> +   if (load) {
>> +  mov->op = OP_LOAD;
>> +  mov->setSrc(0, defi->getSrc(0));
>> +   } else if (imm) {
>> +  mov->setSrc(0, defi->getSrc(0));
>> +   }
>> +
>> +   if (defi->getPredicate())
>> +  mov->setPredicate(defi->cc, defi->getPredicate());
>> +
>> +   cst->setSrc(s, mov->getDef(0));
>> +   cst->bb->insertBefore(cst, mov);
>> +}
>> +
>>  // Insert extra moves so that, if multiple register constraints on a value 
>> are
>>  // in conflict, these conflicts can be resolved.
>>  bool
>> @@ -2328,46 +2376,10 @@ 
>> RegAlloc::InsertConstraintsPass::insertConstraintMoves()
>> cst->bb->insertBefore(cst, mov);
>> continue;
>>  }
>> -assert(cst->getSrc(s)->defs.size() == 1); // still SSA
>> -
>> -Instruction 

[Mesa-dev] [PATCH] radv: add shader BOs to the list at pipeline bind time

2018-04-10 Thread Samuel Pitoiset
Otherwise, the shader BOs are not added to the list on SI because
prefetching isn't supported. Calling radv_cs_add_buffer() in the
prefetch codepath was a bad idea.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105952
Fixes: 4ad7595f35 ("radv: rename radv_emit_prefetch() to radv_emit_prefetch_L2")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 4e89969016..3b1d6aedc8 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -625,8 +625,6 @@ static void
 radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
  struct radv_shader_variant *shader)
 {
-   struct radeon_winsys *ws = cmd_buffer->device->ws;
-   struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint64_t va;
 
if (!shader)
@@ -634,7 +632,6 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer 
*cmd_buffer,
 
va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
 
-   radv_cs_add_buffer(ws, cs, shader->bo, 8);
si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
 }
 
@@ -702,6 +699,18 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
 
radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
 
+   for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+   if (!pipeline->shaders[i])
+   continue;
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+  pipeline->shaders[i]->bo, 8);
+   }
+
+   if (radv_pipeline_has_gs(pipeline))
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+  pipeline->gs_copy_shader->bo, 8);
+
if (unlikely(cmd_buffer->device->trace_bo))
radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
 
@@ -2280,6 +2289,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
  MAX2(cmd_buffer->compute_scratch_size_needed,
   pipeline->max_waves * 
pipeline->scratch_bytes_per_wave);
 
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+  pipeline->shaders[MESA_SHADER_COMPUTE]->bo, 8);
+
if (unlikely(cmd_buffer->device->trace_bo))
radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
 }
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: add shader BOs to the list at pipeline bind time

2018-04-10 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Tue, Apr 10, 2018 at 2:09 PM, Samuel Pitoiset
 wrote:
> Otherwise, the shader BOs are not added to the list on SI because
> prefetching isn't supported. Calling radv_cs_add_buffer() in the
> prefetch codepath was a bad idea.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105952
> Fixes: 4ad7595f35 ("radv: rename radv_emit_prefetch() to 
> radv_emit_prefetch_L2")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 4e89969016..3b1d6aedc8 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -625,8 +625,6 @@ static void
>  radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
>   struct radv_shader_variant *shader)
>  {
> -   struct radeon_winsys *ws = cmd_buffer->device->ws;
> -   struct radeon_winsys_cs *cs = cmd_buffer->cs;
> uint64_t va;
>
> if (!shader)
> @@ -634,7 +632,6 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer 
> *cmd_buffer,
>
> va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
>
> -   radv_cs_add_buffer(ws, cs, shader->bo, 8);
> si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
>  }
>
> @@ -702,6 +699,18 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
> *cmd_buffer)
>
> radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
>
> +   for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
> +   if (!pipeline->shaders[i])
> +   continue;
> +
> +   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
> +  pipeline->shaders[i]->bo, 8);
> +   }
> +
> +   if (radv_pipeline_has_gs(pipeline))
> +   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
> +  pipeline->gs_copy_shader->bo, 8);
> +
> if (unlikely(cmd_buffer->device->trace_bo))
> radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
>
> @@ -2280,6 +2289,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
> *cmd_buffer)
>   
> MAX2(cmd_buffer->compute_scratch_size_needed,
>pipeline->max_waves * 
> pipeline->scratch_bytes_per_wave);
>
> +   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
> +  pipeline->shaders[MESA_SHADER_COMPUTE]->bo, 8);
> +
> if (unlikely(cmd_buffer->device->trace_bo))
> radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
>  }
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Check result of make_surface() for miptree_create

2018-04-10 Thread Andrea Azzarone
CCing: Jason Ekstrand  and Topi Pohjolainen.

2018-04-09 13:36 GMT+02:00 :

> From: Andrea Azzarone 
>
> Since make_surface() can fail we need to check the result before
> dereferencing it.
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 23cb40f322..82baf7c356 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -714,6 +714,9 @@ miptree_create(struct brw_context *brw,
>   ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT,
>   BO_ALLOC_BUSY, 0, NULL);
>
> +  if (!mt)
> + return NULL;
> +
>if (needs_separate_stencil(brw, mt, format) &&
>!make_separate_stencil_surface(brw, mt)) {
>   intel_miptree_release(&mt);
> --
> 2.17.0
>
>


-- 
Andrea Azzarone
http://launchpad.net/~andyrock
http://wiki.ubuntu.com/AndreaAzzarone
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105846] Assertion failure @ st_atom_array.c:675 when playing Natural Selection 2

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105846

l...@protonmail.ch changed:

   What|Removed |Added

 Status|NEEDINFO|RESOLVED
 Resolution|--- |WONTFIX

--- Comment #14 from l...@protonmail.ch ---
I haven't gotten an apitrace yet, although I am now 99% sure that it's a bug in
the game, since another user had the issue with the proprietary nvidia drivers.

I'll close this now, since it obviously isn't something Mesa should attempt to
fix.

Thread on the official forums by said user:
https://forums.unknownworlds.com/discussion/154226/linux-random-segfaults

I also got the same segfault in libc.so when I was just using the mesa libs
from the arch repository, probably because assertions weren't included in that
version.

Thanks for your help though, it was much appreciated!

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Rob Clark
On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand  wrote:
> + A bunch of potentially interested parties.
>
> On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
>  wrote:
>>
>> Hi,
>>
>> >  typedef struct {
>> > -   nir_parameter_type param_type;
>> > -   const struct glsl_type *type;
>> > +   uint8_t num_components;
>> > +   uint8_t bit_size;
>> >  } nir_parameter;
>>
>> (...)
>>
>> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
>> > validate_state *state)
>> >  static void
>> >  validate_call_instr(nir_call_instr *instr, validate_state *state)
>> >  {
>> > -   if (instr->return_deref == NULL) {
>> > -  validate_assert(state,
>> > glsl_type_is_void(instr->callee->return_type));
>> > -   } else {
>> > -  validate_assert(state, instr->return_deref->deref.type ==
>> > instr->callee->return_type);
>> > -  validate_deref_var(instr, instr->return_deref, state);
>> > -   }
>> > -
>> > validate_assert(state, instr->num_params ==
>> > instr->callee->num_params);
>> >
>> > for (unsigned i = 0; i < instr->num_params; i++) {
>> > -  validate_assert(state, instr->callee->params[i].type ==
>> > instr->params[i]->deref.type);
>> > -  validate_deref_var(instr, instr->params[i], state);
>> > +  validate_src(&instr->params[i], state,
>> > +   instr->callee->params[i].bit_size,
>> > +   instr->callee->params[i].num_components);
>> > }
>> >  }
>>
>> Question: I might be misreading, but it seems like we are losing the
>> type information for functions. Isn't that something worth keeping,
>> maybe in some other way, e.g. load_param specifying the expected type?
>
>
> That's a very good question!  To be honest, I'm not sure what the answer is.
> At the moment, the type information is fairly useless for most of what we
> use functions for.  Really, all we need is something that NIR can inline.
> As it is, we're not really preserving the types from SPIR-V because of the
> gymnastics we're doing to handle pointers.
>
> If we did want to preserve types, we'd need to have more detailed type
> information.  In particular, we'd need to be able to provide pointer types
> and maybe combined image-sampler types.  And along with those pointer types,
> we'd need to somehow express those pointer's storage requirements.
>
> The philosophy behind this commit is that, if we don't have a good match to
> SPIR-V anyway, we might as well just chuck that information and do whatever
> makes our lives the easiest.  My philosophy here may be flawed and I'm happy
> to hear arguments in favor of keeping the information.  The best argument I
> can come up with for keeping the information is if we find ourselves wanting
> to do some sort of linking in the future where we have to match functions by
> both name and type.  If we want to do that, however, we'll need all the
> SPIR-V type information.
>

We do end up wanting the type information for cl kernels.  This is
maybe a slightly different case from calls within shader code (ie.
when both caller and callee are in shader).  Although I'd kinda like
to think that we don't need to make vtn aware of this distinction.

So just to throw out an idea.  What if vtn just used load_deref for
everything, and in the case of fxn params it just points to a local
var with type nir_var_param?  (Or something roughly like that.)  Then
lower_io lowers this to load_param.

This way clover could use it's own pass to lower kernel entrypoint
load_deref's to load_param differently (ie. the offset becomes byte
offset into input buffer instead of idx)

BR,
-R


> Thoughts?
>
> --Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: fix picking the method for resolve subpass

2018-04-10 Thread Samuel Pitoiset
The source and destination image parameters were swapped.

No CTS changes on Polaris10, but I suspect this might
fix something.

Fixes: 2a04f5481df ("radv/meta: select resolve paths")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_resolve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_meta_resolve.c 
b/src/amd/vulkan/radv_meta_resolve.c
index bee398378c5..e932976df28 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -621,7 +621,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
*cmd_buffer)
struct radv_image *dst_img = 
cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
struct radv_image *src_img = 
cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
 
-   radv_pick_resolve_method_images(dst_img, src_img, 
dest_att.layout, cmd_buffer, &resolve_method);
+   radv_pick_resolve_method_images(src_img, dst_img, 
dest_att.layout, cmd_buffer, &resolve_method);
if (resolve_method == RESOLVE_FRAGMENT) {
break;
}
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: move save/restore operations close to the slow clears

2018-04-10 Thread Samuel Pitoiset



On 04/10/2018 11:20 AM, Bas Nieuwenhuizen wrote:

Reviewed-by: Bas Nieuwenhuizen 

I'm wondering though, doesn't this result in more saves/restores, as
we now do it for each part of a subpass clear separately?


Yes, possibly. I'm actually not sure myself if it's the right thing to 
do. I will postpone this change for later.




On Mon, Apr 9, 2018 at 11:10 PM, Samuel Pitoiset
 wrote:

This removes the emission of unnecessary states, for example
when performing a fast depth stencil clear (ie. clearing htile),
we don't need to save/restore the graphics pipeline.

For GFX9 chips that have the scissor bug workaround, that
should also reduce the number of partial flushes.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_meta_bufimage.c |  8 +
  src/amd/vulkan/radv_meta_clear.c| 47 +
  2 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_bufimage.c 
b/src/amd/vulkan/radv_meta_bufimage.c
index 69e15d3213..5018ce1f2e 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -1242,8 +1242,14 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer 
*cmd_buffer,
  {
 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
 struct radv_device *device = cmd_buffer->device;
+   struct radv_meta_saved_state saved_state;
 struct radv_image_view dst_iview;

+   radv_meta_save(&saved_state, cmd_buffer,
+  RADV_META_SAVE_COMPUTE_PIPELINE |
+  RADV_META_SAVE_CONSTANTS |
+  RADV_META_SAVE_DESCRIPTORS);
+
 create_iview(cmd_buffer, dst, &dst_iview);
 cleari_bind_descriptors(cmd_buffer, &dst_iview);

@@ -1268,4 +1274,6 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer 
*cmd_buffer,
   push_constants);

 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, 
dst->image->info.height, 1);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
  }
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 016c1ee296..833e3cebab 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -342,6 +342,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
 VkClearColorValue clear_value = clear_att->clearValue.color;
 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   struct radv_meta_saved_state saved_state;
 VkPipeline pipeline;

 if (fs_key == -1) {
@@ -359,6 +360,10 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
 assert(clear_att->colorAttachment < subpass->color_count);

+   radv_meta_save(&saved_state, cmd_buffer,
+  RADV_META_SAVE_GRAPHICS_PIPELINE |
+  RADV_META_SAVE_CONSTANTS);
+
 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
   device->meta_state.clear_color_p_layout,
   VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
@@ -397,6 +402,8 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 }

 radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
+
+   radv_meta_restore(&saved_state, cmd_buffer);
  }


@@ -613,12 +620,17 @@ emit_depthstencil_clear(struct radv_cmd_buffer 
*cmd_buffer,
 const uint32_t samples = iview->image->info.samples;
 const uint32_t samples_log2 = ffs(samples) - 1;
 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+   struct radv_meta_saved_state saved_state;

 assert(pass_att != VK_ATTACHMENT_UNUSED);

 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
 clear_value.depth = 1.0f;

+   radv_meta_save(&saved_state, cmd_buffer,
+  RADV_META_SAVE_GRAPHICS_PIPELINE |
+  RADV_META_SAVE_CONSTANTS);
+
 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
   device->meta_state.clear_depth_p_layout,
   VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
@@ -664,6 +676,8 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 radv_CmdSetStencilReference(cmd_buffer_h, 
VK_STENCIL_FACE_FRONT_BIT,
   prev_reference);
 }
+
+   radv_meta_restore(&saved_state, cmd_buffer);
  }

  static bool
@@ -1165,17 +1179,12 @@ void
  radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
  {
 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct radv_meta_saved_state saved_state;
 enum radv_cmd_flush_bits pre_flush = 0;
 enum radv_cmd_flush_bits post_flush = 0;

 if (!radv_subpass_needs_clear(cmd_buffer))
 return;

-   radv_met

[Mesa-dev] [PATCH] glsl: properly handle bindless sampler and image parameters

2018-04-10 Thread Karol Herbst
fixes a piglit test I sent to the list:
spec@arb_bindless_texture@execution@samplers@basic-arithmetic-func-call-uvec2-texture2D

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/opt_function_inlining.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/opt_function_inlining.cpp 
b/src/compiler/glsl/opt_function_inlining.cpp
index 04690b6cf45..3d00074bbc3 100644
--- a/src/compiler/glsl/opt_function_inlining.cpp
+++ b/src/compiler/glsl/opt_function_inlining.cpp
@@ -155,7 +155,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
   ir_rvalue *param = (ir_rvalue *) actual_node;
 
   /* Generate a new variable for the parameter. */
-  if (sig_param->type->contains_opaque()) {
+  if (!sig_param->contains_bindless() && 
sig_param->type->contains_opaque()) {
 /* For opaque types, we want the inlined variable references
  * referencing the passed in variable, since that will have
  * the location information, which an assignment of an opaque
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105807] [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105807

Alan Swanson  changed:

   What|Removed |Added

   Assignee|intel-3d-bugs@lists.freedes |mesa-dev@lists.freedesktop.
   |ktop.org|org
  Component|Drivers/DRI/i965|Mesa core
 QA Contact|intel-3d-bugs@lists.freedes |mesa-dev@lists.freedesktop.
   |ktop.org|org

--- Comment #7 from Alan Swanson  ---
Just to confirm with Ben, this very definitely affects radeonsi not just i965.
Forcing MESA_GLSL_VERSION_OVERRIDE=130 resolves issue so perhaps a game issue
with GLSL 1.4?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix glsl version mismatch in compat profile

2018-04-10 Thread Ilia Mirkin
What about GL 2.1 and 2.0 (and earlier where you could still have GLSL
as an ext)? And does the GLSL version have to line up exactly for
those? Or does this just need to be

default:
if (ctx->Version < 31)
  ctx->Const.GLSLVersion = MIN2(ctx->Const.GLSLVersion, 130)
else
  ctx->Const.GLSLVersion = ctx->Version * 10

or something along those lines.

On Tue, Apr 10, 2018 at 7:40 AM, Timothy Arceri  wrote:
> Drivers that only support compat 3.0 were reporting GLSL 1.40
> support. This fixes issues with the menu of Dawn of War II.
>
> Fixes: a0c8b49284ef "mesa: enable OpenGL 3.1 with ARB_compatibility"
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105807
> ---
>  src/mesa/main/version.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
> index 0a4e7630da6..84babd69e2f 100644
> --- a/src/mesa/main/version.c
> +++ b/src/mesa/main/version.c
> @@ -620,8 +620,11 @@ _mesa_compute_version(struct gl_context *ctx)
> /* Make sure that the GLSL version lines up with the GL version. In some
>  * cases it can be too high, e.g. if an extension is missing.
>  */
> -   if (_mesa_is_desktop_gl(ctx) && ctx->Version >= 31) {
> +   if (_mesa_is_desktop_gl(ctx)) {
>switch (ctx->Version) {
> +  case 30:
> + ctx->Const.GLSLVersion = 130;
> + break;
>case 31:
>   ctx->Const.GLSLVersion = 140;
>   break;
> @@ -629,7 +632,8 @@ _mesa_compute_version(struct gl_context *ctx)
>   ctx->Const.GLSLVersion = 150;
>   break;
>default:
> - ctx->Const.GLSLVersion = ctx->Version * 10;
> + if (ctx->Version >= 33)
> +ctx->Const.GLSLVersion = ctx->Version * 10;
>   break;
>}
> }
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/7] nir/spirv: Fix warning and add missing breaks.

2018-04-10 Thread Daniel Schürmann
---
 src/compiler/spirv/spirv_to_nir.c | 2 ++
 src/compiler/spirv/vtn_subgroup.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 78c1e9ff59..28274311c2 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3361,10 +3361,12 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
 
   case SpvCapabilityGroupNonUniformQuad:
  spv_check_supported(subgroup_quad, cap);
+ break;
 
   case SpvCapabilityGroupNonUniformArithmetic:
   case SpvCapabilityGroupNonUniformClustered:
  spv_check_supported(subgroup_arithmetic, cap);
+ break;
 
   case SpvCapabilityVariablePointersStorageBuffer:
   case SpvCapabilityVariablePointers:
diff --git a/src/compiler/spirv/vtn_subgroup.c 
b/src/compiler/spirv/vtn_subgroup.c
index bd3143962b..ecec3aa62d 100644
--- a/src/compiler/spirv/vtn_subgroup.c
+++ b/src/compiler/spirv/vtn_subgroup.c
@@ -277,6 +277,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
   case 2:
  op = nir_intrinsic_quad_swap_diagonal;
  break;
+  default:
+ vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
   }
   vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
NULL, 0, 0);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 6/7] ac: handle subgroup intrinsics

2018-04-10 Thread Daniel Schürmann
---
 src/amd/common/ac_nir_to_llvm.c | 69 -
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7c2bd5c0cc..3a3aa72988 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2805,36 +2805,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
break;
case nir_intrinsic_read_invocation:
-   case nir_intrinsic_read_first_invocation: {
-   LLVMValueRef args[2];
-
-   /* Value */
-   args[0] = get_src(ctx, instr->src[0]);
-
-   unsigned num_args;
-   const char *intr_name;
-   if (instr->intrinsic == nir_intrinsic_read_invocation) {
-   num_args = 2;
-   intr_name = "llvm.amdgcn.readlane";
-
-   /* Invocation */
-   args[1] = get_src(ctx, instr->src[1]);
-   } else {
-   num_args = 1;
-   intr_name = "llvm.amdgcn.readfirstlane";
-   }
-
-   /* We currently have no other way to prevent LLVM from lifting 
the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(&ctx->ac, &args[0]);
-
-   result = ac_build_intrinsic(&ctx->ac, intr_name,
-   ctx->ac.i32, args, num_args,
-   AC_FUNC_ATTR_READNONE |
-   AC_FUNC_ATTR_CONVERGENT);
+   result = ac_build_readlane(&ctx->ac, get_src(ctx, 
instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_read_first_invocation:
+   result = ac_build_readlane(&ctx->ac, get_src(ctx, 
instr->src[0]), NULL);
break;
-   }
case nir_intrinsic_load_subgroup_invocation:
result = ac_get_thread_id(&ctx->ac);
break;
@@ -3088,6 +3064,41 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
break;
}
+   case nir_intrinsic_shuffle:
+   result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
+   get_src(ctx, instr->src[1]));
+   break;
+   case nir_intrinsic_reduce:
+   result = ac_build_reduce(&ctx->ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0],
+   instr->const_index[1]);
+   break;
+   case nir_intrinsic_inclusive_scan:
+   result = ac_build_inclusive_scan(&ctx->ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_exclusive_scan:
+   result = ac_build_exclusive_scan(&ctx->ac,
+   get_src(ctx, instr->src[0]),
+   instr->const_index[0]);
+   break;
+   case nir_intrinsic_quad_broadcast: {
+   unsigned lane = nir_src_as_const_value(instr->src[1])->u32[0];
+   result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, 
instr->src[0]),
+   lane, lane, lane, lane);
+   break;
+   }
+   case nir_intrinsic_quad_swap_horizontal:
+   result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, 
instr->src[0]), 1, 0, 3 ,2);
+   break;
+   case nir_intrinsic_quad_swap_vertical:
+   result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, 
instr->src[0]), 2, 3, 0 ,1);
+   break;
+   case nir_intrinsic_quad_swap_diagonal:
+   result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, 
instr->src[0]), 3, 2, 1 ,0);
+   break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 4/7] ac: make ballot and umsb capable of 64bit inputs

2018-04-10 Thread Daniel Schürmann
Reviewed-by: Marek Olšák 
---
 src/amd/common/ac_llvm_build.c | 34 +-
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 32d8a02f56..2fb8aeaac6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -407,8 +407,7 @@ ac_build_ballot(struct ac_llvm_context *ctx,
 */
ac_build_optimization_barrier(ctx, &args[0]);
 
-   if (LLVMTypeOf(args[0]) != ctx->i32)
-   args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+   args[0] = ac_to_integer(ctx, args[0]);
 
return ac_build_intrinsic(ctx,
  "llvm.amdgcn.icmp.i32",
@@ -1291,23 +1290,40 @@ ac_build_umsb(struct ac_llvm_context *ctx,
  LLVMValueRef arg,
  LLVMTypeRef dst_type)
 {
-   LLVMValueRef args[2] = {
+   const char *intrin_name;
+   LLVMTypeRef type;
+   LLVMValueRef highest_bit;
+   LLVMValueRef zero;
+
+   if (ac_get_elem_bits(ctx, LLVMTypeOf(arg)) == 64) {
+   intrin_name = "llvm.ctlz.i64";
+   type = ctx->i64;
+   highest_bit = LLVMConstInt(ctx->i64, 63, false);
+   zero = ctx->i64_0;
+   } else {
+   intrin_name = "llvm.ctlz.i32";
+   type = ctx->i32;
+   highest_bit = LLVMConstInt(ctx->i32, 31, false);
+   zero = ctx->i32_0;
+   }
+
+   LLVMValueRef params[2] = {
arg,
ctx->i1true,
};
-   LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
- dst_type, args, ARRAY_SIZE(args),
+
+   LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
+ params, 2,
  AC_FUNC_ATTR_READNONE);
 
/* The HW returns the last bit index from MSB, but TGSI/NIR wants
 * the index from LSB. Invert it by doing "31 - msb". */
-   msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
-  msb, "");
+   msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
+   msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
 
/* check for zero */
return LLVMBuildSelect(ctx->builder,
-  LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
-LLVMConstInt(ctx->i32, 0, 0), ""),
+  LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, 
zero, ""),
   LLVMConstInt(ctx->i32, -1, true), msb, "");
 }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 0/7] radv: add support for new subgroup capabilities

2018-04-10 Thread Daniel Schürmann
Third version of the series:
- rebased to master
- lower_shuffle_to_32bit now only lowers shuffles and nothing else
- removed constant values from quad intrinsics

Previous Version can be found here
https://lists.freedesktop.org/archives/mesa-dev/2018-March/189116.html

Daniel Schürmann (7):
  nir: adjust subgroups instructions for 64bit ballot sizes
  nir/spirv: Fix warning and add missing breaks.
  nir: lower 64bit subgroup shuffle intrinsics
  ac: make ballot and umsb capable of 64bit inputs
  ac: add LLVM build functions for subgroup instrinsics
  ac: handle subgroup intrinsics
  radv: enable subgroup capabilities

 src/amd/common/ac_llvm_build.c | 490 -
 src/amd/common/ac_llvm_build.h |  30 +-
 src/amd/common/ac_nir_to_llvm.c|  69 +++--
 src/amd/vulkan/radv_device.c   |  10 +-
 src/amd/vulkan/radv_shader.c   |   7 +-
 src/compiler/nir/nir.h |   1 +
 src/compiler/nir/nir_lower_subgroups.c |  73 +++--
 src/compiler/nir/nir_opcodes.py|  12 +-
 src/compiler/spirv/spirv_to_nir.c  |   2 +
 src/compiler/spirv/vtn_subgroup.c  |   2 +
 10 files changed, 632 insertions(+), 64 deletions(-)

-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 7/7] radv: enable subgroup capabilities

2018-04-10 Thread Daniel Schürmann
---
 src/amd/vulkan/radv_device.c | 10 --
 src/amd/vulkan/radv_shader.c |  7 ++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 4fc7392e65..e50b661cac 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -939,8 +939,14 @@ void radv_GetPhysicalDeviceProperties2(
(VkPhysicalDeviceSubgroupProperties*)ext;
properties->subgroupSize = 64;
properties->supportedStages = VK_SHADER_STAGE_ALL;
-   properties->supportedOperations = 
VK_SUBGROUP_FEATURE_BASIC_BIT;
-   properties->quadOperationsInAllStages = false;
+   properties->supportedOperations =
+   
VK_SUBGROUP_FEATURE_BASIC_BIT |
+   
VK_SUBGROUP_FEATURE_BALLOT_BIT |
+   
VK_SUBGROUP_FEATURE_QUAD_BIT |
+   
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
+   
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
+   
VK_SUBGROUP_FEATURE_VOTE_BIT;
+   properties->quadOperationsInAllStages = true;
break;
}
case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index eaf24dcdee..9d49bc02a8 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -213,7 +213,11 @@ radv_shader_compile_to_nir(struct radv_device *device,
.tessellation = true,
.int64 = true,
.multiview = true,
+   .subgroup_ballot = true,
.subgroup_basic = true,
+   .subgroup_quad = true,
+   .subgroup_shuffle = true,
+   .subgroup_vote = true,
.variable_pointers = true,
.gcn_shader = true,
.trinary_minmax = true,
@@ -283,7 +287,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
.lower_to_scalar = 1,
.lower_subgroup_masks = 1,
.lower_shuffle = 1,
-   .lower_quad =  1,
+   .lower_shuffle_to_32bit = 1,
+   .lower_vote_eq_to_ballot = 1,
});
 
radv_optimize_nir(nir);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/7] nir: adjust subgroups instructions for 64bit ballot sizes

2018-04-10 Thread Daniel Schürmann
---
 src/compiler/nir/nir_lower_subgroups.c |  5 ++---
 src/compiler/nir/nir_opcodes.py| 12 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index 0d3c83b795..9dc7be7947 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -357,9 +357,8 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin,
   switch (intrin->intrinsic) {
   case nir_intrinsic_ballot_bitfield_extract:
  assert(intrin->src[1].is_ssa);
- return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val,
-   intrin->src[1].ssa),
-   nir_imm_int(b, 1)));
+ return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val, 
intrin->src[1].ssa),
+nir_imm_intN_t(b, 1, options->ballot_bit_size)));
   case nir_intrinsic_ballot_bit_count_reduce:
  return nir_bit_count(b, int_val);
   case nir_intrinsic_ballot_find_lsb:
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index a762fdd220..89a6c6becc 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -308,17 +308,17 @@ dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tuint32, """
+unop_convert("bit_count", tuint32, tuint, """
 dst = 0;
-for (unsigned bit = 0; bit < 32; bit++) {
+for (unsigned bit = 0; bit < bit_size; bit++) {
if ((src0 >> bit) & 1)
   dst++;
 }
 """)
 
-unop_convert("ufind_msb", tint32, tuint32, """
+unop_convert("ufind_msb", tint32, tuint, """
 dst = -1;
-for (int bit = 31; bit >= 0; bit--) {
+for (int bit = bit_size - 1; bit >= 0; bit--) {
if ((src0 >> bit) & 1) {
   dst = bit;
   break;
@@ -340,9 +340,9 @@ for (int bit = 31; bit >= 0; bit--) {
 }
 """)
 
-unop("find_lsb", tint32, """
+unop_convert("find_lsb", tint32, tint, """
 dst = -1;
-for (unsigned bit = 0; bit < 32; bit++) {
+for (unsigned bit = 0; bit < bit_size; bit++) {
if ((src0 >> bit) & 1) {
   dst = bit;
   break;
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 5/7] ac: add LLVM build functions for subgroup instrinsics

2018-04-10 Thread Daniel Schürmann
Co-authored-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c | 456 +
 src/amd/common/ac_llvm_build.h |  30 ++-
 2 files changed, 485 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2fb8aeaac6..9a00bb1114 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2507,3 +2507,459 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context 
*ac, LLVMValueRef fmask,
addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
addr[sample_chan], "");
 }
+
+static LLVMValueRef
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
lane)
+{
+   ac_build_optimization_barrier(ctx, &src);
+   return ac_build_intrinsic(ctx,
+   lane == NULL ? "llvm.amdgcn.readfirstlane" : 
"llvm.amdgcn.readlane",
+   LLVMTypeOf(src), (LLVMValueRef []) {
+   src, lane },
+   lane == NULL ? 1 : 2,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+}
+
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
lane)
+{
+   LLVMTypeRef src_type = LLVMTypeOf(src);
+   src = ac_to_integer(ctx, src);
+   unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+   LLVMValueRef ret;
+
+   if (bits == 32) {
+   ret = _ac_build_readlane(ctx, src, lane);
+   } else {
+   assert(bits % 32 == 0);
+   LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+   LLVMValueRef src_vector =
+   LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+   ret = LLVMGetUndef(vec_type);
+   for (unsigned i = 0; i < bits / 32; i++) {
+   src = LLVMBuildExtractElement(ctx->builder, src_vector,
+   LLVMConstInt(ctx->i32, i, 0), 
"");
+   LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, 
lane);
+   ret = LLVMBuildInsertElement(ctx->builder, ret, 
ret_comp,
+   LLVMConstInt(ctx->i32, i, 0), 
"");
+   }
+   }
+   return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
value, LLVMValueRef lane)
+{
+   /* TODO: Use the actual instruction when LLVM adds an intrinsic for it.
+*/
+   LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane,
+ ac_get_thread_id(ctx), "");
+   return LLVMBuildSelect(ctx->builder, pred, value, src, "");
+}
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
+{
+   LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
+LLVMVectorType(ctx->i32, 2),
+"");
+   LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
+  ctx->i32_0, "");
+   LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
+  ctx->i32_1, "");
+   LLVMValueRef val =
+   ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+  (LLVMValueRef []) { mask_lo, ctx->i32_0 },
+  2, AC_FUNC_ATTR_READNONE);
+   val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
+(LLVMValueRef []) { mask_hi, val },
+2, AC_FUNC_ATTR_READNONE);
+   return val;
+}
+
+enum dpp_ctrl {
+   _dpp_quad_perm = 0x000,
+   _dpp_row_sl = 0x100,
+   _dpp_row_sr = 0x110,
+   _dpp_row_rr = 0x120,
+   dpp_wf_sl1 = 0x130,
+   dpp_wf_rl1 = 0x134,
+   dpp_wf_sr1 = 0x138,
+   dpp_wf_rr1 = 0x13C,
+   dpp_row_mirror = 0x140,
+   dpp_row_half_mirror = 0x141,
+   dpp_row_bcast15 = 0x142,
+   dpp_row_bcast31 = 0x143
+};
+
+static inline enum dpp_ctrl
+dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
+{
+   assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
+   return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 
6);
+}
+
+static inline enum dpp_ctrl
+dpp_row_sl(unsigned amount)
+{
+   assert(amount > 0 && amount < 16);
+   return _dpp_row_sl | amount;
+}
+
+static inline enum dpp_ctrl
+dpp_row_sr(unsigned amount)
+{
+   assert

[Mesa-dev] [PATCH v3 3/7] nir: lower 64bit subgroup shuffle intrinsics

2018-04-10 Thread Daniel Schürmann
---
 src/compiler/nir/nir.h |  1 +
 src/compiler/nir/nir_lower_subgroups.c | 68 +++---
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d713..f3326e6df9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2609,6 +2609,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_vote_eq_to_ballot:1;
bool lower_subgroup_masks:1;
bool lower_shuffle:1;
+   bool lower_shuffle_to_32bit:1;
bool lower_quad:1;
 } nir_lower_subgroups_options;
 
diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index 9dc7be7947..8e59861f35 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -28,6 +28,38 @@
  * \file nir_opt_intrinsics.c
  */
 
+static nir_intrinsic_instr *
+lower_subgroups_64bit_split_intrinsic(nir_builder *b, nir_intrinsic_instr 
*intrin,
+  unsigned int component)
+{
+   nir_ssa_def *comp;
+   if (component == 0)
+  comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
+   else
+  comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
+
+   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, 
intrin->intrinsic);
+   nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
+   intr->const_index[0] = intrin->const_index[0];
+   intr->const_index[1] = intrin->const_index[1];
+   intr->src[0] = nir_src_for_ssa(comp);
+   if (nir_intrinsic_infos[intrin->intrinsic].num_srcs == 2)
+  nir_src_copy(&intr->src[1], &intrin->src[1], intr);
+
+   intr->num_components = 1;
+   nir_builder_instr_insert(b, &intr->instr);
+   return intr;
+}
+
+static nir_ssa_def *
+lower_subgroup_op_to_32bit(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   assert(intrin->src[0].ssa->bit_size == 64);
+   nir_intrinsic_instr *intr_x = lower_subgroups_64bit_split_intrinsic(b, 
intrin, 0);
+   nir_intrinsic_instr *intr_y = lower_subgroups_64bit_split_intrinsic(b, 
intrin, 1);
+   return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);
+}
+
 static nir_ssa_def *
 ballot_type_to_uint(nir_builder *b, nir_ssa_def *value, unsigned bit_size)
 {
@@ -80,7 +112,8 @@ uint_to_ballot_type(nir_builder *b, nir_ssa_def *value,
 }
 
 static nir_ssa_def *
-lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
+lower_subgroup_op_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin,
+bool lower_to_32bit)
 {
/* This is safe to call on scalar things but it would be silly */
assert(intrin->dest.ssa.num_components > 1);
@@ -107,9 +140,12 @@ lower_subgroup_op_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intrin)
   chan_intrin->const_index[0] = intrin->const_index[0];
   chan_intrin->const_index[1] = intrin->const_index[1];
 
-  nir_builder_instr_insert(b, &chan_intrin->instr);
-
-  reads[i] = &chan_intrin->dest.ssa;
+  if (lower_to_32bit && chan_intrin->src[0].ssa->bit_size == 64) {
+ reads[i] = lower_subgroup_op_to_32bit(b, chan_intrin);
+  } else {
+ nir_builder_instr_insert(b, &chan_intrin->instr);
+ reads[i] = &chan_intrin->dest.ssa;
+  }
}
 
return nir_vec(b, reads, intrin->num_components);
@@ -188,7 +224,7 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr 
*intrin,
 
 static nir_ssa_def *
 lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
-  bool lower_to_scalar)
+  bool lower_to_scalar, bool lower_to_32bit)
 {
nir_ssa_def *index = nir_load_subgroup_invocation(b);
switch (intrin->intrinsic) {
@@ -241,7 +277,9 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
  intrin->dest.ssa.bit_size, NULL);
 
if (lower_to_scalar && shuffle->num_components > 1) {
-  return lower_subgroup_op_to_scalar(b, shuffle);
+  return lower_subgroup_op_to_scalar(b, shuffle, lower_to_32bit);
+   } else if (lower_to_32bit && shuffle->src[0].ssa->bit_size == 64) {
+  return lower_subgroup_op_to_32bit(b, shuffle);
} else {
   nir_builder_instr_insert(b, &shuffle->instr);
   return &shuffle->dest.ssa;
@@ -279,7 +317,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin,
case nir_intrinsic_read_invocation:
case nir_intrinsic_read_first_invocation:
   if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, false);
   break;
 
case nir_intrinsic_load_subgroup_eq_mask:
@@ -400,16 +438,20 @@ lower_subgroups_intrin(nir_builder *b, 
nir_intrinsic_instr *intrin,
 
case nir_intrinsic_shuffle:
   if (options->lower_to_scalar && intrin->num_components > 1)
- return lower_subgroup_op_to_scalar(b, intrin);
+ return lower_subgroup_op_to_scalar(b, intrin, 
options->lower_shuffle_t

[Mesa-dev] [Bug 105807] [Regression, bisected]: 3D Rendering not working correctly in Warhammer 40k: Dawn of War II

2018-04-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105807

--- Comment #8 from b...@besd.de  ---
These are from shaderdumps (good thing I have the sha naming in place otherwise
these would have been overwritten ;)

When GLSL is not forced

[require]
GLSL >= 0.00 // this is generated by OpenGL so maybe something is going on
there

[fragment shader]
#version 140
#extension GL_ARB_explicit_attrib_location : enable

When forcing GLSL 1.3

[require]
GLSL >= 1.30

[vertex shader]
#version 130
#extension GL_ARB_explicit_attrib_location : enable

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] ac: make use of if/loop build helpers

2018-04-10 Thread Juan A. Suarez Romero
On Tue, 2018-04-03 at 10:58 +0100, Alex Smith wrote:
> I don't know exactly what's causing it, no. I noticed the issue was fixed on 
> master so just bisected to this.
> 
> CC'ing stable to nominate:
> 42627dabb4db3011825a022325be7ae9b51103d6 - (1/3) ac: add if/loop build 
> helpers 
> 6e1a142863b368a032e333f09feb107241446053 - (2/3) radeonsi: make use of 
> if/loop build helpers in ac
> 99cdc019bf6fe11c135b7544ef6daf4ac964fa24 - (3/3) ac: make use of if/loop 
> build helpers
> 

Hi, Alex.

Are these 3 commits nominated for a specific stable branch? From the CC not sure
if you want to nominate them for 17.3, 18.0 or both.


J.A.

> 
> 
> On 3 April 2018 at 10:45, Timothy Arceri  wrote:
> > I have no issue with these going in stable if they fix bugs. Ideally we 
> > should create a piglit test to catch this also but presumably you guys 
> > don't actually know the exact shader combination thats tripping things up?
> > 
> > 
> > On 03/04/18 19:36, Samuel Pitoiset wrote:
> > > This fixes a rendering issue with Wolfenstein 2 as well. A backport 
> > > sounds reasonable to me.
> > > 
> > > On 04/03/2018 11:33 AM, Alex Smith wrote:
> > > > Hi Timothy,
> > > > 
> > > > This patch fixes some rendering issues I see with RADV on SI.
> > > > 
> > > > It doesn't sound like it was really intended to fix anything, so 
> > > > possibly it's masking some other issue, but would you object to 
> > > > nominating the series for stable? Applying it on the 18.0 branch fixes 
> > > > the issue there as well.
> > > > 
> > > > Thanks,
> > > > Alex
> > > > 
> > > > On 7 March 2018 at 20:43, Marek Olšák  > > > > wrote:
> > > > 
> > > > For the series:
> > > > 
> > > > Reviewed-by: Marek Olšák  > > > >
> > > > 
> > > > Marek
> > > > 
> > > > On Tue, Mar 6, 2018 at 8:40 PM, Timothy Arceri
> > > > mailto:tarc...@itsqueeze.com>> wrote:
> > > >  > These helpers insert the basic block in the same order as they
> > > >  > appear in NIR making it easier to follow LLVM IR dumps. The 
> > > > helpers
> > > >  > also insert more useful labels onto the blocks.
> > > >  >
> > > >  > TGSI use the line number of the corresponding opcode in the TGSI
> > > >  > dump as the label id, here we use the corresponding block index
> > > >  > from NIR.
> > > >  > ---
> > > >  >  src/amd/common/ac_nir_to_llvm.c | 60
> > > > +
> > > >  >  1 file changed, 18 insertions(+), 42 deletions(-)
> > > >  >
> > > >  > diff --git a/src/amd/common/ac_nir_to_llvm.c
> > > > b/src/amd/common/ac_nir_to_llvm.c
> > > >  > index cda91fe8bf..dc463ed253 100644
> > > >  > --- a/src/amd/common/ac_nir_to_llvm.c
> > > >  > +++ b/src/amd/common/ac_nir_to_llvm.c
> > > >  > @@ -5237,17 +5237,15 @@ static void visit_ssa_undef(struct
> > > > ac_nir_context *ctx,
> > > >  > _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
> > > >  >  }
> > > >  >
> > > >  > -static void visit_jump(struct ac_nir_context *ctx,
> > > >  > +static void visit_jump(struct ac_llvm_context *ctx,
> > > >  >const nir_jump_instr *instr)
> > > >  >  {
> > > >  > switch (instr->type) {
> > > >  > case nir_jump_break:
> > > >  > -   LLVMBuildBr(ctx->ac.builder, ctx->break_block);
> > > >  > -   LLVMClearInsertionPosition(ctx->ac.builder);
> > > >  > +   ac_build_break(ctx);
> > > >  > break;
> > > >  > case nir_jump_continue:
> > > >  > -   LLVMBuildBr(ctx->ac.builder, 
> > > > ctx->continue_block);
> > > >  > -   LLVMClearInsertionPosition(ctx->ac.builder);
> > > >  > +   ac_build_continue(ctx);
> > > >  > break;
> > > >  > default:
> > > >  > fprintf(stderr, "Unknown NIR jump instr: ");
> > > >  > @@ -5285,7 +5283,7 @@ static void visit_block(struct
> > > > ac_nir_context *ctx, nir_block *block)
> > > >  > visit_ssa_undef(ctx,
> > > > nir_instr_as_ssa_undef(instr));
> > > >  > break;
> > > >  > case nir_instr_type_jump:
> > > >  > -   visit_jump(ctx, 
> > > > nir_instr_as_jump(instr));
> > > >  > +   visit_jump(&ctx->ac,
> > > > nir_instr_as_jump(instr));
> > > >  > break;
> > > >  > default:
> > > >  > fprintf(stderr, "Unknown NIR instr type: 
> > > > ");
> > > >  > @@ -5302,56 +5300,34 @@ static void visit_if(struct
> > > > ac_nir_context *ctx, nir_if *if_stmt)
> > > >  >  {
> > > >  > LLVMValueRef value = get_src(ctx, if_stmt->condition);
> > > >  >
> > > >  > -   LLVMValueRef fn =
> > > > LLVMGetBa

Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark  wrote:

> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand 
> wrote:
> > + A bunch of potentially interested parties.
> >
> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
> >  wrote:
> >>
> >> Hi,
> >>
> >> >  typedef struct {
> >> > -   nir_parameter_type param_type;
> >> > -   const struct glsl_type *type;
> >> > +   uint8_t num_components;
> >> > +   uint8_t bit_size;
> >> >  } nir_parameter;
> >>
> >> (...)
> >>
> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
> >> > validate_state *state)
> >> >  static void
> >> >  validate_call_instr(nir_call_instr *instr, validate_state *state)
> >> >  {
> >> > -   if (instr->return_deref == NULL) {
> >> > -  validate_assert(state,
> >> > glsl_type_is_void(instr->callee->return_type));
> >> > -   } else {
> >> > -  validate_assert(state, instr->return_deref->deref.type ==
> >> > instr->callee->return_type);
> >> > -  validate_deref_var(instr, instr->return_deref, state);
> >> > -   }
> >> > -
> >> > validate_assert(state, instr->num_params ==
> >> > instr->callee->num_params);
> >> >
> >> > for (unsigned i = 0; i < instr->num_params; i++) {
> >> > -  validate_assert(state, instr->callee->params[i].type ==
> >> > instr->params[i]->deref.type);
> >> > -  validate_deref_var(instr, instr->params[i], state);
> >> > +  validate_src(&instr->params[i], state,
> >> > +   instr->callee->params[i].bit_size,
> >> > +   instr->callee->params[i].num_components);
> >> > }
> >> >  }
> >>
> >> Question: I might be misreading, but it seems like we are losing the
> >> type information for functions. Isn't that something worth keeping,
> >> maybe in some other way, e.g. load_param specifying the expected type?
> >
> >
> > That's a very good question!  To be honest, I'm not sure what the answer
> is.
> > At the moment, the type information is fairly useless for most of what we
> > use functions for.  Really, all we need is something that NIR can inline.
> > As it is, we're not really preserving the types from SPIR-V because of
> the
> > gymnastics we're doing to handle pointers.
> >
> > If we did want to preserve types, we'd need to have more detailed type
> > information.  In particular, we'd need to be able to provide pointer
> types
> > and maybe combined image-sampler types.  And along with those pointer
> types,
> > we'd need to somehow express those pointer's storage requirements.
> >
> > The philosophy behind this commit is that, if we don't have a good match
> to
> > SPIR-V anyway, we might as well just chuck that information and do
> whatever
> > makes our lives the easiest.  My philosophy here may be flawed and I'm
> happy
> > to hear arguments in favor of keeping the information.  The best
> argument I
> > can come up with for keeping the information is if we find ourselves
> wanting
> > to do some sort of linking in the future where we have to match
> functions by
> > both name and type.  If we want to do that, however, we'll need all the
> > SPIR-V type information.
> >
>
> We do end up wanting the type information for cl kernels.  This is
> maybe a slightly different case from calls within shader code (ie.
> when both caller and callee are in shader).


Yes, I think it is.  Question: Is there a distinction in CL between
functions which are entrypoints callable from the API and functions which
are helpers?  i.e. Can you call an entrypoint as a helper?


> Although I'd kinda like
> to think that we don't need to make vtn aware of this distinction.
>

Someone has to be aware of it. :-)  There are lots of places in
spirv_to_nir were we take the SPIR-V and do something slightly different
with it than the obvious translation.  Also, using function parameters for
this is a significant anachronism because no other shader I/O in NIR has
ever worked that way.


> So just to throw out an idea.  What if vtn just used load_deref for
> everything, and in the case of fxn params it just points to a local
> var with type nir_var_param?  (Or something roughly like that.)  Then
> lower_io lowers this to load_param.
>

That's kind-of what the original thing did.  However, for SPIR-V helper
functions we have to be able to pass through pointers, SSA values with
arbitrary type, and image/sampler pointers.  SSA values can be handled by
just making a variable and storing them to it.  Pointers are tricky because
they're not really copy-in/out.  For images, samplers, and pointers, we
have a pile of "try to patch up the deref chain" code in
nir_inline_functions that's rather tricky.  The moral of the story is that
"just use variables" is not nearly as obvious of a choice as it looks.


> This way clover could use it's own pass to lower kernel entrypoint
> load_deref's to load_param differently (ie. the offset becomes byte
> offset into input buffer instead of idx)
>
___
mesa-dev mailing list
mesa-dev@lists.fr

[Mesa-dev] [PATCH v2 0/3] nir: add support for ARB_bindless_texture texture handles

2018-04-10 Thread Karol Herbst
With this it should be possible to add support for texture handles for backends
using NIR.

changes since v2:
* dropped patch for image handles, still need to work on that

Karol Herbst (3):
  nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars
  nir: add support for bindless_texture samplers
  glsl/nir: fix variable type for image intrinsics and ubos

 src/compiler/glsl/glsl_to_nir.cpp   | 19 ---
 src/compiler/nir/nir.h  |  2 ++
 src/compiler/nir/nir_print.c|  6 ++
 src/compiler/nir/nir_split_var_copies.c |  8 +++-
 4 files changed, 31 insertions(+), 4 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
v2: fix assertion for bindless to non bindless assignments

Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_split_var_copies.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceedbdb8..e592754d770 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
   nir_deref_var *src_head = intrinsic->variables[1];
   nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
   nir_deref *src_tail = nir_deref_tail(&src_head->deref);
+  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
 
-  switch (glsl_get_base_type(src_tail->type)) {
+  switch (base_type) {
   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT:
  split_var_copy_instr(intrinsic, dest_head, src_head,
@@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
 ralloc_steal(state->dead_ctx, instr);
  }
  break;
+  /* for bindless those are uint64 */
+  case GLSL_TYPE_IMAGE:
+  case GLSL_TYPE_SAMPLER:
+ assert(src_head->var->data.bindless ||
+glsl_get_base_type(src_head->var->type) == base_type);
   case GLSL_TYPE_INT:
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT16:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
If the bindless image is passed through a struct we ended up getting the
glsl_type of the struct, not the image.

variable_referenced points to the declaration of the struct, so it won't work
for bindless images. So just drop it.

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 9f233637306..bb9ba3af04a 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
  exec_node *param = ir->actual_parameters.get_head();
  ir_dereference *image = (ir_dereference *)param;
  const glsl_type *type =
-image->variable_referenced()->type->without_array();
+image->type->without_array();
 
  instr->variables[0] = evaluate_deref(&instr->instr, image);
  param = param->get_next();
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-10 Thread Karol Herbst
v2: add both texture and sampler handles

Signed-off-by: Karol Herbst 
---
 src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
 src/compiler/nir/nir.h|  2 ++
 src/compiler/nir/nir_print.c  |  6 ++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index dbb58d82e8f..9f233637306 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
 {
unsigned num_srcs;
nir_texop op;
+   bool bindless = ir->sampler->variable_referenced()->contains_bindless();
+
switch (ir->op) {
case ir_tex:
   op = nir_texop_tex;
@@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
   num_srcs++;
if (ir->offset != NULL)
   num_srcs++;
+   if (bindless)
+  num_srcs++;
 
nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
 
@@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
   unreachable("not reached");
}
 
-   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
-
unsigned src_number = 0;
 
+   /* for bindless we use the texture handle src */
+   if (bindless) {
+  instr->texture = NULL;
+  instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->sampler));
+  instr->src[src_number].src_type = nir_tex_src_texture_handle;
+  src_number++;
+   } else {
+  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
+   }
+
if (ir->coordinate != NULL) {
   instr->coord_components = ir->coordinate->type->vector_elements;
   instr->src[src_number].src =
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f33049d7134..e395352f89c 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1218,6 +1218,8 @@ typedef enum {
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
+   nir_tex_src_texture_handle, /* < handle for bindless texture */
+   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
nir_num_tex_src_types
 } nir_tex_src_type;
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 21f13097651..52f20b1eb10 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_plane:
  fprintf(fp, "(plane)");
  break;
+  case nir_tex_src_texture_handle:
+ fprintf(fp, "(texture_handle)");
+ break;
+  case nir_tex_src_sampler_handle:
+ fprintf(fp, "(sampler_handle)");
+ break;
 
   default:
  unreachable("Invalid texture source type");
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: add support for bindless_texture samplers

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:

> v2: add both texture and sampler handles
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/glsl/glsl_to_nir.cpp | 17 +++--
>  src/compiler/nir/nir.h|  2 ++
>  src/compiler/nir/nir_print.c  |  6 ++
>  3 files changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> b/src/compiler/glsl/glsl_to_nir.cpp
> index dbb58d82e8f..9f233637306 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -1971,6 +1971,8 @@ nir_visitor::visit(ir_texture *ir)
>  {
> unsigned num_srcs;
> nir_texop op;
> +   bool bindless = ir->sampler->variable_referenced()->contains_
> bindless();
>

What happens if I have a uniform struct containing both a regular sampler
and a bindless sampler?  I think this should be possible.


> +
> switch (ir->op) {
> case ir_tex:
>op = nir_texop_tex;
> @@ -2044,6 +2046,8 @@ nir_visitor::visit(ir_texture *ir)
>num_srcs++;
> if (ir->offset != NULL)
>num_srcs++;
> +   if (bindless)
> +  num_srcs++;
>
> nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
>
> @@ -2069,10 +2073,19 @@ nir_visitor::visit(ir_texture *ir)
>unreachable("not reached");
> }
>
> -   instr->texture = evaluate_deref(&instr->instr, ir->sampler);
> -
> unsigned src_number = 0;
>
> +   /* for bindless we use the texture handle src */
> +   if (bindless) {
> +  instr->texture = NULL;
> +  instr->src[src_number].src =
> + nir_src_for_ssa(evaluate_rvalue(ir->sampler));
> +  instr->src[src_number].src_type = nir_tex_src_texture_handle;
> +  src_number++;
> +   } else {
> +  instr->texture = evaluate_deref(&instr->instr, ir->sampler);
> +   }
> +
> if (ir->coordinate != NULL) {
>instr->coord_components = ir->coordinate->type->vector_elements;
>instr->src[src_number].src =
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index f33049d7134..e395352f89c 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1218,6 +1218,8 @@ typedef enum {
> nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
> nir_tex_src_plane,  /* < selects plane for planar textures */
> +   nir_tex_src_texture_handle, /* < handle for bindless texture */
> +   nir_tex_src_sampler_handle, /* < handle for bindless sampler */
> nir_num_tex_src_types
>  } nir_tex_src_type;
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 21f13097651..52f20b1eb10 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -778,6 +778,12 @@ print_tex_instr(nir_tex_instr *instr, print_state
> *state)
>case nir_tex_src_plane:
>   fprintf(fp, "(plane)");
>   break;
> +  case nir_tex_src_texture_handle:
> + fprintf(fp, "(texture_handle)");
> + break;
> +  case nir_tex_src_sampler_handle:
> + fprintf(fp, "(sampler_handle)");
> + break;
>
>default:
>   unreachable("Invalid texture source type");
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:

> If the bindless image is passed through a struct we ended up getting the
> glsl_type of the struct, not the image.
>
> variable_referenced points to the declaration of the struct, so it won't
> work
> for bindless images. So just drop it.
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/glsl/glsl_to_nir.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> b/src/compiler/glsl/glsl_to_nir.cpp
> index 9f233637306..bb9ba3af04a 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
>   exec_node *param = ir->actual_parameters.get_head();
>   ir_dereference *image = (ir_dereference *)param;
>   const glsl_type *type =
> -image->variable_referenced()->type->without_array();
> +image->type->without_array();
>

I asked this question on the last version as well: Do we really need
without_array()?


>   instr->variables[0] = evaluate_deref(&instr->instr, image);
>   param = param->get_next();
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Jason Ekstrand
I still don't see anything to make nir_validate not fail out on you if it
sees a read or a write to/from an IMAGE or SAMPLER.

On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:

> v2: fix assertion for bindless to non bindless assignments
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_split_var_copies.c
> b/src/compiler/nir/nir_split_var_copies.c
> index bc3ceedbdb8..e592754d770 100644
> --- a/src/compiler/nir/nir_split_var_copies.c
> +++ b/src/compiler/nir/nir_split_var_copies.c
> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
> split_var_copies_state *state)
>nir_deref_var *src_head = intrinsic->variables[1];
>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
> +  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
>
> -  switch (glsl_get_base_type(src_tail->type)) {
> +  switch (base_type) {
>case GLSL_TYPE_ARRAY:
>case GLSL_TYPE_STRUCT:
>   split_var_copy_instr(intrinsic, dest_head, src_head,
> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
> split_var_copies_state *state)
>  ralloc_steal(state->dead_ctx, instr);
>   }
>   break;
> +  /* for bindless those are uint64 */
> +  case GLSL_TYPE_IMAGE:
> +  case GLSL_TYPE_SAMPLER:
> + assert(src_head->var->data.bindless ||
> +glsl_get_base_type(src_head->var->type) == base_type);
>case GLSL_TYPE_INT:
>case GLSL_TYPE_UINT:
>case GLSL_TYPE_INT16:
> --
> 2.14.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/7] nir: adjust subgroups instructions for 64bit ballot sizes

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 7:37 AM, Daniel Schürmann <
daniel.schuerm...@campus.tu-berlin.de> wrote:

> ---
>  src/compiler/nir/nir_lower_subgroups.c |  5 ++---
>  src/compiler/nir/nir_opcodes.py| 12 ++--
>  2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> index 0d3c83b795..9dc7be7947 100644
> --- a/src/compiler/nir/nir_lower_subgroups.c
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -357,9 +357,8 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>switch (intrin->intrinsic) {
>case nir_intrinsic_ballot_bitfield_extract:
>   assert(intrin->src[1].is_ssa);
> - return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val,
> -   intrin->src[1].ssa),
> -   nir_imm_int(b, 1)));
> + return nir_i2b(b, nir_iand(b, nir_ushr(b, int_val,
> intrin->src[1].ssa),
> +nir_imm_intN_t(b, 1, options->ballot_bit_size)));
>

Oops.  This didn't automatically scale as I intended. :-/  The indentation
seems a bit weird but there's not much you can do.  This hunk (as it's own
patch) looks good.


>case nir_intrinsic_ballot_bit_count_reduce:
>   return nir_bit_count(b, int_val);
>case nir_intrinsic_ballot_find_lsb:
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_
> opcodes.py
> index a762fdd220..89a6c6becc 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -308,17 +308,17 @@ dst = 0;
>  for (unsigned bit = 0; bit < 32; bit++)
> dst |= ((src0 >> bit) & 1) << (31 - bit);
>  """)
> -unop("bit_count", tuint32, """
> +unop_convert("bit_count", tuint32, tuint, """
>  dst = 0;
> -for (unsigned bit = 0; bit < 32; bit++) {
> +for (unsigned bit = 0; bit < bit_size; bit++) {
> if ((src0 >> bit) & 1)
>dst++;
>  }
>  """)
>
> -unop_convert("ufind_msb", tint32, tuint32, """
> +unop_convert("ufind_msb", tint32, tuint, """
>  dst = -1;
> -for (int bit = 31; bit >= 0; bit--) {
> +for (int bit = bit_size - 1; bit >= 0; bit--) {
> if ((src0 >> bit) & 1) {
>dst = bit;
>break;
> @@ -340,9 +340,9 @@ for (int bit = 31; bit >= 0; bit--) {
>  }
>  """)
>
> -unop("find_lsb", tint32, """
> +unop_convert("find_lsb", tint32, tint, """
>  dst = -1;
> -for (unsigned bit = 0; bit < 32; bit++) {
> +for (unsigned bit = 0; bit < bit_size; bit++) {
>

These three changes should probably be their own patch.  They look fine to
me though.


> if ((src0 >> bit) & 1) {
>dst = bit;
>break;
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Rob Clark
On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark  wrote:
>>
>> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand 
>> wrote:
>> > + A bunch of potentially interested parties.
>> >
>> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
>> >  wrote:
>> >>
>> >> Hi,
>> >>
>> >> >  typedef struct {
>> >> > -   nir_parameter_type param_type;
>> >> > -   const struct glsl_type *type;
>> >> > +   uint8_t num_components;
>> >> > +   uint8_t bit_size;
>> >> >  } nir_parameter;
>> >>
>> >> (...)
>> >>
>> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
>> >> > validate_state *state)
>> >> >  static void
>> >> >  validate_call_instr(nir_call_instr *instr, validate_state *state)
>> >> >  {
>> >> > -   if (instr->return_deref == NULL) {
>> >> > -  validate_assert(state,
>> >> > glsl_type_is_void(instr->callee->return_type));
>> >> > -   } else {
>> >> > -  validate_assert(state, instr->return_deref->deref.type ==
>> >> > instr->callee->return_type);
>> >> > -  validate_deref_var(instr, instr->return_deref, state);
>> >> > -   }
>> >> > -
>> >> > validate_assert(state, instr->num_params ==
>> >> > instr->callee->num_params);
>> >> >
>> >> > for (unsigned i = 0; i < instr->num_params; i++) {
>> >> > -  validate_assert(state, instr->callee->params[i].type ==
>> >> > instr->params[i]->deref.type);
>> >> > -  validate_deref_var(instr, instr->params[i], state);
>> >> > +  validate_src(&instr->params[i], state,
>> >> > +   instr->callee->params[i].bit_size,
>> >> > +   instr->callee->params[i].num_components);
>> >> > }
>> >> >  }
>> >>
>> >> Question: I might be misreading, but it seems like we are losing the
>> >> type information for functions. Isn't that something worth keeping,
>> >> maybe in some other way, e.g. load_param specifying the expected type?
>> >
>> >
>> > That's a very good question!  To be honest, I'm not sure what the answer
>> > is.
>> > At the moment, the type information is fairly useless for most of what
>> > we
>> > use functions for.  Really, all we need is something that NIR can
>> > inline.
>> > As it is, we're not really preserving the types from SPIR-V because of
>> > the
>> > gymnastics we're doing to handle pointers.
>> >
>> > If we did want to preserve types, we'd need to have more detailed type
>> > information.  In particular, we'd need to be able to provide pointer
>> > types
>> > and maybe combined image-sampler types.  And along with those pointer
>> > types,
>> > we'd need to somehow express those pointer's storage requirements.
>> >
>> > The philosophy behind this commit is that, if we don't have a good match
>> > to
>> > SPIR-V anyway, we might as well just chuck that information and do
>> > whatever
>> > makes our lives the easiest.  My philosophy here may be flawed and I'm
>> > happy
>> > to hear arguments in favor of keeping the information.  The best
>> > argument I
>> > can come up with for keeping the information is if we find ourselves
>> > wanting
>> > to do some sort of linking in the future where we have to match
>> > functions by
>> > both name and type.  If we want to do that, however, we'll need all the
>> > SPIR-V type information.
>> >
>>
>> We do end up wanting the type information for cl kernels.  This is
>> maybe a slightly different case from calls within shader code (ie.
>> when both caller and callee are in shader).
>
>
> Yes, I think it is.  Question: Is there a distinction in CL between
> functions which are entrypoints callable from the API and functions which
> are helpers?  i.e. Can you call an entrypoint as a helper?
>

There is the __kernel annotation.  And you know the entry point name
when compiling.  However I'm not sure anything prevents one entry
point from calling another.

I'm not sure we want the calling convention to be the same internally
as for kernel entry points so in that case, if we aren't inlining
everything, we might end up generating two versions of a function (or
possibly a shim.. or possibly between the two based on size.. or??)


>>
>> Although I'd kinda like
>> to think that we don't need to make vtn aware of this distinction.
>
>
> Someone has to be aware of it. :-)  There are lots of places in spirv_to_nir
> were we take the SPIR-V and do something slightly different with it than the
> obvious translation.  Also, using function parameters for this is a
> significant anachronism because no other shader I/O in NIR has ever worked
> that way.
>
>>
>> So just to throw out an idea.  What if vtn just used load_deref for
>> everything, and in the case of fxn params it just points to a local
>> var with type nir_var_param?  (Or something roughly like that.)  Then
>> lower_io lowers this to load_param.
>
>
> That's kind-of what the original thing did.  However, for SPIR-V helper
> functions we have to be able to pass through pointers, SSA values with
> arbitrary type, and image/sampler pointers. 

Re: [Mesa-dev] [PATCH v3 2/7] nir/spirv: Fix warning and add missing breaks.

2018-04-10 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Tue, Apr 10, 2018 at 7:37 AM, Daniel Schürmann <
daniel.schuerm...@campus.tu-berlin.de> wrote:

> ---
>  src/compiler/spirv/spirv_to_nir.c | 2 ++
>  src/compiler/spirv/vtn_subgroup.c | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 78c1e9ff59..28274311c2 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -3361,10 +3361,12 @@ vtn_handle_preamble_instruction(struct
> vtn_builder *b, SpvOp opcode,
>
>case SpvCapabilityGroupNonUniformQuad:
>   spv_check_supported(subgroup_quad, cap);
> + break;
>
>case SpvCapabilityGroupNonUniformArithmetic:
>case SpvCapabilityGroupNonUniformClustered:
>   spv_check_supported(subgroup_arithmetic, cap);
> + break;
>
>case SpvCapabilityVariablePointersStorageBuffer:
>case SpvCapabilityVariablePointers:
> diff --git a/src/compiler/spirv/vtn_subgroup.c b/src/compiler/spirv/vtn_
> subgroup.c
> index bd3143962b..ecec3aa62d 100644
> --- a/src/compiler/spirv/vtn_subgroup.c
> +++ b/src/compiler/spirv/vtn_subgroup.c
> @@ -277,6 +277,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp
> opcode,
>case 2:
>   op = nir_intrinsic_quad_swap_diagonal;
>   break;
> +  default:
> + vtn_fail("Invalid constant value in OpGroupNonUniformQuadSwap");
>}
>vtn_build_subgroup_instr(b, op, val->ssa, vtn_ssa_value(b, w[4]),
> NULL, 0, 0);
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs/release-calendar: update to include 18.1 and 18.2

2018-04-10 Thread Emil Velikov
On 10 April 2018 at 09:06, Juan A. Suarez Romero  wrote:
> On Mon, 2018-04-09 at 19:02 +0100, Emil Velikov wrote:
>> From: Emil Velikov 
>>
>> Dylan has kindly stepped up to help with 18.1.0, while I've taken the
>> liberty to nominate Andres for 18.2.0 ;-)
>>
>
> I would like to replace Andres for the 18.0.x releases. We already talked 
> about
> that and both think it is good idea.
>
> With that change, the remaining proposal looks good to me.
>
>
> Reviewed-by: Juan A. Suarez 
>
Fair enough - swapped Andres with yourself for the 18.0.x series and
pushed to master.

AFAICT you'll be doing both 18.0.1 or 17.3.9 (last week?), which as
previously suggested as something we should avoid.
Please let us know if everything's OK and if we can help out.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear

2018-04-10 Thread Scott D Phillips
Chris Wilson  writes:

> Quoting Chris Wilson (2018-04-05 20:54:54)
> > Quoting Scott D Phillips (2018-04-03 21:05:42)

[...]

> > Ok, was hoping to see how you choose to use the streaming load, but I
> > guess that's the next patch.
> > 
> > Reviewed-by: Chris Wilson 
>
> Oh, one point Eric Anholt mentioned on another thread about movntqda is
> that stale data inside the internal buffer is not automatically
> invalidated. We may need to emit explicit mfence before the copies if we
> are in doubt. A single mfence per tiled-copy is probably not enough to
> worry about optimising away.

Looking around, I found this errata about movntdqa not honoring the
ordering guarantees of locked instructions (VLP31 in the pdf):

https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/pentium-n3520-j2850-celeron-n2920-n2820-n2815-n2806-j1850-j1750-spec-update.pdf

So I added this code near the top of tiled_to_linear():

if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
   /* Various atom processors have errata where the movntdqa instruction
* (which is used in streaming_load_memcpu) may incorrectly be reordered
* before locked instructions. To work around that, we put an lfence
* here to manually wait for preceeding loads to be completed.
*/
   __builtin_ia32_lfence();
}

It seems that an mfence won't suffice where the errata mentions you need
the lfence, by my hazy understanding. Do I have that right, or should
this be an mfence?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 024/104] nir: Support deref instructions in lower_system_values

2018-04-10 Thread Caio Marcelo de Oliveira Filho
> I took another swing at it, and this one seems to make Jenkins happy:
> 
> https://gitlab.freedesktop.org/jekstrand/mesa/commit/ad3cc9f301da3519d4f76767a6d9e98e5a5c118e

Reviewed-by: Caio Marcelo de Oliveira Filho 


Thanks,
Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand  wrote:
> I still don't see anything to make nir_validate not fail out on you if it
> sees a read or a write to/from an IMAGE or SAMPLER.
>

what kind of glsl code are you talking about here? I wrote some tests
and things just seem to work out. I wasn't able to hit any other
issues.

> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:
>>
>> v2: fix assertion for bindless to non bindless assignments
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> b/src/compiler/nir/nir_split_var_copies.c
>> index bc3ceedbdb8..e592754d770 100644
>> --- a/src/compiler/nir/nir_split_var_copies.c
>> +++ b/src/compiler/nir/nir_split_var_copies.c
>> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>nir_deref_var *src_head = intrinsic->variables[1];
>>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
>>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
>> +  enum glsl_base_type base_type = glsl_get_base_type(src_tail->type);
>>
>> -  switch (glsl_get_base_type(src_tail->type)) {
>> +  switch (base_type) {
>>case GLSL_TYPE_ARRAY:
>>case GLSL_TYPE_STRUCT:
>>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> split_var_copies_state *state)
>>  ralloc_steal(state->dead_ctx, instr);
>>   }
>>   break;
>> +  /* for bindless those are uint64 */
>> +  case GLSL_TYPE_IMAGE:
>> +  case GLSL_TYPE_SAMPLER:
>> + assert(src_head->var->data.bindless ||
>> +glsl_get_base_type(src_head->var->type) == base_type);
>>case GLSL_TYPE_INT:
>>case GLSL_TYPE_UINT:
>>case GLSL_TYPE_INT16:
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix picking the method for resolve subpass

2018-04-10 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Tue, Apr 10, 2018 at 4:00 PM, Samuel Pitoiset
 wrote:
> The source and destination image parameters were swapped.
>
> No CTS changes on Polaris10, but I suspect this might
> fix something.
>
> Fixes: 2a04f5481df ("radv/meta: select resolve paths")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_resolve.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_meta_resolve.c 
> b/src/amd/vulkan/radv_meta_resolve.c
> index bee398378c5..e932976df28 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -621,7 +621,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> *cmd_buffer)
> struct radv_image *dst_img = 
> cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
> struct radv_image *src_img = 
> cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
>
> -   radv_pick_resolve_method_images(dst_img, src_img, 
> dest_att.layout, cmd_buffer, &resolve_method);
> +   radv_pick_resolve_method_images(src_img, dst_img, 
> dest_att.layout, cmd_buffer, &resolve_method);
> if (resolve_method == RESOLVE_FRAGMENT) {
> break;
> }
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] glsl/nir: fix variable type for image intrinsics and ubos

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 5:11 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst  wrote:
>>
>> If the bindless image is passed through a struct we ended up getting the
>> glsl_type of the struct, not the image.
>>
>> variable_referenced points to the declaration of the struct, so it won't
>> work
>> for bindless images. So just drop it.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/glsl/glsl_to_nir.cpp | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
>> b/src/compiler/glsl/glsl_to_nir.cpp
>> index 9f233637306..bb9ba3af04a 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -893,7 +893,7 @@ nir_visitor::visit(ir_call *ir)
>>   exec_node *param = ir->actual_parameters.get_head();
>>   ir_dereference *image = (ir_dereference *)param;
>>   const glsl_type *type =
>> -image->variable_referenced()->type->without_array();
>> +image->type->without_array();
>
>
> I asked this question on the last version as well: Do we really need
> without_array()?
>

I don't think so actually, because it should be the sampler type
already. I just forgot about that.

>>
>>   instr->variables[0] = evaluate_deref(&instr->instr, image);
>>   param = param->get_next();
>> --
>> 2.14.3
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] ac: make use of if/loop build helpers

2018-04-10 Thread Alex Smith
On 10 April 2018 at 15:49, Juan A. Suarez Romero 
wrote:

> On Tue, 2018-04-03 at 10:58 +0100, Alex Smith wrote:
> > I don't know exactly what's causing it, no. I noticed the issue was
> fixed on master so just bisected to this.
> >
> > CC'ing stable to nominate:
> > 42627dabb4db3011825a022325be7ae9b51103d6 - (1/3) ac: add if/loop build
> helpers
> > 6e1a142863b368a032e333f09feb107241446053 - (2/3) radeonsi: make use of
> if/loop build helpers in ac
> > 99cdc019bf6fe11c135b7544ef6daf4ac964fa24 - (3/3) ac: make use of
> if/loop build helpers
> >
>
> Hi, Alex.
>
> Are these 3 commits nominated for a specific stable branch? From the CC
> not sure
> if you want to nominate them for 17.3, 18.0 or both.
>

They work for me on both 18.0 and 17.3, so I think they can be nominated
for both.

Thanks,
Alex


>
>
> J.A.
>
> >
> >
> > On 3 April 2018 at 10:45, Timothy Arceri  wrote:
> > > I have no issue with these going in stable if they fix bugs. Ideally
> we should create a piglit test to catch this also but presumably you guys
> don't actually know the exact shader combination thats tripping things up?
> > >
> > >
> > > On 03/04/18 19:36, Samuel Pitoiset wrote:
> > > > This fixes a rendering issue with Wolfenstein 2 as well. A backport
> sounds reasonable to me.
> > > >
> > > > On 04/03/2018 11:33 AM, Alex Smith wrote:
> > > > > Hi Timothy,
> > > > >
> > > > > This patch fixes some rendering issues I see with RADV on SI.
> > > > >
> > > > > It doesn't sound like it was really intended to fix anything, so
> possibly it's masking some other issue, but would you object to nominating
> the series for stable? Applying it on the 18.0 branch fixes the issue there
> as well.
> > > > >
> > > > > Thanks,
> > > > > Alex
> > > > >
> > > > > On 7 March 2018 at 20:43, Marek Olšák  mar...@gmail.com>> wrote:
> > > > >
> > > > > For the series:
> > > > >
> > > > > Reviewed-by: Marek Olšák  > > > > >
> > > > >
> > > > > Marek
> > > > >
> > > > > On Tue, Mar 6, 2018 at 8:40 PM, Timothy Arceri
> > > > > mailto:tarc...@itsqueeze.com>> wrote:
> > > > >  > These helpers insert the basic block in the same order as
> they
> > > > >  > appear in NIR making it easier to follow LLVM IR dumps. The
> helpers
> > > > >  > also insert more useful labels onto the blocks.
> > > > >  >
> > > > >  > TGSI use the line number of the corresponding opcode in the
> TGSI
> > > > >  > dump as the label id, here we use the corresponding block
> index
> > > > >  > from NIR.
> > > > >  > ---
> > > > >  >  src/amd/common/ac_nir_to_llvm.c | 60
> > > > > +
> > > > >  >  1 file changed, 18 insertions(+), 42 deletions(-)
> > > > >  >
> > > > >  > diff --git a/src/amd/common/ac_nir_to_llvm.c
> > > > > b/src/amd/common/ac_nir_to_llvm.c
> > > > >  > index cda91fe8bf..dc463ed253 100644
> > > > >  > --- a/src/amd/common/ac_nir_to_llvm.c
> > > > >  > +++ b/src/amd/common/ac_nir_to_llvm.c
> > > > >  > @@ -5237,17 +5237,15 @@ static void visit_ssa_undef(struct
> > > > > ac_nir_context *ctx,
> > > > >  > _mesa_hash_table_insert(ctx->defs, &instr->def,
> undef);
> > > > >  >  }
> > > > >  >
> > > > >  > -static void visit_jump(struct ac_nir_context *ctx,
> > > > >  > +static void visit_jump(struct ac_llvm_context *ctx,
> > > > >  >const nir_jump_instr *instr)
> > > > >  >  {
> > > > >  > switch (instr->type) {
> > > > >  > case nir_jump_break:
> > > > >  > -   LLVMBuildBr(ctx->ac.builder,
> ctx->break_block);
> > > > >  > -   LLVMClearInsertionPosition(
> ctx->ac.builder);
> > > > >  > +   ac_build_break(ctx);
> > > > >  > break;
> > > > >  > case nir_jump_continue:
> > > > >  > -   LLVMBuildBr(ctx->ac.builder,
> ctx->continue_block);
> > > > >  > -   LLVMClearInsertionPosition(
> ctx->ac.builder);
> > > > >  > +   ac_build_continue(ctx);
> > > > >  > break;
> > > > >  > default:
> > > > >  > fprintf(stderr, "Unknown NIR jump instr: ");
> > > > >  > @@ -5285,7 +5283,7 @@ static void visit_block(struct
> > > > > ac_nir_context *ctx, nir_block *block)
> > > > >  > visit_ssa_undef(ctx,
> > > > > nir_instr_as_ssa_undef(instr));
> > > > >  > break;
> > > > >  > case nir_instr_type_jump:
> > > > >  > -   visit_jump(ctx,
> nir_instr_as_jump(instr));
> > > > >  > +   visit_jump(&ctx->ac,
> > > > > nir_instr_as_jump(instr));
> > > > >  > break;
> > > > >  > default:
> > > > >  > fprintf(stderr, "Unknown NIR instr
> type: ");
> > > > >  

Re: [Mesa-dev] [PATCH v4 3/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-04-10 Thread Scott D Phillips
Chris Wilson  writes:

> Quoting Scott D Phillips (2018-04-03 21:05:43)
>> Rename the (un)map_gtt functions to (un)map_map (map by
>> returning a map) and add new functions (un)map_tiled_memcpy that
>> return a shadow buffer populated with the intel_tiled_memcpy
>> functions.
>> 
>> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
>> tiling, when support is added for those formats.
>> 
>> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
>> 
>> v3: Add units to parameter names of tile_extents (Nanley Chery)
>> Use _mesa_align_malloc for the shadow copy (Nanley)
>> Continue using gtt maps on gen4 (Nanley)
>> 
>> v4: Use streaming_load_memcpy when detiling
>> ---
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 108 
>> --
>>  1 file changed, 100 insertions(+), 8 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index 23cb40f3226..58ffe868d0d 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

[...]

>> @@ -3093,11 +3094,93 @@ intel_miptree_map_gtt(struct brw_context *brw,
>>  }
>>  
>>  static void
>> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
>> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
>>  {
>> intel_miptree_unmap_raw(mt);
>>  }
>>  
>> +/* Compute extent parameters for use with tiled_memcpy functions.
>> + * xs are in units of bytes and ys are in units of strides. */
>> +static inline void
>> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
>> + unsigned int level, unsigned int slice, unsigned int *x1_B,
>> + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
>> +{
>> +   unsigned int block_width, block_height;
>> +   unsigned int x0_el, y0_el;
>> +
>> +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
>> +
>> +   assert(map->x % block_width == 0);
>> +   assert(map->y % block_height == 0);
>> +
>> +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
>> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
>> +   *y1_el = map->y / block_height + y0_el;
>> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
>> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
>> +}
>> +
>> +static void
>> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
>> +   struct intel_mipmap_tree *mt,
>> +   struct intel_miptree_map *map,
>> +   unsigned int level, unsigned int slice)
>> +{
>> +   unsigned int x1, x2, y1, y2;
>> +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
>> +   map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
>> +
>> +   /* The tiling and detiling functions require that the linear buffer
>> +* has proper 16-byte alignment (that is, `x0` is 16-byte aligned).
>
> Throw in an its here, i.e.  (that is, its `x0`...) Just spent a few
> moments going what x0 before remembering it's the internal x0 of
> tiled_to_linear().
>
> We really want to move that knowledge back to intel_tiled_memcpy.c. A
> single user isn't enough to justify a lot of effort though (or be sure
> you get the interface right).

You mean putting the code to decide the stride and alignment
requirements by the detiling code, something like
alloc_linear_for_tiled?

>> +* Here we over-allocate the linear buffer by enough bytes to get
>> +* the proper alignment.
>> +*/
>> +   map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 
>> 16);
>> +   map->ptr = (char *)map->buffer + (x1 & 0xf);
>> +   assert(map->buffer);
>> +
>> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
>> +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
>> +  src += mt->offset;
>> +
>> +  const mem_copy_fn fn =
>> +#if defined(USE_SSE41)
>> + cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
>> +#endif
>> + memcpy;
>
> So always use a streaming load and bypass cache, even coming from WB.
> Justifiable I believe, since there is no reason to keep it in cache as
> the modification is on map->buffer not the tiled bo.
>
> But do we want to use this path if !USE_SSE41 and WC? Let's see if
> that's excluded.

Presently the logic is to always do map_tiled_memcpy for tiled surfaces,
except on gen 4 where we finally could do a gtt map. You're saying we're
better off doing a gtt map if we do have a wc map and don't have
movntdqa? That sounds reasonable

>>  static void
>>  intel_miptree_map_blit(struct brw_context *brw,
>>struct intel_mipmap_tree *mt,
>> @@ -3655,8 +3738,11 @@ intel_miptree_map(struct brw_context *brw,
>>(mt->surf.row_pitch % 16 == 0)) {
>>intel_miptree_map_movntdqa(brw, mt, map, level, slice);
>>  #endif
>> +   } else if (mt->su

Re: [Mesa-dev] [PATCH v2 5/5] i965/miptree: Don't gtt map from map_depthstencil

2018-04-10 Thread Scott D Phillips
Chris Wilson  writes:

> Quoting Scott D Phillips (2018-04-03 21:05:45)
>> Instead of gtt mapping, call out to other map functions (map_map
>> or map_tiled_memcpy) for the depth surface. Removes a place where
>> gtt mapping is used.
>> 
>> v2: add level, slice to debug print (Nanley)
>> ---
>> @@ -3549,16 +3555,21 @@ intel_miptree_unmap_depthstencil(struct brw_context 
>> *brw,
>> bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
>>  
>> if (map->mode & GL_MAP_WRITE_BIT) {
>> +  struct intel_miptree_map z_mt_map = {
>> + .mode = map->mode | GL_MAP_INVALIDATE_RANGE_BIT, .x = map->x,
>> + .y = map->y, .w = map->w, .h = map->h,
>> +  };
>> +  if (z_mt->surf.tiling == ISL_TILING_LINEAR)
>> + intel_miptree_map_map(brw, z_mt, &z_mt_map, level, slice);
>
> No easy way to use movntqda to avoid WC reads?
>
> It feels like duplicating the decision tree from intel_miptree_map,
> which presumably should be already able to choose the preferred access
> for the internal planes. Or does that logic need to be refactored for
> reuse here?

I think I was lacking some piece of state to call into
intel_miptree_map, but you're right that the duplication is already
causing a split in the behavior, just one patch rev later. I'll go back
and see if I can get this to go into the regular map code.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 8:17 AM, Rob Clark  wrote:

> On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand 
> wrote:
> > On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark  wrote:
> >>
> >> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand 
> >> wrote:
> >> > + A bunch of potentially interested parties.
> >> >
> >> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
> >> >  wrote:
> >> >>
> >> >> Hi,
> >> >>
> >> >> >  typedef struct {
> >> >> > -   nir_parameter_type param_type;
> >> >> > -   const struct glsl_type *type;
> >> >> > +   uint8_t num_components;
> >> >> > +   uint8_t bit_size;
> >> >> >  } nir_parameter;
> >> >>
> >> >> (...)
> >> >>
> >> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
> >> >> > validate_state *state)
> >> >> >  static void
> >> >> >  validate_call_instr(nir_call_instr *instr, validate_state *state)
> >> >> >  {
> >> >> > -   if (instr->return_deref == NULL) {
> >> >> > -  validate_assert(state,
> >> >> > glsl_type_is_void(instr->callee->return_type));
> >> >> > -   } else {
> >> >> > -  validate_assert(state, instr->return_deref->deref.type ==
> >> >> > instr->callee->return_type);
> >> >> > -  validate_deref_var(instr, instr->return_deref, state);
> >> >> > -   }
> >> >> > -
> >> >> > validate_assert(state, instr->num_params ==
> >> >> > instr->callee->num_params);
> >> >> >
> >> >> > for (unsigned i = 0; i < instr->num_params; i++) {
> >> >> > -  validate_assert(state, instr->callee->params[i].type ==
> >> >> > instr->params[i]->deref.type);
> >> >> > -  validate_deref_var(instr, instr->params[i], state);
> >> >> > +  validate_src(&instr->params[i], state,
> >> >> > +   instr->callee->params[i].bit_size,
> >> >> > +   instr->callee->params[i].num_components);
> >> >> > }
> >> >> >  }
> >> >>
> >> >> Question: I might be misreading, but it seems like we are losing the
> >> >> type information for functions. Isn't that something worth keeping,
> >> >> maybe in some other way, e.g. load_param specifying the expected
> type?
> >> >
> >> >
> >> > That's a very good question!  To be honest, I'm not sure what the
> answer
> >> > is.
> >> > At the moment, the type information is fairly useless for most of what
> >> > we
> >> > use functions for.  Really, all we need is something that NIR can
> >> > inline.
> >> > As it is, we're not really preserving the types from SPIR-V because of
> >> > the
> >> > gymnastics we're doing to handle pointers.
> >> >
> >> > If we did want to preserve types, we'd need to have more detailed type
> >> > information.  In particular, we'd need to be able to provide pointer
> >> > types
> >> > and maybe combined image-sampler types.  And along with those pointer
> >> > types,
> >> > we'd need to somehow express those pointer's storage requirements.
> >> >
> >> > The philosophy behind this commit is that, if we don't have a good
> match
> >> > to
> >> > SPIR-V anyway, we might as well just chuck that information and do
> >> > whatever
> >> > makes our lives the easiest.  My philosophy here may be flawed and I'm
> >> > happy
> >> > to hear arguments in favor of keeping the information.  The best
> >> > argument I
> >> > can come up with for keeping the information is if we find ourselves
> >> > wanting
> >> > to do some sort of linking in the future where we have to match
> >> > functions by
> >> > both name and type.  If we want to do that, however, we'll need all
> the
> >> > SPIR-V type information.
> >> >
> >>
> >> We do end up wanting the type information for cl kernels.  This is
> >> maybe a slightly different case from calls within shader code (ie.
> >> when both caller and callee are in shader).
> >
> >
> > Yes, I think it is.  Question: Is there a distinction in CL between
> > functions which are entrypoints callable from the API and functions which
> > are helpers?  i.e. Can you call an entrypoint as a helper?
> >
>
> There is the __kernel annotation.  And you know the entry point name
> when compiling.  However I'm not sure anything prevents one entry
> point from calling another.
>

That would be worth investigating.


> I'm not sure we want the calling convention to be the same internally
> as for kernel entry points so in that case, if we aren't inlining
> everything, we might end up generating two versions of a function (or
> possibly a shim.. or possibly between the two based on size.. or??)
>

Having a shim seems like a reasonable plan.


> >>
> >> Although I'd kinda like
> >> to think that we don't need to make vtn aware of this distinction.
> >
> >
> > Someone has to be aware of it. :-)  There are lots of places in
> spirv_to_nir
> > were we take the SPIR-V and do something slightly different with it than
> the
> > obvious translation.  Also, using function parameters for this is a
> > significant anachronism because no other shader I/O in NIR has ever
> worked
> > that way.
> >
> >>
> >> So just to throw out an idea.  What if vtn just used load_deref

Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst  wrote:

> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand 
> wrote:
> > I still don't see anything to make nir_validate not fail out on you if it
> > sees a read or a write to/from an IMAGE or SAMPLER.
> >
>
> what kind of glsl code are you talking about here? I wrote some tests
> and things just seem to work out. I wasn't able to hit any other
> issues.
>

Were they tests where GLSL was able to copy propagate such that NIR never
saw a write to the image/sampler variable?


> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst 
> wrote:
> >>
> >> v2: fix assertion for bindless to non bindless assignments
> >>
> >> Signed-off-by: Karol Herbst 
> >> ---
> >>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
> >>  1 file changed, 7 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/src/compiler/nir/nir_split_var_copies.c
> >> b/src/compiler/nir/nir_split_var_copies.c
> >> index bc3ceedbdb8..e592754d770 100644
> >> --- a/src/compiler/nir/nir_split_var_copies.c
> >> +++ b/src/compiler/nir/nir_split_var_copies.c
> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
> >> split_var_copies_state *state)
> >>nir_deref_var *src_head = intrinsic->variables[1];
> >>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
> >>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
> >> +  enum glsl_base_type base_type = glsl_get_base_type(src_tail->
> type);
> >>
> >> -  switch (glsl_get_base_type(src_tail->type)) {
> >> +  switch (base_type) {
> >>case GLSL_TYPE_ARRAY:
> >>case GLSL_TYPE_STRUCT:
> >>   split_var_copy_instr(intrinsic, dest_head, src_head,
> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
> >> split_var_copies_state *state)
> >>  ralloc_steal(state->dead_ctx, instr);
> >>   }
> >>   break;
> >> +  /* for bindless those are uint64 */
> >> +  case GLSL_TYPE_IMAGE:
> >> +  case GLSL_TYPE_SAMPLER:
> >> + assert(src_head->var->data.bindless ||
> >> +glsl_get_base_type(src_head->var->type) == base_type);
> >>case GLSL_TYPE_INT:
> >>case GLSL_TYPE_UINT:
> >>case GLSL_TYPE_INT16:
> >> --
> >> 2.14.3
> >>
> >
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] i965/tiled_memcpy: inline movntdqa loads in tiled_to_linear

2018-04-10 Thread Chris Wilson
Quoting Scott D Phillips (2018-04-10 16:33:18)
> Chris Wilson  writes:
> 
> > Quoting Chris Wilson (2018-04-05 20:54:54)
> > > Quoting Scott D Phillips (2018-04-03 21:05:42)
> 
> [...]
> 
> > > Ok, was hoping to see how you choose to use the streaming load, but I
> > > guess that's the next patch.
> > > 
> > > Reviewed-by: Chris Wilson 
> >
> > Oh, one point Eric Anholt mentioned on another thread about movntqda is
> > that stale data inside the internal buffer is not automatically
> > invalidated. We may need to emit explicit mfence before the copies if we
> > are in doubt. A single mfence per tiled-copy is probably not enough to
> > worry about optimising away.
> 
> Looking around, I found this errata about movntdqa not honoring the
> ordering guarantees of locked instructions (VLP31 in the pdf):
> 
> https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/pentium-n3520-j2850-celeron-n2920-n2820-n2815-n2806-j1850-j1750-spec-update.pdf
> 
> So I added this code near the top of tiled_to_linear():
> 
> if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
>/* Various atom processors have errata where the movntdqa instruction
> * (which is used in streaming_load_memcpu) may incorrectly be reordered
> * before locked instructions. To work around that, we put an lfence
> * here to manually wait for preceeding loads to be completed.
> */
>__builtin_ia32_lfence();
> }
> 
> It seems that an mfence won't suffice where the errata mentions you need
> the lfence, by my hazy understanding. Do I have that right, or should
> this be an mfence?

An lfence is a weaker version of mfence. We are not using locked
instructions for serialising access within the data, or at least not
from the perspective of serialising it with the GPU. Certainly it's not
been an issue for the kernel. *touch wood*

Note you can use _mm_*fence() to keep use a consistent instruction set.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 3/5] i965/miptree: Use cpu tiling/detiling when mapping

2018-04-10 Thread Chris Wilson
Quoting Scott D Phillips (2018-04-10 16:49:16)
> Chris Wilson  writes:
> 
> > Quoting Scott D Phillips (2018-04-03 21:05:43)
> >> Rename the (un)map_gtt functions to (un)map_map (map by
> >> returning a map) and add new functions (un)map_tiled_memcpy that
> >> return a shadow buffer populated with the intel_tiled_memcpy
> >> functions.
> >> 
> >> Tiling/detiling with the cpu will be the only way to handle Yf/Ys
> >> tiling, when support is added for those formats.
> >> 
> >> v2: Compute extents properly in the x|y-rounded-down case (Chris Wilson)
> >> 
> >> v3: Add units to parameter names of tile_extents (Nanley Chery)
> >> Use _mesa_align_malloc for the shadow copy (Nanley)
> >> Continue using gtt maps on gen4 (Nanley)
> >> 
> >> v4: Use streaming_load_memcpy when detiling
> >> ---
> >>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 108 
> >> --
> >>  1 file changed, 100 insertions(+), 8 deletions(-)
> >> 
> >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> index 23cb40f3226..58ffe868d0d 100644
> >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> 
> [...]
> 
> >> @@ -3093,11 +3094,93 @@ intel_miptree_map_gtt(struct brw_context *brw,
> >>  }
> >>  
> >>  static void
> >> -intel_miptree_unmap_gtt(struct intel_mipmap_tree *mt)
> >> +intel_miptree_unmap_map(struct intel_mipmap_tree *mt)
> >>  {
> >> intel_miptree_unmap_raw(mt);
> >>  }
> >>  
> >> +/* Compute extent parameters for use with tiled_memcpy functions.
> >> + * xs are in units of bytes and ys are in units of strides. */
> >> +static inline void
> >> +tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
> >> + unsigned int level, unsigned int slice, unsigned int *x1_B,
> >> + unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
> >> +{
> >> +   unsigned int block_width, block_height;
> >> +   unsigned int x0_el, y0_el;
> >> +
> >> +   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
> >> +
> >> +   assert(map->x % block_width == 0);
> >> +   assert(map->y % block_height == 0);
> >> +
> >> +   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
> >> +   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
> >> +   *y1_el = map->y / block_height + y0_el;
> >> +   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
> >> +   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
> >> +}
> >> +
> >> +static void
> >> +intel_miptree_map_tiled_memcpy(struct brw_context *brw,
> >> +   struct intel_mipmap_tree *mt,
> >> +   struct intel_miptree_map *map,
> >> +   unsigned int level, unsigned int slice)
> >> +{
> >> +   unsigned int x1, x2, y1, y2;
> >> +   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
> >> +   map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
> >> +
> >> +   /* The tiling and detiling functions require that the linear buffer
> >> +* has proper 16-byte alignment (that is, `x0` is 16-byte aligned).
> >
> > Throw in an its here, i.e.  (that is, its `x0`...) Just spent a few
> > moments going what x0 before remembering it's the internal x0 of
> > tiled_to_linear().
> >
> > We really want to move that knowledge back to intel_tiled_memcpy.c. A
> > single user isn't enough to justify a lot of effort though (or be sure
> > you get the interface right).
> 
> You mean putting the code to decide the stride and alignment
> requirements by the detiling code, something like
> alloc_linear_for_tiled?

Something like that, but I don't think its worth it unless you have some
other candidates.
 
> >> +* Here we over-allocate the linear buffer by enough bytes to get
> >> +* the proper alignment.
> >> +*/
> >> +   map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 
> >> 16);
> >> +   map->ptr = (char *)map->buffer + (x1 & 0xf);
> >> +   assert(map->buffer);
> >> +
> >> +   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
> >> +  char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
> >> +  src += mt->offset;
> >> +
> >> +  const mem_copy_fn fn =
> >> +#if defined(USE_SSE41)
> >> + cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
> >> +#endif
> >> + memcpy;
> >
> > So always use a streaming load and bypass cache, even coming from WB.
> > Justifiable I believe, since there is no reason to keep it in cache as
> > the modification is on map->buffer not the tiled bo.
> >
> > But do we want to use this path if !USE_SSE41 and WC? Let's see if
> > that's excluded.
> 
> Presently the logic is to always do map_tiled_memcpy for tiled surfaces,
> except on gen 4 where we finally could do a gtt map. You're saying we're
> better off doing a gtt map if we do have a wc map and don't have
> movntdqa

[Mesa-dev] [PATCH] mesa: remove struct gl_extensions::ATI_separate_stencil

2018-04-10 Thread Emil Velikov
From: Emil Velikov 

Virtually every driver that supports ATI_separate_stencil
also supports EXT_stencil_two_side.

Use the latter boolean for both extension. With that in mind we can drop
the explicit true from the drivers and the nasty comment in
compute_version().

Signed-off-by: Emil Velikov 
---
Noticed while skimming through compute_version(). I don't have a strong
opinion about this patch.
---
 src/mesa/drivers/dri/i915/intel_extensions.c |  1 -
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 -
 src/mesa/main/extensions.c   |  1 -
 src/mesa/main/extensions_table.h |  2 +-
 src/mesa/main/mtypes.h   |  1 -
 src/mesa/main/version.c  | 10 +-
 src/mesa/state_tracker/st_extensions.c   |  1 -
 7 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c 
b/src/mesa/drivers/dri/i915/intel_extensions.c
index c85bd787fe..9c5398ef85 100644
--- a/src/mesa/drivers/dri/i915/intel_extensions.c
+++ b/src/mesa/drivers/dri/i915/intel_extensions.c
@@ -89,7 +89,6 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.EXT_texture_sRGB = true;
   ctx->Extensions.EXT_texture_sRGB_decode = true;
   ctx->Extensions.EXT_stencil_two_side = true;
-  ctx->Extensions.ATI_separate_stencil = true;
   ctx->Extensions.ATI_texture_env_combine3 = true;
   ctx->Extensions.NV_texture_env_combine4 = true;
 
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 73a6c73f53..a9a564d8a6 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -118,7 +118,6 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.KHR_robustness = true;
ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
ctx->Extensions.APPLE_object_purgeable = true;
-   ctx->Extensions.ATI_separate_stencil = true;
ctx->Extensions.ATI_texture_env_combine3 = true;
ctx->Extensions.MESA_pack_invert = true;
ctx->Extensions.NV_conditional_render = true;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 38fc52d020..10cd92708f 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -144,7 +144,6 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
ctx->Extensions.ATI_texture_compression_3dc = GL_TRUE;
ctx->Extensions.ATI_texture_env_combine3 = GL_TRUE;
ctx->Extensions.ATI_texture_mirror_once = GL_TRUE;
-   ctx->Extensions.ATI_separate_stencil = GL_TRUE;
ctx->Extensions.EXT_blend_color = GL_TRUE;
ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
ctx->Extensions.EXT_blend_func_separate = GL_TRUE;
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 492f7c3d20..199dec820f 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -187,7 +187,7 @@ EXT(ATI_blend_equation_separate , 
EXT_blend_equation_separate
 EXT(ATI_draw_buffers, dummy_true   
  , GLL,  x ,  x ,  x , 2002)
 EXT(ATI_fragment_shader , ATI_fragment_shader  
  , GLL,  x ,  x ,  x , 2001)
 EXT(ATI_meminfo , ATI_meminfo  
  , GLL, GLC,  x ,  x , 2009)
-EXT(ATI_separate_stencil, ATI_separate_stencil 
  , GLL,  x ,  x ,  x , 2006)
+EXT(ATI_separate_stencil, EXT_stencil_two_side 
  , GLL,  x ,  x ,  x , 2006)
 EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc  
  , GLL,  x ,  x ,  x , 2004)
 EXT(ATI_texture_env_combine3, ATI_texture_env_combine3 
  , GLL,  x ,  x ,  x , 2002)
 EXT(ATI_texture_float   , ARB_texture_float
  , GLL, GLC,  x ,  x , 2002)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b7a7b34a09..c61a4e9bb2 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4333,7 +4333,6 @@ struct gl_extensions
GLboolean ATI_texture_mirror_once;
GLboolean ATI_texture_env_combine3;
GLboolean ATI_fragment_shader;
-   GLboolean ATI_separate_stencil;
GLboolean GREMEDY_string_marker;
GLboolean INTEL_conservative_rasterization;
GLboolean INTEL_performance_query;
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 0a4e7630da..4314556b65 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -261,15 +261,7 @@ compute_version(const struct gl_extensions *extensions,
  extensions->ARB_fragment_shader &&
  extensions->ARB_texture_non_power_of_two &&
  extensions->EXT_blend_equation_separate &&
-
- /* Technically, 2.0 requires the functionality of the
-  * EXT version.  Enable 2

Re: [Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 1:28 AM, Antia Puentes  wrote:

> On 07/04/18 08:21, Jason Ekstrand wrote:
>
> On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick  wrote:
>
>> From: Antia Puentes 
>>
>> We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
>> vertex element. The previous Vertex Elements order was:
>>
>>   * VE 1: 
>>   * VE 2: 
>>
>> and now it is:
>>
>>   * VE 1: 
>>   * VE 2: 
>>
>> To move the BaseVertex keeping VE1 as it is, allows to keep pointing the
>> vertex buffer associated to VE 1 to the indirect buffer for indirect
>> draw calls.
>>
>> From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
>>
>>   "gl_BaseVertex holds the integer value passed to the baseVertex
>>   parameter to the command that resulted in the current shader
>>   invocation. In the case where the command has no baseVertex parameter,
>>   the value of gl_BaseVertex is zero."
>>
>> Fixes CTS tests:
>>
>>   * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
>>   * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstan
>> cedParameters
>>   * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
>>   * KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysI
>> ndirectParameters
>>   * KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirec
>> tCountParameters
>>
>> v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
>> those in genX(emit_vertices).  Reformat commit message to 72 columns.
>>
>> Signed-off-by: Ian Romanick 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
>> ---
>>  src/intel/compiler/brw_nir.c  | 14 +
>>  src/intel/compiler/brw_vec4.cpp   | 14 +
>>  src/mesa/drivers/dri/i965/brw_context.h   | 32 ++-
>>  src/mesa/drivers/dri/i965/brw_draw.c  | 45
>> ++-
>>  src/mesa/drivers/dri/i965/brw_draw_upload.c   | 14 -
>>  src/mesa/drivers/dri/i965/genX_state_upload.c | 38
>> +++---
>>  6 files changed, 97 insertions(+), 60 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
>> index 16b0d86814f..16ab529737b 100644
>> --- a/src/intel/compiler/brw_nir.c
>> +++ b/src/intel/compiler/brw_nir.c
>> @@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>  */
>> const bool has_sgvs =
>>nir->info.system_values_read &
>> -  (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> -   BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> +  (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
>> @@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>
>> nir_intrinsic_set_base(load, num_inputs);
>> switch (intrin->intrinsic) {
>> -   case nir_intrinsic_load_base_vertex:
>> case nir_intrinsic_load_first_vertex:
>>nir_intrinsic_set_component(load, 0);
>>break;
>> @@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>>nir_intrinsic_set_component(load, 3);
>>break;
>> case nir_intrinsic_load_draw_id:
>> -  /* gl_DrawID is stored right after gl_VertexID and
>> friends
>> -   * if any of them exist.
>> +   case nir_intrinsic_load_base_vertex:
>> +  /* gl_DrawID and gl_BaseVertex are stored right after
>> + gl_VertexID and friends if any of them exist.
>> */
>>nir_intrinsic_set_base(load, num_inputs + has_sgvs);
>> -  nir_intrinsic_set_component(load, 0);
>> +  if (intrin->intrinsic == nir_intrinsic_load_draw_id)
>> + nir_intrinsic_set_component(load, 0);
>> +  else
>> + nir_intrinsic_set_component(load, 1);
>>break;
>> default:
>>unreachable("Invalid system value intrinsic");
>> diff --git a/src/intel/compiler/brw_vec4.cpp
>> b/src/intel/compiler/brw_vec4.cpp
>> index 1e384f5bf4d..d33caefdea9 100644
>> --- a/src/intel/compiler/brw_vec4.cpp
>> +++ b/src/intel/compiler/brw_vec4.cpp
>> @@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
>> *compiler, void *log_data,
>>  * incoming vertex attribute.  So, add an extra slot.
>>  */
>> if (shader->info.system_values_read &
>> -   (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>> -BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>> +   (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>>  BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>>  BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>>  BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>>nr_attribute_slots++;
>> }
>>
>> +  

Re: [Mesa-dev] [PATCH] mesa: remove struct gl_extensions::ATI_separate_stencil

2018-04-10 Thread Roland Scheidegger
Yes, there is indeed plenty hw (all with d3d heritage, d3d10 doesn't
support different ref/masks) which don't actually have full support for
two-sided stencil.
I think all drivers just cheat and fail though since they really want to
expose GL 2 anyway.
So I suppose that's ok, albeit I don't really have an opinion on it.

Roland


Am 10.04.2018 um 18:11 schrieb Emil Velikov:
> From: Emil Velikov 
> 
> Virtually every driver that supports ATI_separate_stencil
> also supports EXT_stencil_two_side.
> 
> Use the latter boolean for both extension. With that in mind we can drop
> the explicit true from the drivers and the nasty comment in
> compute_version().
> 
> Signed-off-by: Emil Velikov 
> ---
> Noticed while skimming through compute_version(). I don't have a strong
> opinion about this patch.
> ---
>  src/mesa/drivers/dri/i915/intel_extensions.c |  1 -
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 -
>  src/mesa/main/extensions.c   |  1 -
>  src/mesa/main/extensions_table.h |  2 +-
>  src/mesa/main/mtypes.h   |  1 -
>  src/mesa/main/version.c  | 10 +-
>  src/mesa/state_tracker/st_extensions.c   |  1 -
>  7 files changed, 2 insertions(+), 15 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c 
> b/src/mesa/drivers/dri/i915/intel_extensions.c
> index c85bd787fe..9c5398ef85 100644
> --- a/src/mesa/drivers/dri/i915/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i915/intel_extensions.c
> @@ -89,7 +89,6 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Extensions.EXT_texture_sRGB = true;
>ctx->Extensions.EXT_texture_sRGB_decode = true;
>ctx->Extensions.EXT_stencil_two_side = true;
> -  ctx->Extensions.ATI_separate_stencil = true;
>ctx->Extensions.ATI_texture_env_combine3 = true;
>ctx->Extensions.NV_texture_env_combine4 = true;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 73a6c73f53..a9a564d8a6 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -118,7 +118,6 @@ intelInitExtensions(struct gl_context *ctx)
> ctx->Extensions.KHR_robustness = true;
> ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
> ctx->Extensions.APPLE_object_purgeable = true;
> -   ctx->Extensions.ATI_separate_stencil = true;
> ctx->Extensions.ATI_texture_env_combine3 = true;
> ctx->Extensions.MESA_pack_invert = true;
> ctx->Extensions.NV_conditional_render = true;
> diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
> index 38fc52d020..10cd92708f 100644
> --- a/src/mesa/main/extensions.c
> +++ b/src/mesa/main/extensions.c
> @@ -144,7 +144,6 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
> ctx->Extensions.ATI_texture_compression_3dc = GL_TRUE;
> ctx->Extensions.ATI_texture_env_combine3 = GL_TRUE;
> ctx->Extensions.ATI_texture_mirror_once = GL_TRUE;
> -   ctx->Extensions.ATI_separate_stencil = GL_TRUE;
> ctx->Extensions.EXT_blend_color = GL_TRUE;
> ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
> ctx->Extensions.EXT_blend_func_separate = GL_TRUE;
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index 492f7c3d20..199dec820f 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -187,7 +187,7 @@ EXT(ATI_blend_equation_separate , 
> EXT_blend_equation_separate
>  EXT(ATI_draw_buffers, dummy_true 
> , GLL,  x ,  x ,  x , 2002)
>  EXT(ATI_fragment_shader , ATI_fragment_shader
> , GLL,  x ,  x ,  x , 2001)
>  EXT(ATI_meminfo , ATI_meminfo
> , GLL, GLC,  x ,  x , 2009)
> -EXT(ATI_separate_stencil, ATI_separate_stencil   
> , GLL,  x ,  x ,  x , 2006)
> +EXT(ATI_separate_stencil, EXT_stencil_two_side   
> , GLL,  x ,  x ,  x , 2006)
>  EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc
> , GLL,  x ,  x ,  x , 2004)
>  EXT(ATI_texture_env_combine3, ATI_texture_env_combine3   
> , GLL,  x ,  x ,  x , 2002)
>  EXT(ATI_texture_float   , ARB_texture_float  
> , GLL, GLC,  x ,  x , 2002)
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index b7a7b34a09..c61a4e9bb2 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -4333,7 +4333,6 @@ struct gl_extensions
> GLboolean ATI_texture_mirror_once;
> GLboolean ATI_texture_env_combine3;
> GLboolean ATI_fragment_shader;
> -   GLboolean ATI_separate_stencil;
> GLboolean GREMEDY_string_marker;
> GLboolean INTEL_conservative_rasterization;
> GLboolean INTEL_performance_query;

Re: [Mesa-dev] [PATCH] docs/release-calendar: update to include 18.1 and 18.2

2018-04-10 Thread Juan A. Suarez Romero
On Tue, 2018-04-10 at 16:23 +0100, Emil Velikov wrote:
> On 10 April 2018 at 09:06, Juan A. Suarez Romero  wrote:
> > On Mon, 2018-04-09 at 19:02 +0100, Emil Velikov wrote:
> > > From: Emil Velikov 
> > > 
> > > Dylan has kindly stepped up to help with 18.1.0, while I've taken the
> > > liberty to nominate Andres for 18.2.0 ;-)
> > > 
> > 
> > I would like to replace Andres for the 18.0.x releases. We already talked 
> > about
> > that and both think it is good idea.
> > 
> > With that change, the remaining proposal looks good to me.
> > 
> > 
> > Reviewed-by: Juan A. Suarez 
> > 
> 
> Fair enough - swapped Andres with yourself for the 18.0.x series and
> pushed to master.
> 
> AFAICT you'll be doing both 18.0.1 or 17.3.9 (last week?), which as
> previously suggested as something we should avoid.
> Please let us know if everything's OK and if we can help out.
> 

Yup. This is an exceptional case, as 17.3.9 is the last one. Afterwards, I'll be
 doing only 18.0.x releases.


J.A.

> -Emil
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] egl/x11: Handle both depth 30 formats for eglCreateImage().

2018-04-10 Thread Ilia Mirkin
On Tue, Apr 10, 2018 at 4:42 AM, Michel Dänzer  wrote:
> On 2018-04-10 10:22 AM, Mario Kleiner wrote:
>> On 04/09/2018 12:12 PM, Michel Dänzer wrote:
>>> On 2018-04-06 08:56 PM, Mario Kleiner wrote:
>>>
>>> I'm interested in the full xdpyinfo *at screen depth 30*, in particular
>>> whether it lists only one variant of depth 30 visuals. If so, one
>>> possibility for a kludge would be to just look at any depth 30 visual.
>>
>> Ok, the fresh v2 patch implements that kludge. This one retested to work
>> on nouveau, ati, intel.
>>
>> On intel and nouveau we only get one channel mask for depth 30 visuals
>> in xdpyinfo. On amd we get both masks for xrgb2101010 and xbgr2101010,
>> as the amd gallium drivers expose both formats, but the ordering is
>> xrgb2101010 first, so that's fine when picking the first depth 30 visual
>> to get the channel mask for decisions.
>
> Hmm, that sounds fragile though when there are both variants; is there
> any guarantee they can't appear in the opposite order?

It seems like nouveau is stirring a bit of a hornet's nest here.
Unfortunately there's not a whole lot I can do about hw scanout format
support (rgb10x2 only, no bgr10x2 support until Kepler), but is there
something else that the DDX and/or mesa driver should be doing to
avoid some of this pain?

Should we get the *other* ddx's to avoid exposing both masks?

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: remove unreachable assert()

2018-04-10 Thread Ivan Kalvachev
On 3/28/18, Emil Velikov  wrote:
> From: Emil Velikov 
>
> Earlier commit enforced that we'll bail out if the number of terminators
> is different than 2. With that in mind, the assert() will never trigger.
>
> Fixes: 56b867395de ("glsl: fix infinite loop caused by bug in loop
> unrolling pass")
> Cc: Timothy Arceri 
> Signed-off-by: Emil Velikov 

Just a nitpick.
The explanations doesn't sound right to me.

Asserts are meant to never trigger.
They are used to check the internal logic of the code.

If this assert does trigger that would mean
that there is a bug in the code that makes sure
the number of terminators is different than 2.

It is better to catch bug with assert than
to silently do something wrong.

Also, sometimes compilers might use
the assert assumptions to optimize the code.
(Even when the assertion itself is disabled.)

Best Regards.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Rob Clark
On Tue, Apr 10, 2018 at 11:55 AM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:17 AM, Rob Clark  wrote:
>>
>> On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand 
>> wrote:
>> > On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark  wrote:
>> >>
>> >> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand 
>> >> wrote:
>> >> > + A bunch of potentially interested parties.
>> >> >
>> >> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
>> >> >  wrote:
>> >> >>
>> >> >> Hi,
>> >> >>
>> >> >> >  typedef struct {
>> >> >> > -   nir_parameter_type param_type;
>> >> >> > -   const struct glsl_type *type;
>> >> >> > +   uint8_t num_components;
>> >> >> > +   uint8_t bit_size;
>> >> >> >  } nir_parameter;
>> >> >>
>> >> >> (...)
>> >> >>
>> >> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
>> >> >> > validate_state *state)
>> >> >> >  static void
>> >> >> >  validate_call_instr(nir_call_instr *instr, validate_state *state)
>> >> >> >  {
>> >> >> > -   if (instr->return_deref == NULL) {
>> >> >> > -  validate_assert(state,
>> >> >> > glsl_type_is_void(instr->callee->return_type));
>> >> >> > -   } else {
>> >> >> > -  validate_assert(state, instr->return_deref->deref.type ==
>> >> >> > instr->callee->return_type);
>> >> >> > -  validate_deref_var(instr, instr->return_deref, state);
>> >> >> > -   }
>> >> >> > -
>> >> >> > validate_assert(state, instr->num_params ==
>> >> >> > instr->callee->num_params);
>> >> >> >
>> >> >> > for (unsigned i = 0; i < instr->num_params; i++) {
>> >> >> > -  validate_assert(state, instr->callee->params[i].type ==
>> >> >> > instr->params[i]->deref.type);
>> >> >> > -  validate_deref_var(instr, instr->params[i], state);
>> >> >> > +  validate_src(&instr->params[i], state,
>> >> >> > +   instr->callee->params[i].bit_size,
>> >> >> > +   instr->callee->params[i].num_components);
>> >> >> > }
>> >> >> >  }
>> >> >>
>> >> >> Question: I might be misreading, but it seems like we are losing the
>> >> >> type information for functions. Isn't that something worth keeping,
>> >> >> maybe in some other way, e.g. load_param specifying the expected
>> >> >> type?
>> >> >
>> >> >
>> >> > That's a very good question!  To be honest, I'm not sure what the
>> >> > answer
>> >> > is.
>> >> > At the moment, the type information is fairly useless for most of
>> >> > what
>> >> > we
>> >> > use functions for.  Really, all we need is something that NIR can
>> >> > inline.
>> >> > As it is, we're not really preserving the types from SPIR-V because
>> >> > of
>> >> > the
>> >> > gymnastics we're doing to handle pointers.
>> >> >
>> >> > If we did want to preserve types, we'd need to have more detailed
>> >> > type
>> >> > information.  In particular, we'd need to be able to provide pointer
>> >> > types
>> >> > and maybe combined image-sampler types.  And along with those pointer
>> >> > types,
>> >> > we'd need to somehow express those pointer's storage requirements.
>> >> >
>> >> > The philosophy behind this commit is that, if we don't have a good
>> >> > match
>> >> > to
>> >> > SPIR-V anyway, we might as well just chuck that information and do
>> >> > whatever
>> >> > makes our lives the easiest.  My philosophy here may be flawed and
>> >> > I'm
>> >> > happy
>> >> > to hear arguments in favor of keeping the information.  The best
>> >> > argument I
>> >> > can come up with for keeping the information is if we find ourselves
>> >> > wanting
>> >> > to do some sort of linking in the future where we have to match
>> >> > functions by
>> >> > both name and type.  If we want to do that, however, we'll need all
>> >> > the
>> >> > SPIR-V type information.
>> >> >
>> >>
>> >> We do end up wanting the type information for cl kernels.  This is
>> >> maybe a slightly different case from calls within shader code (ie.
>> >> when both caller and callee are in shader).
>> >
>> >
>> > Yes, I think it is.  Question: Is there a distinction in CL between
>> > functions which are entrypoints callable from the API and functions
>> > which
>> > are helpers?  i.e. Can you call an entrypoint as a helper?
>> >
>>
>> There is the __kernel annotation.  And you know the entry point name
>> when compiling.  However I'm not sure anything prevents one entry
>> point from calling another.
>
>
> That would be worth investigating.
>

fwiw, at least the cl to spv compiler seems to allow it.  (Although in
my simple examples it also inlines the called function before things
end up in spv.)

>>
>> I'm not sure we want the calling convention to be the same internally
>> as for kernel entry points so in that case, if we aren't inlining
>> everything, we might end up generating two versions of a function (or
>> possibly a shim.. or possibly between the two based on size.. or??)
>
>
> Having a shim seems like a reasonable plan.
>
>>
>> >>
>> >> Although I'd kinda like
>> >> to think that we don't need to make vtn aware of this distinction.
>> >
>> >
>> > Som

Re: [Mesa-dev] [PATCH] glsl: remove unreachable assert()

2018-04-10 Thread Emil Velikov
On 10 April 2018 at 17:53, Ivan Kalvachev  wrote:
> On 3/28/18, Emil Velikov  wrote:
>> From: Emil Velikov 
>>
>> Earlier commit enforced that we'll bail out if the number of terminators
>> is different than 2. With that in mind, the assert() will never trigger.
>>
>> Fixes: 56b867395de ("glsl: fix infinite loop caused by bug in loop
>> unrolling pass")
>> Cc: Timothy Arceri 
>> Signed-off-by: Emil Velikov 
>
> Just a nitpick.
> The explanations doesn't sound right to me.
>
> Asserts are meant to never trigger.
> They are used to check the internal logic of the code.
>
> If this assert does trigger that would mean
> that there is a bug in the code that makes sure
> the number of terminators is different than 2.
>
> It is better to catch bug with assert than
> to silently do something wrong.
>
Right. wording is not perfect. As-is the assert provides misleading
assumption considering the explicit check.

> Also, sometimes compilers might use
> the assert assumptions to optimize the code.
> (Even when the assertion itself is disabled.)
>
Fully aware of that, yet i doubt it will matter in this case.
If you want to give it a check, that would be appreciated.

JFYI I've pushed this ~2h before your reply. But if people prefer I
can revert it.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: remove unreachable assert()

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 10:05 AM, Emil Velikov 
wrote:

> On 10 April 2018 at 17:53, Ivan Kalvachev  wrote:
> > On 3/28/18, Emil Velikov  wrote:
> >> From: Emil Velikov 
> >>
> >> Earlier commit enforced that we'll bail out if the number of terminators
> >> is different than 2. With that in mind, the assert() will never trigger.
> >>
> >> Fixes: 56b867395de ("glsl: fix infinite loop caused by bug in loop
> >> unrolling pass")
>

This doesn't fix anything.  Not triggering an assert is not a bug.

Removing a bogus assert that does get triggered by perfectly valid
code-paths would be a bug fix.


> >> Cc: Timothy Arceri 
> >> Signed-off-by: Emil Velikov 
> >
> > Just a nitpick.
> > The explanations doesn't sound right to me.
> >
> > Asserts are meant to never trigger.
> > They are used to check the internal logic of the code.
> >
> > If this assert does trigger that would mean
> > that there is a bug in the code that makes sure
> > the number of terminators is different than 2.
> >
> > It is better to catch bug with assert than
> > to silently do something wrong.
> >
> Right. wording is not perfect. As-is the assert provides misleading
> assumption considering the explicit check


What misleading information would that be?  In this particular case, we
have multiple cases of "if (term_count == 1) { ... } else { ... }"  so
knowing that term_count never goes above 2 is useful.  How is
"assert(term_count < 2)" misleading?

In general, the point of asserts is to declare assumptions made by the code
that follows.  This serves both as documentation to developers and ensures
that we find out if those assumptions are ever violated.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 9:59 AM, Rob Clark  wrote:

> On Tue, Apr 10, 2018 at 11:55 AM, Jason Ekstrand 
> wrote:
> > On Tue, Apr 10, 2018 at 8:17 AM, Rob Clark  wrote:
> >>
> >> On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand 
> >> wrote:
> >> > On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark 
> wrote:
> >> >>
> >> >> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand <
> ja...@jlekstrand.net>
> >> >> wrote:
> >> >> > + A bunch of potentially interested parties.
> >> >> >
> >> >> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
> >> >> >  wrote:
> >> >> >>
> >> >> >> Hi,
> >> >> >>
> >> >> >> >  typedef struct {
> >> >> >> > -   nir_parameter_type param_type;
> >> >> >> > -   const struct glsl_type *type;
> >> >> >> > +   uint8_t num_components;
> >> >> >> > +   uint8_t bit_size;
> >> >> >> >  } nir_parameter;
> >> >> >>
> >> >> >> (...)
> >> >> >>
> >> >> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
> >> >> >> > validate_state *state)
> >> >> >> >  static void
> >> >> >> >  validate_call_instr(nir_call_instr *instr, validate_state
> *state)
> >> >> >> >  {
> >> >> >> > -   if (instr->return_deref == NULL) {
> >> >> >> > -  validate_assert(state,
> >> >> >> > glsl_type_is_void(instr->callee->return_type));
> >> >> >> > -   } else {
> >> >> >> > -  validate_assert(state, instr->return_deref->deref.type
> ==
> >> >> >> > instr->callee->return_type);
> >> >> >> > -  validate_deref_var(instr, instr->return_deref, state);
> >> >> >> > -   }
> >> >> >> > -
> >> >> >> > validate_assert(state, instr->num_params ==
> >> >> >> > instr->callee->num_params);
> >> >> >> >
> >> >> >> > for (unsigned i = 0; i < instr->num_params; i++) {
> >> >> >> > -  validate_assert(state, instr->callee->params[i].type ==
> >> >> >> > instr->params[i]->deref.type);
> >> >> >> > -  validate_deref_var(instr, instr->params[i], state);
> >> >> >> > +  validate_src(&instr->params[i], state,
> >> >> >> > +   instr->callee->params[i].bit_size,
> >> >> >> > +   instr->callee->params[i].num_components);
> >> >> >> > }
> >> >> >> >  }
> >> >> >>
> >> >> >> Question: I might be misreading, but it seems like we are losing
> the
> >> >> >> type information for functions. Isn't that something worth
> keeping,
> >> >> >> maybe in some other way, e.g. load_param specifying the expected
> >> >> >> type?
> >> >> >
> >> >> >
> >> >> > That's a very good question!  To be honest, I'm not sure what the
> >> >> > answer
> >> >> > is.
> >> >> > At the moment, the type information is fairly useless for most of
> >> >> > what
> >> >> > we
> >> >> > use functions for.  Really, all we need is something that NIR can
> >> >> > inline.
> >> >> > As it is, we're not really preserving the types from SPIR-V because
> >> >> > of
> >> >> > the
> >> >> > gymnastics we're doing to handle pointers.
> >> >> >
> >> >> > If we did want to preserve types, we'd need to have more detailed
> >> >> > type
> >> >> > information.  In particular, we'd need to be able to provide
> pointer
> >> >> > types
> >> >> > and maybe combined image-sampler types.  And along with those
> pointer
> >> >> > types,
> >> >> > we'd need to somehow express those pointer's storage requirements.
> >> >> >
> >> >> > The philosophy behind this commit is that, if we don't have a good
> >> >> > match
> >> >> > to
> >> >> > SPIR-V anyway, we might as well just chuck that information and do
> >> >> > whatever
> >> >> > makes our lives the easiest.  My philosophy here may be flawed and
> >> >> > I'm
> >> >> > happy
> >> >> > to hear arguments in favor of keeping the information.  The best
> >> >> > argument I
> >> >> > can come up with for keeping the information is if we find
> ourselves
> >> >> > wanting
> >> >> > to do some sort of linking in the future where we have to match
> >> >> > functions by
> >> >> > both name and type.  If we want to do that, however, we'll need all
> >> >> > the
> >> >> > SPIR-V type information.
> >> >> >
> >> >>
> >> >> We do end up wanting the type information for cl kernels.  This is
> >> >> maybe a slightly different case from calls within shader code (ie.
> >> >> when both caller and callee are in shader).
> >> >
> >> >
> >> > Yes, I think it is.  Question: Is there a distinction in CL between
> >> > functions which are entrypoints callable from the API and functions
> >> > which
> >> > are helpers?  i.e. Can you call an entrypoint as a helper?
> >> >
> >>
> >> There is the __kernel annotation.  And you know the entry point name
> >> when compiling.  However I'm not sure anything prevents one entry
> >> point from calling another.
> >
> >
> > That would be worth investigating.
> >
>
> fwiw, at least the cl to spv compiler seems to allow it.  (Although in
> my simple examples it also inlines the called function before things
> end up in spv.)
>

Interesting.


> >>
> >> I'm not sure we want the calling convention to be the same internally
> >> as for kernel entry points so in th

Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Karol Herbst
On Tue, Apr 10, 2018 at 6:01 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst  wrote:
>>
>> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand 
>> wrote:
>> > I still don't see anything to make nir_validate not fail out on you if
>> > it
>> > sees a read or a write to/from an IMAGE or SAMPLER.
>> >
>>
>> what kind of glsl code are you talking about here? I wrote some tests
>> and things just seem to work out. I wasn't able to hit any other
>> issues.
>
>
> Were they tests where GLSL was able to copy propagate such that NIR never
> saw a write to the image/sampler variable?
>

Well the trivial one is where you directly consume the uniform.

>>
>> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst 
>> > wrote:
>> >>
>> >> v2: fix assertion for bindless to non bindless assignments
>> >>
>> >> Signed-off-by: Karol Herbst 
>> >> ---
>> >>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
>> >>  1 file changed, 7 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/src/compiler/nir/nir_split_var_copies.c
>> >> b/src/compiler/nir/nir_split_var_copies.c
>> >> index bc3ceedbdb8..e592754d770 100644
>> >> --- a/src/compiler/nir/nir_split_var_copies.c
>> >> +++ b/src/compiler/nir/nir_split_var_copies.c
>> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>nir_deref_var *src_head = intrinsic->variables[1];
>> >>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
>> >>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
>> >> +  enum glsl_base_type base_type =
>> >> glsl_get_base_type(src_tail->type);
>> >>
>> >> -  switch (glsl_get_base_type(src_tail->type)) {
>> >> +  switch (base_type) {
>> >>case GLSL_TYPE_ARRAY:
>> >>case GLSL_TYPE_STRUCT:
>> >>   split_var_copy_instr(intrinsic, dest_head, src_head,
>> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
>> >> split_var_copies_state *state)
>> >>  ralloc_steal(state->dead_ctx, instr);
>> >>   }
>> >>   break;
>> >> +  /* for bindless those are uint64 */
>> >> +  case GLSL_TYPE_IMAGE:
>> >> +  case GLSL_TYPE_SAMPLER:
>> >> + assert(src_head->var->data.bindless ||
>> >> +glsl_get_base_type(src_head->var->type) == base_type);
>> >>case GLSL_TYPE_INT:
>> >>case GLSL_TYPE_UINT:
>> >>case GLSL_TYPE_INT16:
>> >> --
>> >> 2.14.3
>> >>
>> >
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> >
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/3] nir/split_var_copies: handle IMAGE and SAMPLER for bindless vars

2018-04-10 Thread Jason Ekstrand
On Tue, Apr 10, 2018 at 10:20 AM, Karol Herbst  wrote:

> On Tue, Apr 10, 2018 at 6:01 PM, Jason Ekstrand 
> wrote:
> > On Tue, Apr 10, 2018 at 8:35 AM, Karol Herbst 
> wrote:
> >>
> >> On Tue, Apr 10, 2018 at 5:12 PM, Jason Ekstrand 
> >> wrote:
> >> > I still don't see anything to make nir_validate not fail out on you if
> >> > it
> >> > sees a read or a write to/from an IMAGE or SAMPLER.
> >> >
> >>
> >> what kind of glsl code are you talking about here? I wrote some tests
> >> and things just seem to work out. I wasn't able to hit any other
> >> issues.
> >
> >
> > Were they tests where GLSL was able to copy propagate such that NIR never
> > saw a write to the image/sampler variable?
> >
>
> Well the trivial one is where you directly consume the uniform.
>

You could probably do something like this

sampler2D s;
for (int i = 0; i < uniform_one; i++)
   s = some_sampler;

texture(s, coords);



> >>
> >> > On Tue, Apr 10, 2018 at 8:05 AM, Karol Herbst 
> >> > wrote:
> >> >>
> >> >> v2: fix assertion for bindless to non bindless assignments
> >> >>
> >> >> Signed-off-by: Karol Herbst 
> >> >> ---
> >> >>  src/compiler/nir/nir_split_var_copies.c | 8 +++-
> >> >>  1 file changed, 7 insertions(+), 1 deletion(-)
> >> >>
> >> >> diff --git a/src/compiler/nir/nir_split_var_copies.c
> >> >> b/src/compiler/nir/nir_split_var_copies.c
> >> >> index bc3ceedbdb8..e592754d770 100644
> >> >> --- a/src/compiler/nir/nir_split_var_copies.c
> >> >> +++ b/src/compiler/nir/nir_split_var_copies.c
> >> >> @@ -222,8 +222,9 @@ split_var_copies_block(nir_block *block, struct
> >> >> split_var_copies_state *state)
> >> >>nir_deref_var *src_head = intrinsic->variables[1];
> >> >>nir_deref *dest_tail = nir_deref_tail(&dest_head->deref);
> >> >>nir_deref *src_tail = nir_deref_tail(&src_head->deref);
> >> >> +  enum glsl_base_type base_type =
> >> >> glsl_get_base_type(src_tail->type);
> >> >>
> >> >> -  switch (glsl_get_base_type(src_tail->type)) {
> >> >> +  switch (base_type) {
> >> >>case GLSL_TYPE_ARRAY:
> >> >>case GLSL_TYPE_STRUCT:
> >> >>   split_var_copy_instr(intrinsic, dest_head, src_head,
> >> >> @@ -241,6 +242,11 @@ split_var_copies_block(nir_block *block, struct
> >> >> split_var_copies_state *state)
> >> >>  ralloc_steal(state->dead_ctx, instr);
> >> >>   }
> >> >>   break;
> >> >> +  /* for bindless those are uint64 */
> >> >> +  case GLSL_TYPE_IMAGE:
> >> >> +  case GLSL_TYPE_SAMPLER:
> >> >> + assert(src_head->var->data.bindless ||
> >> >> +glsl_get_base_type(src_head->var->type) ==
> base_type);
> >> >>case GLSL_TYPE_INT:
> >> >>case GLSL_TYPE_UINT:
> >> >>case GLSL_TYPE_INT16:
> >> >> --
> >> >> 2.14.3
> >> >>
> >> >
> >> >
> >> > ___
> >> > mesa-dev mailing list
> >> > mesa-dev@lists.freedesktop.org
> >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >> >
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Rob Clark
On Tue, Apr 10, 2018 at 1:17 PM, Jason Ekstrand  wrote:
> On Tue, Apr 10, 2018 at 9:59 AM, Rob Clark  wrote:
>>
>> On Tue, Apr 10, 2018 at 11:55 AM, Jason Ekstrand 
>> wrote:
>> > On Tue, Apr 10, 2018 at 8:17 AM, Rob Clark  wrote:
>> >>
>> >> On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand 
>> >> wrote:
>> >> > On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark 
>> >> > wrote:
>> >> >>
>> >> >> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand
>> >> >> 
>> >> >> wrote:
>> >> >> > + A bunch of potentially interested parties.
>> >> >> >
>> >> >> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
>> >> >> >  wrote:
>> >> >> >>
>> >> >> >> Hi,
>> >> >> >>
>> >> >> >> >  typedef struct {
>> >> >> >> > -   nir_parameter_type param_type;
>> >> >> >> > -   const struct glsl_type *type;
>> >> >> >> > +   uint8_t num_components;
>> >> >> >> > +   uint8_t bit_size;
>> >> >> >> >  } nir_parameter;
>> >> >> >>
>> >> >> >> (...)
>> >> >> >>
>> >> >> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
>> >> >> >> > validate_state *state)
>> >> >> >> >  static void
>> >> >> >> >  validate_call_instr(nir_call_instr *instr, validate_state
>> >> >> >> > *state)
>> >> >> >> >  {
>> >> >> >> > -   if (instr->return_deref == NULL) {
>> >> >> >> > -  validate_assert(state,
>> >> >> >> > glsl_type_is_void(instr->callee->return_type));
>> >> >> >> > -   } else {
>> >> >> >> > -  validate_assert(state, instr->return_deref->deref.type
>> >> >> >> > ==
>> >> >> >> > instr->callee->return_type);
>> >> >> >> > -  validate_deref_var(instr, instr->return_deref, state);
>> >> >> >> > -   }
>> >> >> >> > -
>> >> >> >> > validate_assert(state, instr->num_params ==
>> >> >> >> > instr->callee->num_params);
>> >> >> >> >
>> >> >> >> > for (unsigned i = 0; i < instr->num_params; i++) {
>> >> >> >> > -  validate_assert(state, instr->callee->params[i].type ==
>> >> >> >> > instr->params[i]->deref.type);
>> >> >> >> > -  validate_deref_var(instr, instr->params[i], state);
>> >> >> >> > +  validate_src(&instr->params[i], state,
>> >> >> >> > +   instr->callee->params[i].bit_size,
>> >> >> >> > +   instr->callee->params[i].num_components);
>> >> >> >> > }
>> >> >> >> >  }
>> >> >> >>
>> >> >> >> Question: I might be misreading, but it seems like we are losing
>> >> >> >> the
>> >> >> >> type information for functions. Isn't that something worth
>> >> >> >> keeping,
>> >> >> >> maybe in some other way, e.g. load_param specifying the expected
>> >> >> >> type?
>> >> >> >
>> >> >> >
>> >> >> > That's a very good question!  To be honest, I'm not sure what the
>> >> >> > answer
>> >> >> > is.
>> >> >> > At the moment, the type information is fairly useless for most of
>> >> >> > what
>> >> >> > we
>> >> >> > use functions for.  Really, all we need is something that NIR can
>> >> >> > inline.
>> >> >> > As it is, we're not really preserving the types from SPIR-V
>> >> >> > because
>> >> >> > of
>> >> >> > the
>> >> >> > gymnastics we're doing to handle pointers.
>> >> >> >
>> >> >> > If we did want to preserve types, we'd need to have more detailed
>> >> >> > type
>> >> >> > information.  In particular, we'd need to be able to provide
>> >> >> > pointer
>> >> >> > types
>> >> >> > and maybe combined image-sampler types.  And along with those
>> >> >> > pointer
>> >> >> > types,
>> >> >> > we'd need to somehow express those pointer's storage requirements.
>> >> >> >
>> >> >> > The philosophy behind this commit is that, if we don't have a good
>> >> >> > match
>> >> >> > to
>> >> >> > SPIR-V anyway, we might as well just chuck that information and do
>> >> >> > whatever
>> >> >> > makes our lives the easiest.  My philosophy here may be flawed and
>> >> >> > I'm
>> >> >> > happy
>> >> >> > to hear arguments in favor of keeping the information.  The best
>> >> >> > argument I
>> >> >> > can come up with for keeping the information is if we find
>> >> >> > ourselves
>> >> >> > wanting
>> >> >> > to do some sort of linking in the future where we have to match
>> >> >> > functions by
>> >> >> > both name and type.  If we want to do that, however, we'll need
>> >> >> > all
>> >> >> > the
>> >> >> > SPIR-V type information.
>> >> >> >
>> >> >>
>> >> >> We do end up wanting the type information for cl kernels.  This is
>> >> >> maybe a slightly different case from calls within shader code (ie.
>> >> >> when both caller and callee are in shader).
>> >> >
>> >> >
>> >> > Yes, I think it is.  Question: Is there a distinction in CL between
>> >> > functions which are entrypoints callable from the API and functions
>> >> > which
>> >> > are helpers?  i.e. Can you call an entrypoint as a helper?
>> >> >
>> >>
>> >> There is the __kernel annotation.  And you know the entry point name
>> >> when compiling.  However I'm not sure anything prevents one entry
>> >> point from calling another.
>> >
>> >
>> > That would be worth investigating.
>> >
>>
>> fwiw, at least the cl to spv compile

Re: [Mesa-dev] [PATCH v2 01/12] glsl/tests: reimplement warnings-test in python

2018-04-10 Thread Eric Anholt
Dylan Baker  writes:

> This reimplements the test in python with a shell script wrapper that
> allows autotools to continue to run the test without realizing that
> anything has changed.
>
> Using python has two advantages, first it's portable so this test can be
> run on windows as well as Linux since it just requires python, no more
> diff, pwd or sh. It's also no longer tied to autotools implementation
> details, like the environment variables $srcdir and $abs_builddir,
> though the autotools shell wrapper still uses those, which makes it
> possible to run the test in meson.
>
> Signed-off-by: Dylan Baker 
> ---
>  src/compiler/glsl/tests/warnings-test.sh | 57 +---
>  src/compiler/glsl/tests/warnings_test.py | 74 +-
>  2 files changed, 75 insertions(+), 56 deletions(-)
>  create mode 100755 src/compiler/glsl/tests/warnings_test.py
>
> diff --git a/src/compiler/glsl/tests/warnings-test.sh 
> b/src/compiler/glsl/tests/warnings-test.sh
> index d5dc3b5..debd6fd 100755
> --- a/src/compiler/glsl/tests/warnings-test.sh
> +++ b/src/compiler/glsl/tests/warnings-test.sh
> @@ -1,58 +1,3 @@
>  #!/bin/sh
>  
> -if [ -z "$srcdir" -o -z "$abs_builddir" ]; then
> -echo ""
> -echo "Warning: you're invoking the script manually and things may fail."
> -echo "Attempting to determine/set srcdir and abs_builddir variables."
> -echo ""
> -
> -# Variable should point to the Makefile.glsl.am
> -srcdir=./../../
> -cd `dirname "$0"`
> -# Variable should point to glsl_compiler
> -abs_builddir=`pwd`/../../
> -fi
> -
> -# Execute several shaders, and check that the InfoLog outcome is the 
> expected.
> -
> -compiler=$abs_builddir/glsl_compiler
> -total=0
> -pass=0
> -
> -if [ ! -x "$compiler" ]; then
> -echo "Could not find glsl_compiler. Ensure that it is build via make 
> check"
> -exit 1
> -fi
> -
> -tests_relative_dir="glsl/tests/warnings"
> -
> -echo "== Testing compilation output =="
> -for test in $srcdir/$tests_relative_dir/*.vert; do
> -test_output="$abs_builddir/$tests_relative_dir/`basename $test`"
> -mkdir -p $abs_builddir/$tests_relative_dir/
> -echo -n "Testing `basename $test`..."
> -$compiler --just-log --version 150 "$test" > "$test_output.out" 2>&1
> -total=$((total+1))
> -if diff "$test.expected" "$test_output.out" >/dev/null 2>&1; then
> -echo "PASS"
> -pass=$((pass+1))
> -else
> -echo "FAIL"
> -diff "$test.expected" "$test_output.out"
> -fi
> -done
> -
> -if [ $total -eq 0 ]; then
> -echo "Could not find any tests."
> -exit 1
> -fi
> -
> -echo ""
> -echo "$pass/$total tests returned correct results"
> -echo ""
> -
> -if [ $pass = $total ]; then
> -exit 0
> -else
> -exit 1
> -fi
> +$srcdir/glsl/tests/warnings_test.py --glsl-compiler 
> $abs_builddir/glsl_compiler --test-directory $srcdir/glsl/tests/warnings/

The other instance we have of calling python from a .sh
(optimization-test.sh) uses $PYTHON2 for the invocation.  Should we do
that here, too?

Other than that, r-b.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Plumb invariant output attrib thru TGSI

2018-04-10 Thread Marek Olšák
This doesn't change TGSI. It only changes utilities around it.

Marek

On Mon, Apr 9, 2018 at 6:02 PM, Joe M. Kniss  wrote:

> Add support for glsl 'invariant' modifier for output data declarations.
> Gallium drivers that use TGSI serialization currently loose invariant
> modifiers in glsl shaders.
>
> Tested: chromiumos on qemu with virglrenderer.
> Signed-off-by: Joe M. Kniss 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |  2 ++
>  src/gallium/auxiliary/tgsi/tgsi_strings.h  |  2 ++
>  src/gallium/auxiliary/tgsi/tgsi_text.c | 18 +++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 27 ++
>  src/gallium/auxiliary/tgsi/tgsi_ureg.h |  4 +++-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  8 +--
>  6 files changed, 45 insertions(+), 16 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c
> b/src/gallium/auxiliary/tgsi/tgsi_strings.c
> index 4f28b49ce8..434871273f 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
> @@ -185,6 +185,8 @@ const char 
> *tgsi_interpolate_locations[TGSI_INTERPOLATE_LOC_COUNT]
> =
> "SAMPLE",
>  };
>
> +const char *tgsi_invariant_name = "INVARIANT";
> +
>  const char *tgsi_primitive_names[PIPE_PRIM_MAX] =
>  {
> "POINTS",
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h
> b/src/gallium/auxiliary/tgsi/tgsi_strings.h
> index bb2d3458dd..20e3f7127f 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h
> @@ -52,6 +52,8 @@ extern const char *tgsi_interpolate_names[TGSI_
> INTERPOLATE_COUNT];
>
>  extern const char *tgsi_interpolate_locations[
> TGSI_INTERPOLATE_LOC_COUNT];
>
> +extern const char *tgsi_invariant_name;
> +
>  extern const char *tgsi_primitive_names[PIPE_PRIM_MAX];
>
>  extern const char *tgsi_fs_coord_origin_names[2];
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c
> b/src/gallium/auxiliary/tgsi/tgsi_text.c
> index 02241a66bf..815b1ee65d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_text.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
> @@ -1586,10 +1586,6 @@ static boolean parse_declaration( struct
> translate_ctx *ctx )
>  break;
>   }
>}
> -  if (i == TGSI_INTERPOLATE_COUNT) {
> - report_error( ctx, "Expected semantic or interpolate attribute"
> );
> - return FALSE;
> -  }
> }
>
> cur = ctx->cur;
> @@ -1609,6 +1605,20 @@ static boolean parse_declaration( struct
> translate_ctx *ctx )
>}
> }
>
> +   cur = ctx->cur;
> +   eat_opt_white( &cur );
> +   if (*cur == ',' && !is_vs_input) {
> +  cur++;
> +  eat_opt_white( &cur );
> +  if (str_match_nocase_whole( &cur, tgsi_invariant_name )) {
> + decl.Declaration.Invariant = 1;
> + ctx->cur = cur;
> +  } else {
> + report_error( ctx, "Expected semantic, interpolate attribute, or
> invariant ");
> + return FALSE;
> +  }
> +   }
> +
> advance = tgsi_build_full_declaration(
>&decl,
>ctx->tokens_cur,
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> index 393e015001..f54e2229a7 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
> @@ -140,6 +140,7 @@ struct ureg_program
>unsigned first;
>unsigned last;
>unsigned array_id;
> +  unsigned invariant;
> } output[UREG_MAX_OUTPUT];
> unsigned nr_outputs, nr_output_regs;
>
> @@ -427,7 +428,8 @@ ureg_DECL_output_layout(struct ureg_program *ureg,
>  unsigned index,
>  unsigned usage_mask,
>  unsigned array_id,
> -unsigned array_size)
> +unsigned array_size,
> +unsigned invariant)
>  {
> unsigned i;
>
> @@ -455,6 +457,7 @@ ureg_DECL_output_layout(struct ureg_program *ureg,
>ureg->output[i].first = index;
>ureg->output[i].last = index + array_size - 1;
>ureg->output[i].array_id = array_id;
> +  ureg->output[i].invariant = invariant;
>ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index +
> array_size);
>ureg->nr_outputs++;
> }
> @@ -480,7 +483,7 @@ ureg_DECL_output_masked(struct ureg_program *ureg,
>  unsigned array_size)
>  {
> return ureg_DECL_output_layout(ureg, name, index, 0,
> -  ureg->nr_output_regs, usage_mask,
> array_id, array_size);
> +  ureg->nr_output_regs, usage_mask,
> array_id, array_size, 0);
>  }
>
>
> @@ -1512,7 +1515,8 @@ emit_decl_semantic(struct ureg_program *ureg,
> unsigned semantic_index,
> unsigned streams,
> unsigned usage_mask,
> -   unsigned array_id)
> +   unsigned array_id,
> +   unsigned invariant)
>

Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-10 Thread Marek Olšák
On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:

> Are you building LLVM yourself, or is that a build that comes with your
> distro?
> Also, what is your distro?
>

Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default,
include is not in the include path by default, but lib is in the ld path.

I build LLVM with shared libs myself.

Marek


>
> Quoting Marek Olšák (2018-04-09 14:27:10)
> > See:
> > https://cgit.freedesktop.org/mesa/mesa/commit/?id=
> > f55d1f806e6b6c33af559de166d08ec8fa3ebe90
> >
> > Marek
> >
> > On Mon, Apr 9, 2018 at 5:08 PM, Dylan Baker  wrote:
> >
> > Quoting Marek Olšák (2018-04-09 13:44:27)
> > > meson fails to link LLVM on my setup, so I can't use it, therefore
> all my
> > meson
> > > changes are untested.
> > >
> > > Even if meson worked, I have to use make, because that's what
> users use.
> > >
> > > This change simplifies the meson build too.
> > >
> > > Marek
> > >
> >
> > What happens with LLVM on your system?
> >
> > Dylan
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 11/12] meson: build tests for gallium mesa state tracker

2018-04-10 Thread Eric Anholt
Dylan Baker  writes:

> Signed-off-by: Dylan Baker 
> ---
>  src/mesa/state_tracker/tests/meson.build | 40 +-
>  src/meson.build  |  3 ++-
>  2 files changed, 43 insertions(+)
>  create mode 100644 src/mesa/state_tracker/tests/meson.build
>
> diff --git a/src/mesa/state_tracker/tests/meson.build 
> b/src/mesa/state_tracker/tests/meson.build
> new file mode 100644
> index 000..f62039e
> --- /dev/null
> +++ b/src/mesa/state_tracker/tests/meson.build
> @@ -0,0 +1,40 @@
> +# Copyright © 2018 Intel Corporation
> +
> +# Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> +# of this software and associated documentation files (the "Software"), to 
> deal
> +# in the Software without restriction, including without limitation the 
> rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following conditions:
> +
> +# The above copyright notice and this permission notice shall be included in
> +# all copies or substantial portions of the Software.
> +
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> +# SOFTWARE.
> +
> +libmesa_st_test_common = static_library(
> +  'mesa_st_test_common',
> +  ['st_tests_common.cpp', ir_expression_operation_h],
> +  include_directories : inc_common,
> +  dependencies : dep_thread,
> +)
> +
> +test(
> +  'st_renumerate_test',
> +  executable(
> +'st_renumerate_test',
> +['test_glsl_to_tgsi_lifetime.cpp', ir_expression_operation_h],
> +include_directories : inc_common,
> +link_with : [
> +  libmesa_st_test_common, libmesa_gallium, libglapi, libgallium,
> +  libmesa_util,
> +],
> +dependencies : [idep_gtest, dep_thread, dep_llvm]
> +  )
> +)
> diff --git a/src/meson.build b/src/meson.build
> index 1900527..03b0656 100644
> --- a/src/meson.build
> +++ b/src/meson.build
> @@ -80,6 +80,9 @@ if with_egl
>  endif
>  if with_gallium
>subdir('gallium')
> +  # This has to be here since it requires libgallium, and subdir cannot
> +  # contains ..

"contain"

other than that, patches 2, 6-8, 11-12 get my r-b.

I don't think I'm going to find time to read all the python in patch 3.
If nobody else does in the next week or so, I think we should just put
my a-b on it and r-b on 4-5, and call it good.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-10 Thread Marek Olšák
If I understand correctly, you don't disagree with the patch.

Marek

On Tue, Apr 10, 2018 at 2:03 PM, Marek Olšák  wrote:

> On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:
>
>> Are you building LLVM yourself, or is that a build that comes with your
>> distro?
>> Also, what is your distro?
>>
>
> Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default,
> include is not in the include path by default, but lib is in the ld path.
>
> I build LLVM with shared libs myself.
>
> Marek
>
>
>>
>> Quoting Marek Olšák (2018-04-09 14:27:10)
>> > See:
>> > https://cgit.freedesktop.org/mesa/mesa/commit/?id=
>> > f55d1f806e6b6c33af559de166d08ec8fa3ebe90
>> >
>> > Marek
>> >
>> > On Mon, Apr 9, 2018 at 5:08 PM, Dylan Baker 
>> wrote:
>> >
>> > Quoting Marek Olšák (2018-04-09 13:44:27)
>> > > meson fails to link LLVM on my setup, so I can't use it,
>> therefore all my
>> > meson
>> > > changes are untested.
>> > >
>> > > Even if meson worked, I have to use make, because that's what
>> users use.
>> > >
>> > > This change simplifies the meson build too.
>> > >
>> > > Marek
>> > >
>> >
>> > What happens with LLVM on your system?
>> >
>> > Dylan
>> >
>> >
>>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/11] gallium: Use Array._DrawVAO in st_atom_array.c.

2018-04-10 Thread Marek Olšák
Generally, if you have to loop over all arrays to find common vertex
buffers, it's better not to do it. The default separate path is going to
perform best, because it's straightforward and interleaved arrays are super
rare.

Marek

On Mon, Apr 9, 2018 at 7:15 PM, Mathias Fröhlich 
wrote:

> Hi Marek,
>
> On Saturday, 7 April 2018 01:53:58 CEST Marek Olšák wrote:
> > So interleaved attribs are unsupported, right?
> >
> > is_interleaved_arrays was probably slowing things down, so I'm OK with
> that.
>
> I am currently away from all the source code and be back at about the 22.4.
>
> But out of my head: The main purpose of the is_interleaved_arrays that I
> could
> spot is to minimize the vbo's that are send down the pipeline. In the non
> vbo
> case the is_interleaved_arrays check did nothing I could finally spot?
> The buffer itself is marked as user buffer and we need a new vbuffer
> because
> of the pointer value anyway? Correct?
>
> So, the VAO now contains all the redundancy information. And thanks to this
> bitmask sieves we can easily collect the arrays belonging to a specific
> precollapsed binding point.
> So, the is_interleaved is fully there in the vbo case. Even better as
> before.
> It sees even 4 attributes distributed across two pairwise interleaved vbo
> arrays.
>
> So even if you are fine, if you tell me that the user buffer code can make
> use
> of the same sharing finally, I can take a look at that and establish the
> same
> sort of sharing here.
>
> best
>
> Mathias
>
>
> >
> > Marek
> >
> > On Sun, Apr 1, 2018 at 2:13 PM,  wrote:
> > > From: Mathias Fröhlich 
> > >
> > > Finally make use of the binding information in the VAO when
> > > setting up arrays for draw.
> > >
> > > Signed-off-by: Mathias Fröhlich 
> > > ---
> > >
> > >  src/mesa/state_tracker/st_atom_array.c | 448
> > >
> > > +
> > >
> > >  1 file changed, 124 insertions(+), 324 deletions(-)
> > >
> > > diff --git a/src/mesa/state_tracker/st_atom_array.c
> > > b/src/mesa/state_tracker/st_atom_array.c
> > > index 2fd67e8d84..46934a718a 100644
> > > --- a/src/mesa/state_tracker/st_atom_array.c
> > > +++ b/src/mesa/state_tracker/st_atom_array.c
> > > @@ -48,6 +48,7 @@
> > >
> > >  #include "main/bufferobj.h"
> > >  #include "main/glformats.h"
> > >  #include "main/varray.h"
> > >
> > > +#include "main/arrayobj.h"
> > >
> > >  /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1]
> */
> > >  static const uint16_t vertex_formats[][4][4] = {
> > >
> > > @@ -306,79 +307,6 @@ st_pipe_vertex_format(const struct
> > > gl_array_attributes *attrib)
> > >
> > > return vertex_formats[type - GL_BYTE][index][size-1];
> > >
> > >  }
> > >
> > > -static const struct gl_vertex_array *
> > > -get_client_array(const struct gl_vertex_array *arrays,
> > > - unsigned mesaAttr)
> > > -{
> > > -   /* st_program uses 0x to denote a double placeholder
> attribute
> > > */
> > > -   if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
> > > -  return NULL;
> > > -   return &arrays[mesaAttr];
> > > -}
> > > -
> > > -/**
> > > - * Examine the active arrays to determine if we have interleaved
> > > - * vertex arrays all living in one VBO, or all living in user space.
> > > - */
> > > -static GLboolean
> > > -is_interleaved_arrays(const struct st_vertex_program *vp,
> > > -  const struct gl_vertex_array *arrays,
> > > -  unsigned num_inputs)
> > > -{
> > > -   GLuint attr;
> > > -   const struct gl_buffer_object *firstBufObj = NULL;
> > > -   GLint firstStride = -1;
> > > -   const GLubyte *firstPtr = NULL;
> > > -   GLboolean userSpaceBuffer = GL_FALSE;
> > > -
> > > -   for (attr = 0; attr < num_inputs; attr++) {
> > > -  const struct gl_vertex_array *array;
> > > -  const struct gl_vertex_buffer_binding *binding;
> > > -  const struct gl_array_attributes *attrib;
> > > -  const GLubyte *ptr;
> > > -  const struct gl_buffer_object *bufObj;
> > > -  GLsizei stride;
> > > -
> > > -  array = get_client_array(arrays, vp->index_to_input[attr]);
> > > -  if (!array)
> > > -continue;
> > > -
> > > -  binding = array->BufferBinding;
> > > -  attrib = array->VertexAttrib;
> > > -  stride = binding->Stride; /* in bytes */
> > > -  ptr = _mesa_vertex_attrib_address(attrib, binding);
> > > -
> > > -  /* To keep things simple, don't allow interleaved zero-stride
> > > attribs. */
> > > -  if (stride == 0)
> > > - return false;
> > > -
> > > -  bufObj = binding->BufferObj;
> > > -  if (attr == 0) {
> > > - /* save info about the first array */
> > > - firstStride = stride;
> > > - firstPtr = ptr;
> > > - firstBufObj = bufObj;
> > > - userSpaceBuffer = !_mesa_is_bufferobj(bufObj);
> > > -  }
> > > -  else {
> > > - /* check if other arrays interleave with the first, in same
> > > buffer */
> > > - if (stride != firstStrid

Re: [Mesa-dev] [PATCH] RFC gallium: add 64 bit integer formats

2018-04-10 Thread Marek Olšák
On Mon, Apr 9, 2018 at 9:14 PM, Karol Herbst  wrote:

> On Tue, Apr 10, 2018 at 2:43 AM, Ilia Mirkin  wrote:
> > On Mon, Apr 9, 2018 at 8:39 PM, Karol Herbst  wrote:
> >> unsigneds are needed by ARB_bindless_texture 64 bit vertex attribs,
> both for
> >> NV_vertex_attrib_integer64.
> >>
> >> Fixes the new piglit sampler-vertex-attrib-input-output test I sent
> some days
> >> ago for bindless_texture.
> >>
> >> The change inside vbo_attrtype_to_double_flag is what I am most
> concerned
> >> about. Maybe I should add another flag for 64 bit ints. Or rework what
> Doubles
> >> mean in gl_array_attributes. Or Rename that to is64Bit and rework all
> users of
> >> Doubles.
> >>
> >> Any suggestions?
> >>
> >> Signed-off-by: Karol Herbst 
> >> ---
> >>  src/gallium/drivers/svga/svga_format.c |  8 
> >>  src/gallium/include/pipe/p_format.h|  9 +
> >>  src/mesa/main/glformats.c  |  3 +++
> >>  src/mesa/state_tracker/st_atom_array.c | 30
> +++---
> >>  src/mesa/vbo/vbo_private.h |  2 +-
> >>  5 files changed, 48 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/svga/svga_format.c
> b/src/gallium/drivers/svga/svga_format.c
> >> index 20a6e6b159f..f01a0e79c72 100644
> >> --- a/src/gallium/drivers/svga/svga_format.c
> >> +++ b/src/gallium/drivers/svga/svga_format.c
> >> @@ -369,6 +369,14 @@ static const struct vgpu10_format_entry
> format_conversion_table[] =
> >> { PIPE_FORMAT_A1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> { PIPE_FORMAT_X1B5G5R5_UNORM,SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> { PIPE_FORMAT_A4B4G4R4_UNORM,SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64_UINT,  SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64_UINT,   SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64B64_UINT,SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64B64A64_UINT, SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64_SINT,  SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64_SINT,   SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64B64_SINT,SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >> +   { PIPE_FORMAT_R64G64B64A64_SINT, SVGA3D_FORMAT_INVALID,
> SVGA3D_FORMAT_INVALID,   0 },
> >>  };
> >>
> >>
> >> diff --git a/src/gallium/include/pipe/p_format.h
> b/src/gallium/include/pipe/p_format.h
> >> index 57399800fa4..df698856b70 100644
> >> --- a/src/gallium/include/pipe/p_format.h
> >> +++ b/src/gallium/include/pipe/p_format.h
> >> @@ -396,6 +396,15 @@ enum pipe_format {
> >> PIPE_FORMAT_X1B5G5R5_UNORM  = 310,
> >> PIPE_FORMAT_A4B4G4R4_UNORM  = 311,
> >>
> >> +   PIPE_FORMAT_R64_UINT= 312,
> >> +   PIPE_FORMAT_R64G64_UINT = 313,
> >> +   PIPE_FORMAT_R64G64B64_UINT  = 314,
> >> +   PIPE_FORMAT_R64G64B64A64_UINT   = 315,
> >> +   PIPE_FORMAT_R64_SINT= 316,
> >> +   PIPE_FORMAT_R64G64_SINT = 317,
> >> +   PIPE_FORMAT_R64G64B64_SINT  = 318,
> >> +   PIPE_FORMAT_R64G64B64A64_SINT   = 319,
> >> +
> >> PIPE_FORMAT_COUNT
> >>  };
> >>
> >> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
> >> index 1e797c24c2a..feafd97f5ee 100644
> >> --- a/src/mesa/main/glformats.c
> >> +++ b/src/mesa/main/glformats.c
> >> @@ -543,6 +543,9 @@ _mesa_bytes_per_vertex_attrib(GLint comps, GLenum
> type)
> >> case GL_INT:
> >> case GL_UNSIGNED_INT:
> >>return comps * sizeof(GLint);
> >> +   /* ARB_bindless_texture */
> >> +   case GL_UNSIGNED_INT64_ARB:
> >> +  return comps * sizeof(GLuint64EXT);
> >> case GL_FLOAT:
> >>return comps * sizeof(GLfloat);
> >> case GL_HALF_FLOAT_ARB:
> >> diff --git a/src/mesa/state_tracker/st_atom_array.c
> b/src/mesa/state_tracker/st_atom_array.c
> >> index 2fd67e8d840..1c3f677d4bf 100644
> >> --- a/src/mesa/state_tracker/st_atom_array.c
> >> +++ b/src/mesa/state_tracker/st_atom_array.c
> >> @@ -230,6 +230,27 @@ static const uint16_t vertex_formats[][4][4] = {
> >>   PIPE_FORMAT_R32G32B32A32_FIXED
> >>},
> >> },
> >> +   {{0}}, /* gap */
> >> +   { /* GL_INT64_ARB */
> >> +  {0},
> >> +  {0},
> >> +  {
> >> + PIPE_FORMAT_R64_SINT,
> >> + PIPE_FORMAT_R64G64_SINT,
> >> + PIPE_FORMAT_R64G64B64_SINT,
> >> + PIPE_FORMAT_R64G64B64A64_SINT
> >> +  },
> >> +   },
> >> +   { /* GL_UNSIGNED_INT64_ARB */
> >> +  {0},
> >> +  {0},
> >> +  {
> >> + PIPE_FORMAT_R64_UINT,
> >> + PIPE_FORMAT_R64G64_UINT,
> >> + PIPE_FORMAT_R64G64B64_UINT,
> >> + PIPE_FORMAT_R64G64

Re: [Mesa-dev] [PATCH] i965/miptree: Initialize mcs buffer only until clear color

2018-04-10 Thread Nanley Chery
On Fri, Apr 06, 2018 at 07:04:01PM +0300, Pohjolainen, Topi wrote:
> On Fri, Apr 06, 2018 at 08:53:39AM -0700, Jason Ekstrand wrote:
> > On Fri, Apr 6, 2018 at 8:22 AM, Rafael Antognolli <
> > rafael.antogno...@intel.com> wrote:
> > 
> > > On Fri, Apr 06, 2018 at 06:07:52PM +0300, Topi Pohjolainen wrote:
> > > > Otherwise even the clear color gets initialised to 0xFF. This
> > > > allows enabling of color fast clears on ICL without regressing
> > > > multisampling tests.
> > > >
> > > > CC: Rafael Antognolli 
> > > > CC: Jason Ekstrand 
> > > > CC: Nanley Chery 
> > > > Signed-off-by: Topi Pohjolainen 
> > > > ---
> > > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++-
> > > >  1 file changed, 6 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > index 89074a6..25f901d 100644
> > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > @@ -1680,7 +1680,12 @@ intel_miptree_init_mcs(struct brw_context *brw,
> > > >return;
> > > > }
> > > > void *data = map;
> > > > -   memset(data, init_value, mt->mcs_buf->size);
> > > > +
> > > > +   /* Only initialize until clear color (if present). */
> > > > +   const unsigned aux_size = mt->mcs_buf->clear_color_offset ?
> > > > +mt->mcs_buf->clear_color_offset :
> > > > +mt->mcs_buf->size;
> > > > +   memset(data, init_value, aux_size);
> > >
> > 
> > Why not just use mt->mcs_buf->aux_surf.size?
> > 
> > Also, I think we probably want to memset the clear color to 0 in case we
> > get a recycled BO with unknown garbage in the clear value.
> 
> Good thinking, both points.
> 

I also agree with those points.

-Nanley

> > 
> > 
> > > Hmm... that's a good catch, and I think we definitely should not
> > > overwrite the clear color here.
> > >
> > > However, the initial value of the clear color shouldn't matter, right? I
> > > think there might still be a bug hidden somewhere...
> 
> I agree. I started to look into MCS in more detail - I don't think I fully
> understand how the clear color works there.
> 
> > >
> > > Regardless of that, this patch is
> > >
> > > Reviewed-by: Rafael Antognolli 
> > >
> > > > brw_bo_unmap(mt->mcs_buf->bo);
> > > >  }
> > > >
> > > > --
> > > > 2.7.4
> > > >
> > >
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/blorp/hiz: Emit CC viewport

2018-04-10 Thread Nanley Chery
On Tue, Apr 03, 2018 at 09:07:52PM +0300, Pohjolainen, Topi wrote:
> On Tue, Apr 03, 2018 at 09:04:48PM +0300, Pohjolainen, Topi wrote:
> > On Tue, Apr 03, 2018 at 07:40:34PM +0300, Topi Pohjolainen wrote:
> > > Otherwise simulator for ICL complains that:
> > > 
> > > B-spec CC_ViewPort Minimum Depth cannot be greater than Maximum Depth
> > 
> > And it looks that there is real reason why even HIZ ops should
> > specify it. In Bspec:
> > 
> > GT - 3D - vol2a.11 3D Pipeline - Windower - Depth Clear Value Format:
> > 
> > The clear value must be between the min and max depth values
> > (inclusive) defined in the CC_VIEWPORT.
> 
> And the same text is actually found in 3DSTATE_WM_HZ_OP_BODY -
> Depth Clear Value.
> 

Good find. Since this step isn't explicitly listed in the HZ OP
sequence, I think it would be helpful if we repeated what did for the
multisample packet and included a comment explaining why were updating
the viewport.

With or without the comment, this patch is
Reviewed-by: Nanley Chery 

> > 
> > > 
> > > CC: Jason Ekstrand 
> > > CC: Kenneth Graunke 
> > > Signed-off-by: Topi Pohjolainen 
> > > ---
> > >  src/intel/blorp/blorp_genX_exec.h | 1 +
> > >  1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/src/intel/blorp/blorp_genX_exec.h 
> > > b/src/intel/blorp/blorp_genX_exec.h
> > > index 992bc99..e16d10c 100644
> > > --- a/src/intel/blorp/blorp_genX_exec.h
> > > +++ b/src/intel/blorp/blorp_genX_exec.h
> > > @@ -1570,6 +1570,7 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
> > >  * emit 3DSTATE_MULTISAMPLE.
> > >  */
> > > blorp_emit_3dstate_multisample(batch, params);
> > > +   blorp_emit_cc_viewport(batch);
> > >  
> > > /* If we can't alter the depth stencil config and multiple layers are
> > >  * involved, the HiZ op will fail. This is because the op requires 
> > > that a
> > > -- 
> > > 2.7.4
> > > 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 057/104] nir,spirv: Rework function calls

2018-04-10 Thread Rob Clark
On Tue, Apr 10, 2018 at 1:50 PM, Rob Clark  wrote:
> On Tue, Apr 10, 2018 at 1:17 PM, Jason Ekstrand  wrote:
>> On Tue, Apr 10, 2018 at 9:59 AM, Rob Clark  wrote:
>>>
>>> On Tue, Apr 10, 2018 at 11:55 AM, Jason Ekstrand 
>>> wrote:
>>> > On Tue, Apr 10, 2018 at 8:17 AM, Rob Clark  wrote:
>>> >>
>>> >> On Tue, Apr 10, 2018 at 11:04 AM, Jason Ekstrand 
>>> >> wrote:
>>> >> > On Tue, Apr 10, 2018 at 6:20 AM, Rob Clark 
>>> >> > wrote:
>>> >> >>
>>> >> >> On Mon, Apr 9, 2018 at 10:52 PM, Jason Ekstrand
>>> >> >> 
>>> >> >> wrote:
>>> >> >> > + A bunch of potentially interested parties.
>>> >> >> >
>>> >> >> > On Mon, Apr 9, 2018 at 4:25 PM, Caio Marcelo de Oliveira Filho
>>> >> >> >  wrote:
>>> >> >> >>
>>> >> >> >> Hi,
>>> >> >> >>
>>> >> >> >> >  typedef struct {
>>> >> >> >> > -   nir_parameter_type param_type;
>>> >> >> >> > -   const struct glsl_type *type;
>>> >> >> >> > +   uint8_t num_components;
>>> >> >> >> > +   uint8_t bit_size;
>>> >> >> >> >  } nir_parameter;
>>> >> >> >>
>>> >> >> >> (...)
>>> >> >> >>
>>> >> >> >> > @@ -683,18 +692,12 @@ validate_tex_instr(nir_tex_instr *instr,
>>> >> >> >> > validate_state *state)
>>> >> >> >> >  static void
>>> >> >> >> >  validate_call_instr(nir_call_instr *instr, validate_state
>>> >> >> >> > *state)
>>> >> >> >> >  {
>>> >> >> >> > -   if (instr->return_deref == NULL) {
>>> >> >> >> > -  validate_assert(state,
>>> >> >> >> > glsl_type_is_void(instr->callee->return_type));
>>> >> >> >> > -   } else {
>>> >> >> >> > -  validate_assert(state, instr->return_deref->deref.type
>>> >> >> >> > ==
>>> >> >> >> > instr->callee->return_type);
>>> >> >> >> > -  validate_deref_var(instr, instr->return_deref, state);
>>> >> >> >> > -   }
>>> >> >> >> > -
>>> >> >> >> > validate_assert(state, instr->num_params ==
>>> >> >> >> > instr->callee->num_params);
>>> >> >> >> >
>>> >> >> >> > for (unsigned i = 0; i < instr->num_params; i++) {
>>> >> >> >> > -  validate_assert(state, instr->callee->params[i].type ==
>>> >> >> >> > instr->params[i]->deref.type);
>>> >> >> >> > -  validate_deref_var(instr, instr->params[i], state);
>>> >> >> >> > +  validate_src(&instr->params[i], state,
>>> >> >> >> > +   instr->callee->params[i].bit_size,
>>> >> >> >> > +   instr->callee->params[i].num_components);
>>> >> >> >> > }
>>> >> >> >> >  }
>>> >> >> >>
>>> >> >> >> Question: I might be misreading, but it seems like we are losing
>>> >> >> >> the
>>> >> >> >> type information for functions. Isn't that something worth
>>> >> >> >> keeping,
>>> >> >> >> maybe in some other way, e.g. load_param specifying the expected
>>> >> >> >> type?
>>> >> >> >
>>> >> >> >
>>> >> >> > That's a very good question!  To be honest, I'm not sure what the
>>> >> >> > answer
>>> >> >> > is.
>>> >> >> > At the moment, the type information is fairly useless for most of
>>> >> >> > what
>>> >> >> > we
>>> >> >> > use functions for.  Really, all we need is something that NIR can
>>> >> >> > inline.
>>> >> >> > As it is, we're not really preserving the types from SPIR-V
>>> >> >> > because
>>> >> >> > of
>>> >> >> > the
>>> >> >> > gymnastics we're doing to handle pointers.
>>> >> >> >
>>> >> >> > If we did want to preserve types, we'd need to have more detailed
>>> >> >> > type
>>> >> >> > information.  In particular, we'd need to be able to provide
>>> >> >> > pointer
>>> >> >> > types
>>> >> >> > and maybe combined image-sampler types.  And along with those
>>> >> >> > pointer
>>> >> >> > types,
>>> >> >> > we'd need to somehow express those pointer's storage requirements.
>>> >> >> >
>>> >> >> > The philosophy behind this commit is that, if we don't have a good
>>> >> >> > match
>>> >> >> > to
>>> >> >> > SPIR-V anyway, we might as well just chuck that information and do
>>> >> >> > whatever
>>> >> >> > makes our lives the easiest.  My philosophy here may be flawed and
>>> >> >> > I'm
>>> >> >> > happy
>>> >> >> > to hear arguments in favor of keeping the information.  The best
>>> >> >> > argument I
>>> >> >> > can come up with for keeping the information is if we find
>>> >> >> > ourselves
>>> >> >> > wanting
>>> >> >> > to do some sort of linking in the future where we have to match
>>> >> >> > functions by
>>> >> >> > both name and type.  If we want to do that, however, we'll need
>>> >> >> > all
>>> >> >> > the
>>> >> >> > SPIR-V type information.
>>> >> >> >
>>> >> >>
>>> >> >> We do end up wanting the type information for cl kernels.  This is
>>> >> >> maybe a slightly different case from calls within shader code (ie.
>>> >> >> when both caller and callee are in shader).
>>> >> >
>>> >> >
>>> >> > Yes, I think it is.  Question: Is there a distinction in CL between
>>> >> > functions which are entrypoints callable from the API and functions
>>> >> > which
>>> >> > are helpers?  i.e. Can you call an entrypoint as a helper?
>>> >> >
>>> >>
>>> >> There is the __kernel annotation.  And you know the entry point name
>>> >> when compiling.  Ho

Re: [Mesa-dev] [PATCH] intel/blorp/hiz: Emit CC viewport

2018-04-10 Thread Pohjolainen, Topi
On Tue, Apr 10, 2018 at 11:32:19AM -0700, Nanley Chery wrote:
> On Tue, Apr 03, 2018 at 09:07:52PM +0300, Pohjolainen, Topi wrote:
> > On Tue, Apr 03, 2018 at 09:04:48PM +0300, Pohjolainen, Topi wrote:
> > > On Tue, Apr 03, 2018 at 07:40:34PM +0300, Topi Pohjolainen wrote:
> > > > Otherwise simulator for ICL complains that:
> > > > 
> > > > B-spec CC_ViewPort Minimum Depth cannot be greater than Maximum Depth
> > > 
> > > And it looks that there is real reason why even HIZ ops should
> > > specify it. In Bspec:
> > > 
> > > GT - 3D - vol2a.11 3D Pipeline - Windower - Depth Clear Value Format:
> > > 
> > > The clear value must be between the min and max depth values
> > > (inclusive) defined in the CC_VIEWPORT.
> > 
> > And the same text is actually found in 3DSTATE_WM_HZ_OP_BODY -
> > Depth Clear Value.
> > 
> 
> Good find. Since this step isn't explicitly listed in the HZ OP
> sequence, I think it would be helpful if we repeated what did for the
> multisample packet and included a comment explaining why were updating
> the viewport.

Sure, I'm happy to add a comment and bspec reference.

> 
> With or without the comment, this patch is
> Reviewed-by: Nanley Chery 
> 
> > > 
> > > > 
> > > > CC: Jason Ekstrand 
> > > > CC: Kenneth Graunke 
> > > > Signed-off-by: Topi Pohjolainen 
> > > > ---
> > > >  src/intel/blorp/blorp_genX_exec.h | 1 +
> > > >  1 file changed, 1 insertion(+)
> > > > 
> > > > diff --git a/src/intel/blorp/blorp_genX_exec.h 
> > > > b/src/intel/blorp/blorp_genX_exec.h
> > > > index 992bc99..e16d10c 100644
> > > > --- a/src/intel/blorp/blorp_genX_exec.h
> > > > +++ b/src/intel/blorp/blorp_genX_exec.h
> > > > @@ -1570,6 +1570,7 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
> > > >  * emit 3DSTATE_MULTISAMPLE.
> > > >  */
> > > > blorp_emit_3dstate_multisample(batch, params);
> > > > +   blorp_emit_cc_viewport(batch);
> > > >  
> > > > /* If we can't alter the depth stencil config and multiple layers 
> > > > are
> > > >  * involved, the HiZ op will fail. This is because the op requires 
> > > > that a
> > > > -- 
> > > > 2.7.4
> > > > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-10 Thread Dylan Baker
Quoting Marek Olšák (2018-04-10 11:03:59)
> On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:
> 
> Are you building LLVM yourself, or is that a build that comes with your
> distro?
> Also, what is your distro?
> 
> 
> Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default, include
> is not in the include path by default, but lib is in the ld path.
> 
> I build LLVM with shared libs myself.

with -DBUILD_SHARED_LIBS=1?

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix glsl version mismatch in compat profile

2018-04-10 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Apr 10, 2018 at 7:40 AM, Timothy Arceri 
wrote:

> Drivers that only support compat 3.0 were reporting GLSL 1.40
> support. This fixes issues with the menu of Dawn of War II.
>
> Fixes: a0c8b49284ef "mesa: enable OpenGL 3.1 with ARB_compatibility"
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105807
> ---
>  src/mesa/main/version.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
> index 0a4e7630da6..84babd69e2f 100644
> --- a/src/mesa/main/version.c
> +++ b/src/mesa/main/version.c
> @@ -620,8 +620,11 @@ _mesa_compute_version(struct gl_context *ctx)
> /* Make sure that the GLSL version lines up with the GL version. In
> some
>  * cases it can be too high, e.g. if an extension is missing.
>  */
> -   if (_mesa_is_desktop_gl(ctx) && ctx->Version >= 31) {
> +   if (_mesa_is_desktop_gl(ctx)) {
>switch (ctx->Version) {
> +  case 30:
> + ctx->Const.GLSLVersion = 130;
> + break;
>case 31:
>   ctx->Const.GLSLVersion = 140;
>   break;
> @@ -629,7 +632,8 @@ _mesa_compute_version(struct gl_context *ctx)
>   ctx->Const.GLSLVersion = 150;
>   break;
>default:
> - ctx->Const.GLSLVersion = ctx->Version * 10;
> + if (ctx->Version >= 33)
> +ctx->Const.GLSLVersion = ctx->Version * 10;
>   break;
>}
> }
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-10 Thread Marek Olšák
cmake .. -G Ninja -DCMAKE_INSTALL_PREFIX=/usr/llvm/x86_64-linux-gnu
-DLLVM_TARGETS_TO_BUILD="X86;AMDGPU" -DLLVM_ENABLE_ASSERTIONS=ON \
  -DCMAKE_BUILD_TYPE=RelWithDebInfo
-DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON \
  -DLLVM_APPEND_VC_REV=OFF
-DCMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO="-fuse-ld=gold" \
  -DCMAKE_C_FLAGS_RELWITHDEBINFO="-O2 -g
-fno-omit-frame-pointer" \
  -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O2 -g
-fno-omit-frame-pointer"

Marek

On Tue, Apr 10, 2018 at 3:11 PM, Dylan Baker  wrote:

> Quoting Marek Olšák (2018-04-10 11:03:59)
> > On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:
> >
> > Are you building LLVM yourself, or is that a build that comes with
> your
> > distro?
> > Also, what is your distro?
> >
> >
> > Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default,
> include
> > is not in the include path by default, but lib is in the ld path.
> >
> > I build LLVM with shared libs myself.
>
> with -DBUILD_SHARED_LIBS=1?
>
> Dylan
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >