from:"Dave Airlie"

[Mesa-dev] [PATCH] tgsi: handle indirect sampler arrays. (v2)

2015-06-21 Thread Dave Airlie

This is required for ARB_gpu_shader5 support in softpipe.

v2: add support to txd/txf/txq paths.

Signed-off-by: Dave Airlie 
---

 src/gallium/auxiliary/tgsi/tgsi_exec.c | 42 ++
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index fde99b9..44000ff 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1988,6 +1988,35 @@ fetch_assign_deriv_channel(struct tgsi_exec_machine 
*mach,
derivs[1][3] = d.f[3];
 }
 
+static uint
+fetch_sampler_unit(struct tgsi_exec_machine *mach,
+   const struct tgsi_full_instruction *inst,
+   uint sampler)
+{
+   uint unit;
+
+   if (inst->Src[sampler].Register.Indirect) {
+  const struct tgsi_full_src_register *reg = &inst->Src[sampler];
+  union tgsi_exec_channel indir_index, index2;
+
+  index2.i[0] =
+  index2.i[1] =
+  index2.i[2] =
+  index2.i[3] = reg->Indirect.Index;
+
+  fetch_src_file_channel(mach,
+ 0,
+ reg->Indirect.File,
+ reg->Indirect.Swizzle,
+ &index2,
+ &ZeroVec,
+ &indir_index);
+  unit = inst->Src[sampler].Register.Index + indir_index.i[0];
+   } else {
+  unit = inst->Src[sampler].Register.Index;
+   }
+   return unit;
+}
 
 /*
  * execute a texture instruction.
@@ -2001,14 +2030,15 @@ exec_tex(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst,
  uint modifier, uint sampler)
 {
-   const uint unit = inst->Src[sampler].Register.Index;
const union tgsi_exec_channel *args[5], *proj = NULL;
union tgsi_exec_channel r[5];
enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
uint chan;
+   uint unit;
int8_t offsets[3];
int dim, shadow_ref, i;
 
+   unit = fetch_sampler_unit(mach, inst, sampler);
/* always fetch all 3 offsets, overkill but keeps code simple */
fetch_texel_offsets(mach, inst, offsets);
 
@@ -2107,12 +2137,13 @@ static void
 exec_txd(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[3].Register.Index;
union tgsi_exec_channel r[4];
float derivs[3][2][TGSI_QUAD_SIZE];
uint chan;
+   uint unit;
int8_t offsets[3];
 
+   unit = fetch_sampler_unit(mach, inst, 3);
/* always fetch all 3 offsets, overkill but keeps code simple */
fetch_texel_offsets(mach, inst, offsets);
 
@@ -2214,14 +2245,15 @@ static void
 exec_txf(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[1].Register.Index;
union tgsi_exec_channel r[4];
uint chan;
+   uint unit;
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
int j;
int8_t offsets[3];
unsigned target;
 
+   unit = fetch_sampler_unit(mach, inst, 1);
/* always fetch all 3 offsets, overkill but keeps code simple */
fetch_texel_offsets(mach, inst, offsets);
 
@@ -2296,12 +2328,14 @@ static void
 exec_txq(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[1].Register.Index;
int result[4];
union tgsi_exec_channel r[4], src;
uint chan;
+   uint unit;
int i,j;
 
+   unit = fetch_sampler_unit(mach, inst, 1);
+
fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
 
/* XXX: This interface can't return per-pixel values */
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g: ignore sampler views for now.

2015-06-21 Thread Dave Airlie

From: Dave Airlie 

This fixes a regression in that r600 stopped working when
sampler views were pushed.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 07da167..af7622e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -725,6 +725,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 
case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_SAMPLER_VIEW:
case TGSI_FILE_ADDRESS:
break;
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] draw/gallivm: add invocation ID support for llvmpipe.

2015-06-21 Thread Dave Airlie

From: Dave Airlie 

This extends the draw code to add support for invocations.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/draw/draw_gs.c| 3 ++-
 src/gallium/auxiliary/draw/draw_llvm.c  | 5 -
 src/gallium/auxiliary/draw/draw_llvm.h  | 3 ++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 5 +
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 755e527..a1564f9 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -391,7 +391,8 @@ llvm_gs_run(struct draw_geometry_shader *shader,
   (struct vertex_header*)input,
   input_primitives,
   shader->draw->instance_id,
-  shader->llvm_prim_ids);
+  shader->llvm_prim_ids,
+  shader->invocation_id);
 
return ret;
 }
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 9629a8a..90a31bc 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -2069,7 +2069,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
struct gallivm_state *gallivm = variant->gallivm;
LLVMContextRef context = gallivm->context;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[6];
+   LLVMTypeRef arg_types[7];
LLVMTypeRef func_type;
LLVMValueRef variant_func;
LLVMValueRef context_ptr;
@@ -2105,6 +2105,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
arg_types[4] = int32_type;  /* instance_id */
arg_types[5] = LLVMPointerType(
   LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
+   arg_types[6] = int32_type;
 
func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
 
@@ -2125,6 +2126,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
num_prims = LLVMGetParam(variant_func, 3);
system_values.instance_id = LLVMGetParam(variant_func, 4);
prim_id_ptr   = LLVMGetParam(variant_func, 5);
+   system_values.invocation_id = LLVMGetParam(variant_func, 6);
 
lp_build_name(context_ptr, "context");
lp_build_name(input_array, "input");
@@ -2132,6 +2134,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
lp_build_name(num_prims, "num_prims");
lp_build_name(system_values.instance_id, "instance_id");
lp_build_name(prim_id_ptr, "prim_id_ptr");
+   lp_build_name(system_values.invocation_id, "invocation_id");
 
variant->context_ptr = context_ptr;
variant->io_ptr = io_ptr;
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index 9565fc6..d48ed72 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -298,7 +298,8 @@ typedef int
 struct vertex_header *output,
 unsigned num_prims,
 unsigned instance_id,
-int *prim_ids);
+int *prim_ids,
+unsigned invocation_id);
 
 struct draw_llvm_variant_key
 {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 3f76b79..967373c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -165,6 +165,7 @@ struct lp_bld_tgsi_system_values {
LLVMValueRef vertex_id_nobase;
LLVMValueRef prim_id;
LLVMValueRef basevertex;
+   LLVMValueRef invocation_id;
 };
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 092bd18..268379e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1532,6 +1532,11 @@ emit_fetch_system_value(
   atype = TGSI_TYPE_UNSIGNED;
   break;
 
+   case TGSI_SEMANTIC_INVOCATIONID:
+  res = lp_build_broadcast_scalar(&bld_base->uint_bld, 
bld->system_values.invocation_id);
+  atype = TGSI_TYPE_UNSIGNED;
+  break;
+
default:
   assert(!"unexpected semantic in emit_fetch_system_value");
   res = bld_base->base.zero;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] draw/tgsi: implement geom shader invocation support.

2015-06-21 Thread Dave Airlie

From: Dave Airlie 

This is just for softpipe, llvmpipe won't work without
some changes.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/draw/draw_gs.c   | 47 +-
 src/gallium/auxiliary/draw/draw_gs.h   |  2 ++
 src/gallium/auxiliary/tgsi/tgsi_scan.c |  2 ++
 src/gallium/auxiliary/tgsi/tgsi_scan.h |  1 +
 4 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_gs.c 
b/src/gallium/auxiliary/draw/draw_gs.c
index 6375d41..755e527 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -190,9 +190,15 @@ static void tgsi_gs_prepare(struct draw_geometry_shader 
*shader,
 const unsigned 
constants_size[PIPE_MAX_CONSTANT_BUFFERS])
 {
struct tgsi_exec_machine *machine = shader->machine;
-
+   int j;
tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
   constants, constants_size);
+
+   if (shader->info.uses_invocationid) {
+  unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID];
+  for (j = 0; j < TGSI_QUAD_SIZE; j++)
+ machine->SystemValue[i].i[j] = shader->invocation_id;
+   }
 }
 
 static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
@@ -555,7 +561,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
 * overflown vertices into some area where they won't harm anyone */
unsigned total_verts_per_buffer = shader->primitive_boundary *
   num_in_primitives;
-
+   unsigned invocation;
//Assume at least one primitive
max_out_prims = MAX2(max_out_prims, 1);
 
@@ -564,7 +570,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
output_verts->stride = output_verts->vertex_size;
output_verts->verts =
   (struct vertex_header *)MALLOC(output_verts->vertex_size *
- total_verts_per_buffer);
+ total_verts_per_buffer * 
shader->num_invocations);
debug_assert(output_verts->verts);
 
 #if 0
@@ -592,7 +598,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
shader->input = input;
shader->input_info = input_info;
FREE(shader->primitive_lengths);
-   shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
+   shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * 
shader->num_invocations);
 
 
 #ifdef HAVE_LLVM
@@ -622,23 +628,26 @@ int draw_geometry_shader_run(struct draw_geometry_shader 
*shader,
}
 #endif
 
-   shader->prepare(shader, constants, constants_size);
+   for (invocation = 0; invocation < shader->num_invocations; invocation++) {
+  shader->invocation_id = invocation;
 
-   if (input_prim->linear)
-  gs_run(shader, input_prim, input_verts,
- output_prims, output_verts);
-   else
-  gs_run_elts(shader, input_prim, input_verts,
-  output_prims, output_verts);
+  shader->prepare(shader, constants, constants_size);
 
-   /* Flush the remaining primitives. Will happen if
-* num_input_primitives % 4 != 0
-*/
-   if (shader->fetched_prim_count > 0) {
-  gs_flush(shader);
-   }
+  if (input_prim->linear)
+ gs_run(shader, input_prim, input_verts,
+output_prims, output_verts);
+  else
+ gs_run_elts(shader, input_prim, input_verts,
+ output_prims, output_verts);
 
-   debug_assert(shader->fetched_prim_count == 0);
+  /* Flush the remaining primitives. Will happen if
+   * num_input_primitives % 4 != 0
+   */
+  if (shader->fetched_prim_count > 0) {
+ gs_flush(shader);
+  }
+  debug_assert(shader->fetched_prim_count == 0);
+   }
 
/* Update prim_info:
 */
@@ -771,6 +780,8 @@ draw_create_geometry_shader(struct draw_context *draw,
  gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
gs->max_output_vertices =
  gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+   gs->num_invocations =
+  gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
if (!gs->max_output_vertices)
   gs->max_output_vertices = 32;
 
diff --git a/src/gallium/auxiliary/draw/draw_gs.h 
b/src/gallium/auxiliary/draw/draw_gs.h
index 49e93d5..663ba84 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -90,6 +90,8 @@ struct draw_geometry_shader {
unsigned vector_length;
unsigned max_out_prims;
 
+   unsigned num_invocations;
+   unsigned invocation_id;
 #ifdef HAVE_LLVM
struct draw_gs_inputs *gs_input;
struct draw_gs_jit_context *jit_context;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 369f56a..711413c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tg

[Mesa-dev] softpipe and llvmpipe geom shader invocation support

2015-06-21 Thread Dave Airlie

Hey,

repost of patch 1, the second patch adds the corresponding code
to llvmpipe, passes the basic tests.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] r600g: ignore sampler views for now.

2015-06-21 Thread Dave Airlie

On 22 June 2015 at 13:37, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This fixes a regression in that r600 stopped working when
> sampler views were pushed.

I pushed this already, regressions are too annoying to wait.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Compatibility between old dri modules and new loaders, and vice verse

2015-06-22 Thread Dave Airlie

>
> As kindly hinted by Marek, currently we do have a wide selection of
> supported dri <> loader combinations.
>
> Although we like to think that things never break, we have to admit
> that not many of us test every possible combinations of dri modules
> and loaders. With the chances getting smaller as the time gap (age)
> between the two increases. As such I would like to ask if we're
> interested in gradually depreciating as the gap grows beyond X years.
>
> The rough idea that I have in my mind is:
> - Check for obsolete extensions (requirements for such) - both in the
> dri modules and the loaders (including the xserver).
> - Add some WARN messages ("You're using an old loader/DRI module.
> Update to XXX or later") when such code path is hit.
> - After X mesa releases, we remove the dri extension from the
> module(s) and bump the requirement(s) in the loader(s).
>
> And now the more important question why ?
>  - Very rarely tested and not actively supported - if it works it
> works, we only cover one stable branch.
>  - Having a quick look at the the "if extension && extension.version
>>= y" maze does leave most of us speechless.
>  - Will allow us to start removing a few of the nasty quirks/hacks
> that we currently have laying around.
>
> Worth mentioning:
>  - Depreciation period will be based on the longest time frame set by
> LTS versions of distros. For example if Debian A ships X and mesa 3
> years apart, while Ubuntu does is ~2.5 and RedHat ~2.8, we'll stick
> with 3 years.
>  - libGL dri1 support... it's been almost four years since the removal
> of the dri1 modules. Since then the only activity that I've noticed by
> Connor Behan on the r128 front. Although it seems that he has covered
> the ddx and is just looking at the kernel side of things. Should we
> consider mesa X (10.6 ?) as the last one that supports such old
> modules in it's libGL and give it a much needed cleanup ?
>
>
> How would people feel about this - do we have any strong ack/nack
> about the idea ? Are there many people/companies that support distros
> where the xserver <> mesa gap is over, say 2 years ?

We still ship 7.11 based dri1 drivers in RHEL6, and there is still a
chance of us rebasing to newer Mesa in that depending on schedules.

ajax might have a different opinion, on how likely that is, but
that would be at least another year from now where we'd want DRI1
to work.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC] Compatibility between old dri modules and new loaders, and vice verse

2015-06-22 Thread Dave Airlie

On 23 June 2015 at 08:16, Ian Romanick  wrote:
> On 06/22/2015 11:54 AM, Dave Airlie wrote:
>>>
>>> As kindly hinted by Marek, currently we do have a wide selection of
>>> supported dri <> loader combinations.
>>>
>>> Although we like to think that things never break, we have to admit
>>> that not many of us test every possible combinations of dri modules
>>> and loaders. With the chances getting smaller as the time gap (age)
>>> between the two increases. As such I would like to ask if we're
>>> interested in gradually depreciating as the gap grows beyond X years.
>>>
>>> The rough idea that I have in my mind is:
>>> - Check for obsolete extensions (requirements for such) - both in the
>>> dri modules and the loaders (including the xserver).
>>> - Add some WARN messages ("You're using an old loader/DRI module.
>>> Update to XXX or later") when such code path is hit.
>>> - After X mesa releases, we remove the dri extension from the
>>> module(s) and bump the requirement(s) in the loader(s).
>>>
>>> And now the more important question why ?
>>>  - Very rarely tested and not actively supported - if it works it
>>> works, we only cover one stable branch.
>>>  - Having a quick look at the the "if extension && extension.version
>>>> = y" maze does leave most of us speechless.
>>>  - Will allow us to start removing a few of the nasty quirks/hacks
>>> that we currently have laying around.
>>>
>>> Worth mentioning:
>>>  - Depreciation period will be based on the longest time frame set by
>>> LTS versions of distros. For example if Debian A ships X and mesa 3
>>> years apart, while Ubuntu does is ~2.5 and RedHat ~2.8, we'll stick
>>> with 3 years.
>>>  - libGL dri1 support... it's been almost four years since the removal
>>> of the dri1 modules. Since then the only activity that I've noticed by
>>> Connor Behan on the r128 front. Although it seems that he has covered
>>> the ddx and is just looking at the kernel side of things. Should we
>>> consider mesa X (10.6 ?) as the last one that supports such old
>>> modules in it's libGL and give it a much needed cleanup ?
>>>
>>>
>>> How would people feel about this - do we have any strong ack/nack
>>> about the idea ? Are there many people/companies that support distros
>>> where the xserver <> mesa gap is over, say 2 years ?
>>
>> We still ship 7.11 based dri1 drivers in RHEL6, and there is still a
>> chance of us rebasing to newer Mesa in that depending on schedules.
>>
>> ajax might have a different opinion, on how likely that is, but
>> that would be at least another year from now where we'd want DRI1
>> to work.
>
> A time line would be good.  I think it will take a fair amount of time
> to get a new loader<>driver interface in order.  If we can't change
> anything for two years, then there's not a lot of point to thinking
> about it now.  If it's a year or less away, that's a different story.
>
> The other possibility would be for RHEL to ship more than one libGL...
> one for DRI1 drivers and one for everything else.  I don't know how
> horrible that would be.

That would worse than impossible, it's bad enough nvidia overwrite
libGL I don't want us to do it as well to ourselves :-)

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] tgsi: handle indirect sampler arrays. (v2)

2015-06-22 Thread Dave Airlie

On 22 June 2015 at 21:20, Roland Scheidegger  wrote:
> Should there be some clamping somewhere to prevent crashes due to
> out-of-bound unit index?

The spec says its undefined, I'm never sure if that means explode in
any way whatsoever.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Fix strict-aliasing violations in GLSL shader list implementation

2015-06-23 Thread Dave Airlie

On 24 June 2015 at 11:44, Ian Romanick  wrote:
> On 06/24/2015 03:59 PM, Davin McCall wrote:
>> Hi Ian,
>>
>> On 23/06/15 23:26, Ian Romanick wrote:
>>> On 06/23/2015 02:36 PM, Thomas Helland wrote:
 2015-06-24 23:05 GMT+02:00 Davin McCall :
> Hi - I'm new here.
>
> I've recently started poking the Mesa codebase for little reason other 
> than
> personal interest. In the "help wanted" section of the website it mentions
> aliasing violations as a target for newcomers to fix, so with that in mind
> I've attached a patch (against git head) which resolves a few of them, by
> targeting the linked list implementation (list.h) used in the GLSL
> compiler/optimizers. This change slightly increases the storage 
> requirements
> for a list (adds one word) but resolves the blatant aliasing violation 
> that
> was caused by the trick used to conserve that word in the first place.
>
> (I toyed with another approach - using a single sentinel node for both the
> head and tail of a list - but this was much more invasive, and meant that
> you could no longer check whether a particular node was a sentinel node
> unless you had a reference to the list, so I gave up and went with this
> simpler approach).
>
> The most essential change is in the 'exec_list' structure. Three fields
> 'head', 'tail' and 'tail_pred' are removed, and two separate sentinel 
> nodes
> are inserted in their place. The old 'head' is replaced by
> 'head_sentinel.next', 'tail_pred' by 'tail_sentinel.prev', and tail 
> (always
> NULL) by 'head_sentinel.prev' and 'tail_sentinel.next' (both always NULL).
>>> NAK.  The datastructure is correct as-is.  It has been in common use
>>> since at least 1985.  See the references in the header file.
>>
>> I understand the data structure and how it is supposed to work; the
>> issue is that the trick it employs cannot be implemented in C without
>> breaking the strict aliasing rules (or at least, the current
>> implementation in Mesa breaks the strict aliasing rules). The current
>> code works correctly only with the -fno-strict-aliasing compiler option.
>> The issue is that a pair of 'exec_node *' do not constitute an exec_node
>> in the eyes of the language spec, even though exec_node is declared as
>> holding two such pointers. Consider (from src/glsl/list.h):
>>
>> static inline void
>> exec_list_make_empty(struct exec_list *list)
>> {
>>list->head = (struct exec_node *) & list->tail;
>>list->tail = NULL;
>>list->tail_pred = (struct exec_node *) & list->head;
>> }
>>
>>
>> 'list->head' is of type 'struct exec_node *', and so should point at a
>> 'struct  exec_node'. In the code above it is instead co-erced to point
>> at a 'struct exec_node *' (list->tail). That in itself doesn't break the
>> alias rules, but then:
>>
>> static inline bool
>> exec_node_is_tail_sentinel(const struct exec_node *n)
>> {
>>return n->next == NULL;
>> }
>>
>>
>> In 'exec_node_is_tail_sentinel', the sentinel is not actually an
>> exec_node - it is &list->tail. So, if the parameter n does refer to the
>> sentinel, then it does not point to an actual exec_node structure.
>> However, it is de-referenced (by 'n->next') which breaks the strict
>> aliasing rules. This means that the method above can only ever return
>> false, unless it violates the aliasing rules.
>>
>> (The above method could be fixed by casting n to an 'struct exec_node
>> **' and then comparing '**n' against NULL. However, there are other
>> similar examples throughout the code that I do not think would be so
>> trivial).
>>
>> I can quote the relevant parts of the standard if necessary, but your
>> tone somewhat implies that you wouldn't even consider this patch?
>
> Please quote the spec.  I have a hard time believing that the compiler
> magically decides that an exec_node* is not really an exec_node* and
> just does whatever it wants (which sounds like what you're describing).
>  That sounds pretty rubbish... and I'm surprised that anything works in
> such an environment.
>
> Mesa has also had -fno-strict-aliasing for as long as I can remember,
> and this structure has only been used here for a few years.  The whole
> thing just doesn't smell right.
>

Oh we've always had aliasing problems this is just one, you can't
expect one person to fix them all at once.

But making headway is a good thing.

You can't have
struct exec_list *p;
struct exec_node *p2 = (struct exec_list *)p

And do things with p2 and hope that p will get them, because
the compiler wants to store things its doing to p in registers,
and when you go and do something in p2 it can't work out it's the
same thing, so it has to spill/reload.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Fix strict-aliasing violations in GLSL shader list implementation

2015-06-23 Thread Dave Airlie

>
> Actually, I'm almost 100% certain that there are lots of other strict
> aliasing violations in the Mesa code. That's why we've always disabled it.
>
> More generally, IMO it's unrealistic to rely on strict aliasing for
> optimization, because very few people really understand it (I'm not one
> of them).

I personally think we should get past the, aliasing is hard, lets go shopping,

I get a 30K code size reduction on r600_dri.so

6780217  367820 1991392 9139429  8b74e5 lib/gallium/r600_dri.so
6746389  367820 1991392 9105601  8af0c1 lib/gallium/r600_dri.so

That isn't a small amount, and I'd think at least for Intel targetting
atoms with small caches it might matter.

However it might also not be that size once we fixed all the aliasing.

Granted there might be other things that would get us that sort of reduction
but I'm not sure what they are.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Fix strict-aliasing violations in GLSL shader list implementation

2015-06-23 Thread Dave Airlie

>> And do things with p2 and hope that p will get them, because
>> the compiler wants to store things its doing to p in registers,
>> and when you go and do something in p2 it can't work out it's the
>> same thing, so it has to spill/reload.
>
> Which I think is different from what Davin was saying, but I may be
> misunderstanding the whole thing.  That's why I'd like to see spec
> language.  The part that really gets me is that this is across a
> function boundary... that's generally a sacred line, so I'm surprised
> that the compiler is allowed to disregard what it's told in that scenario.

inlining makes function boundaries nothing, and gcc will really
try and inline things.

>
> I'd also like to see assembly dumps with and without
> -fno-strict-aliasing of some place where this goes wrong.  If you,
> Davin, or someone else can point out a specific function that actually
> does the wrong thing, I can generate assembly myself.
>
> For that matter... how the heck is the ralloc (or any memory allocator)
> implementation valid?

There are rules on types that can alias, void * and char * are allowed
unions are allowed.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Fix strict-aliasing violations in GLSL shader list implementation

2015-06-23 Thread Dave Airlie

On 24 June 2015 at 12:57, Michel Dänzer  wrote:
> On 24.06.2015 11:39, Dave Airlie wrote:
>>>
>>> Actually, I'm almost 100% certain that there are lots of other strict
>>> aliasing violations in the Mesa code. That's why we've always disabled it.
>>>
>>> More generally, IMO it's unrealistic to rely on strict aliasing for
>>> optimization, because very few people really understand it (I'm not one
>>> of them).
>>
>> I personally think we should get past the, aliasing is hard, lets go 
>> shopping,
>
> I'm not saying that. I'm saying we'll keep getting it wrong, which will
> cause subtle and very hard to debug problems.
>
Well my hope is gcc and clang got better, that said I know the kernel
disables it as well, but they do a lot worse things.


   text   databssdechexfilename
5489951 187400  257125703063 570597lib/mesa_dri_drivers.so

  text   databssdechexfilename
5446885 187400  257125659997 565d5dlib/mesa_dri_drivers.so

here is a i965 driver, 43k code size drop.

Again fixing things may not help that.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] glsls: Modify exec_list to avoid strict-aliasing violations

2015-06-24 Thread Dave Airlie

> -fno-strict-aliasing:with strict aliasing:
> libGL.so  699188  699188(no change)
> *_dri.so 9575876 9563104(-2772)
>

Use the size command to get the actual text segment size,

otherwise debugging symbols can drown changes.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: add support for viewport array

2015-06-24 Thread Dave Airlie

From: Dave Airlie 

This isn't pretty and I'd suggest it the pm4 interface builder
could be tweaked to do this more efficently, but I'd need
guidance on how that would look.

This seems to pass the few piglit tests I threw at it.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_blit.c   |  4 +-
 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 12 +-
 src/gallium/drivers/radeonsi/si_state.c  | 66 
 src/gallium/drivers/radeonsi/si_state.h  |  4 +-
 5 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c408..4560bf7 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -64,10 +64,10 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
  
sctx->queued.named.sample_mask->sample_mask);
}
if (sctx->queued.named.viewport) {
-   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport->viewport);
+   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport[0]->viewport);
}
if (sctx->queued.named.scissor) {
-   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor->scissor);
+   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, 
sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, 
sctx->b.streamout.num_targets,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a..480a301 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 8;
 
case PIPE_CAP_MAX_VIEWPORTS:
-   return 1;
+   return 16;
 
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 47e5f96..3cd439c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1128,7 +1128,7 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,

&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
LLVMValueRef args[9];
LLVMValueRef pos_args[4][9] = { { 0 } };
-   LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = 
NULL;
+   LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = 
NULL, viewport_index_value = NULL;
unsigned semantic_name, semantic_index;
unsigned target;
unsigned param_count = 0;
@@ -1155,6 +1155,9 @@ handle_semantic:
case TGSI_SEMANTIC_LAYER:
layer_value = outputs[i].values[0];
continue;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+   viewport_index_value = outputs[i].values[0];
+   continue;
case TGSI_SEMANTIC_POSITION:
target = V_008DFC_SQ_EXP_POS;
break;
@@ -1220,11 +1223,13 @@ handle_semantic:
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (shader->selector->info.writes_psize ||
shader->selector->info.writes_edgeflag ||
+   shader->selector->info.writes_viewport_index ||
shader->selector->info.writes_layer) {
pos_args[1][0] = lp_build_const_int32(base->gallivm, /* 
writemask */
  
shader->selector->info.writes_psize |
  
(shader->selector->info.writes_edgeflag << 1) |
- 
(shader->selector->info.writes_layer << 2));
+ 
(shader->selector->info.writes_layer << 2) |
+ 
(shader->selector->info.writes_viewport_index << 3));
pos_args[1][1] = uint->zero; /* EXEC mask */
pos_args[1][2] = uint->zero; /* last export? */
pos_args[1][3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_POS + 1);
@@ -1255,6 +1260,9 @@ handle_semantic:
 
if (shader->selector->info.writes_layer)
pos_args[1][7] = layer_value;
+
+   if (shader->

[Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

2015-06-24 Thread Dave Airlie

From: Dave Airlie 

This isn't pretty and I'd suggest it the pm4 interface builder
could be tweaked to do this more efficently, but I'd need
guidance on how that would look.

This seems to pass the few piglit tests I threw at it.

v2: handle passing layer/viewport index to fragment shader.
fix crash in blit changes,
add support to io_get_unique_index for layer/viewport index
update docs.

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt|  4 +-
 docs/relnotes/10.7.0.html   |  3 ++
 src/gallium/drivers/radeonsi/si_blit.c  |  8 +--
 src/gallium/drivers/radeonsi/si_pipe.c  |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c| 26 +++---
 src/gallium/drivers/radeonsi/si_state.c | 66 +++--
 src/gallium/drivers/radeonsi/si_state.h |  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 -
 8 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 220bcc8..df913bd 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_shader_precision  started (Micah)
   GL_ARB_vertex_attrib_64bit   DONE (nvc0, softpipe)
-  GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, llvmpipe)
+  GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe)
 
 
 GL 4.2, GLSL 4.20:
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_copy_imageDONE (i965) (gallium - 
in progress, VMware)
   GL_KHR_debug DONE (all drivers)
   GL_ARB_explicit_uniform_location DONE (all drivers that 
support GLSL)
-  GL_ARB_fragment_layer_viewport   DONE (nv50, nvc0, r600, 
llvmpipe)
+  GL_ARB_fragment_layer_viewport   DONE (nv50, nvc0, r600, 
radeonsi, llvmpipe)
   GL_ARB_framebuffer_no_attachmentsDONE (i965)
   GL_ARB_internalformat_query2 not started
   GL_ARB_invalidate_subdataDONE (all drivers)
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index e089889..fcc5081 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -44,8 +44,11 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+GL_AMD_vertex_shader_viewport_index on radeonsi
 GL_ARB_framebuffer_no_attachments on i965
 GL_ARB_shader_stencil_export on llvmpipe
+GL_ARB_viewport_array on radeonsi
+GL_ARB_fragment_layer_viewport on radeonsi
 
 
 Bug fixes
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c408..6c7b383 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
util_blitter_save_sample_mask(sctx->blitter,
  
sctx->queued.named.sample_mask->sample_mask);
}
-   if (sctx->queued.named.viewport) {
-   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport->viewport);
+   if (sctx->queued.named.viewport[0]) {
+   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport[0]->viewport);
}
-   if (sctx->queued.named.scissor) {
-   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor->scissor);
+   if (sctx->queued.named.scissor[0]) {
+   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, 
sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, 
sctx->b.streamout.num_targets,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a..480a301 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 8;
 
case PIPE_CAP_MAX_VIEWPORTS:
-   return 1;
+   return 16;
 
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 47e5f96..87608a1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -125,12 +125,16 @@ unsigned si_shader_io_get_unique_index(unsigned 
semantic_name, unsigned index)
re

Re: [Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

2015-06-25 Thread Dave Airlie

On 26 June 2015 at 00:26, Marek Olšák  wrote:
> Hi Dave,
>
> The change in si_shader_io_get_unique_index can be dropped. The
> function is only used for shaders before GS.
>
Ok okay I was hitting the assert in there for the layer/viewport index cases,
but if the patch you pushed to master helps I'll drop it.

> This looks good, but I've had a different plan for this feature:

Yeah I thought you might, I just wanted to hack something up to see it working
since it seems new Dota 2 uses this feature.

> I'd like the states to be converted into 2 atoms:
>
> 1 r600_atom for all 16 viewports
> 1 r600_atom for all 16 scissors
>
> Each atom should have a bitmask saying which "slots" are dirty. (the same
> idea as resource slots)
>
> The "emit" functions should only emit dirty viewports/scissors.
>
> Also, the "emit" functions shouldn't emit non-zero viewports/scissors
> if the viewport index isn't written by the hardware VS stage
> (si_get_vs_info(sctx)->...). This should keep the same level of
> effectiveness as before. When a shader that writes the viewport index
> is bound *and* there are any dirty viewports or scissors, that's the
> right time to mark the atoms as dirty again, so that non-zero dirty
> viewports/scissors are finally emitted.

I'm not sure if I'll get to doing more of it, I just had a day at home,
and wanted to get up to speed on radeonsi a bit, so I can just leave
that here, in case anyone wants to pick it up,

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: add support for geometry shader invocations.

2015-06-25 Thread Dave Airlie

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_shader.c| 5 +
 src/gallium/drivers/radeonsi/si_shader.h| 1 +
 src/gallium/drivers/radeonsi/si_state.c | 1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 +++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 87608a1..665ce83 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -630,6 +630,11 @@ static void declare_system_value(
 SI_PARAM_BASE_VERTEX);
break;
 
+   case TGSI_SEMANTIC_INVOCATIONID:
+   value = LLVMGetParam(radeon_bld->main_fn,
+SI_PARAM_GS_INSTANCE_ID);
+   break;
+
case TGSI_SEMANTIC_SAMPLEID:
value = get_sample_id(radeon_bld);
break;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 51055af..b4339ae 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -115,6 +115,7 @@ struct si_shader_selector {
 
unsignedgs_output_prim;
unsignedgs_max_out_vertices;
+   unsignedgs_num_invocations;
uint64_tgs_used_inputs; /* mask of "get_unique_index" bits */
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 752467b..0dd08a2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3078,7 +3078,6 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
-   si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
 
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 48128fa..eef3baa 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -76,6 +76,7 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 
2);
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+   unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
@@ -118,6 +119,10 @@ static void si_shader_gs(struct si_shader *shader)
 
si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
 
+   si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+  S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+  S_028B90_ENABLE(gs_num_invocations > 0));
+
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
@@ -490,6 +495,8 @@ static void *si_create_shader_state(struct pipe_context 
*ctx,
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
sel->gs_max_out_vertices =

sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+   sel->gs_num_invocations =
+   sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
 
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

2015-06-25 Thread Dave Airlie

On 26 June 2015 at 07:11, Marek Olšák  wrote:
> Wait a moment, how did it fail with si_shader_io_get_unique_index? The
> function shouldn't be called for ES with the viewport index, because
> ES can't pass the output to GS. If it was called, ignoring the
> viewport index in si_llvm_emit_es_epilogue should fix it.

It asserted in a couple of ARB_fragment_layer_viewport tests.

It seemed due to the putting viewpor and layer into params, but I
don't have a backtrace here
right now, but I can get it tomorrow.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

2015-06-26 Thread Dave Airlie

On 26 June 2015 at 07:43, Dave Airlie  wrote:
> On 26 June 2015 at 07:11, Marek Olšák  wrote:
>> Wait a moment, how did it fail with si_shader_io_get_unique_index? The
>> function shouldn't be called for ES with the viewport index, because
>> ES can't pass the output to GS. If it was called, ignoring the
>> viewport index in si_llvm_emit_es_epilogue should fix it.
>
> It asserted in a couple of ARB_fragment_layer_viewport tests.
>
> It seemed due to the putting viewpor and layer into params, but I
> don't have a backtrace here
> right now, but I can get it tomorrow.
>
> Dave.

Okay I've tested this again, and
tests/spec/arb_fragment_layer_viewport/viewport-vs-write-gs-no-write-simple.shader_test
fails without the change I made.

si_llvm_emit_es_epilogue is what calls it, but with the assert removed
by your patch, the test still fails unless I add my change.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: add support for viewport array (v3)

2015-06-26 Thread Dave Airlie

From: Dave Airlie 

This isn't pretty and I'd suggest it the pm4 interface builder
could be tweaked to do this more efficently, but I'd need
guidance on how that would look.

This seems to pass the few piglit tests I threw at it.

v2: handle passing layer/viewport index to fragment shader.
fix crash in blit changes,
add support to io_get_unique_index for layer/viewport index
update docs.
v3: avoid looking up viewport index and layer in es (Marek).

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt|  4 +-
 docs/relnotes/10.7.0.html   |  3 ++
 src/gallium/drivers/radeonsi/si_blit.c  |  8 +--
 src/gallium/drivers/radeonsi/si_pipe.c  |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c| 27 +++---
 src/gallium/drivers/radeonsi/si_state.c | 66 +++--
 src/gallium/drivers/radeonsi/si_state.h |  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 -
 8 files changed, 74 insertions(+), 42 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 220bcc8..df913bd 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_shader_precision  started (Micah)
   GL_ARB_vertex_attrib_64bit   DONE (nvc0, softpipe)
-  GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, llvmpipe)
+  GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe)
 
 
 GL 4.2, GLSL 4.20:
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_copy_imageDONE (i965) (gallium - 
in progress, VMware)
   GL_KHR_debug DONE (all drivers)
   GL_ARB_explicit_uniform_location DONE (all drivers that 
support GLSL)
-  GL_ARB_fragment_layer_viewport   DONE (nv50, nvc0, r600, 
llvmpipe)
+  GL_ARB_fragment_layer_viewport   DONE (nv50, nvc0, r600, 
radeonsi, llvmpipe)
   GL_ARB_framebuffer_no_attachmentsDONE (i965)
   GL_ARB_internalformat_query2 not started
   GL_ARB_invalidate_subdataDONE (all drivers)
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index e089889..fcc5081 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -44,8 +44,11 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+GL_AMD_vertex_shader_viewport_index on radeonsi
 GL_ARB_framebuffer_no_attachments on i965
 GL_ARB_shader_stencil_export on llvmpipe
+GL_ARB_viewport_array on radeonsi
+GL_ARB_fragment_layer_viewport on radeonsi
 
 
 Bug fixes
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c408..6c7b383 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum 
si_blitter_op op)
util_blitter_save_sample_mask(sctx->blitter,
  
sctx->queued.named.sample_mask->sample_mask);
}
-   if (sctx->queued.named.viewport) {
-   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport->viewport);
+   if (sctx->queued.named.viewport[0]) {
+   util_blitter_save_viewport(sctx->blitter, 
&sctx->queued.named.viewport[0]->viewport);
}
-   if (sctx->queued.named.scissor) {
-   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor->scissor);
+   if (sctx->queued.named.scissor[0]) {
+   util_blitter_save_scissor(sctx->blitter, 
&sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, 
sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, 
sctx->b.streamout.num_targets,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a..480a301 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 8;
 
case PIPE_CAP_MAX_VIEWPORTS:
-   return 1;
+   return 16;
 
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a293ef3..4ca3172 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1132,7 +1132,7 @@ static void si_llv

Re: [Mesa-dev] [PATCH] radeonsi: add support for viewport array (v2)

2015-06-26 Thread Dave Airlie

On 27 June 2015 at 09:03, Marek Olšák  wrote:
> If you write VIEWPORT_INDEX at location 0, it overwrites POSITION
> which happens to be at location 0 too and that's why the test fails.
>
> The fix is not to call si_shader_io_get_unique_index (or its caller
> get_param_index) for LAYER and VIEWPORT_INDEX.
>
> LAYER and VIEWPORT_INDEX should be ignored in
> si_llvm_emit_es_epilogue, as is required by the spec.

makes sense, sending v3 with that change.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] llvmpipe double support

2015-06-29 Thread Dave Airlie

Before considering radeonsi, I felt llvmpipe support would
be a good stepping stone.

Things are messy with SoA, and I've no idea how test AoS,
so I left it alone.

but this set does work and does pass the tests and doesn't regress.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] tgsi: add infer support for double opcodes.

2015-06-29 Thread Dave Airlie

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 37 ++
 1 file changed, 37 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 9295311..4b16ef3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -374,7 +374,33 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_IMSB:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
   return TGSI_TYPE_SIGNED;
+   case TGSI_OPCODE_DADD:
+   case TGSI_OPCODE_DABS:
+   case TGSI_OPCODE_DNEG:
+   case TGSI_OPCODE_DMUL:
+   case TGSI_OPCODE_DMAX:
+   case TGSI_OPCODE_DMIN:
+   case TGSI_OPCODE_DRCP:
+   case TGSI_OPCODE_DSQRT:
+   case TGSI_OPCODE_DMAD:
+   case TGSI_OPCODE_DLDEXP:
+   case TGSI_OPCODE_DFRACEXP:
+   case TGSI_OPCODE_DFRAC:
+   case TGSI_OPCODE_DRSQ:
+   case TGSI_OPCODE_DTRUNC:
+   case TGSI_OPCODE_DCEIL:
+   case TGSI_OPCODE_DFLR:
+   case TGSI_OPCODE_DROUND:
+   case TGSI_OPCODE_DSSG:
+   case TGSI_OPCODE_F2D:
+   case TGSI_OPCODE_I2D:
+   case TGSI_OPCODE_U2D:
+  return TGSI_TYPE_DOUBLE;
default:
   return TGSI_TYPE_FLOAT;
}
@@ -391,6 +417,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_BREAKC:
case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_U2D:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_SWITCH:
case TGSI_OPCODE_CASE:
@@ -400,10 +427,12 @@ tgsi_opcode_infer_src_type( uint opcode )
   return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_I2D:
   return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_F2D:
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_FSEQ:
@@ -412,6 +441,14 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_UCMP:
   return TGSI_TYPE_FLOAT;
+   case TGSI_OPCODE_D2F:
+   case TGSI_OPCODE_D2U:
+   case TGSI_OPCODE_D2I:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
+  return TGSI_TYPE_DOUBLE;
default:
   return tgsi_opcode_infer_type(opcode);
}
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] gallivm: add fp64 support.

2015-06-29 Thread Dave Airlie

This adds support for ARB_gpu_shader_fp64 and ARB_vertex_attrib_64bit to
llvmpipe.

Two things that don't mix well are SoA and doubles, see
emit_fetch_double, and emit_store_double_chan in this.

I've also had to split emit_data.chan, to add src_chan,
which can be different for doubles.

Open issues:
are intrinsics okay for floor/ceil?
should and of these functions have CPU versions?

tested with piglit, no regressions, all the fp64 tests seem to pass.

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c|  12 ++
 src/gallium/auxiliary/gallivm/lp_bld_limits.h  |   1 +
 src/gallium/auxiliary/gallivm/lp_bld_logic.c   |   2 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c|  47 +++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   4 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 240 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h |   3 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 163 +-
 8 files changed, 458 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 9daa93e..8fba43f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1997,6 +1997,12 @@ lp_build_floor(struct lp_build_context *bld,
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMTypeRef vec_type = bld->vec_type;
 
+  if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", 
type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+  }
+
   assert(type.width == 32); /* might want to handle doubles at some point 
*/
 
   inttype = type;
@@ -2066,6 +2072,12 @@ lp_build_ceil(struct lp_build_context *bld,
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMTypeRef vec_type = bld->vec_type;
 
+  if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", 
type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+  }
+
   assert(type.width == 32); /* might want to handle doubles at some point 
*/
 
   inttype = type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 2851fd1..3db7261 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -132,6 +132,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
   return 1;
case PIPE_SHADER_CAP_DOUBLES:
+  return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c 
b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 80b53e5..f724cfa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -81,7 +81,7 @@ lp_build_compare_ext(struct gallivm_state *gallivm,
  boolean ordered)
 {
LLVMBuilderRef builder = gallivm->builder;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, 
lp_type_int_vec(32, 32 * type.length));
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e391d8a..1887956 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -175,13 +175,52 @@ void lp_build_fetch_args(
unsigned src;
for (src = 0; src < emit_data->info->num_src; src++) {
   emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, 
src,
-   emit_data->chan);
+ emit_data->src_chan);
}
emit_data->arg_count = emit_data->info->num_src;
lp_build_action_set_dst_type(emit_data, bld_base,
emit_data->inst->Instruction.Opcode);
 }
 
+/**
+ * with doubles src and dst channels aren't 1:1.
+ * check the src/dst types for the opcode,
+ * 1. if neither is double then src == dst;
+ * 2. if dest is double
+ * - don't store to y or w
+ * - if src is double then src == dst.
+ * - else for f2d, d.xy = s.x
+ * - else for f2d, d.zw = s.y
+ * 3. if dst is single, src is double
+ *- map dst x,z to src xy;
+ *- map dst y,w to src zw;
+ */
+static int get_src_chan_idx(unsigned opcode,
+int dst_ch

[Mesa-dev] [PATCH 3/3] docs: update for llvmpipe fp64 support

2015-06-29 Thread Dave Airlie

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt  | 4 ++--
 docs/relnotes/10.7.0.html | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 54e4574..ce3b134 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00:
   - Enhanced per-sample shadingDONE (r600, radeonsi)
   - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
-  GL_ARB_gpu_shader_fp64   DONE (nvc0, softpipe)
+  GL_ARB_gpu_shader_fp64   DONE (nvc0, llvmpipe, 
softpipe)
   GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_shader_subroutine started (Dave)
   GL_ARB_tessellation_shader   started (Chris, Ilia)
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10:
   GL_ARB_get_program_binaryDONE (0 binary formats)
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_shader_precision  started (Micah)
-  GL_ARB_vertex_attrib_64bit   DONE (nvc0, softpipe)
+  GL_ARB_vertex_attrib_64bit   DONE (nvc0, llvmpipe, 
softpipe)
   GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe)
 
 
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index fcc5081..2484243 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -49,6 +49,8 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_shader_stencil_export on llvmpipe
 GL_ARB_viewport_array on radeonsi
 GL_ARB_fragment_layer_viewport on radeonsi
+GL_ARB_gpu_shader_fp64 on llvmpipe
+GL_ARB_vertex_attrib_64bit on llvmpipe
 
 
 Bug fixes
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] gallivm: add fp64 support.

2015-06-29 Thread Dave Airlie

On 30 June 2015 at 00:58, Roland Scheidegger  wrote:
> Don't worry about the AoS stuff. Only meant to do simple things.
>
> Looks good overall, I guess it makes sense to not split execution too
> (so you'd have native hw vector size there), llvm should handle that
> pretty well these days (the sse intrinsics won't get used that way
> probably (though there's a helper for that too which makes it possible
> but it might not be hooked up, but I guess there's not really much need
> for them).
>
> Some comments inline.

I've noticed we have no tests for indirect access to fp64 things, so
I'll probably write some first to validate the indirect paths I
haven't fixed up yet.

>> Two things that don't mix well are SoA and doubles, see
>> emit_fetch_double, and emit_store_double_chan in this.
>>
>> I've also had to split emit_data.chan, to add src_chan,
>> which can be different for doubles.
>>
>> Open issues:
>> are intrinsics okay for floor/ceil?
> The question is if they actually work if you don't have sse4.1 and don't
> just crash (at least I assume with sse4.1 it turns into round
> instruction). (Or on non-x86 cpus if there is no direct hw support). If
> they don't you'd have to provide your own implementation (at least as a
> fallback) or make support for the extension conditional. Otherwise llvm
> intrinsics are just fine (traditionally we didn't really use them much
> as most of the things we do with sse intrinsics were missing, and even
> if some intrinsic existed it often didn't work, but that was a long time
> ago - ideally we'd switch to llvm intrinsics where possible).

Okay well I'm okay with limiting fp64 to where they work I suppose
though that needs
testing on older non sse4.1 hw.

>> +
>> +  scalar = LLVMBuildExtractElement(builder, input, si, "");
>> +  res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
>> +  scalar2 = LLVMBuildExtractElement(builder, input2, si, "");
>> +  res = LLVMBuildInsertElement(builder, res, scalar2, ii1, "");
>> +   }
> Did you check what code this generated? Traditionally, we tried to avoid
> the extract/insert stuff where possible and use shuffles instead.
> Because llvm would actually do inserts/extracts (i.e. move from simd
> domain to integer domain and back, which is pretty horrendous, and
> doubly so on some non-intel cpus which have like 15+ cycles latency for
> this). It is possible though this is no longer a problem, llvm 3.6 or
> 3.7 got some majorly improved shuffle optimizer which might also catch this.

No I haven't looked at what it generated, I was pretty sure it was
going to be ugly,

Oh if I can use shufflevector for this direction I probably will, that
make sense. I'm not sure it'll work for the other way,
but maybe two shufflevectors will, I hadn't looked into it that much yet.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallivm: add fp64 support. (v2)

2015-06-29 Thread Dave Airlie

This adds support for ARB_gpu_shader_fp64 and ARB_vertex_attrib_64bit to
llvmpipe.

Two things that don't mix well are SoA and doubles, see
emit_fetch_double, and emit_store_double_chan in this.

I've also had to split emit_data.chan, to add src_chan,
which can be different for doubles.

It handles indirect double fetches from temps, inputs, constants
and immediates. It doesn't handle double stores to indirects,
however it appears the mesa/st doesn't currently emit these,
it always does UARL/MOV combos, which will work fine.

tested with piglit, no regressions, all the fp64 tests seem to pass.

v2:
switch to using shuffles for fetch/store (Roland)
assert on indirect double stores - mesa/st never emits these (it uses MOV)
fix indirect temp/input/constant/immediates (Roland)
typos/formatting fixes (Roland)

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c|  12 +
 src/gallium/auxiliary/gallivm/lp_bld_limits.h  |   1 +
 src/gallium/auxiliary/gallivm/lp_bld_logic.c   |   2 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c|  47 +++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   4 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 246 
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h |   5 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 256 ++---
 8 files changed, 541 insertions(+), 32 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 9daa93e..8fba43f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1997,6 +1997,12 @@ lp_build_floor(struct lp_build_context *bld,
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMTypeRef vec_type = bld->vec_type;
 
+  if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", 
type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+  }
+
   assert(type.width == 32); /* might want to handle doubles at some point 
*/
 
   inttype = type;
@@ -2066,6 +2072,12 @@ lp_build_ceil(struct lp_build_context *bld,
   LLVMTypeRef int_vec_type = bld->int_vec_type;
   LLVMTypeRef vec_type = bld->vec_type;
 
+  if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", 
type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+  }
+
   assert(type.width == 32); /* might want to handle doubles at some point 
*/
 
   inttype = type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h 
b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 2851fd1..3db7261 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -132,6 +132,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
   return 1;
case PIPE_SHADER_CAP_DOUBLES:
+  return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c 
b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 80b53e5..f724cfa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -81,7 +81,7 @@ lp_build_compare_ext(struct gallivm_state *gallivm,
  boolean ordered)
 {
LLVMBuilderRef builder = gallivm->builder;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, 
lp_type_int_vec(32, 32 * type.length));
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e391d8a..1887956 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -175,13 +175,52 @@ void lp_build_fetch_args(
unsigned src;
for (src = 0; src < emit_data->info->num_src; src++) {
   emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, 
src,
-   emit_data->chan);
+ emit_data->src_chan);
}
emit_data->arg_count = emit_data->info->num_src;
lp_build_action_set_dst_type(emit_data, bld_base,
emit_data->inst->Instruction.Opcode);
 }
 
+/**
+ * with doubles src and dst channels aren't 1:1.
+ * check the src/dst types for the opcode,
+ * 1. if neither is double then src == dst;
+ *

Re: [Mesa-dev] [PATCH 2/3] gallivm: add fp64 support.

2015-06-29 Thread Dave Airlie

On 30 June 2015 at 09:36, Roland Scheidegger  wrote:
> Am 29.06.2015 um 22:18 schrieb Dave Airlie:
>> On 30 June 2015 at 00:58, Roland Scheidegger  wrote:
>>> Don't worry about the AoS stuff. Only meant to do simple things.
>>>
>>> Looks good overall, I guess it makes sense to not split execution too
>>> (so you'd have native hw vector size there), llvm should handle that
>>> pretty well these days (the sse intrinsics won't get used that way
>>> probably (though there's a helper for that too which makes it possible
>>> but it might not be hooked up, but I guess there's not really much need
>>> for them).
>>>
>>> Some comments inline.
>>
>> I've noticed we have no tests for indirect access to fp64 things, so
>> I'll probably write some first to validate the indirect paths I
>> haven't fixed up yet.
> Ok, thanks for looking at that.

Okay I've posted a new version of just this patch,

I fixed up the indirect fetchers all fine, the indirect stores don't occur
with mesa/st and I'm not sure I want fo fix them up without test cases,
I've put an assert in the new patch in case it ever happens.

It also uses shufflevector instead of insert/extract fun.

Otherwise I should have addresses all the things mentioned.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: add fp64 support. (v2)

2015-06-30 Thread Dave Airlie

>>   LLVMValueRef base_ptr,
>>   LLVMValueRef indexes,
>> - LLVMValueRef overflow_mask)
>> + LLVMValueRef overflow_mask, LLVMValueRef indexes2)
>>  {
>> struct gallivm_state *gallivm = bld_base->base.gallivm;
>> LLVMBuilderRef builder = gallivm->builder;
>> struct lp_build_context *uint_bld = &bld_base->uint_bld;
>> struct lp_build_context *bld = &bld_base->base;
>> -   LLVMValueRef res = bld->undef;
>> +   LLVMValueRef res;
>> unsigned i;
>>
>> +   if (indexes2)
>> +  res = 
>> LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 
>> bld_base->base.type.length * 2));
>> +   else
>> +  res = bld->undef;
>> /*
>>  * overflow_mask is a vector telling us which channels
>>  * in the vector overflowed. We use the overflow behavior for
>> @@ -976,26 +980,47 @@ build_gather(struct lp_build_tgsi_context *bld_base,
>> * control flow.
>> */
>>indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, 
>> indexes);
>> +  if (indexes2)
>> + indexes2 = lp_build_select(uint_bld, overflow_mask, 
>> uint_bld->zero, indexes2);
>> }
>>
>> /*
>>  * Loop over elements of index_vec, load scalar value, insert it into 
>> 'res'.
>>  */
>> -   for (i = 0; i < bld->type.length; i++) {
>> -  LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
>> -  LLVMValueRef index = LLVMBuildExtractElement(builder,
>> -   indexes, ii, "");
>> +   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
>> +  LLVMValueRef si, di;
>> +  LLVMValueRef index;
>>LLVMValueRef scalar_ptr, scalar;
>>
>> +  if (indexes2) {
>> + si = lp_build_const_int32(bld->gallivm, i >> 1);
>> + di = lp_build_const_int32(bld->gallivm, i);
>> +  } else {
>> + si = lp_build_const_int32(bld->gallivm, i);
>> + di = si;
>> +  }
>> +
>> +  if (indexes2 && (i & 1)) {
>> + index = LLVMBuildExtractElement(builder,
>> + indexes2, si, "");
>> +  } else {
>> + index = LLVMBuildExtractElement(builder,
>> + indexes, si, "");
>> +  }
>>scalar_ptr = LLVMBuildGEP(builder, base_ptr,
>>  &index, 1, "gather_ptr");
>>scalar = LLVMBuildLoad(builder, scalar_ptr, "");
>>
>> -  res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
>> +  res = LLVMBuildInsertElement(builder, res, scalar, di, "");
>> }
>>
>> if (overflow_mask) {
>> -  res = lp_build_select(bld, overflow_mask, bld->zero, res);
>> +  if (indexes2) {
>> + res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, 
>> "");
>> + overflow_mask = LLVMBuildSExt(builder, overflow_mask, 
>> bld_base->dbl_bld.int_vec_type, "");
>> + res = lp_build_select(&bld_base->dbl_bld, overflow_mask, 
>> bld_base->dbl_bld.zero, res);
>> +  } else
>> + res = lp_build_select(bld, overflow_mask, bld->zero, res);
>> }
> This function looks pretty complex to me.
> I wonder if it wouldn't make more sense to use the gather as it was and
> just call it twice, with some shuffle for the fetched values afterwards.
> (There is actually some good reason why build_gather should be a
> "simple" function extracting elements, do loads, and insert the loaded
> values in a straightforward manner, this is supposed to be able to turn
> into a avx2 gather at some point, and everything doing something
> different would need to be thrown out.)

It still collapses back to the previous version if indexes2 is NULL,
so if someone does
create a version that is avx2 gather then it should use that path for
the single width vars,
and can use the slower uglier one for fp64s I would think. I don't
think we'll ever get fp64
gathers/stores to be pretty.

>> +  LLVMValueRef temp, temp2;
>> +  LLVMValueRef shuffles[8];
>> +  LLVMValueRef shuffles2[8];
>> +
>> +  for (i = 0; i < bld_base->base.type.length; i++) {
>> + shuffles[i] = lp_build_const_int32(gallivm, i * 2);
>> + shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
>> +  }
>> +
>> +  temp = LLVMBuildShuffleVector(builder, value, 
>> LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(shuffles, 
>> bld_base->base.type.length), "");
>> +  temp2 = LLVMBuildShuffleVector(builder, value, 
>> LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(shuffles2, 
>> bld_base->base.type.length), "");
> These lines are a bit long...

Indeed, cleaned that up.
>
>
>> +  lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
>> +  lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, 
>> chan_ptr2);
>> +   } else {
>> +  lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, chan_ptr);
> Actually I think it would be nicer to keep the ordinary float path o

Re: [Mesa-dev] [PATCH 2/3] gallivm: add fp64 support.

2015-06-30 Thread Dave Airlie

On 1 July 2015 at 00:52, Roland Scheidegger  wrote:
> Am 30.06.2015 um 03:42 schrieb Dave Airlie:
>> On 30 June 2015 at 09:36, Roland Scheidegger  wrote:
>>> Am 29.06.2015 um 22:18 schrieb Dave Airlie:
>>>> On 30 June 2015 at 00:58, Roland Scheidegger  wrote:
>>>>> Don't worry about the AoS stuff. Only meant to do simple things.
>>>>>
>>>>> Looks good overall, I guess it makes sense to not split execution too
>>>>> (so you'd have native hw vector size there), llvm should handle that
>>>>> pretty well these days (the sse intrinsics won't get used that way
>>>>> probably (though there's a helper for that too which makes it possible
>>>>> but it might not be hooked up, but I guess there's not really much need
>>>>> for them).
>>>>>
>>>>> Some comments inline.
>>>>
>>>> I've noticed we have no tests for indirect access to fp64 things, so
>>>> I'll probably write some first to validate the indirect paths I
>>>> haven't fixed up yet.
>>> Ok, thanks for looking at that.
>>
>> Okay I've posted a new version of just this patch,
>>
>> I fixed up the indirect fetchers all fine, the indirect stores don't occur
>> with mesa/st and I'm not sure I want fo fix them up without test cases,
>> I've put an assert in the new patch in case it ever happens.
> Sounds like a good idea. So doe mesa/st store those to temps then move
> them to indirect via the address reg? I thought we wanted to kill that
> eventually...
>

Yes it does that, my problem is I'd can't test this without the st being
fixed, so its kinda chicken/egg, if the st gets fixed then fixing this is a lot
easier, and we hit an assert so we know to fix it.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g: fix sampler/ubo indexing on cayman

2015-07-08 Thread Dave Airlie

From: Dave Airlie 

Cayman needs a different method to upload the CF IDX0/1

This fixes 31 piglits when ARB_gpu_shader5 is forced on
with cayman.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/eg_asm.c | 17 +++--
 src/gallium/drivers/r600/eg_sq.h  | 11 +++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index d04921e..c32d317 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -161,6 +161,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned 
id, bool inside_alu_c
alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
alu.src[0].chan = 0;
+   if (bc->chip_class == CAYMAN)
+   alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : 
CM_V_SQ_MOVA_DST_CF_IDX1;
+
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
@@ -168,12 +171,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc, 
unsigned id, bool inside_alu_c
 
bc->ar_loaded = 0; /* clobbered */
 
-   memset(&alu, 0, sizeof(alu));
-   alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
-   alu.last = 1;
-   r = r600_bytecode_add_alu(bc, &alu);
-   if (r)
-   return r;
+   if (bc->chip_class == EVERGREEN) {
+   memset(&alu, 0, sizeof(alu));
+   alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+   alu.last = 1;
+   r = r600_bytecode_add_alu(bc, &alu);
+   if (r)
+   return r;
+   }
 
/* Must split ALU group as index only applies to following group */
if (inside_alu_clause) {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872..10caa07 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -521,4 +521,15 @@
 
 #define V_SQ_REL_ABSOLUTE 0
 #define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_7_0 4
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_15_8 5
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_23_16 6
+#define CM_V_SQ_MOVA_DST_CF_CLAUSE_GLOBAL_31_24 7
+
 #endif
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g: move sampler/ubo index registers before temp reg

2015-07-08 Thread Dave Airlie

From: Dave Airlie 

temp_reg needs to be last, as we increment things
away from it, otherwise on cayman some tests were overwriting
the index regs.

Fixes 2 piglit with ARB_gpu_shader5 forced on cayman.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index af7622e..1a72bf6 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1931,15 +1931,14 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-   ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
-   ctx.temp_reg = ctx.bc->ar_reg + 2;
-   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
-   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+   ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+   ctx.temp_reg = ctx.bc->ar_reg + 4;
} else {
-   ctx.temp_reg = ctx.bc->ar_reg + 1;
-   ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
-   ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+   ctx.temp_reg = ctx.bc->ar_reg + 3;
}
 
shader->max_arrays = 0;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/6] r600g/sb: add support for multiple streams to SB backend

2015-07-08 Thread Dave Airlie

From: Glenn Kennard 

This adds a peephole and removes an assert that isn't
actually valid with some of the stream emit instructions.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/sb/sb_bc_finalize.cpp |  2 --
 src/gallium/drivers/r600/sb/sb_bc_parser.cpp   | 18 +++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 8c2cd14..48d56ac 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -761,8 +761,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
mask |= (1 << chan);
}
 
-   assert(reg >= 0 && mask);
-
if (reg >= 0)
update_ngpr(reg);
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index 4879c03..25b0f55 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -757,10 +757,22 @@ int bc_parser::prepare_ir() {
c->bc.end_of_program = eop;
 
} else if (flags & CF_EMIT) {
-   c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+   /* quick peephole */
+   cf_node *prev = static_cast(c->prev);
+   if (c->bc.op == CF_OP_CUT_VERTEX &&
+   prev && prev->is_valid() &&
+   prev->bc.op == CF_OP_EMIT_VERTEX &&
+   c->bc.count == prev->bc.count) {
+   prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
+   prev->bc.end_of_program = c->bc.end_of_program;
+   c->remove();
+   }
+   else {
+   c->flags |= NF_DONT_KILL | NF_DONT_HOIST | 
NF_DONT_MOVE;
 
-   
c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
-   
c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+   
c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+   
c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+   }
}
}
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/6] r600g: enable ARB_gpu_shader5 on evergreen and up

2015-07-08 Thread Dave Airlie

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 67caa69..0db1c1c 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -299,7 +299,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
 
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (family >= CHIP_CEDAR)
-  return 330;
+  return 400;
/* pre-evergreen geom shaders need newer kernel */
if (rscreen->b.info.drm_minor >= 37)
   return 330;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC] r600 geometry streams (and ARB_gpu_shader5 support)

2015-07-08 Thread Dave Airlie

This applies on top of the two patches I've sent already,
and enables the geometry streams, which is the final
piece missing for ARB_gpu_shader5 on evergreen and cayman.
(I'll do doc update patches later)

Glenn wrote most of this, I just spent some time making it work
and cleaning up the code. Though I suspect it could do with more
cleaning.

The main thing I don't really like is we try and emit all
outputs into each ring so we waste a bit of space, however
it does work, though the special casing to avoid POSITION
going into any other streams kinda makes me wonder if anything
else needs that special casing.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/6] r600g: add support for streams to the assembler.

2015-07-08 Thread Dave Airlie

From: Glenn Kennard 

This just adds support to the assembler dumper and allows
stream instructions to be generated. Also fix up the stream
debugging to add stream info.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/eg_asm.c  | 1 +
 src/gallium/drivers/r600/r600_asm.c| 2 ++
 src/gallium/drivers/r600/r600_asm.h| 1 +
 src/gallium/drivers/r600/r600_shader.c | 6 --
 src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 3 +++
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index 42e8b0b..c32d317 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -115,6 +115,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+   S_SQ_CF_WORD1_COUNT(cf->count) |

S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
}
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 762cc7f..40639d0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, "CND:%X ", cf->cond);
if (cf->pop_count)
fprintf(stderr, "POP:%X ", 
cf->pop_count);
+   if (cf->count && (cfop->flags & CF_EMIT))
+   fprintf(stderr, "STREAM%d ", cf->count);
fprintf(stderr, "\n");
}
}
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index e37d926..b282907 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -149,6 +149,7 @@ struct r600_bytecode_cf {
unsignedid;
unsignedcond;
unsignedpop_count;
+   unsignedcount;
unsignedcf_addr; /* control flow addr */
struct r600_bytecode_kcache kcache[4];
unsignedr6xx_uses_waterfall;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 1a72bf6..dbff313 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -93,8 +93,10 @@ static void r600_dump_streamout(struct 
pipe_stream_output_info *so)
for (i = 0; i < so->num_outputs; i++) {
unsigned mask = ((1 << so->output[i].num_components) - 1) <<
so->output[i].start_component;
-   fprintf(stderr, "  %i: MEM_STREAM0_BUF%i[%i..%i] <- 
OUT[%i].%s%s%s%s%s\n",
-   i, so->output[i].output_buffer,
+   fprintf(stderr, "  %i: MEM_STREAM%d_BUF%i[%i..%i] <- 
OUT[%i].%s%s%s%s%s\n",
+   i,
+   so->output[i].stream,
+   so->output[i].output_buffer,
so->output[i].dst_offset, so->output[i].dst_offset + 
so->output[i].num_components - 1,
so->output[i].register_index,
mask & 1 ? "x" : "",
diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp 
b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
index 5232782..631fac2 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
@@ -182,6 +182,9 @@ void bc_dump::dump(cf_node& n) {
 
if (n.bc.pop_count)
s << " POP:" << n.bc.pop_count;
+
+   if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
+   s << " STREAM" << n.bc.count;
}
 
if (!n.bc.barrier)
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/6] radeon: add streamout status 1-3 queries.

2015-07-08 Thread Dave Airlie

From: Glenn Kennard 

This adds support for queries against the non-0 vertex streams.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeon/r600_query.c   | 18 --
 src/gallium/drivers/radeon/r600d_common.h |  3 +++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 71f4a15..9ad2452 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -54,6 +54,8 @@ struct r600_query {
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
+   /* For transform feedback: which stream the query is for */
+   unsigned stream;
 };
 
 
@@ -157,6 +159,17 @@ static void r600_update_occlusion_query_state(struct 
r600_common_context *rctx,
}
 }
 
+static unsigned event_type_for_stream(struct r600_query *query)
+{
+   switch (query->stream) {
+   default:
+   case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+   case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1; /* enum values 
snarfed from SI kernel sid.h */
+   case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+   case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+   }
+}
+
 static void r600_emit_query_begin(struct r600_common_context *ctx, struct 
r600_query *query)
 {
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
@@ -191,7 +204,7 @@ static void r600_emit_query_begin(struct 
r600_common_context *ctx, struct r600_q
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-   radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | 
EVENT_INDEX(3));
+   radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -248,7 +261,7 @@ static void r600_emit_query_end(struct r600_common_context 
*ctx, struct r600_que
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
va += query->buffer.results_end + query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-   radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | 
EVENT_INDEX(3));
+   radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -369,6 +382,7 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
query->num_cs_dw = 6;
+   query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
diff --git a/src/gallium/drivers/radeon/r600d_common.h 
b/src/gallium/drivers/radeon/r600d_common.h
index 74c8d87..5a56a54 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -66,6 +66,9 @@
 #define PKT3_SET_SH_REG0x76 /* SI and later */
 #define PKT3_SET_UCONFIG_REG   0x79 /* CIK and later */
 
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1  0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2  0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3  0x3 /* EG and later */
 #define EVENT_TYPE_PS_PARTIAL_FLUSH0x10
 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
 #define EVENT_TYPE_ZPASS_DONE  0x15
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/6] r600g: add streamout support

2015-07-08 Thread Dave Airlie

From: Glenn Kennard 

This adds the main chunk of the geometry shader multiple stream
support to the r600 driver.

Glenn wrote the original pass, and I took his code and hacked
it into a working state.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c   |  29 ++--
 src/gallium/drivers/r600/r600_pipe.c |   2 +-
 src/gallium/drivers/r600/r600_shader.c   | 200 ---
 src/gallium/drivers/r600/r600_shader.h   |   6 +-
 src/gallium/drivers/r600/r600_state.c|   6 +-
 src/gallium/drivers/r600/r600_state_common.c |   7 +
 6 files changed, 180 insertions(+), 70 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 4ddbc0b..788bf54 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2988,8 +2988,12 @@ void evergreen_update_gs_state(struct pipe_context *ctx, 
struct r600_pipe_shader
struct r600_command_buffer *cb = &shader->command_buffer;
struct r600_shader *rshader = &shader->shader;
struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
-   unsigned gsvs_itemsize =
-   (cp_shader->ring_item_size * 
rshader->gs_max_out_vertices) >> 2;
+   unsigned gsvs_itemsizes[4] = {
+   (cp_shader->ring_item_sizes[0] * 
rshader->gs_max_out_vertices) >> 2,
+   (cp_shader->ring_item_sizes[1] * 
rshader->gs_max_out_vertices) >> 2,
+   (cp_shader->ring_item_sizes[2] * 
rshader->gs_max_out_vertices) >> 2,
+   (cp_shader->ring_item_sizes[3] * 
rshader->gs_max_out_vertices) >> 2
+   };
 
r600_init_command_buffer(cb, 64);
 
@@ -3008,21 +3012,24 @@ void evergreen_update_gs_state(struct pipe_context 
*ctx, struct r600_pipe_shader
S_028B90_ENABLE(rshader->gs_num_invocations > 
0));
}
r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
-   r600_store_value(cb, cp_shader->ring_item_size >> 2);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
-   r600_store_value(cb, 0);
+   r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
+   r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
+   r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
+   r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
 
r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
-  (rshader->ring_item_size) >> 2);
+  (rshader->ring_item_sizes[0]) >> 2);
 
r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
-  gsvs_itemsize);
+  gsvs_itemsizes[0] +
+  gsvs_itemsizes[1] +
+  gsvs_itemsizes[2] +
+  gsvs_itemsizes[3]);
 
r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
-   r600_store_value(cb, gsvs_itemsize);
-   r600_store_value(cb, gsvs_itemsize);
-   r600_store_value(cb, gsvs_itemsize);
+   r600_store_value(cb, gsvs_itemsizes[0]);
+   r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
+   r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + 
gsvs_itemsizes[2]);
 
/* FIXME calculate these values somehow ??? */
r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 143e98e..67caa69 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -352,7 +352,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return 16384;
case PIPE_CAP_MAX_VERTEX_STREAMS:
-   return 1;
+   return family >= CHIP_CEDAR ? 4 : 1;
 
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2047;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index dbff313..ef19706 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -311,7 +311,9 @@ struct r600_shader_ctx {
int gs_out_ring_offset;
int gs_next_vertex;
struct r600_shader  *gs_for_vs;
-   int gs_export_gpr_treg;
+   int gs_export_gpr_tregs[4];
+   const struct pipe_stream_output_info*gs_stream_output_info;
+   unsignedenabled_stream_buffers_mask

[Mesa-dev] [PATCH 3/6] radeon: add support for streams to the common streamout code.

2015-07-08 Thread Dave Airlie

From: Glenn Kennard 

This just adds to the common radeon streamout code, support
for multiple streams.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_streamout.c   | 23 +--
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index a471426..22d940e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -327,6 +327,7 @@ struct r600_streamout {
/* External state which comes from the vertex shader,
 * it must be set explicitly when binding a shader. */
unsigned*stride_in_dw;
+   unsignedenabled_stream_buffers_mask; /* stream0 
buffers0-3 in 4 LSB */
 
/* The state of VGT_STRMOUT_(CONFIG|EN). */
struct r600_atomenable_atom;
diff --git a/src/gallium/drivers/radeon/r600_streamout.c 
b/src/gallium/drivers/radeon/r600_streamout.c
index bc8bf97..a602dac 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -195,7 +195,11 @@ static void r600_emit_streamout_begin(struct 
r600_common_context *rctx, struct r
r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
   R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
   R_028B20_VGT_STRMOUT_BUFFER_EN,
-  rctx->streamout.enabled_mask);
+  (rctx->streamout.enabled_mask |
+   (rctx->streamout.enabled_mask << 4) |
+   (rctx->streamout.enabled_mask << 8) |
+   (rctx->streamout.enabled_mask << 12)) &
+
rctx->streamout.enabled_stream_buffers_mask);
 
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
@@ -326,11 +330,18 @@ static bool r600_get_strmout_en(struct 
r600_common_context *rctx)
 static void r600_emit_streamout_enable(struct r600_common_context *rctx,
   struct r600_atom *atom)
 {
-   r600_write_context_reg(rctx->rings.gfx.cs,
-  rctx->chip_class >= EVERGREEN ?
-  R_028B94_VGT_STRMOUT_CONFIG :
-  R_028AB0_VGT_STRMOUT_EN,
-  
S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
+   unsigned reg = R_028AB0_VGT_STRMOUT_EN;
+   unsigned val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+   if (rctx->chip_class >= EVERGREEN) {
+   reg = R_028B94_VGT_STRMOUT_CONFIG;
+   val |=
+   S_028B94_RAST_STREAM(0) |
+   S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+   S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+   S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+   }
+
+   r600_write_context_reg(rctx->rings.gfx.cs, reg, val);
 }
 
 static void r600_set_streamout_enable(struct r600_common_context *rctx, bool 
enable)
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/19] glsl/types: add new subroutine type (v3)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This type will be used to store the name of subroutine types

as in subroutine void myfunc(void);
will store myfunc into a subroutine type.

This is required to the parser can identify a subroutine
type in a uniform decleration as a valid type, and also for
looking up the type later.

Also add contains_subroutine method.

v2: handle subroutine to int comparisons, needed
for lowering pass.
v3: do subroutine to int with it's own IR
operation to avoid hacking on asserts (Kayden)

Signed-off-by: Dave Airlie 
---
 src/glsl/glsl_types.cpp| 63 ++
 src/glsl/glsl_types.h  | 19 ++
 src/glsl/ir.cpp|  2 ++
 src/glsl/ir.h  |  1 +
 src/glsl/ir_builder.cpp|  6 
 src/glsl/ir_builder.h  |  1 +
 src/glsl/ir_clone.cpp  |  1 +
 src/glsl/ir_validate.cpp   |  4 +++
 src/glsl/link_uniform_initializers.cpp |  1 +
 9 files changed, 98 insertions(+)

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 281ff51..1e3ebb2 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
 hash_table *glsl_type::array_types = NULL;
 hash_table *glsl_type::record_types = NULL;
 hash_table *glsl_type::interface_types = NULL;
+hash_table *glsl_type::subroutine_types = NULL;
 void *glsl_type::mem_ctx = NULL;
 
 void
@@ -159,6 +160,22 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
mtx_unlock(&glsl_type::mutex);
 }
 
+glsl_type::glsl_type(const char *subroutine_name) :
+   gl_type(0),
+   base_type(GLSL_TYPE_SUBROUTINE),
+   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+   sampler_type(0), interface_packing(0),
+   vector_elements(0), matrix_columns(0),
+   length(0)
+{
+   mtx_lock(&glsl_type::mutex);
+
+   init_ralloc_type_ctx();
+   assert(subroutine_name != NULL);
+   this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+   this->vector_elements = 1;
+   mtx_unlock(&glsl_type::mutex);
+}
 
 bool
 glsl_type::contains_sampler() const
@@ -229,6 +246,22 @@ glsl_type::contains_opaque() const {
}
 }
 
+bool
+glsl_type::contains_subroutine() const
+{
+   if (this->is_array()) {
+  return this->fields.array->contains_subroutine();
+   } else if (this->is_record()) {
+  for (unsigned int i = 0; i < this->length; i++) {
+if (this->fields.structure[i].type->contains_subroutine())
+   return true;
+  }
+  return false;
+   } else {
+  return this->is_subroutine();
+   }
+}
+
 gl_texture_index
 glsl_type::sampler_index() const
 {
@@ -826,6 +859,34 @@ glsl_type::get_interface_instance(const glsl_struct_field 
*fields,
return t;
 }
 
+const glsl_type *
+glsl_type::get_subroutine_instance(const char *subroutine_name)
+{
+   const glsl_type key(subroutine_name);
+
+   mtx_lock(&glsl_type::mutex);
+
+   if (subroutine_types == NULL) {
+  subroutine_types = hash_table_ctor(64, record_key_hash, 
record_key_compare);
+   }
+
+   const glsl_type *t = (glsl_type *) hash_table_find(subroutine_types, & key);
+   if (t == NULL) {
+  mtx_unlock(&glsl_type::mutex);
+  t = new glsl_type(subroutine_name);
+  mtx_lock(&glsl_type::mutex);
+
+  hash_table_insert(subroutine_types, (void *) t, t);
+   }
+
+   assert(t->base_type == GLSL_TYPE_SUBROUTINE);
+   assert(strcmp(t->name, subroutine_name) == 0);
+
+   mtx_unlock(&glsl_type::mutex);
+
+   return t;
+}
+
 
 const glsl_type *
 glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
@@ -958,6 +1019,7 @@ glsl_type::component_slots() const
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
   break;
}
@@ -1331,6 +1393,7 @@ glsl_type::count_attribute_slots() const
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
   break;
}
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index f54a939..0f4dc80 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -59,6 +59,7 @@ enum glsl_base_type {
GLSL_TYPE_INTERFACE,
GLSL_TYPE_ARRAY,
GLSL_TYPE_VOID,
+   GLSL_TYPE_SUBROUTINE,
GLSL_TYPE_ERROR
 };
 
@@ -264,6 +265,11 @@ struct glsl_type {
  const char *block_name);
 
/**
+* Get the instance of an subroutine type
+*/
+   static const glsl_type *get_subroutine_instance(const char 
*subroutine_name);
+
+   /**
 * Get the type resulting from a multiplication of \p type_a * \p type_b
 */
static const glsl_type *get_mul_type(const glsl_type *type_a,
@@ -514,6 +520,13 @@ struct glsl_type {
/**
 * Query if a type is u

[Mesa-dev] ARB_shader_subroutine (again)

2015-07-09 Thread Dave Airlie

I've rebased this series, it's in my arb_shader_subroutine branch.

I've also implemented Ken's idea for a subroutine->int conversion
and put the changes into each patch that it affects.

Otherwise not much different from when I last posted.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/19] mesa: add inline conversion functions for ARB_shader_subroutine

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This handles converting the shader stages to the internal
prefix along with the program resource interfaces.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderobj.h | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 3d696a1..67c717b 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -120,6 +120,90 @@ _mesa_shader_enum_to_shader_stage(GLenum v)
 }
 
 
+static inline const char *
+_mesa_shader_stage_to_subroutine_prefix(gl_shader_stage stage)
+{
+  switch (stage) {
+  case MESA_SHADER_VERTEX:
+return "__subu_v";
+  case MESA_SHADER_GEOMETRY:
+return "__subu_g";
+  case MESA_SHADER_FRAGMENT:
+return "__subu_f";
+  case MESA_SHADER_COMPUTE:
+return "__subu_c";
+  default:
+return NULL;
+  }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine_uniform(GLenum subuniform)
+{
+   switch (subuniform) {
+   default:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_COMPUTE;
+   /* TODO - COMPUTE, TESS */
+   }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine(GLenum subroutine)
+{
+   switch (subroutine) {
+   case GL_VERTEX_SUBROUTINE:
+  return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE:
+  return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE:
+  return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE:
+  return MESA_SHADER_COMPUTE;
+   /* TODO - TESS */
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+  return GL_VERTEX_SUBROUTINE;
+   case MESA_SHADER_GEOMETRY:
+  return GL_GEOMETRY_SUBROUTINE;
+   case MESA_SHADER_FRAGMENT:
+  return GL_FRAGMENT_SUBROUTINE;
+   case MESA_SHADER_COMPUTE:
+  return GL_COMPUTE_SUBROUTINE;
+   /* TODO - TESS */
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+  return GL_VERTEX_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_GEOMETRY:
+  return GL_GEOMETRY_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_FRAGMENT:
+  return GL_FRAGMENT_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_COMPUTE:
+  return GL_COMPUTE_SUBROUTINE_UNIFORM;
+   /* TODO - TESS */
+   }
+}
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/19] mesa: Add glGet support for ARB_shader_subroutine implementation limits

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/config.h   | 6 ++
 src/mesa/main/get.c  | 1 +
 src/mesa/main/get_hash_params.py | 4 
 src/mesa/main/tests/enum_strings.cpp | 9 +
 4 files changed, 20 insertions(+)

diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 9c3baf4..07c3474 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -272,6 +272,12 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_ARB_shader_subroutine */
+/*@{*/
+#define MAX_SUBROUTINES   256
+#define MAX_SUBROUTINE_UNIFORM_LOCATIONS  1024
+/*@}*/
+
 /** For GL_INTEL_performance_query */
 /*@{*/
 #define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 3d6d639..ac9cba3 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -401,6 +401,7 @@ EXTRA_EXT(ARB_explicit_uniform_location);
 EXTRA_EXT(ARB_clip_control);
 EXTRA_EXT(EXT_polygon_offset_clamp);
 EXTRA_EXT(ARB_framebuffer_no_attachments);
+EXTRA_EXT(ARB_shader_subroutine);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index c25e1b6..842ed6c 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -824,6 +824,10 @@ descriptor=[
   [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", 
"CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
   [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", 
"CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
   [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", 
"CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ],
+
+# GL_ARB_shader_subroutine
+  [ "MAX_SUBROUTINES", "CONST(MAX_SUBROUTINES), extra_ARB_shader_subroutine" ],
+  [ "MAX_SUBROUTINE_UNIFORM_LOCATIONS", 
"CONST(MAX_SUBROUTINE_UNIFORM_LOCATIONS), extra_ARB_shader_subroutine" ],
 ]}
 
 ]
diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index dc5fe75..d40b82a 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -1731,6 +1731,10 @@ const struct enum_info everything[] = {
{ 0x8DDF, "GL_MAX_GEOMETRY_UNIFORM_COMPONENTS" },
{ 0x8DE0, "GL_MAX_GEOMETRY_OUTPUT_VERTICES" },
{ 0x8DE1, "GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS" },
+   { 0x8DE5, "GL_ACTIVE_SUBROUTINES" },
+   { 0x8DE6, "GL_ACTIVE_SUBROUTINE_UNIFORMS" },
+   { 0x8DE7, "GL_MAX_SUBROUTINES" },
+   { 0x8DE8, "GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS" },
{ 0x8DF0, "GL_LOW_FLOAT" },
{ 0x8DF1, "GL_MEDIUM_FLOAT" },
{ 0x8DF2, "GL_HIGH_FLOAT" },
@@ -1759,6 +1763,11 @@ const struct enum_info everything[] = {
{ 0x8E44, "GL_TEXTURE_SWIZZLE_B" },
{ 0x8E45, "GL_TEXTURE_SWIZZLE_A" },
{ 0x8E46, "GL_TEXTURE_SWIZZLE_RGBA" },
+   { 0x8E47, "GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" },
+   { 0x8E48, "GL_ACTIVE_SUBROUTINE_MAX_LENGTH" },
+   { 0x8E49, "GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" },
+   { 0x8E4A, "GL_NUM_COMPATIBLE_SUBROUTINES" },
+   { 0x8E4B, "GL_COMPATIBLE_SUBROUTINES" },
{ 0x8E4C, "GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION" },
{ 0x8E4D, "GL_FIRST_VERTEX_CONVENTION" },
{ 0x8E4E, "GL_LAST_VERTEX_CONVENTION" },
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/19] glapi: Add ARB_shader_subroutine functions and enums (v2)

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

v2: fix output="true" and LENGTH typo

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mapi/glapi/gen/ARB_shader_subroutine.xml | 84 
 src/mapi/glapi/gen/Makefile.am   |  1 +
 src/mapi/glapi/gen/gl_API.xml|  6 +-
 3 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/ARB_shader_subroutine.xml

diff --git a/src/mapi/glapi/gen/ARB_shader_subroutine.xml 
b/src/mapi/glapi/gen/ARB_shader_subroutine.xml
new file mode 100644
index 000..04b75cb
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_shader_subroutine.xml
@@ -0,0 +1,84 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 5b163b0..1922c15 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -151,6 +151,7 @@ API_XML = \
ARB_separate_shader_objects.xml \
ARB_shader_atomic_counters.xml \
ARB_shader_image_load_store.xml \
+   ARB_shader_subroutine.xml \
ARB_sync.xml \
ARB_texture_barrier.xml \
ARB_texture_buffer_object.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 2f33075..64314cf 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8072,7 +8072,11 @@
 
 http://www.w3.org/2001/XInclude"/>
 
-
+
+
+http://www.w3.org/2001/XInclude"/>
+
+
 
 http://www.w3.org/2001/XInclude"/>
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/19] glsl: add ast/parser support for subroutine parsing storage (v3.1)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This is the guts of the GLSL parser and AST support for
shader subroutines.

The code creates a subroutine type in the parser, and
uses that there to validate the identifiers. The parser
also distinguishes between subroutine types/function prototypes
/uniforms and subroutine defintions for functions.

Then in the AST conversion it recreates the types, and
stores the subroutine definition info or subroutine info
into the ir_function along with a side lookup table in
the parser state. It also converts subroutine calls into
the enhanced ir_call.

v2: move to handling method calls in
function handling not in field selection.
v3: merge Chris's previous parser patches in here, to
make it clearer what's changed in one place.
v3.1: add more documentation, drop unused include

Signed-off-by: Dave Airlie 
---
 src/glsl/ast.h   |  15 +
 src/glsl/ast_function.cpp| 120 +--
 src/glsl/ast_to_hir.cpp  |  98 
 src/glsl/ast_type.cpp|   7 ++-
 src/glsl/glsl_lexer.ll   |   8 +++
 src/glsl/glsl_parser.yy  | 114 +
 src/glsl/glsl_parser_extras.cpp  |  22 +++
 src/glsl/glsl_parser_extras.h|  19 +++
 src/glsl/hir_field_selection.cpp |  39 -
 9 files changed, 326 insertions(+), 116 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index ef74e51..968aad4 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -304,6 +304,16 @@ private:
 * Is this function call actually a constructor?
 */
bool cons;
+   ir_rvalue *
+   handle_method(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+};
+
+class ast_subroutine_list : public ast_node
+{
+public:
+   virtual void print(void) const;
+   exec_list declarations;
 };
 
 class ast_array_specifier : public ast_node {
@@ -514,6 +524,10 @@ struct ast_type_qualifier {
  unsigned stream:1; /**< Has stream value assigned  */
  unsigned explicit_stream:1; /**< stream value assigned explicitly by 
shader code */
  /** \} */
+
+ /** \name Qualifiers for GL_ARB_shader_subroutine */
+ unsigned subroutine:1;  /**< Is this marked 'subroutine' */
+ unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a 
list of types */
   }
   /** \brief Set of flags, accessed by name. */
   q;
@@ -636,6 +650,7 @@ struct ast_type_qualifier {
ast_type_qualifier q,
ast_node* &node);
 
+   ast_subroutine_list *subroutine_list;
 };
 
 class ast_declarator_list;
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 92e26bf..f32de7c 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -26,6 +26,7 @@
 #include "glsl_types.h"
 #include "ir.h"
 #include "main/core.h" /* for MIN2 */
+#include "main/shaderobj.h"
 
 static ir_rvalue *
 convert_component(ir_rvalue *src, const glsl_type *desired_type);
@@ -355,6 +356,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const 
glsl_type *formal_type,
 static ir_rvalue *
 generate_call(exec_list *instructions, ir_function_signature *sig,
  exec_list *actual_parameters,
+  ir_variable *sub_var,
+ ir_rvalue *array_idx,
  struct _mesa_glsl_parse_state *state)
 {
void *ctx = state;
@@ -421,7 +424,8 @@ generate_call(exec_list *instructions, 
ir_function_signature *sig,
 
   deref = new(ctx) ir_dereference_variable(var);
}
-   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters);
+
+   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, 
array_idx);
instructions->push_tail(call);
 
/* Also emit any necessary out-parameter conversions. */
@@ -489,6 +493,40 @@ done:
return sig;
 }
 
+static ir_function_signature *
+match_subroutine_by_name(const char *name,
+ exec_list *actual_parameters,
+ struct _mesa_glsl_parse_state *state,
+ ir_variable **var_r)
+{
+   void *ctx = state;
+   ir_function_signature *sig = NULL;
+   ir_function *f, *found = NULL;
+   const char *new_name;
+   ir_variable *var;
+   bool is_exact = false;
+
+   new_name = ralloc_asprintf(ctx, "%s_%s", 
_mesa_shader_stage_to_subroutine_prefix(state->stage), name);
+   var = state->symbols->get_variable(new_name);
+   if (!var)
+  return NULL;
+
+   for (int i = 0; i < state->num_subroutine_types; i++) {
+  f = state->subroutine_types[i];
+  if (strcmp(f->name, var->type->without_array()->name))
+ continue;
+  found = f;
+  break;
+   }
+
+   if (!found)
+  return NULL;
+   *var_r = var;
+   sig = found->matching_signature(state, actual_parameters,
+

[Mesa-dev] [PATCH 06/19] glsl: Make `subroutine` a reserved keyword

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/glsl_lexer.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 10db5b8..5fd22b4 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -577,7 +577,7 @@ usamplerBuffer  KEYWORD(140, 300, 140, 0, 
USAMPLERBUFFER);
 resource   KEYWORD(0, 300, 0, 0, RESOURCE);
 patch  KEYWORD(0, 300, 0, 0, PATCH);
 sample KEYWORD_WITH_ALT(400, 300, 400, 0, 
yyextra->ARB_gpu_shader5_enable, SAMPLE);
-subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE);
+subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, 
yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
 
 
 [_a-zA-Z][_a-zA-Z0-9]* {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/19] mesa: Add extension tracking for arb_shader_subroutine (v2)

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

v2: [airlied]: merge version check update.

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/extensions.c | 1 +
 src/mesa/main/mtypes.h | 1 +
 src/mesa/main/version.c| 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4176a69..24ae33e 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -154,6 +154,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_shader_objects",  o(dummy_true),  
GL, 2002 },
{ "GL_ARB_shader_precision",o(ARB_shader_precision),
GL, 2010 },
{ "GL_ARB_shader_stencil_export",   
o(ARB_shader_stencil_export),   GL, 2009 },
+   { "GL_ARB_shader_subroutine",   o(ARB_shader_subroutine),   
GLC,2010 },
{ "GL_ARB_shader_texture_lod",  o(ARB_shader_texture_lod),  
GL, 2009 },
{ "GL_ARB_shading_language_100",o(dummy_true),  
GLL,2003 },
{ "GL_ARB_shading_language_packing",
o(ARB_shading_language_packing),GL, 2011 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 7b55677..a93fe94 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3685,6 +3685,7 @@ struct gl_extensions
GLboolean ARB_shader_image_load_store;
GLboolean ARB_shader_precision;
GLboolean ARB_shader_stencil_export;
+   GLboolean ARB_shader_subroutine;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;
GLboolean ARB_shading_language_420pack;
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 8bc00ac..fd7ae53 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -309,7 +309,7 @@ compute_version(const struct gl_extensions *extensions,
  extensions->ARB_gpu_shader5 &&
  extensions->ARB_gpu_shader_fp64 &&
  extensions->ARB_sample_shading &&
- false /*extensions->ARB_shader_subroutine*/ &&
+ extensions->ARB_shader_subroutine &&
  extensions->ARB_tessellation_shader &&
  extensions->ARB_texture_buffer_object_rgb32 &&
  extensions->ARB_texture_cube_map_array &&
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/19] glsl/ir: add subroutine information storage to ir_function (v1.1)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

We need to store two sets of info into the ir_function,
if this is a function definition with a subroutine list
(subroutine_def) or if it a subroutine prototype.

v1.1: add some more documentation.

Signed-off-by: Dave Airlie 
---
 src/glsl/ir.cpp   |  4 
 src/glsl/ir.h | 16 
 src/glsl/ir_clone.cpp |  7 +++
 src/glsl/ir_print_visitor.cpp |  2 +-
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 38a5e2a..2fbc631 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1853,6 +1853,7 @@ static void
 steal_memory(ir_instruction *ir, void *new_ctx)
 {
ir_variable *var = ir->as_variable();
+   ir_function *fn = ir->as_function();
ir_constant *constant = ir->as_constant();
if (var != NULL && var->constant_value != NULL)
   steal_memory(var->constant_value, ir);
@@ -1860,6 +1861,9 @@ steal_memory(ir_instruction *ir, void *new_ctx)
if (var != NULL && var->constant_initializer != NULL)
   steal_memory(var->constant_initializer, ir);
 
+   if (fn != NULL && fn->subroutine_types)
+  ralloc_steal(new_ctx, fn->subroutine_types);
+
/* The components of aggregate constants are not visited by the normal
 * visitor, so steal their values by hand.
 */
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 092c96b..b5a9e99 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1121,6 +1121,22 @@ public:
 * List of ir_function_signature for each overloaded function with this 
name.
 */
struct exec_list signatures;
+
+   /**
+* is this function a subroutine type declaration
+* e.g. subroutine void type1(float arg1);
+*/
+   bool is_subroutine;
+
+   /**
+* is this function associated to a subroutine type
+* e.g. subroutine (type1, type2) function_name { function_body };
+* would have this flag set and num_subroutine_types 2,
+* and pointers to the type1 and type2 types.
+*/
+   bool is_subroutine_def;
+   int num_subroutine_types;
+   const struct glsl_type **subroutine_types;
 };
 
 inline const char *ir_function_signature::function_name() const
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 49834ff..bf25d6c 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -267,6 +267,13 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) 
const
 {
ir_function *copy = new(mem_ctx) ir_function(this->name);
 
+   copy->is_subroutine = this->is_subroutine;
+   copy->is_subroutine_def = this->is_subroutine_def;
+   copy->num_subroutine_types = this->num_subroutine_types;
+   copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, 
copy->num_subroutine_types);
+   for (int i = 0; i < copy->num_subroutine_types; i++)
+ copy->subroutine_types[i] = this->subroutine_types[i];
+
foreach_in_list(const ir_function_signature, sig, &this->signatures) {
   ir_function_signature *sig_copy = sig->clone(mem_ctx, ht);
   copy->add_signature(sig_copy);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 4cbcad4..f210175 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -229,7 +229,7 @@ void ir_print_visitor::visit(ir_function_signature *ir)
 
 void ir_print_visitor::visit(ir_function *ir)
 {
-   fprintf(f, "(function %s\n", ir->name);
+   fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", 
ir->name);
indentation++;
foreach_in_list(ir_function_signature, sig, &ir->signatures) {
   indent();
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/19] mesa: Add stubs for ARB_shader_subroutine entrypoints

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderapi.c | 63 +++
 src/mesa/main/shaderapi.h | 35 ++
 2 files changed, 98 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index a4296ad..48ab217 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1984,3 +1984,66 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count,
 
return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings);
 }
+
+
+/**
+ * ARB_shader_subroutine
+ */
+GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+   const GLchar *name)
+{
+   return -1;
+}
+
+
+GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name)
+{
+   return GL_INVALID_INDEX;
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+   GLuint index, GLenum pname, GLint *values)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+  GLuint index, GLsizei bufsize,
+  GLsizei *length, GLchar *name)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+const GLuint *indices)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+  GLuint *params)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+GLenum pname, GLint *values)
+{
+}
diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h
index aba6d5d..eda7170 100644
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -264,6 +264,41 @@ _mesa_get_program_resourceiv(struct gl_shader_program 
*shProg,
  GLsizei bufSize, GLsizei *length,
  GLint *params);
 
+/* GL_ARB_shader_subroutine */
+extern GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+   const GLchar *name);
+
+extern GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+   GLuint index, GLenum pname, GLint *values);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+  GLuint index, GLsizei bufsize,
+  GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+const GLuint *indices);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+  GLuint *params);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+GLenum pname, GLint *values);
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/19] glsl: Add extension plumbing and define for ARB_shader_subroutine

2015-07-09 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/glcpp/glcpp-parse.y| 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/glsl/standalone_scaffolding.cpp | 1 +
 4 files changed, 7 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index a11b6b2..99b7cdf 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2483,6 +2483,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
 
   if (extensions->ARB_shader_precision)
  add_builtin_define(parser, "GL_ARB_shader_precision", 1);
+
+  if (extensions->ARB_shader_subroutine)
+ add_builtin_define(parser, "GL_ARB_shader_subroutine", 1);
   }
}
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 046d5d7..676489f 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -570,6 +570,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_image_load_store,true,  false, 
ARB_shader_image_load_store),
EXT(ARB_shader_precision,   true,  false, ARB_shader_precision),
EXT(ARB_shader_stencil_export,  true,  false, 
ARB_shader_stencil_export),
+   EXT(ARB_shader_subroutine,  true,  false, 
ARB_shader_subroutine),
EXT(ARB_shader_texture_lod, true,  false, 
ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack,   true,  false, 
ARB_shading_language_420pack),
EXT(ARB_shading_language_packing,   true,  false, 
ARB_shading_language_packing),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 02ddbbd..726a427 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -462,6 +462,8 @@ struct _mesa_glsl_parse_state {
bool ARB_shader_precision_warn;
bool ARB_shader_stencil_export_enable;
bool ARB_shader_stencil_export_warn;
+   bool ARB_shader_subroutine_enable;
+   bool ARB_shader_subroutine_warn;
bool ARB_shader_texture_lod_enable;
bool ARB_shader_texture_lod_warn;
bool ARB_shading_language_420pack_enable;
diff --git a/src/glsl/standalone_scaffolding.cpp 
b/src/glsl/standalone_scaffolding.cpp
index 172c6f4..15546c2 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -133,6 +133,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, 
gl_api api)
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_stencil_export = true;
+   ctx->Extensions.ARB_shader_subroutine = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/19] glsl/ir: allow ir_call to handle subroutine calling

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This adds a ir_variable which contains the subroutine uniform
and an array rvalue for the deref of that uniform, these
are stored in the ir_call and lowered later.

Signed-off-by: Dave Airlie 
---
 src/glsl/ir.h | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index b5a9e99..691c8b6 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1708,7 +1708,18 @@ public:
ir_call(ir_function_signature *callee,
   ir_dereference_variable *return_deref,
   exec_list *actual_parameters)
-  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee)
+  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee), sub_var(NULL), array_idx(NULL)
+   {
+  assert(callee->return_type != NULL);
+  actual_parameters->move_nodes_to(& this->actual_parameters);
+  this->use_builtin = callee->is_builtin();
+   }
+
+   ir_call(ir_function_signature *callee,
+  ir_dereference_variable *return_deref,
+  exec_list *actual_parameters,
+  ir_variable *var, ir_rvalue *array_idx)
+  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee), sub_var(var), array_idx(array_idx)
{
   assert(callee->return_type != NULL);
   actual_parameters->move_nodes_to(& this->actual_parameters);
@@ -1756,6 +1767,14 @@ public:
 
/** Should this call only bind to a built-in function? */
bool use_builtin;
+
+   /*
+* ARB_shader_subroutine support -
+* the subroutine uniform variable and array index
+* rvalue to be used in the lowering pass later.
+*/
+   ir_variable *sub_var;
+   ir_rvalue *array_idx;
 };
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 16/19] program: add subroutine uniform support (v1.1)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

Add support for the subroutine uniform type ir->mesa.cpp

v1.1: add subroutine to int to switch

Signed-off-by: Dave Airlie 
---
 src/mesa/program/ir_to_mesa.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0b2eb12..6ee6ee8 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -534,6 +534,7 @@ type_size(const struct glsl_type *type)
   return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
   /* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
@@ -1342,6 +1343,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_dFdx_fine:
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
+   case ir_unop_subroutine_to_int:
   assert(!"not supported");
   break;
 
@@ -2451,6 +2453,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
break;
 case GLSL_TYPE_SAMPLER:
 case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
format = uniform_native;
columns = 1;
break;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/19] glsl/ir: add subroutine lowering pass (v2.1)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This lowers the enhanced ir_call using the lookaside table
of subroutines into an if ladder. This initially was done
at the AST level but it caused some ordering issues so a separate
pass was required.

v2: clone return value derefs.
v2.1: update for subroutine->int convert.

Signed-off-by: Dave Airlie 
---
 src/glsl/Makefile.sources   |   1 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/ir_optimization.h  |   2 +
 src/glsl/lower_subroutine.cpp   | 109 
 4 files changed, 113 insertions(+)
 create mode 100644 src/glsl/lower_subroutine.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index d784a81..3f113c8 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -154,6 +154,7 @@ LIBGLSL_FILES = \
lower_packed_varyings.cpp \
lower_named_interface_blocks.cpp \
lower_packing_builtins.cpp \
+   lower_subroutine.cpp \
lower_texture_projection.cpp \
lower_variable_index_to_cond_assign.cpp \
lower_vec_index_to_cond_assign.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index ba869f9..a2de278 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1558,6 +1558,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
   struct gl_shader_compiler_options *options =
  &ctx->Const.ShaderCompilerOptions[shader->Stage];
 
+  lower_subroutine(shader->ir, state);
   /* Do some optimization at compile time to reduce shader IR size
* and reduce later work if the same shader is linked multiple times
*/
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index e6939f3..fef5a83 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -135,6 +135,8 @@ void optimize_dead_builtin_variables(exec_list 
*instructions,
 
 bool lower_vertex_id(gl_shader *shader);
 
+bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state 
*state);
+
 ir_rvalue *
 compare_index_block(exec_list *instructions, ir_variable *index,
unsigned base, unsigned components, void *mem_ctx);
diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp
new file mode 100644
index 000..e5635a2
--- /dev/null
+++ b/src/glsl/lower_subroutine.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright ?? 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_subroutine.cpp
+ *
+ * lowers subroutines to an if ladder.
+ */
+
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "ir.h"
+#include "ir_builder.h"
+
+using namespace ir_builder;
+namespace {
+
+class lower_subroutine_visitor : public ir_hierarchical_visitor {
+public:
+   lower_subroutine_visitor()
+   {
+  this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_call *);
+   bool progress;
+   struct _mesa_glsl_parse_state *state;
+};
+
+}
+
+bool
+lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+{
+   lower_subroutine_visitor v;
+   v.state = state;
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+ir_visitor_status
+lower_subroutine_visitor::visit_leave(ir_call *ir)
+{
+   if (!ir->sub_var)
+  return visit_continue;
+
+   void *mem_ctx = ralloc_parent(ir);
+   ir_if *last_branch = NULL;
+   ir_dereference_variable *return_deref = ir->return_deref;
+
+   for (int s = this->state->num_subroutines - 1; s >= 0; s--) {
+  ir_rvalue *var;
+  ir_constant *lc = new(mem_ctx)ir_constant(s);
+  ir_function *fn = this->state->subroutines[s];
+  bool is_compat = false;
+
+  for (int i = 0; i < fn->num_subroutine_types; i++) {
+ if (ir-&

[Mesa-dev] [PATCH 18/19] st/mesa: add subroutine bits (v1.1)

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

Just add support for the subroutine type to the
glsl->tgsi convertor.

v1.1: add subroutine to int support.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 25e30c7..a1dd70f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -797,7 +797,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
case TGSI_OPCODE_##c: \
   if (type == GLSL_TYPE_DOUBLE) \
  op = TGSI_OPCODE_##d; \
-  else if (type == GLSL_TYPE_INT)   \
+  else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)   \
  op = TGSI_OPCODE_##i; \
   else if (type == GLSL_TYPE_UINT) \
  op = TGSI_OPCODE_##u; \
@@ -1090,6 +1090,7 @@ type_size(const struct glsl_type *type)
   return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
   /* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
@@ -1470,6 +1471,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  result_src = op[0];
   }
   break;
+   case ir_unop_subroutine_to_int:
+  emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+  break;
case ir_unop_abs:
   emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
   break;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/19] program_resource: add subroutine support

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This fleshes out the ARB_program_query support for the
APIs that ARB_shader_subroutine introduces, leaving
some TODOs for later addition.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/shader_query.cpp | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index a6246a3..4fa5913 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -61,6 +61,7 @@ DECL_RESOURCE_FUNC(UBO, gl_uniform_block);
 DECL_RESOURCE_FUNC(UNI, gl_uniform_storage);
 DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer);
 DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_varying_info);
+DECL_RESOURCE_FUNC(SUB, gl_subroutine_function);
 
 void GLAPIENTRY
 _mesa_BindAttribLocation(GLhandleARB program, GLuint index,
@@ -497,6 +498,24 @@ _mesa_program_resource_name(struct gl_program_resource 
*res)
   return RESOURCE_VAR(res)->name;
case GL_UNIFORM:
   return RESOURCE_UNI(res)->name;
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  /* TODO
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+  */
+  return RESOURCE_UNI(res)->name + 9;
+   case GL_VERTEX_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_FRAGMENT_SUBROUTINE:
+  /* TODO
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+  */
+  return RESOURCE_SUB(res)->name;
default:
   assert(!"support for resource type not implemented");
}
@@ -515,6 +534,9 @@ _mesa_program_resource_array_size(struct 
gl_program_resource *res)
case GL_PROGRAM_OUTPUT:
   return RESOURCE_VAR(res)->data.max_array_access;
case GL_UNIFORM:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
   return RESOURCE_UNI(res)->array_elements;
case GL_ATOMIC_COUNTER_BUFFER:
case GL_UNIFORM_BLOCK:
@@ -571,6 +593,12 @@ _mesa_program_resource_find_name(struct gl_shader_program 
*shProg,
   case GL_TRANSFORM_FEEDBACK_VARYING:
   case GL_UNIFORM_BLOCK:
   case GL_UNIFORM:
+  case GL_VERTEX_SUBROUTINE_UNIFORM:
+  case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  case GL_VERTEX_SUBROUTINE:
+  case GL_GEOMETRY_SUBROUTINE:
+  case GL_FRAGMENT_SUBROUTINE:
  if (strncmp(rname, name, baselen) == 0) {
 /* Basename match, check if array or struct. */
 if (name[baselen] == '\0' ||
@@ -651,6 +679,12 @@ _mesa_program_resource_find_index(struct gl_shader_program 
*shProg,
   case GL_PROGRAM_INPUT:
   case GL_PROGRAM_OUTPUT:
   case GL_UNIFORM:
+  case GL_VERTEX_SUBROUTINE_UNIFORM:
+  case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  case GL_VERTEX_SUBROUTINE:
+  case GL_GEOMETRY_SUBROUTINE:
+  case GL_FRAGMENT_SUBROUTINE:
  if (++idx == (int) index)
 return res;
  break;
@@ -740,6 +774,8 @@ program_resource_location(struct gl_shader_program *shProg,
 {
unsigned index, offset;
int array_index = -1;
+   long offset_ret;
+   const GLchar *base_name_end;
 
if (res->Type == GL_PROGRAM_INPUT || res->Type == GL_PROGRAM_OUTPUT) {
   array_index = array_index_of_resource(res, name);
@@ -780,6 +816,16 @@ program_resource_location(struct gl_shader_program *shProg,
   /* location in remap table + array element offset */
   return RESOURCE_UNI(res)->remap_location + offset;
 
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  /* TODO
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+  */
+  offset_ret = parse_program_resource_name(name, &base_name_end);
+  return 
RESOURCE_UNI(res)->subroutine[_mesa_shader_stage_from_subroutine_uniform(res->Type)].index
 + ((offset_ret != -1) ? offset_ret : 0);
default:
   return -1;
}
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/19] st/mesa: enable shader subroutine

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

I'm not sure if we shouldn't enable this everywhere
and rip out the API checks,

discuss,

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index b1057f3..3b828fa 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -598,6 +598,7 @@ void st_init_extensions(struct pipe_screen *screen,
extensions->ARB_half_float_vertex = GL_TRUE;
extensions->ARB_internalformat_query = GL_TRUE;
extensions->ARB_map_buffer_range = GL_TRUE;
+   extensions->ARB_shader_subroutine = GL_TRUE;
extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */
extensions->ARB_texture_cube_map = GL_TRUE;
extensions->ARB_texture_env_combine = GL_TRUE;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 17/19] mesa: fill out the ARB_shader_subroutine APIs

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This fleshes out the APIs, using the program resource
APIs where they should match.

It also sets the default values to valid subroutines.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderapi.c | 457 +-
 src/mesa/main/shaderapi.h |   3 +
 2 files changed, 457 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 48ab217..1a46749 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1071,6 +1071,7 @@ _mesa_use_program(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   use_shader_program(ctx, i, shProg, &ctx->Shader);
_mesa_active_program(ctx, shProg, "glUseProgram");
 
+   _mesa_shader_program_init_subroutine_defaults(shProg);
if (ctx->Driver.UseProgram)
   ctx->Driver.UseProgram(ctx, shProg);
 }
@@ -1993,15 +1994,75 @@ GLint GLAPIENTRY
 _mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
const GLchar *name)
 {
-   return -1;
-}
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineUniformLocation";
+   struct gl_shader_program *shProg;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
 
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+   return _mesa_program_resource_location(shProg, resource_type, name);
+}
 
 GLuint GLAPIENTRY
 _mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
  const GLchar *name)
 {
-   return GL_INVALID_INDEX;
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineIndex";
+   struct gl_shader_program *shProg;
+   struct gl_program_resource *res;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine(stage);
+   res = _mesa_program_resource_find_name(shProg, resource_type, name);
+   if (!res) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+ return -1;
+   }
+
+   return _mesa_program_resource_index(shProg, res);
 }
 
 
@@ -2009,6 +2070,91 @@ GLvoid GLAPIENTRY
 _mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
GLuint index, GLenum pname, GLint *values)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetActiveSubroutineUniformiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+   struct gl_program_resource *res;
+   const struct gl_uniform_storage *uni;
+   GLenum resource_type;
+   int count, i, j;
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   switch (pname) {
+   case GL_NUM_COMPATIBLE_SUBROUTINES: {
+  res = _mesa_program_resource_find_index(shProg, resource_type, index);
+  if (res) {
+ uni = res->Data;
+ count = 0;
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+for (j = 0; j < fn->num_compat_types; j++) {
+   if (fn->types[j] == uni->type) {
+  count++;
+  break;
+   }
+}
+ }
+

[Mesa-dev] [PATCH 13/19] mesa/mtypes: add gl_subroutine_function and uniform storage to shader

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This adds the necessary storage for subroutine info to gl_shader.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/mtypes.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a93fe94..c53bf2d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2359,6 +2359,15 @@ struct gl_ati_fragment_shader_state
struct ati_fragment_shader *Current;
 };
 
+/**
+ *  Shader subroutine function definition
+ */
+struct gl_subroutine_function
+{
+   char *name;
+   int num_compat_types;
+   const struct glsl_type **types;
+};
 
 /**
  * A GLSL vertex or fragment shader object.
@@ -2509,6 +2518,12 @@ struct gl_shader
*/
   unsigned LocalSize[3];
} Comp;
+
+   GLuint NumSubroutineUniformTypes;
+   GLuint NumSubroutineUniforms;
+   struct gl_uniform_storage **SubroutineUniformRemapTable;
+   GLuint NumSubroutineFunctions;
+   struct gl_subroutine_function *SubroutineFunctions;
 };
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/19] glsl: add uniform and program resource support

2015-07-09 Thread Dave Airlie

From: Dave Airlie 

This adds linker support for subroutine uniforms, they
have some subtle differences from real uniforms, we also hide
them and they are given internal uniform names.

This also adds the subroutine locations and subroutine uniforms
to the program resource tracking for later use.

Signed-off-by: Dave Airlie 
---
 src/glsl/ir_uniform.h  |  2 +
 src/glsl/link_uniforms.cpp | 56 +--
 src/glsl/linker.cpp| 94 +-
 3 files changed, 146 insertions(+), 6 deletions(-)

diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
index e1b8014..be1b38d 100644
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -114,6 +114,8 @@ struct gl_uniform_storage {
 
struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
 
+   struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
+
/**
 * Storage used by the driver for the uniform
 */
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 11ae06f..78a830a 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -47,9 +47,10 @@
 static unsigned
 values_for_type(const glsl_type *type)
 {
-   if (type->is_sampler()) {
+   if (type->is_sampler() || type->is_subroutine()) {
   return 1;
-   } else if (type->is_array() && type->fields.array->is_sampler()) {
+   } else if (type->is_array() && (type->fields.array->is_sampler() ||
+   type->fields.array->is_subroutine())) {
   return type->array_size();
} else {
   return type->component_slots();
@@ -284,6 +285,7 @@ public:
count_uniform_size(struct string_to_uint_map *map)
   : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
 num_shader_images(0), num_shader_uniform_components(0),
+num_shader_subroutines(0),
 is_ubo_var(false), map(map)
{
   /* empty */
@@ -294,6 +296,7 @@ public:
   this->num_shader_samplers = 0;
   this->num_shader_images = 0;
   this->num_shader_uniform_components = 0;
+  this->num_shader_subroutines = 0;
}
 
void process(ir_variable *var)
@@ -331,6 +334,11 @@ public:
 */
unsigned num_shader_uniform_components;
 
+   /**
+* Number of subroutine uniforms used
+*/
+   unsigned num_shader_subroutines;
+
bool is_ubo_var;
 
 private:
@@ -348,7 +356,9 @@ private:
* count it for each shader target.
*/
   const unsigned values = values_for_type(type);
-  if (type->contains_sampler()) {
+  if (type->contains_subroutine()) {
+ this->num_shader_subroutines += values;
+  } else if (type->contains_sampler()) {
  this->num_shader_samplers += values;
   } else if (type->contains_image()) {
  this->num_shader_images += values;
@@ -421,6 +431,7 @@ public:
   this->shader_shadow_samplers = 0;
   this->next_sampler = 0;
   this->next_image = 0;
+  this->next_subroutine = 0;
   memset(this->targets, 0, sizeof(this->targets));
}
 
@@ -535,6 +546,24 @@ private:
   }
}
 
+   void handle_subroutines(const glsl_type *base_type,
+   struct gl_uniform_storage *uniform)
+   {
+  if (base_type->is_subroutine()) {
+ uniform->subroutine[shader_type].index = this->next_subroutine;
+ uniform->subroutine[shader_type].active = true;
+
+ /* Increment the subroutine index by 1 for non-arrays and by the
+  * number of array elements for arrays.
+  */
+ this->next_subroutine += MAX2(1, uniform->array_elements);
+
+  } else {
+ uniform->subroutine[shader_type].index = ~0;
+ uniform->subroutine[shader_type].active = false;
+  }
+   }
+
virtual void visit_field(const glsl_type *type, const char *name,
 bool row_major)
{
@@ -588,6 +617,7 @@ private:
   /* This assigns uniform indices to sampler and image uniforms. */
   handle_samplers(base_type, &this->uniforms[id]);
   handle_images(base_type, &this->uniforms[id]);
+  handle_subroutines(base_type, &this->uniforms[id]);
 
   /* If there is already storage associated with this uniform or if the
* uniform is set as builtin, it means that it was set while processing
@@ -672,6 +702,7 @@ private:
struct gl_uniform_storage *uniforms;
unsigned next_sampler;
unsigned next_image;
+   unsigned next_subroutine;
 
 public:
union gl_constant_value *values;
@@ -952,8 +983,11 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
   sh->num_samplers = uniform_size.num_shader_samplers;
   sh->NumImages = uniform_size.num_shader_images;
   sh->num_uniform_components = uniform_size.num_shader_uniform_components;
-
   sh->num_combined_uniform_components = sh-

[Mesa-dev] [PATCH] radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support.

2015-07-11 Thread Dave Airlie

From: Dave Airlie 

This adds the translation from TGSI to AMDGPU llvm backend, for the
64-bit opcodes. The backend pretty much handles everything for us
fine. There is one patch required for SI DFRAC support, that I know
off.

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt   |   4 +-
 src/gallium/drivers/radeon/radeon_llvm.h   |   7 +-
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 143 -
 src/gallium/drivers/radeonsi/si_pipe.c |   1 +
 src/gallium/drivers/radeonsi/si_shader.c   |  31 -
 5 files changed, 173 insertions(+), 13 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 94bbcd1..d1a42be 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00:
   - Enhanced per-sample shadingDONE (r600, radeonsi)
   - Interpolation functionsDONE (r600)
   - New overload resolution rules  DONE
-  GL_ARB_gpu_shader_fp64   DONE (nvc0, llvmpipe, 
softpipe)
+  GL_ARB_gpu_shader_fp64   DONE (nvc0, radeonsi, 
llvmpipe, softpipe)
   GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_shader_subroutine started (Dave)
   GL_ARB_tessellation_shader   started (Chris, Ilia)
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10:
   GL_ARB_get_program_binaryDONE (0 binary formats)
   GL_ARB_separate_shader_objects   DONE (all drivers)
   GL_ARB_shader_precision  started (Micah)
-  GL_ARB_vertex_attrib_64bit   DONE (nvc0, llvmpipe, 
softpipe)
+  GL_ARB_vertex_attrib_64bit   DONE (nvc0, radeonsi 
llvmpipe, softpipe)
   GL_ARB_viewport_arrayDONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe)
 
 
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 6a9557b..591e698 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -146,6 +146,8 @@ static inline LLVMTypeRef tgsi2llvmtype(
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
+   case TGSI_TYPE_DOUBLE:
+   return LLVMDoubleTypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
return LLVMFloatTypeInContext(ctx);
@@ -205,6 +207,9 @@ build_tgsi_intrinsic_nomem(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
 
-
+LLVMValueRef
+radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef ptr,
+ LLVMValueRef ptr2);
 
 #endif /* RADEON_LLVM_H */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c8c980d..444a41c 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -116,6 +116,28 @@ emit_fetch(
enum tgsi_opcode_type type,
unsigned swizzle);
 
+LLVMValueRef
+radeon_llvm_emit_fetch_double(
+   struct lp_build_tgsi_context *bld_base,
+   LLVMValueRef ptr,
+   LLVMValueRef ptr2)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef result;
+
+   result = 
LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context,
 32), bld_base->base.type.length * 2));
+
+   result = LLVMBuildInsertElement(builder,
+   result,
+   bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
ptr),
+   bld_base->int_bld.zero, "");
+   result = LLVMBuildInsertElement(builder,
+   result,
+   bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
ptr2),
+   bld_base->int_bld.one, "");
+   return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
+}
+
 static LLVMValueRef
 emit_array_fetch(
struct lp_build_tgsi_context *bld_base,
@@ -160,7 +182,7 @@ emit_fetch(
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-   LLVMValueRef result = NULL, ptr;
+   LLVMValueRef result = NULL, ptr, ptr2;
 
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
@@ -184,11 +206,27 @@ emit_fetch(
switch(reg->Register.File) {
case TGSI_FILE_IMMEDIATE: {
LLVMTypeRef ctype = tgs

Re: [Mesa-dev] [PATCH 09/19] glsl/ir: add subroutine information storage to ir_function (v1.1)

2015-07-11 Thread Dave Airlie

On 10 July 2015 at 07:15, Chris Forbes  wrote:
> Do you really need is_subroutine_def ? It seems redundant with
> num_subroutine_types>0.

I'm not sure, the spec isn't very clear on whether a subroutine
definition with no subroutine types is legal GLSL or not.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 09/19] glsl/ir: add subroutine information storage to ir_function (v1.1)

2015-07-11 Thread Dave Airlie

On 12 July 2015 at 13:02, Dave Airlie  wrote:
> On 10 July 2015 at 07:15, Chris Forbes  wrote:
>> Do you really need is_subroutine_def ? It seems redundant with
>> num_subroutine_types>0.
>
> I'm not sure, the spec isn't very clear on whether a subroutine
> definition with no subroutine types is legal GLSL or not.

actually it's fine to do what you said, I've nuked is_subroutine_def
for the next version.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radeonsi indirect sampler, llvm backend issue

2015-07-12 Thread Dave Airlie

Hey,

So i tried to get ARB_gpu_shader5 indirect sampler support to work in radeonsi,

The attached patch to mesa adds support, but the llvm backend appears
to do the wrong thing, and I'm not sure how to fix it.

So the image sampling functions needs the resource and sampler address in SGPR,

So I generate a load of v8i32 for the resource, and v4i32 for the
sampler, however the backend translate the v4i32 load into a VGPR
based load, then it all chokes when it tries to pass that to the image
sample.

commenting out the following pattern in the backend makes things work
as a workaround.
defm : MUBUFLoad_Pattern ;
I then get an s_load_dwordx4 instead of a buffer_load_dwordx4.

I've exhausted my knowledge of llvm already on this, so any ideas let me know.

Below is the tgsi/llvm inputs I create.

Dave.

FRAG
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[4]
DCL TEMP[0], LOCAL
DCL ADDR[0..2]
IMM[0] FLT32 {0.7500, 0.2500, 0., 0.}
  0: MOV TEMP[0].xy, IMM[0].xyyy
  1: UARL ADDR[2].x, CONST[4].
  2: TEX TEMP[0], TEMP[0], SAMP[ADDR[2].x], 2D
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x
i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x
<8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2
x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float,
float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)*
%1, i64 0, i64 0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
  %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
  %25 = bitcast float %24 to i32
  %26 = sext i32 %25 to i64
  %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)*
%3, i64 0, i64 %26
  %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0
  %29 = sext i32 %25 to i64
  %30 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)*
%2, i64 0, i64 %29
  %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0
  %32 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> , <8 x i32> %28, <4 x i32> %31, i32 15, i32
0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %33 = extractelement <4 x float> %32, i32 0
  %34 = extractelement <4 x float> %32, i32 1
  %35 = extractelement <4 x float> %32, i32 2
  %36 = extractelement <4 x float> %32, i32 3
  %37 = call i32 @llvm.SI.packf16(float %33, float %34)
  %38 = bitcast i32 %37 to float
  %39 = call i32 @llvm.SI.packf16(float %35, float %36)
  %40 = bitcast i32 %39 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float
%38, float %40, float %38, float %40)
  ret void
}
From 23ee04b806875724ddeb6d9731f9bd42383272e8 Mon Sep 17 00:00:00 2001
From: Dave Airlie 
Date: Mon, 13 Jul 2015 00:07:09 +0100
Subject: [PATCH] radeonsi: add support for indirect samplers

This is the necessary frontend work, the llvm backend
is producing the wrong code for the v4i32 load,
---
 src/gallium/drivers/radeonsi/si_shader.c | 70 
 1 file changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 75a29ae..38ad74e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1603,6 +1603,24 @@ static bool tgsi_is_shadow_sampler(unsigned target)
 
 static const struct lp_build_tgsi_action tex_action;
 
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+   const struct tgsi_ind_register *ind,
+   int rel_index)
+{
+	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+	LLVMValueRef result;
+
+	result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+	result = LLVMBuildLoad(gallivm->builder, result, "");
+	result = LLVMBuildAdd(gallivm->builder, result,
+			  lp_build_const_int32(gallivm, rel_index), "");
+	return result;
+}
+
 static void tex_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
@@ -1618,10 +1636,41 @@ static void tex_fetch_args(
 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
 	unsigned count = 0;
 	unsigned chan;
-	unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
-	unsigned sampler_index = emit_data->inst->S

Re: [Mesa-dev] [PATCH 19/19] st/mesa: enable shader subroutine

2015-07-20 Thread Dave Airlie

On 21 July 2015 at 08:50, Marek Olšák  wrote:
> If the extension is core only, we can rip out the checks, but the
> checks that test ctx->API == API_OPEGL_CORE should stay (if they are
> missing, they should be added).
>

I've reconsidered this, and I'm sticking with my original plan,
NIR and the i965 driver will need some changes to support this, so I
should really provide a way to turn it off.

this is due to the subroutine type and the subroutine to int
conversion function.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/20] glsl/ir: add subroutine information storage to ir_function (v1.1)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

We need to store two sets of info into the ir_function,
if this is a function definition with a subroutine list
(subroutine_def) or if it a subroutine prototype.

v1.1: add some more documentation.

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/ir.cpp   |  4 
 src/glsl/ir.h | 15 +++
 src/glsl/ir_clone.cpp |  6 ++
 src/glsl/ir_print_visitor.cpp |  2 +-
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 77f1736..7fba0b3 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1853,6 +1853,7 @@ static void
 steal_memory(ir_instruction *ir, void *new_ctx)
 {
ir_variable *var = ir->as_variable();
+   ir_function *fn = ir->as_function();
ir_constant *constant = ir->as_constant();
if (var != NULL && var->constant_value != NULL)
   steal_memory(var->constant_value, ir);
@@ -1860,6 +1861,9 @@ steal_memory(ir_instruction *ir, void *new_ctx)
if (var != NULL && var->constant_initializer != NULL)
   steal_memory(var->constant_initializer, ir);
 
+   if (fn != NULL && fn->subroutine_types)
+  ralloc_steal(new_ctx, fn->subroutine_types);
+
/* The components of aggregate constants are not visited by the normal
 * visitor, so steal their values by hand.
 */
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 2d7f3d0..e273239 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1125,6 +1125,21 @@ public:
 * List of ir_function_signature for each overloaded function with this 
name.
 */
struct exec_list signatures;
+
+   /**
+* is this function a subroutine type declaration
+* e.g. subroutine void type1(float arg1);
+*/
+   bool is_subroutine;
+
+   /**
+* is this function associated to a subroutine type
+* e.g. subroutine (type1, type2) function_name { function_body };
+* would have num_subroutine_types 2,
+* and pointers to the type1 and type2 types.
+*/
+   int num_subroutine_types;
+   const struct glsl_type **subroutine_types;
 };
 
 inline const char *ir_function_signature::function_name() const
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 49834ff..a8fac18 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -267,6 +267,12 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) 
const
 {
ir_function *copy = new(mem_ctx) ir_function(this->name);
 
+   copy->is_subroutine = this->is_subroutine;
+   copy->num_subroutine_types = this->num_subroutine_types;
+   copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, 
copy->num_subroutine_types);
+   for (int i = 0; i < copy->num_subroutine_types; i++)
+ copy->subroutine_types[i] = this->subroutine_types[i];
+
foreach_in_list(const ir_function_signature, sig, &this->signatures) {
   ir_function_signature *sig_copy = sig->clone(mem_ctx, ht);
   copy->add_signature(sig_copy);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 922f98b..0ee03d9 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -230,7 +230,7 @@ void ir_print_visitor::visit(ir_function_signature *ir)
 
 void ir_print_visitor::visit(ir_function *ir)
 {
-   fprintf(f, "(function %s\n", ir->name);
+   fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", 
ir->name);
indentation++;
foreach_in_list(ir_function_signature, sig, &ir->signatures) {
   indent();
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] ARB_shader_subroutine - now explicit!

2015-07-20 Thread Dave Airlie

So I revisited ARB_shader_subroutine again today, and noticed
it was lacking wrt ARB_explicit_uniform_location thanks to some
piglits from Igalia/Intel.

So I've added support for that, cleaned up some things,
like calculating the compatible shaders for a uniform at link
time, stopped the dead code from eliminating subroutine 
uniforms so the queries continue to work,

I've enabled tessellation bits where it makes sense as well.

This series just enables it for gallium, I have the i965
changes to work on haswell ready as well, they are quite trivial.

Since we have games actively depending on this, it would be
nice to get it merged, so I don't spend more time rebasing 
than fixing.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/20] glsl/types: add new subroutine type (v3.1)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This type will be used to store the name of subroutine types

as in subroutine void myfunc(void);
will store myfunc into a subroutine type.

This is required to the parser can identify a subroutine
type in a uniform decleration as a valid type, and also for
looking up the type later.

Also add contains_subroutine method.

v2: handle subroutine to int comparisons, needed
for lowering pass.
v3: do subroutine to int with it's own IR
operation to avoid hacking on asserts (Kayden)
v3.1: fix warnings in this patch, fix nir,
fix tgsi

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/ast_to_hir.cpp|  1 +
 src/glsl/glsl_types.cpp| 66 ++
 src/glsl/glsl_types.h  | 19 +
 src/glsl/ir.cpp|  2 +
 src/glsl/ir.h  |  1 +
 src/glsl/ir_builder.cpp|  6 +++
 src/glsl/ir_builder.h  |  1 +
 src/glsl/ir_clone.cpp  |  1 +
 src/glsl/ir_validate.cpp   |  4 ++
 src/glsl/link_uniform_initializers.cpp |  1 +
 src/glsl/nir/nir_lower_io.c|  2 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  4 ++
 12 files changed, 108 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index b5c4ed9..3c920a4 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -971,6 +971,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, 
ir_rvalue *op1)
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_SUBROUTINE:
   /* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index aaf7c7c..5ebafb2 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
 hash_table *glsl_type::array_types = NULL;
 hash_table *glsl_type::record_types = NULL;
 hash_table *glsl_type::interface_types = NULL;
+hash_table *glsl_type::subroutine_types = NULL;
 void *glsl_type::mem_ctx = NULL;
 
 void
@@ -159,6 +160,22 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
mtx_unlock(&glsl_type::mutex);
 }
 
+glsl_type::glsl_type(const char *subroutine_name) :
+   gl_type(0),
+   base_type(GLSL_TYPE_SUBROUTINE),
+   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+   sampler_type(0), interface_packing(0),
+   vector_elements(0), matrix_columns(0),
+   length(0)
+{
+   mtx_lock(&glsl_type::mutex);
+
+   init_ralloc_type_ctx();
+   assert(subroutine_name != NULL);
+   this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+   this->vector_elements = 1;
+   mtx_unlock(&glsl_type::mutex);
+}
 
 bool
 glsl_type::contains_sampler() const
@@ -229,6 +246,22 @@ glsl_type::contains_opaque() const {
}
 }
 
+bool
+glsl_type::contains_subroutine() const
+{
+   if (this->is_array()) {
+  return this->fields.array->contains_subroutine();
+   } else if (this->is_record()) {
+  for (unsigned int i = 0; i < this->length; i++) {
+if (this->fields.structure[i].type->contains_subroutine())
+   return true;
+  }
+  return false;
+   } else {
+  return this->is_subroutine();
+   }
+}
+
 gl_texture_index
 glsl_type::sampler_index() const
 {
@@ -831,6 +864,36 @@ glsl_type::get_interface_instance(const glsl_struct_field 
*fields,
return (glsl_type *) entry->data;
 }
 
+const glsl_type *
+glsl_type::get_subroutine_instance(const char *subroutine_name)
+{
+   const glsl_type key(subroutine_name);
+
+   mtx_lock(&glsl_type::mutex);
+
+   if (subroutine_types == NULL) {
+  subroutine_types = _mesa_hash_table_create(NULL, record_key_hash,
+ record_key_compare);
+   }
+
+   const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types,
+&key);
+   if (entry == NULL) {
+  mtx_unlock(&glsl_type::mutex);
+  const glsl_type *t = new glsl_type(subroutine_name);
+  mtx_lock(&glsl_type::mutex);
+
+  entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t);
+   }
+
+   assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
+   assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
+
+   mtx_unlock(&glsl_type::mutex);
+
+   return (glsl_type *) entry->data;
+}
+
 
 const glsl_type *
 glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
@@ -963,6 +1026,7 @@ glsl_type::component_slots() const
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
   break;
}

[Mesa-dev] [PATCH 02/20] glapi: Add ARB_shader_subroutine functions and enums (v2)

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

v2: fix output="true" and LENGTH typo

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mapi/glapi/gen/ARB_shader_subroutine.xml | 84 
 src/mapi/glapi/gen/Makefile.am   |  1 +
 src/mapi/glapi/gen/gl_API.xml|  6 +-
 3 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/ARB_shader_subroutine.xml

diff --git a/src/mapi/glapi/gen/ARB_shader_subroutine.xml 
b/src/mapi/glapi/gen/ARB_shader_subroutine.xml
new file mode 100644
index 000..04b75cb
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_shader_subroutine.xml
@@ -0,0 +1,84 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index 5b163b0..1922c15 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -151,6 +151,7 @@ API_XML = \
ARB_separate_shader_objects.xml \
ARB_shader_atomic_counters.xml \
ARB_shader_image_load_store.xml \
+   ARB_shader_subroutine.xml \
ARB_sync.xml \
ARB_texture_barrier.xml \
ARB_texture_buffer_object.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 2f33075..64314cf 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8072,7 +8072,11 @@
 
 http://www.w3.org/2001/XInclude"/>
 
-
+
+
+http://www.w3.org/2001/XInclude"/>
+
+
 
 http://www.w3.org/2001/XInclude"/>
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/20] glsl: Make `subroutine` a reserved keyword

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/glsl_lexer.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 845deeb..22055ce 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -578,7 +578,7 @@ usamplerBuffer  KEYWORD(140, 300, 140, 0, 
USAMPLERBUFFER);
 resource   KEYWORD(0, 300, 0, 0, RESOURCE);
 patch  KEYWORD(0, 300, 0, 0, PATCH);
 sample KEYWORD_WITH_ALT(400, 300, 400, 0, 
yyextra->ARB_gpu_shader5_enable, SAMPLE);
-subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE);
+subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, 
yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
 
 
 [_a-zA-Z][_a-zA-Z0-9]* {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/20] mesa: add inline conversion functions for ARB_shader_subroutine

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This handles converting the shader stages to the internal
prefix along with the program resource interfaces.

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderobj.h | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 3d696a1..67c717b 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -120,6 +120,90 @@ _mesa_shader_enum_to_shader_stage(GLenum v)
 }
 
 
+static inline const char *
+_mesa_shader_stage_to_subroutine_prefix(gl_shader_stage stage)
+{
+  switch (stage) {
+  case MESA_SHADER_VERTEX:
+return "__subu_v";
+  case MESA_SHADER_GEOMETRY:
+return "__subu_g";
+  case MESA_SHADER_FRAGMENT:
+return "__subu_f";
+  case MESA_SHADER_COMPUTE:
+return "__subu_c";
+  default:
+return NULL;
+  }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine_uniform(GLenum subuniform)
+{
+   switch (subuniform) {
+   default:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+  return MESA_SHADER_COMPUTE;
+   /* TODO - COMPUTE, TESS */
+   }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine(GLenum subroutine)
+{
+   switch (subroutine) {
+   case GL_VERTEX_SUBROUTINE:
+  return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE:
+  return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE:
+  return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE:
+  return MESA_SHADER_COMPUTE;
+   /* TODO - TESS */
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+  return GL_VERTEX_SUBROUTINE;
+   case MESA_SHADER_GEOMETRY:
+  return GL_GEOMETRY_SUBROUTINE;
+   case MESA_SHADER_FRAGMENT:
+  return GL_FRAGMENT_SUBROUTINE;
+   case MESA_SHADER_COMPUTE:
+  return GL_COMPUTE_SUBROUTINE;
+   /* TODO - TESS */
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+  return GL_VERTEX_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_GEOMETRY:
+  return GL_GEOMETRY_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_FRAGMENT:
+  return GL_FRAGMENT_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_COMPUTE:
+  return GL_COMPUTE_SUBROUTINE_UNIFORM;
+   /* TODO - TESS */
+   }
+}
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/20] glsl: don't eliminate subroutine types.

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This stops dead code from removing subroutines types,
we need these for the queries to work properly.

Signed-off-by: Dave Airlie 
---
 src/glsl/opt_dead_code.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index 04e4d56..e4bf874 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -126,6 +126,9 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
if (block_type->interface_packing != 
GLSL_INTERFACE_PACKING_PACKED)
   continue;
 }
+
+if (entry->var->type->is_subroutine())
+   continue;
  }
 
 entry->var->remove();
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/20] mesa: Add extension tracking for arb_shader_subroutine (v2)

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

v2: [airlied]: merge version check update.

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/extensions.c | 1 +
 src/mesa/main/mtypes.h | 1 +
 src/mesa/main/version.c| 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index d753e5f..a20693b 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -155,6 +155,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_shader_precision",o(ARB_shader_precision),
GL, 2010 },
{ "GL_ARB_shader_stencil_export",   
o(ARB_shader_stencil_export),   GL, 2009 },
{ "GL_ARB_shader_storage_buffer_object",
o(ARB_shader_storage_buffer_object),GL, 2012 },
+   { "GL_ARB_shader_subroutine",   o(ARB_shader_subroutine),   
GLC,2010 },
{ "GL_ARB_shader_texture_lod",  o(ARB_shader_texture_lod),  
GL, 2009 },
{ "GL_ARB_shading_language_100",o(dummy_true),  
GLL,2003 },
{ "GL_ARB_shading_language_packing",
o(ARB_shading_language_packing),GL, 2011 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 4b0a995..10c3954 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3704,6 +3704,7 @@ struct gl_extensions
GLboolean ARB_shader_precision;
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_storage_buffer_object;
+   GLboolean ARB_shader_subroutine;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;
GLboolean ARB_shading_language_420pack;
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 8bc00ac..fd7ae53 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -309,7 +309,7 @@ compute_version(const struct gl_extensions *extensions,
  extensions->ARB_gpu_shader5 &&
  extensions->ARB_gpu_shader_fp64 &&
  extensions->ARB_sample_shading &&
- false /*extensions->ARB_shader_subroutine*/ &&
+ extensions->ARB_shader_subroutine &&
  extensions->ARB_tessellation_shader &&
  extensions->ARB_texture_buffer_object_rgb32 &&
  extensions->ARB_texture_cube_map_array &&
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/20] glsl: add uniform and program resource support (v2)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This adds linker support for subroutine uniforms, they
have some subtle differences from real uniforms, we also hide
them and they are given internal uniform names.

This also adds the subroutine locations and subroutine uniforms
to the program resource tracking for later use.

v1.1: drop is_subroutine_def

v2: handle explicit location properly, ARB_explicit_location
has a lot of language for subroutine shaders.
Calculate a link time the number of compatible subroutines
for a uniform, to make program resource easier later.

Signed-off-by: Dave Airlie 
---
 src/glsl/ir_uniform.h  |   8 ++
 src/glsl/link_uniforms.cpp | 103 -
 src/glsl/linker.cpp| 182 -
 3 files changed, 286 insertions(+), 7 deletions(-)

diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
index e1b8014..0b6f720 100644
--- a/src/glsl/ir_uniform.h
+++ b/src/glsl/ir_uniform.h
@@ -114,6 +114,8 @@ struct gl_uniform_storage {
 
struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
 
+   struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
+
/**
 * Storage used by the driver for the uniform
 */
@@ -173,10 +175,16 @@ struct gl_uniform_storage {
/**
 * The 'base location' for this uniform in the uniform remap table. For
 * arrays this is the first element in the array.
+* for subroutines this is in shader subroutine uniform remap table.
 */
unsigned remap_location;
 
/**
+* The number of compatible subroutines with this subroutine uniform.
+*/
+   unsigned num_compatible_subroutines;
+
+   /**
 * This is a compiler-generated uniform that should not be advertised
 * via the API.
 */
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index e786ddc..254086d 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -47,9 +47,10 @@
 static unsigned
 values_for_type(const glsl_type *type)
 {
-   if (type->is_sampler()) {
+   if (type->is_sampler() || type->is_subroutine()) {
   return 1;
-   } else if (type->is_array() && type->fields.array->is_sampler()) {
+   } else if (type->is_array() && (type->fields.array->is_sampler() ||
+   type->fields.array->is_subroutine())) {
   return type->array_size();
} else {
   return type->component_slots();
@@ -284,6 +285,7 @@ public:
count_uniform_size(struct string_to_uint_map *map)
   : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
 num_shader_images(0), num_shader_uniform_components(0),
+num_shader_subroutines(0),
 is_ubo_var(false), map(map)
{
   /* empty */
@@ -294,6 +296,7 @@ public:
   this->num_shader_samplers = 0;
   this->num_shader_images = 0;
   this->num_shader_uniform_components = 0;
+  this->num_shader_subroutines = 0;
}
 
void process(ir_variable *var)
@@ -331,6 +334,11 @@ public:
 */
unsigned num_shader_uniform_components;
 
+   /**
+* Number of subroutine uniforms used
+*/
+   unsigned num_shader_subroutines;
+
bool is_ubo_var;
 
 private:
@@ -348,7 +356,9 @@ private:
* count it for each shader target.
*/
   const unsigned values = values_for_type(type);
-  if (type->contains_sampler()) {
+  if (type->contains_subroutine()) {
+ this->num_shader_subroutines += values;
+  } else if (type->contains_sampler()) {
  this->num_shader_samplers += values;
   } else if (type->contains_image()) {
  this->num_shader_images += values;
@@ -421,6 +431,7 @@ public:
   this->shader_shadow_samplers = 0;
   this->next_sampler = 0;
   this->next_image = 0;
+  this->next_subroutine = 0;
   memset(this->targets, 0, sizeof(this->targets));
}
 
@@ -535,6 +546,24 @@ private:
   }
}
 
+   void handle_subroutines(const glsl_type *base_type,
+   struct gl_uniform_storage *uniform)
+   {
+  if (base_type->is_subroutine()) {
+ uniform->subroutine[shader_type].index = this->next_subroutine;
+ uniform->subroutine[shader_type].active = true;
+
+ /* Increment the subroutine index by 1 for non-arrays and by the
+  * number of array elements for arrays.
+  */
+ this->next_subroutine += MAX2(1, uniform->array_elements);
+
+  } else {
+ uniform->subroutine[shader_type].index = ~0;
+ uniform->subroutine[shader_type].active = false;
+  }
+   }
+
virtual void visit_field(const glsl_type *type, const char *name,
 bool row_major)
{
@@ -588,6 +617,7 @@ private:
   /* This assigns uniform indices to sampler and image uniforms. */
   handle_samplers(base_type, &this->uniforms[id]);
   handle_images(

[Mesa-dev] [PATCH 13/20] glsl/ir: add subroutine lowering pass (v2.3)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This lowers the enhanced ir_call using the lookaside table
of subroutines into an if ladder. This initially was done
at the AST level but it caused some ordering issues so a separate
pass was required.

v2: clone return value derefs.
v2.1: update for subroutine->int convert.
v2.2: add a clone for the array index

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/Makefile.sources   |   1 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/ir_optimization.h  |   2 +
 src/glsl/lower_subroutine.cpp   | 109 
 4 files changed, 113 insertions(+)
 create mode 100644 src/glsl/lower_subroutine.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index d784a81..3f113c8 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -154,6 +154,7 @@ LIBGLSL_FILES = \
lower_packed_varyings.cpp \
lower_named_interface_blocks.cpp \
lower_packing_builtins.cpp \
+   lower_subroutine.cpp \
lower_texture_projection.cpp \
lower_variable_index_to_cond_assign.cpp \
lower_vec_index_to_cond_assign.cpp \
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 3618424..0891b6f 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -1561,6 +1561,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct 
gl_shader *shader,
   struct gl_shader_compiler_options *options =
  &ctx->Const.ShaderCompilerOptions[shader->Stage];
 
+  lower_subroutine(shader->ir, state);
   /* Do some optimization at compile time to reduce shader IR size
* and reduce later work if the same shader is linked multiple times
*/
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index e6939f3..fef5a83 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -135,6 +135,8 @@ void optimize_dead_builtin_variables(exec_list 
*instructions,
 
 bool lower_vertex_id(gl_shader *shader);
 
+bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state 
*state);
+
 ir_rvalue *
 compare_index_block(exec_list *instructions, ir_variable *index,
unsigned base, unsigned components, void *mem_ctx);
diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp
new file mode 100644
index 000..e45ccfe
--- /dev/null
+++ b/src/glsl/lower_subroutine.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_subroutine.cpp
+ *
+ * lowers subroutines to an if ladder.
+ */
+
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "ir.h"
+#include "ir_builder.h"
+
+using namespace ir_builder;
+namespace {
+
+class lower_subroutine_visitor : public ir_hierarchical_visitor {
+public:
+   lower_subroutine_visitor()
+   {
+  this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_call *);
+   bool progress;
+   struct _mesa_glsl_parse_state *state;
+};
+
+}
+
+bool
+lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+{
+   lower_subroutine_visitor v;
+   v.state = state;
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+ir_visitor_status
+lower_subroutine_visitor::visit_leave(ir_call *ir)
+{
+   if (!ir->sub_var)
+  return visit_continue;
+
+   void *mem_ctx = ralloc_parent(ir);
+   ir_if *last_branch = NULL;
+   ir_dereference_variable *return_deref = ir->return_deref;
+
+   for (int s = this->state->num_subroutines - 1; s >= 0; s--) {
+  ir_rvalue *var;
+  ir_constant *lc = new(mem_ctx)ir_constant(s);
+  ir_function *fn = this->state->subroutines[s];
+  bool is_compat = false;
+
+  for (int i = 0; i <

[Mesa-dev] [PATCH 11/20] glsl/ir: allow ir_call to handle subroutine calling

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This adds a ir_variable which contains the subroutine uniform
and an array rvalue for the deref of that uniform, these
are stored in the ir_call and lowered later.

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/ir.h | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e273239..647a87b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1711,7 +1711,18 @@ public:
ir_call(ir_function_signature *callee,
   ir_dereference_variable *return_deref,
   exec_list *actual_parameters)
-  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee)
+  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee), sub_var(NULL), array_idx(NULL)
+   {
+  assert(callee->return_type != NULL);
+  actual_parameters->move_nodes_to(& this->actual_parameters);
+  this->use_builtin = callee->is_builtin();
+   }
+
+   ir_call(ir_function_signature *callee,
+  ir_dereference_variable *return_deref,
+  exec_list *actual_parameters,
+  ir_variable *var, ir_rvalue *array_idx)
+  : ir_instruction(ir_type_call), return_deref(return_deref), 
callee(callee), sub_var(var), array_idx(array_idx)
{
   assert(callee->return_type != NULL);
   actual_parameters->move_nodes_to(& this->actual_parameters);
@@ -1759,6 +1770,14 @@ public:
 
/** Should this call only bind to a built-in function? */
bool use_builtin;
+
+   /*
+* ARB_shader_subroutine support -
+* the subroutine uniform variable and array index
+* rvalue to be used in the lowering pass later.
+*/
+   ir_variable *sub_var;
+   ir_rvalue *array_idx;
 };
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/20] glsl: add ast/parser support for subroutine parsing storage (v3.2)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This is the guts of the GLSL parser and AST support for
shader subroutines.

The code creates a subroutine type in the parser, and
uses that there to validate the identifiers. The parser
also distinguishes between subroutine types/function prototypes
/uniforms and subroutine defintions for functions.

Then in the AST conversion it recreates the types, and
stores the subroutine definition info or subroutine info
into the ir_function along with a side lookup table in
the parser state. It also converts subroutine calls into
the enhanced ir_call.

v2: move to handling method calls in
function handling not in field selection.
v3: merge Chris's previous parser patches in here, to
make it clearer what's changed in one place.
v3.1: add more documentation, drop unused include
v3.2: drop is_subroutine_def

Reviewed-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/ast.h   |  15 +
 src/glsl/ast_function.cpp| 120 +--
 src/glsl/ast_to_hir.cpp  |  96 +++
 src/glsl/ast_type.cpp|   7 ++-
 src/glsl/glsl_lexer.ll   |   8 +++
 src/glsl/glsl_parser.yy  | 114 +
 src/glsl/glsl_parser_extras.cpp  |  22 +++
 src/glsl/glsl_parser_extras.h|  19 +++
 src/glsl/hir_field_selection.cpp |  39 -
 9 files changed, 324 insertions(+), 116 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 4921229..ff0a2ee 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -304,6 +304,16 @@ private:
 * Is this function call actually a constructor?
 */
bool cons;
+   ir_rvalue *
+   handle_method(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+};
+
+class ast_subroutine_list : public ast_node
+{
+public:
+   virtual void print(void) const;
+   exec_list declarations;
 };
 
 class ast_array_specifier : public ast_node {
@@ -515,6 +525,10 @@ struct ast_type_qualifier {
  unsigned stream:1; /**< Has stream value assigned  */
  unsigned explicit_stream:1; /**< stream value assigned explicitly by 
shader code */
  /** \} */
+
+ /** \name Qualifiers for GL_ARB_shader_subroutine */
+ unsigned subroutine:1;  /**< Is this marked 'subroutine' */
+ unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a 
list of types */
   }
   /** \brief Set of flags, accessed by name. */
   q;
@@ -637,6 +651,7 @@ struct ast_type_qualifier {
ast_type_qualifier q,
ast_node* &node);
 
+   ast_subroutine_list *subroutine_list;
 };
 
 class ast_declarator_list;
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 6749e99..803edf5 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -26,6 +26,7 @@
 #include "glsl_types.h"
 #include "ir.h"
 #include "main/core.h" /* for MIN2 */
+#include "main/shaderobj.h"
 
 static ir_rvalue *
 convert_component(ir_rvalue *src, const glsl_type *desired_type);
@@ -355,6 +356,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const 
glsl_type *formal_type,
 static ir_rvalue *
 generate_call(exec_list *instructions, ir_function_signature *sig,
  exec_list *actual_parameters,
+  ir_variable *sub_var,
+ ir_rvalue *array_idx,
  struct _mesa_glsl_parse_state *state)
 {
void *ctx = state;
@@ -421,7 +424,8 @@ generate_call(exec_list *instructions, 
ir_function_signature *sig,
 
   deref = new(ctx) ir_dereference_variable(var);
}
-   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters);
+
+   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, 
array_idx);
instructions->push_tail(call);
 
/* Also emit any necessary out-parameter conversions. */
@@ -489,6 +493,40 @@ done:
return sig;
 }
 
+static ir_function_signature *
+match_subroutine_by_name(const char *name,
+ exec_list *actual_parameters,
+ struct _mesa_glsl_parse_state *state,
+ ir_variable **var_r)
+{
+   void *ctx = state;
+   ir_function_signature *sig = NULL;
+   ir_function *f, *found = NULL;
+   const char *new_name;
+   ir_variable *var;
+   bool is_exact = false;
+
+   new_name = ralloc_asprintf(ctx, "%s_%s", 
_mesa_shader_stage_to_subroutine_prefix(state->stage), name);
+   var = state->symbols->get_variable(new_name);
+   if (!var)
+  return NULL;
+
+   for (int i = 0; i < state->num_subroutine_types; i++) {
+  f = state->subroutine_types[i];
+  if (strcmp(f->name, var->type->without_array()->name))
+ continue;
+  found = f;
+  break;
+   }
+
+   if (!found)
+  return NULL;
+   *var_r = var;
+   sig = found->

[Mesa-dev] [PATCH 05/20] glsl: Add extension plumbing and define for ARB_shader_subroutine

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/glsl/glcpp/glcpp-parse.y| 3 +++
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/glsl/standalone_scaffolding.cpp | 1 +
 4 files changed, 7 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index ed1bffb..5534ff7 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2486,6 +2486,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
 
  if (extensions->ARB_shader_storage_buffer_object)
 add_builtin_define(parser, 
"GL_ARB_shader_storage_buffer_object", 1);
+
+  if (extensions->ARB_shader_subroutine)
+ add_builtin_define(parser, "GL_ARB_shader_subroutine", 1);
   }
}
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 5412f0b..a234eef 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -571,6 +571,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_precision, true,  false, 
ARB_shader_precision),
EXT(ARB_shader_stencil_export,true,  false, 
ARB_shader_stencil_export),
EXT(ARB_shader_storage_buffer_object, true,  false, 
ARB_shader_storage_buffer_object),
+   EXT(ARB_shader_subroutine,true,  false, 
ARB_shader_subroutine),
EXT(ARB_shader_texture_lod,   true,  false, 
ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack, true,  false, 
ARB_shading_language_420pack),
EXT(ARB_shading_language_packing, true,  false, 
ARB_shading_language_packing),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 4996b84..dd441c1 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -469,6 +469,8 @@ struct _mesa_glsl_parse_state {
bool ARB_shader_stencil_export_warn;
bool ARB_shader_storage_buffer_object_enable;
bool ARB_shader_storage_buffer_object_warn;
+   bool ARB_shader_subroutine_enable;
+   bool ARB_shader_subroutine_warn;
bool ARB_shader_texture_lod_enable;
bool ARB_shader_texture_lod_warn;
bool ARB_shading_language_420pack_enable;
diff --git a/src/glsl/standalone_scaffolding.cpp 
b/src/glsl/standalone_scaffolding.cpp
index 172c6f4..15546c2 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -133,6 +133,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, 
gl_api api)
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_stencil_export = true;
+   ctx->Extensions.ARB_shader_subroutine = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/20] mesa: Add glGet support for ARB_shader_subroutine implementation limits

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/config.h   | 6 ++
 src/mesa/main/get.c  | 1 +
 src/mesa/main/get_hash_params.py | 4 
 src/mesa/main/tests/enum_strings.cpp | 9 +
 4 files changed, 20 insertions(+)

diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 177f176..6a04df1 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -274,6 +274,12 @@
 #define MAX_VERTEX_STREAMS  4
 /*@}*/
 
+/** For GL_ARB_shader_subroutine */
+/*@{*/
+#define MAX_SUBROUTINES   256
+#define MAX_SUBROUTINE_UNIFORM_LOCATIONS  1024
+/*@}*/
+
 /** For GL_INTEL_performance_query */
 /*@{*/
 #define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index ffafe51..9b16518 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -401,6 +401,7 @@ EXTRA_EXT(ARB_explicit_uniform_location);
 EXTRA_EXT(ARB_clip_control);
 EXTRA_EXT(EXT_polygon_offset_clamp);
 EXTRA_EXT(ARB_framebuffer_no_attachments);
+EXTRA_EXT(ARB_shader_subroutine);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index c25e1b6..842ed6c 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -824,6 +824,10 @@ descriptor=[
   [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", 
"CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
   [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", 
"CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
   [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", 
"CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ],
+
+# GL_ARB_shader_subroutine
+  [ "MAX_SUBROUTINES", "CONST(MAX_SUBROUTINES), extra_ARB_shader_subroutine" ],
+  [ "MAX_SUBROUTINE_UNIFORM_LOCATIONS", 
"CONST(MAX_SUBROUTINE_UNIFORM_LOCATIONS), extra_ARB_shader_subroutine" ],
 ]}
 
 ]
diff --git a/src/mesa/main/tests/enum_strings.cpp 
b/src/mesa/main/tests/enum_strings.cpp
index 84c1195..8218cc9 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -1731,6 +1731,10 @@ const struct enum_info everything[] = {
{ 0x8DDF, "GL_MAX_GEOMETRY_UNIFORM_COMPONENTS" },
{ 0x8DE0, "GL_MAX_GEOMETRY_OUTPUT_VERTICES" },
{ 0x8DE1, "GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS" },
+   { 0x8DE5, "GL_ACTIVE_SUBROUTINES" },
+   { 0x8DE6, "GL_ACTIVE_SUBROUTINE_UNIFORMS" },
+   { 0x8DE7, "GL_MAX_SUBROUTINES" },
+   { 0x8DE8, "GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS" },
{ 0x8DF0, "GL_LOW_FLOAT" },
{ 0x8DF1, "GL_MEDIUM_FLOAT" },
{ 0x8DF2, "GL_HIGH_FLOAT" },
@@ -1759,6 +1763,11 @@ const struct enum_info everything[] = {
{ 0x8E44, "GL_TEXTURE_SWIZZLE_B" },
{ 0x8E45, "GL_TEXTURE_SWIZZLE_A" },
{ 0x8E46, "GL_TEXTURE_SWIZZLE_RGBA" },
+   { 0x8E47, "GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" },
+   { 0x8E48, "GL_ACTIVE_SUBROUTINE_MAX_LENGTH" },
+   { 0x8E49, "GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" },
+   { 0x8E4A, "GL_NUM_COMPATIBLE_SUBROUTINES" },
+   { 0x8E4B, "GL_COMPATIBLE_SUBROUTINES" },
{ 0x8E4C, "GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION" },
{ 0x8E4D, "GL_FIRST_VERTEX_CONVENTION" },
{ 0x8E4E, "GL_LAST_VERTEX_CONVENTION" },
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/20] mesa: Add stubs for ARB_shader_subroutine entrypoints

2015-07-20 Thread Dave Airlie

From: Chris Forbes 

Reviewed-by: Tapani Pälli 
Reviewed-by: Kenneth Graunke 
Signed-off-by: Chris Forbes 
Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderapi.c | 63 +++
 src/mesa/main/shaderapi.h | 35 ++
 2 files changed, 98 insertions(+)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 3365c7a..afca9b1 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1984,3 +1984,66 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count,
 
return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings);
 }
+
+
+/**
+ * ARB_shader_subroutine
+ */
+GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+   const GLchar *name)
+{
+   return -1;
+}
+
+
+GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name)
+{
+   return GL_INVALID_INDEX;
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+   GLuint index, GLenum pname, GLint *values)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+  GLuint index, GLsizei bufsize,
+  GLsizei *length, GLchar *name)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+const GLuint *indices)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+  GLuint *params)
+{
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+GLenum pname, GLint *values)
+{
+}
diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h
index aba6d5d..eda7170 100644
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -264,6 +264,41 @@ _mesa_get_program_resourceiv(struct gl_shader_program 
*shProg,
  GLsizei bufSize, GLsizei *length,
  GLint *params);
 
+/* GL_ARB_shader_subroutine */
+extern GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+   const GLchar *name);
+
+extern GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+   GLuint index, GLenum pname, GLint *values);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+  GLuint index, GLsizei bufsize,
+  GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+const GLuint *indices);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+  GLuint *params);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+GLenum pname, GLint *values);
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 18/20] mesa: fill out the ARB_shader_subroutine APIs

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This fleshes out the APIs, using the program resource
APIs where they should match.

It also sets the default values to valid subroutines.

Signed-off-by: Dave Airlie 
---
 src/mesa/main/shaderapi.c | 450 +-
 src/mesa/main/shaderapi.h |   3 +
 2 files changed, 450 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index afca9b1..3d17230 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1071,6 +1071,7 @@ _mesa_use_program(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   use_shader_program(ctx, i, shProg, &ctx->Shader);
_mesa_active_program(ctx, shProg, "glUseProgram");
 
+   _mesa_shader_program_init_subroutine_defaults(shProg);
if (ctx->Driver.UseProgram)
   ctx->Driver.UseProgram(ctx, shProg);
 }
@@ -1993,15 +1994,75 @@ GLint GLAPIENTRY
 _mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
const GLchar *name)
 {
-   return -1;
-}
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineUniformLocation";
+   struct gl_shader_program *shProg;
+   GLenum resource_type;
+   gl_shader_stage stage;
 
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+   return _mesa_program_resource_location(shProg, resource_type, name);
+}
 
 GLuint GLAPIENTRY
 _mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
  const GLchar *name)
 {
-   return GL_INVALID_INDEX;
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineIndex";
+   struct gl_shader_program *shProg;
+   struct gl_program_resource *res;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine(stage);
+   res = _mesa_program_resource_find_name(shProg, resource_type, name);
+   if (!res) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+ return -1;
+   }
+
+   return _mesa_program_resource_index(shProg, res);
 }
 
 
@@ -2009,6 +2070,81 @@ GLvoid GLAPIENTRY
 _mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
GLuint index, GLenum pname, GLint *values)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetActiveSubroutineUniformiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+   struct gl_program_resource *res;
+   const struct gl_uniform_storage *uni;
+   GLenum resource_type;
+   int count, i, j;
+   if (!ctx->Extensions.ARB_shader_subroutine) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+  return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+  return;
+   }
+
+   switch (pname) {
+   case GL_NUM_COMPATIBLE_SUBROUTINES: {
+  res = _mesa_program_resource_find_index(shProg, resource_type, index);
+  if (res) {
+ uni = res->Data;
+ values[0] = uni->num_compatible_subroutines;
+  }
+  break;
+   }
+   case GL_COMPATIBLE_SUBROUTINES: {
+  res = _mesa_program_resource_find_index(shProg, resource_type, index);
+  if (res) {
+ uni = res->Data;
+ count = 0;
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+struct gl_subroutine_function *fn = &sh->SubroutineF

[Mesa-dev] [PATCH 16/20] program_resource: add subroutine support (v2)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This fleshes out the ARB_program_query support for the
APIs that ARB_shader_subroutine introduces, leaving
some TODOs for later addition.

v2: reworked for lots of the ARB_program_interface_query
entry points and tests

Signed-off-by: Dave Airlie 
---
 src/mesa/main/program_resource.c | 88 
 src/mesa/main/shader_query.cpp   | 82 -
 2 files changed, 151 insertions(+), 19 deletions(-)

diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index e77bb03..b5ef9b9 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -28,10 +28,11 @@
 #include "main/mtypes.h"
 #include "main/shaderapi.h"
 #include "main/shaderobj.h"
+#include "main/context.h"
 #include "program_resource.h"
-
+#include "ir_uniform.h"
 static bool
-supported_interface_enum(GLenum iface)
+supported_interface_enum(struct gl_context *ctx, GLenum iface)
 {
switch (iface) {
case GL_UNIFORM:
@@ -41,18 +42,24 @@ supported_interface_enum(GLenum iface)
case GL_TRANSFORM_FEEDBACK_VARYING:
case GL_ATOMIC_COUNTER_BUFFER:
   return true;
+  /* arb shader subroutine is always enabled */
case GL_VERTEX_SUBROUTINE:
-   case GL_TESS_CONTROL_SUBROUTINE:
-   case GL_TESS_EVALUATION_SUBROUTINE:
-   case GL_GEOMETRY_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
-   case GL_COMPUTE_SUBROUTINE:
case GL_VERTEX_SUBROUTINE_UNIFORM:
-   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
-   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
-   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  return ctx->Extensions.ARB_shader_subroutine;
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  return _mesa_has_geometry_shaders(ctx) && 
ctx->Extensions.ARB_shader_subroutine;
+   case GL_COMPUTE_SUBROUTINE:
case GL_COMPUTE_SUBROUTINE_UNIFORM:
+  return _mesa_has_compute_shaders(ctx) && 
ctx->Extensions.ARB_shader_subroutine;
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+  return ctx->Extensions.ARB_tessellation_shader && 
ctx->Extensions.ARB_shader_subroutine;
+  return false;
case GL_BUFFER_VARIABLE:
case GL_SHADER_STORAGE_BLOCK:
default:
@@ -79,7 +86,7 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum 
programInterface,
}
 
/* Validate interface. */
-   if (!supported_interface_enum(programInterface)) {
+   if (!supported_interface_enum(ctx, programInterface)) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s)",
   _mesa_enum_to_string(programInterface));
   return;
@@ -143,6 +150,31 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum 
programInterface,
   };
   break;
case GL_MAX_NUM_COMPATIBLE_SUBROUTINES:
+  switch (programInterface) {
+  case GL_VERTEX_SUBROUTINE_UNIFORM:
+  case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+  case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+  case GL_COMPUTE_SUBROUTINE_UNIFORM:
+  case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+  case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: {
+ for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
+if (shProg->ProgramResourceList[i].Type == programInterface) {
+   struct gl_uniform_storage *uni =
+  (struct gl_uniform_storage *)
+  shProg->ProgramResourceList[i].Data;
+   *params = MAX2(*params, uni->num_compatible_subroutines);
+}
+ }
+ break;
+  }
+
+  default:
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(%s pname %s)",
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
+  }
+  break;
default:
   _mesa_error(ctx, GL_INVALID_OPERATION,
   "glGetProgramInterfaceiv(pname %s)",
@@ -206,6 +238,11 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum 
programInterface,
if (!shProg || !name)
   return GL_INVALID_INDEX;
 
+   if (!supported_interface_enum(ctx, programInterface)) {
+  _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)",
+  _mesa_enum_to_string(programInterface));
+  return GL_INVALID_INDEX;
+   }
/*
 * For the interface TRANSFORM_FEEDBACK_VARYING, the value INVALID_INDEX
 * should be returned when querying the index assigned to the special names
@@ -217,6 +254,14 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum 
programInterface,
   return GL_INVALID_INDEX;
 
switch (programInterface) {
+   case GL_COMPUTE_SUBROUTINE:
+   case GL_COMPUTE_SUBROUTINE_U

[Mesa-dev] [PATCH 17/20] program: add subroutine uniform support (v1.1)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

Add support for the subroutine uniform type ir->mesa.cpp

v1.1: add subroutine to int to switch

Signed-off-by: Dave Airlie 
---
 src/mesa/program/ir_to_mesa.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 2bd212e..20c0fad 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -534,6 +534,7 @@ type_size(const struct glsl_type *type)
   return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
   /* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
@@ -1342,6 +1343,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_dFdx_fine:
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
+   case ir_unop_subroutine_to_int:
   assert(!"not supported");
   break;
 
@@ -2451,6 +2453,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
break;
 case GLSL_TYPE_SAMPLER:
 case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
format = uniform_native;
columns = 1;
break;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/20] mesa/mtypes: add gl_subroutine_function and uniform storage to shader (v2)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

This adds the necessary storage for subroutine info to gl_shader.

v2: add comments, rename one member
Signed-off-by: Dave Airlie 
---
 src/mesa/main/mtypes.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 10c3954..046ba04 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2360,6 +2360,15 @@ struct gl_ati_fragment_shader_state
struct ati_fragment_shader *Current;
 };
 
+/**
+ *  Shader subroutine function definition
+ */
+struct gl_subroutine_function
+{
+   char *name;
+   int num_compat_types;
+   const struct glsl_type **types;
+};
 
 /**
  * A GLSL vertex or fragment shader object.
@@ -2510,6 +2519,25 @@ struct gl_shader
*/
   unsigned LocalSize[3];
} Comp;
+
+   /**
+ * Number of types for subroutine uniforms.
+ */
+   GLuint NumSubroutineUniformTypes;
+
+   /**
+ * Subroutine uniform remap table
+ * based on the program level uniform remap table.
+ */
+   GLuint NumSubroutineUniformRemapTable;
+   struct gl_uniform_storage **SubroutineUniformRemapTable;
+
+   /**
+* Num of subroutine functions for this stage
+* and storage for them.
+*/
+   GLuint NumSubroutineFunctions;
+   struct gl_subroutine_function *SubroutineFunctions;
 };
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/20] st/mesa: add subroutine bits (v1.1)

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

Just add support for the subroutine type to the
glsl->tgsi convertor.

v1.1: add subroutine to int support.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 48d7de6..a1dd70f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -797,7 +797,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, 
unsigned op,
case TGSI_OPCODE_##c: \
   if (type == GLSL_TYPE_DOUBLE) \
  op = TGSI_OPCODE_##d; \
-  else if (type == GLSL_TYPE_INT)   \
+  else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)   \
  op = TGSI_OPCODE_##i; \
   else if (type == GLSL_TYPE_UINT) \
  op = TGSI_OPCODE_##u; \
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 20/20] st/mesa: enable shader subroutine

2015-07-20 Thread Dave Airlie

From: Dave Airlie 

since this touches drivers, only enable it on gallium
for now for drivers reporting GLSL 1.30 or above.

Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index b1057f3..e5796f9 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -693,6 +693,7 @@ void st_init_extensions(struct pipe_screen *screen,
   extensions->OES_depth_texture_cube_map = GL_TRUE;
   extensions->ARB_shading_language_420pack = GL_TRUE;
   extensions->ARB_texture_query_levels = GL_TRUE;
+  extensions->ARB_shader_subroutine = GL_TRUE;
 
   if (!options->disable_shader_bit_encoding) {
  extensions->ARB_shader_bit_encoding = GL_TRUE;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] radeonsi: split out interpolation input selection

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This is prep work for using it in the interpolation code
later.

Also add storage for the input interpolation mode so we
can pick it up later.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_shader.c | 62 +++-
 src/gallium/drivers/radeonsi/si_shader.h |  2 +-
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 22fb983..f23eaa4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -401,6 +401,35 @@ static LLVMValueRef fetch_input_gs(
tgsi2llvmtype(bld_base, type), "");
 }
 
+static int lookup_interp_param_index(unsigned interpolate, unsigned location)
+{
+   switch (interpolate) {
+   case TGSI_INTERPOLATE_CONSTANT:
+   return 0;
+
+   case TGSI_INTERPOLATE_LINEAR:
+   if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+   return SI_PARAM_LINEAR_SAMPLE;
+   else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+   return SI_PARAM_LINEAR_CENTROID;
+   else
+   return SI_PARAM_LINEAR_CENTER;
+   break;
+   case TGSI_INTERPOLATE_COLOR:
+   case TGSI_INTERPOLATE_PERSPECTIVE:
+   if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+   return SI_PARAM_PERSP_SAMPLE;
+   else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+   return SI_PARAM_PERSP_CENTROID;
+   else
+   return SI_PARAM_PERSP_CENTER;
+   break;
+   default:
+   fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+   return -1;
+   }
+}
+
 static void declare_input_fs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
@@ -415,7 +444,8 @@ static void declare_input_fs(
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
LLVMValueRef main_fn = radeon_bld->main_fn;
 
-   LLVMValueRef interp_param;
+   LLVMValueRef interp_param = NULL;
+   int interp_param_idx;
const char * intr_name;
 
/* This value is:
@@ -464,30 +494,14 @@ static void declare_input_fs(
attr_number = lp_build_const_int32(gallivm,
   
shader->ps_input_param_offset[input_index]);
 
-   switch (decl->Interp.Interpolate) {
-   case TGSI_INTERPOLATE_CONSTANT:
-   interp_param = 0;
-   break;
-   case TGSI_INTERPOLATE_LINEAR:
-   if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_LINEAR_SAMPLE);
-   else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_LINEAR_CENTROID);
-   else
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_LINEAR_CENTER);
-   break;
-   case TGSI_INTERPOLATE_COLOR:
-   case TGSI_INTERPOLATE_PERSPECTIVE:
-   if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_PERSP_SAMPLE);
-   else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_PERSP_CENTROID);
-   else
-   interp_param = LLVMGetParam(main_fn, 
SI_PARAM_PERSP_CENTER);
-   break;
-   default:
-   fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+   shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
+   interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+decl->Interp.Location);
+   if (interp_param_idx == -1)
return;
+   else if (interp_param_idx)
+   {
+   interp_param = LLVMGetParam(main_fn, interp_param_idx);
}
 
/* fs.constant returns the param from the middle vertex, so it's not
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 8d309b4..21bae5f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -162,7 +162,7 @@ struct si_shader {
unsignednparam;
unsignedvs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
unsignedps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
-
+   unsignedps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
booluses_instanceid;
unsignednr_p

[Mesa-dev] [PATCH 4/4] radeonsi: add support for interpolateAt functions

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This is part of ARB_gpu_shader5, and this passes
all the piglit tests currently available.

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt |   2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 232 ++-
 2 files changed, 232 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 4f6c415..d74ae63 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -107,7 +107,7 @@ GL 4.0, GLSL 4.00:
   - Geometry shader instancing DONE (r600, radeonsi, 
llvmpipe, softpipe)
   - Geometry shader multiple streams   DONE ()
   - Enhanced per-sample shadingDONE (r600, radeonsi)
-  - Interpolation functionsDONE (r600)
+  - Interpolation functionsDONE (r600, radeonsi)
   - New overload resolution rules  DONE
   GL_ARB_gpu_shader_fp64   DONE (nvc0, radeonsi, 
llvmpipe, softpipe)
   GL_ARB_sample_shadingDONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c5d80f0..0c01c90 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2263,6 +2263,225 @@ static void si_llvm_emit_ddxy(
emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
 }
 
+/* return 4 values - v2i32 DDX, v2i32 DDY */
+static LLVMValueRef si_llvm_emit_ddxy_interp(
+   struct lp_build_tgsi_context * bld_base,
+   LLVMValueRef interp_ij)
+{
+   struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_context * base = &bld_base->base;
+   LLVMValueRef indices[2];
+   LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, 
load_ptr_ddy, temp, temp2;
+   LLVMValueRef tl, tr, bl, result[4];
+   LLVMTypeRef i32;
+   unsigned c;
+
+   i32 = LLVMInt32TypeInContext(gallivm->context);
+
+   indices[0] = bld_base->uint_bld.zero;
+   indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+NULL, 0, LLVMReadNoneAttribute);
+   store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+indices, 2, "");
+
+temp = LLVMBuildAnd(gallivm->builder, indices[1],
+   lp_build_const_int32(gallivm, 0xfffe), "");
+
+   temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
+lp_build_const_int32(gallivm, 0xfffd), "");
+
+   indices[1] = temp;
+   load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ indices, 2, "");
+
+   indices[1] = temp2;
+   load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ indices, 2, "");
+
+   indices[1] = LLVMBuildAdd(gallivm->builder, temp,
+ lp_build_const_int32(gallivm,
+  1),
+ "");
+   load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+indices, 2, "");
+
+   indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
+ lp_build_const_int32(gallivm,
+  2),
+ "");
+   load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+indices, 2, "");
+
+   for (c = 0; c < 2; ++c) {
+   LLVMValueRef store_val;
+   LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
+
+   store_val = LLVMBuildExtractElement(gallivm->builder,
+   interp_ij, c_ll, "");
+   LLVMBuildStore(gallivm->builder,
+  store_val,
+  store_ptr);
+
+   tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
+   tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, 
"");
+
+   tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
+   tr = LLVMBuildBitCast(gallivm->builder, tr, base->elem_type, 
"");
+
+   result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
+
+   tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
+   tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, 
"");
+

[Mesa-dev] [PATCH 3/4] radeonsi: add fine derivate control

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This adds support for fine derivatives and enables
ARB_derivative_control on radeonsi.

(just fell out of my working out interpolation)

Signed-off-by: Dave Airlie 
---
 docs/GL3.txt |  2 +-
 docs/relnotes/10.7.0.html|  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  5 +++--
 src/gallium/drivers/radeonsi/si_shader.c | 20 
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 33a282e..4f6c415 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_clip_control  DONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_conditional_render_inverted   DONE (i965, nv50, nvc0, 
llvmpipe, softpipe)
   GL_ARB_cull_distance in progress (Tobias)
-  GL_ARB_derivative_controlDONE (i965, nv50, nvc0, 
r600)
+  GL_ARB_derivative_controlDONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_direct_state_access   DONE (all drivers)
   - Transform Feedback object  DONE
   - Buffer object  DONE
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index 42ea807..a69971f 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 GL_AMD_vertex_shader_viewport_index on radeonsi
+GL_ARB_derivative_control on radeonsi
 GL_ARB_fragment_layer_viewport on radeonsi
 GL_ARB_framebuffer_no_attachments on i965
 GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b6a6ff..f725677 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -284,7 +284,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return HAVE_LLVM >= 0x0305;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return HAVE_LLVM >= 0x0305 ? 4 : 0;
-
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   return 1;
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -293,7 +294,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f23eaa4..c5d80f0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2203,7 +2203,8 @@ static void si_llvm_emit_ddxy(
LLVMTypeRef i32;
unsigned swizzle[4];
unsigned c;
-
+   int idx;
+   unsigned mask;
i32 = LLVMInt32TypeInContext(gallivm->context);
 
indices[0] = bld_base->uint_bld.zero;
@@ -2212,14 +2213,21 @@ static void si_llvm_emit_ddxy(
store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
 indices, 2, "");
 
+   if (opcode == TGSI_OPCODE_DDX_FINE)
+   mask = 0xfffe;
+   else if (opcode == TGSI_OPCODE_DDY_FINE)
+   mask = 0xfffd;
+   else
+   mask = 0xfffc;
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, 0xfffc), 
"");
+ lp_build_const_int32(gallivm, mask), "");
load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
 indices, 2, "");
 
+   idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
:2;
indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
  lp_build_const_int32(gallivm,
-  opcode == 
TGSI_OPCODE_DDX ? 1 : 2),
+  idx),
  "");
load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
 indices, 2, "");
@@ -2506,7 +2514,9 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
 
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
-bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
+

[Mesa-dev] [PATCH 1/4] radeonsi: separate out load sample position

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This is prep work for reusing this in the interpolation
code later.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_shader.c | 44 +++-
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 75a29ae..22fb983 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -595,6 +595,31 @@ static LLVMValueRef buffer_load_const(LLVMBuilderRef 
builder, LLVMValueRef resou
   LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 }
 
+static LLVMValueRef load_sample_position(struct radeon_llvm_context 
*radeon_bld, LLVMValueRef sample_id)
+{
+   struct si_shader_context *si_shader_ctx =
+   si_shader_context(&radeon_bld->soa.bld_base);
+   struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
+   struct gallivm_state *gallivm = &radeon_bld->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, 
SI_PARAM_CONST);
+   LLVMValueRef buf_index = lp_build_const_int32(gallivm, 
SI_DRIVER_STATE_CONST_BUF);
+   LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, 
buf_index);
+
+   /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
+   LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
+   LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, 
lp_build_const_int32(gallivm, 4), "");
+
+   LLVMValueRef pos[4] = {
+   buffer_load_const(builder, resource, offset0, 
radeon_bld->soa.bld_base.base.elem_type),
+   buffer_load_const(builder, resource, offset1, 
radeon_bld->soa.bld_base.base.elem_type),
+   lp_build_const_float(gallivm, 0),
+   lp_build_const_float(gallivm, 0)
+   };
+
+   return lp_build_gather_values(gallivm, pos, 4);
+}
+
 static void declare_system_value(
struct radeon_llvm_context * radeon_bld,
unsigned index,
@@ -640,25 +665,8 @@ static void declare_system_value(
break;
 
case TGSI_SEMANTIC_SAMPLEPOS:
-   {
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef desc = 
LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
-   LLVMValueRef buf_index = lp_build_const_int32(gallivm, 
SI_DRIVER_STATE_CONST_BUF);
-   LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, 
desc, buf_index);
-
-   /* offset = sample_id * 8  (8 = 2 floats containing 
samplepos.xy) */
-   LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, 
get_sample_id(radeon_bld), 8);
-   LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, 
lp_build_const_int32(gallivm, 4), "");
-
-   LLVMValueRef pos[4] = {
-   buffer_load_const(builder, resource, offset0, 
radeon_bld->soa.bld_base.base.elem_type),
-   buffer_load_const(builder, resource, offset1, 
radeon_bld->soa.bld_base.base.elem_type),
-   lp_build_const_float(gallivm, 0),
-   lp_build_const_float(gallivm, 0)
-   };
-   value = lp_build_gather_values(gallivm, pos, 4);
+   value = load_sample_position(radeon_bld, 
get_sample_id(radeon_bld));
break;
-   }
 
case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radeonsi: interpolateAt and ARB_derivative_control support

2015-07-21 Thread Dave Airlie

These are pretty standalone so I've pulled them out from my radeonsi-dev
tree and cleaned them up.

They pass all the tests, and get one step closer to gpu_shader5.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radeonsi: rename ddxy_lds to lds

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This is picked from Marek's tess work, and I think
it makes sense to just do it first.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_shader.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0c01c90..a944547 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -75,7 +75,7 @@ struct si_shader_context
LLVMTargetMachineRef tm;
LLVMValueRef const_md;
LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
-   LLVMValueRef ddxy_lds;
+   LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
@@ -2210,7 +2210,7 @@ static void si_llvm_emit_ddxy(
indices[0] = bld_base->uint_bld.zero;
indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
 NULL, 0, LLVMReadNoneAttribute);
-   store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
if (opcode == TGSI_OPCODE_DDX_FINE)
@@ -2221,7 +2221,7 @@ static void si_llvm_emit_ddxy(
mask = 0xfffc;
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
  lp_build_const_int32(gallivm, mask), "");
-   load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
:2;
@@ -2229,7 +2229,7 @@ static void si_llvm_emit_ddxy(
  lp_build_const_int32(gallivm,
   idx),
  "");
-   load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
for (c = 0; c < 4; ++c) {
@@ -2282,7 +2282,7 @@ static LLVMValueRef si_llvm_emit_ddxy_interp(
indices[0] = bld_base->uint_bld.zero;
indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
 NULL, 0, LLVMReadNoneAttribute);
-   store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
 temp = LLVMBuildAnd(gallivm->builder, indices[1],
@@ -2292,25 +2292,25 @@ static LLVMValueRef si_llvm_emit_ddxy_interp(
 lp_build_const_int32(gallivm, 0xfffd), "");
 
indices[1] = temp;
-   load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
  indices, 2, "");
 
indices[1] = temp2;
-   load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
  indices, 2, "");
 
indices[1] = LLVMBuildAdd(gallivm->builder, temp,
  lp_build_const_int32(gallivm,
   1),
  "");
-   load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
  lp_build_const_int32(gallivm,
   2),
  "");
-   load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+   load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 indices, 2, "");
 
for (c = 0; c < 2; ++c) {
@@ -2743,7 +2743,7 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
 bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
 bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
 bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
-   si_shader_ctx->ddxy_lds =
+

[Mesa-dev] [PATCH 2/2] radeonsi: renamed gs_used_inputs to inputs_read

2015-07-21 Thread Dave Airlie

From: Dave Airlie 

This is also picked from Marek's tess work, it reduces
the conflicts with my GS5 work.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/radeonsi/si_shader.c| 2 +-
 src/gallium/drivers/radeonsi/si_shader.h| 4 +++-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a944547..2705dcc 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -384,7 +384,7 @@ static LLVMValueRef fetch_input_gs(
args[1] = vtx_offset;
args[2] = lp_build_const_int32(gallivm,
   (get_param_index(semantic_name, 
semantic_index,
-   
shader->selector->gs_used_inputs) * 4 +
+   
shader->selector->inputs_read) * 4 +
swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one;  /* OFFEN */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 21bae5f..fbbd747 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -116,7 +116,9 @@ struct si_shader_selector {
unsignedgs_output_prim;
unsignedgs_max_out_vertices;
unsignedgs_num_invocations;
-   uint64_tgs_used_inputs; /* mask of "get_unique_index" bits */
+
+   /* mask of "get_unique_index" bits */
+   uint64_tinputs_read;
 };
 
 union si_shader_key {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 78be4d9..ad398d9 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -112,7 +112,7 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
 
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-  util_bitcount64(shader->selector->gs_used_inputs) * (16 
>> 2));
+  util_bitcount64(shader->selector->inputs_read) * (16 >> 
2));
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
 
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -353,7 +353,7 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
 
if (sctx->gs_shader) {
key->vs.as_es = 1;
-   key->vs.gs_used_inputs = 
sctx->gs_shader->gs_used_inputs;
+   key->vs.gs_used_inputs = sctx->gs_shader->inputs_read;
}
break;
case PIPE_SHADER_GEOMETRY:
@@ -487,7 +487,7 @@ static void *si_create_shader_state(struct pipe_context 
*ctx,
case TGSI_SEMANTIC_PRIMID:
break;
default:
-   sel->gs_used_inputs |=
+   sel->inputs_read |=
1llu << 
si_shader_io_get_unique_index(name, index);
}
}
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radeonsi trivial renames from tess branch

2015-07-21 Thread Dave Airlie

While pulling tess into my gs5 branch these things got annoying
so lets just rename them upfront.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 23/46] glsl: allow linking of tessellation shaders.

2015-07-21 Thread Dave Airlie

On 17 June 2015 at 09:01, Marek Olšák  wrote:
> From: Chris Forbes 
>
> Marek: require a tess eval shader if a tess control shader is present

As part of my learn about tess,

Reviewed-by: Dave Airlie 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 24/46] glsl: make lower_clip_distance work with tessellation shaders.

2015-07-21 Thread Dave Airlie

On 17 June 2015 at 09:01, Marek Olšák  wrote:
> From: Fabian Bieler 

Reviewed-by: Dave Airlie 
>
> ---
>  src/glsl/lower_clip_distance.cpp | 185 
> ++-
>  1 file changed, 105 insertions(+), 80 deletions(-)
>
> diff --git a/src/glsl/lower_clip_distance.cpp 
> b/src/glsl/lower_clip_distance.cpp
> index 01f028b..1ada215 100644
> --- a/src/glsl/lower_clip_distance.cpp
> +++ b/src/glsl/lower_clip_distance.cpp
> @@ -55,9 +55,9 @@ namespace {
>  class lower_clip_distance_visitor : public ir_rvalue_visitor {
>  public:
> explicit lower_clip_distance_visitor(gl_shader_stage shader_stage)
> -  : progress(false), old_clip_distance_1d_var(NULL),
> -old_clip_distance_2d_var(NULL), new_clip_distance_1d_var(NULL),
> -new_clip_distance_2d_var(NULL), shader_stage(shader_stage)
> +  : progress(false), old_clip_distance_out_var(NULL),
> +old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL),
> +new_clip_distance_in_var(NULL), shader_stage(shader_stage)
> {
> }
>
> @@ -80,20 +80,21 @@ public:
>  *
>  * Note:
>  *
> -* - the 2d_var is for geometry shader input only.
> +* - the in_var is for geometry and both tessellation shader inputs only.
>  *
> -* - since gl_ClipDistance is available in geometry shaders as both an
> -*   input and an output, it's possible for both old_clip_distance_1d_var
> -*   and old_clip_distance_2d_var to be non-null.
> +* - since gl_ClipDistance is available in tessellation control,
> +*   tessellation evaluation and geometry shaders as both an input
> +*   and an output, it's possible for both old_clip_distance_out_var
> +*   and old_clip_distance_in_var to be non-null.
>  */
> -   ir_variable *old_clip_distance_1d_var;
> -   ir_variable *old_clip_distance_2d_var;
> +   ir_variable *old_clip_distance_out_var;
> +   ir_variable *old_clip_distance_in_var;
>
> /**
>  * Pointer to the newly-created gl_ClipDistanceMESA variable.
>  */
> -   ir_variable *new_clip_distance_1d_var;
> -   ir_variable *new_clip_distance_2d_var;
> +   ir_variable *new_clip_distance_out_var;
> +   ir_variable *new_clip_distance_in_var;
>
> /**
>  * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
> @@ -110,62 +111,81 @@ public:
>  ir_visitor_status
>  lower_clip_distance_visitor::visit(ir_variable *ir)
>  {
> +   ir_variable **old_var;
> +   ir_variable **new_var;
> +
> if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0)
>return visit_continue;
> assert (ir->type->is_array());
>
> -   if (!ir->type->fields.array->is_array()) {
> -  /* 1D gl_ClipDistance (used for vertex and geometry output, and 
> fragment
> -   * input).
> -   */
> -  if (this->old_clip_distance_1d_var)
> +   if (ir->data.mode == ir_var_shader_out) {
> +  if (this->old_clip_distance_out_var)
> + return visit_continue;
> +  old_var = &old_clip_distance_out_var;
> +  new_var = &new_clip_distance_out_var;
> +   } else if (ir->data.mode == ir_var_shader_in) {
> +  if (this->old_clip_distance_in_var)
>   return visit_continue;
> +  old_var = &old_clip_distance_in_var;
> +  new_var = &new_clip_distance_in_var;
> +   } else {
> +  unreachable("not reached");
> +   }
>
> -  this->progress = true;
> -  this->old_clip_distance_1d_var = ir;
> +   this->progress = true;
> +
> +   if (!ir->type->fields.array->is_array()) {
> +  /* gl_ClipDistance (used for vertex, tessellation evaluation and
> +   * geometry output, and fragment input).
> +   */
> +  assert((ir->data.mode == ir_var_shader_in &&
> +  this->shader_stage == MESA_SHADER_FRAGMENT) ||
> + (ir->data.mode == ir_var_shader_out &&
> +  (this->shader_stage == MESA_SHADER_VERTEX ||
> +   this->shader_stage == MESA_SHADER_TESS_EVAL ||
> +   this->shader_stage == MESA_SHADER_GEOMETRY)));
> +
> +  *old_var = ir;
>assert (ir->type->fields.array == glsl_type::float_type);
>unsigned new_size = (ir->type->array_size() + 3) / 4;
>
>/* Clone the old var so that we inherit all of its properties */
> -  this->new_clip_distance_1d_var = ir->clone(ralloc_parent(ir), NULL);
> +  *new_var = ir->clone(ralloc_parent(ir), NULL);
>
>/* And change the properties that we need to change */
> -  this->new_clip_distance_1d_var->name
> - = ralloc_s

Re: [Mesa-dev] [PATCH 26/46] glsl: add "in" or "out" prefix to name when flattening interface blocks

2015-07-21 Thread Dave Airlie

On 17 June 2015 at 09:01, Marek Olšák  wrote:
> From: Fabian Bieler 
>
Reviewed-by: Dave Airlie 
> This is to prevent a name conflict in tessellation shaders built-in interface
> blocks.
> ---
>  src/glsl/lower_named_interface_blocks.cpp | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/lower_named_interface_blocks.cpp 
> b/src/glsl/lower_named_interface_blocks.cpp
> index 2f1e3af..29606f7 100644
> --- a/src/glsl/lower_named_interface_blocks.cpp
> +++ b/src/glsl/lower_named_interface_blocks.cpp
> @@ -125,7 +125,8 @@ 
> flatten_named_interface_blocks_declarations::run(exec_list *instructions)
>for (unsigned i = 0; i < iface_t->length; i++) {
>   const char * field_name = iface_t->fields.structure[i].name;
>   char *iface_field_name =
> -ralloc_asprintf(mem_ctx, "%s.%s.%s",
> +ralloc_asprintf(mem_ctx, "%s %s.%s.%s",
> +var->data.mode == ir_var_shader_in ? "in" : 
> "out",
>  iface_t->name, var->name, field_name);
>
>   ir_variable *found_var =
> @@ -218,7 +219,9 @@ 
> flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
>
> if (var->get_interface_type() != NULL) {
>char *iface_field_name =
> - ralloc_asprintf(mem_ctx, "%s.%s.%s", 
> var->get_interface_type()->name,
> + ralloc_asprintf(mem_ctx, "%s %s.%s.%s",
> + var->data.mode == ir_var_shader_in ? "in" : "out",
> + var->get_interface_type()->name,
>   var->name, ir->field);
>/* Find the variable in the set of flattened interface blocks */
>ir_variable *found_var =
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 28/46] glsl: don't lower variable indexing on non-patch tessellation inputs/outputs

2015-07-21 Thread Dave Airlie

On 17 July 2015 at 06:15, Marek Olšák  wrote:
> From: Marek Olšák 
>
> There is no way to lower them, because the array sizes are unknown
> at compile time.
>
> Based on a patch from: Fabian Bieler 
>
> v2: add comments

you added the comment but you typoed it.


> +
>case ir_var_shader_in:
> + /* The input array size is unknown at compiler time for non-patch
> +  * inputs in TCS and TES. The arrays are sized to
> +  * the implementation-dependent limit "gl_MaxPatchVertices", but
> +  * the real size is stored in the "gl_PatchVerticeIn" built-in

gl_PatchVerticeIn?

Other than that,

Reviewed-by: Dave Airlie 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 30/46] glsl: relax unsized input/output block arrays for TCS/TES

2015-07-21 Thread Dave Airlie

This should use

_mesa_shader_stage_to_string

in the _mesa_glsl_error strings,

Change that and it can have
Reviewed-by: Dave Airlie 
>
> ---
>  src/glsl/ast_to_hir.cpp | 33 ++---
>  1 file changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index 837bac7..fb957a1 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -5938,16 +5938,35 @@ ast_interface_block::hir(exec_list *instructions,
>* geometry shader inputs. All other input and output block
>* arrays must specify an array size.
>*
> +  * The same applies to tessellation shaders.
> +  *
>* The upshot of this is that the only circumstance where an
>* interface array size *doesn't* need to be specified is on a
> -  * geometry shader input.
> +  * geometry shader input, tessellation control shader input,
> +  * tessellation control shader output, and tessellation evaluation
> +  * shader input.
>*/
> - if (this->array_specifier->is_unsized_array &&
> - (state->stage != MESA_SHADER_GEOMETRY || 
> !this->layout.flags.q.in)) {
> -_mesa_glsl_error(&loc, state,
> - "only geometry shader inputs may be unsized "
> - "instance block arrays");
> -
> + if (this->array_specifier->is_unsized_array) {
> +bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
> +state->stage == MESA_SHADER_TESS_CTRL ||
> +state->stage == MESA_SHADER_TESS_EVAL;
> +bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL;
> +
> +if (this->layout.flags.q.in) {
> +   if (!allow_inputs)
> +  _mesa_glsl_error(&loc, state,
> +   "unsized input block arrays not allowed 
> in this "
> +   "shader stage");
> +} else if (this->layout.flags.q.out) {
> +   if (!allow_outputs)
> +  _mesa_glsl_error(&loc, state,
> +   "unsized output block arrays not allowed 
> in this "
> +   "shader stage");
> +} else {
> +   /* by elimination, this is a uniform block array */
> +   _mesa_glsl_error(&loc, state,
> +"unsized uniform block arrays not allowed");
> +}
>   }
>
>   const glsl_type *block_array_type =
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/46] drirc: drop support for Heaven 3.0, fixes tessellation in 4.0

2015-07-21 Thread Dave Airlie

>> The patches that are missing Rbs are:
>>
>> 23-24, 26-43, 45-46.

With where I've replied, you can add my Reviewed-by to these.

it might be nice to sed/^I// across the patches that add tabs
into GLSL, but it's already scary in there.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 4069 matches

Mail list logo