Re: [Mesa-dev] [PATCH] glsl: use set rather than old hash table for ir_validate

2015-07-10 Thread Chris Forbes
Perf data?

On Fri, Jul 10, 2015 at 6:41 PM, Timothy Arceri  wrote:
> This implementation should be faster and there was no
> need to store a data field.
> ---
>  src/glsl/ir_validate.cpp | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
> index cfe0df3..684bef2 100644
> --- a/src/glsl/ir_validate.cpp
> +++ b/src/glsl/ir_validate.cpp
> @@ -35,7 +35,8 @@
>
>  #include "ir.h"
>  #include "ir_hierarchical_visitor.h"
> -#include "program/hash_table.h"
> +#include "util/hash_table.h"
> +#include "util/set.h"
>  #include "glsl_types.h"
>
>  namespace {
> @@ -44,18 +45,18 @@ class ir_validate : public ir_hierarchical_visitor {
>  public:
> ir_validate()
> {
> -  this->ht = hash_table_ctor(0, hash_table_pointer_hash,
> -hash_table_pointer_compare);
> +  this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> +  _mesa_key_pointer_equal);
>
>this->current_function = NULL;
>
>this->callback_enter = ir_validate::validate_ir;
> -  this->data_enter = ht;
> +  this->data_enter = ir_set;
> }
>
> ~ir_validate()
> {
> -  hash_table_dtor(this->ht);
> +  _mesa_set_destroy(this->ir_set, NULL);
> }
>
> virtual ir_visitor_status visit(ir_variable *v);
> @@ -80,7 +81,7 @@ public:
>
> ir_function *current_function;
>
> -   struct hash_table *ht;
> +   struct set *ir_set;
>  };
>
>  } /* anonymous namespace */
> @@ -94,7 +95,7 @@ ir_validate::visit(ir_dereference_variable *ir)
>abort();
> }
>
> -   if (hash_table_find(ht, ir->var) == NULL) {
> +   if (_mesa_set_search(ir_set, ir->var) == NULL) {
>printf("ir_dereference_variable @ %p specifies undeclared variable "
>  "`%s' @ %p\n",
>  (void *) ir, ir->var->name, (void *) ir->var);
> @@ -730,8 +731,7 @@ ir_validate::visit(ir_variable *ir)
> if (ir->name && ir->is_name_ralloced())
>assert(ralloc_parent(ir->name) == ir);
>
> -   hash_table_insert(ht, ir, ir);
> -
> +   _mesa_set_add(ir_set, ir);
>
> /* If a variable is an array, verify that the maximum array index is in
>  * bounds.  There was once an error in AST-to-HIR conversion that set this
> @@ -885,15 +885,15 @@ dump_ir:
>  void
>  ir_validate::validate_ir(ir_instruction *ir, void *data)
>  {
> -   struct hash_table *ht = (struct hash_table *) data;
> +   struct set *ir_set = (struct set *) data;
>
> -   if (hash_table_find(ht, ir)) {
> +   if (_mesa_set_search(ir_set, ir)) {
>printf("Instruction node present twice in ir tree:\n");
>ir->print();
>printf("\n");
>abort();
> }
> -   hash_table_insert(ht, ir, ir);
> +   _mesa_set_add(ir_set, ir);
>  }
>
>  void
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: use set rather than old hash table for ir_validate

2015-07-10 Thread Timothy Arceri
On Fri, 2015-07-10 at 19:07 +1200, Chris Forbes wrote:
> Perf data?

I can create some if you like, but wasn't program/hash_table.c meant to die
along time ago [1] anyway.

[1] http://lists.freedesktop.org/archives/mesa-dev/2013-December/050524.html

> 
> On Fri, Jul 10, 2015 at 6:41 PM, Timothy Arceri  
> wrote:
> > This implementation should be faster and there was no
> > need to store a data field.
> > ---
> >  src/glsl/ir_validate.cpp | 24 
> >  1 file changed, 12 insertions(+), 12 deletions(-)
> > 
> > diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
> > index cfe0df3..684bef2 100644
> > --- a/src/glsl/ir_validate.cpp
> > +++ b/src/glsl/ir_validate.cpp
> > @@ -35,7 +35,8 @@
> > 
> >  #include "ir.h"
> >  #include "ir_hierarchical_visitor.h"
> > -#include "program/hash_table.h"
> > +#include "util/hash_table.h"
> > +#include "util/set.h"
> >  #include "glsl_types.h"
> > 
> >  namespace {
> > @@ -44,18 +45,18 @@ class ir_validate : public ir_hierarchical_visitor {
> >  public:
> > ir_validate()
> > {
> > -  this->ht = hash_table_ctor(0, hash_table_pointer_hash,
> > -hash_table_pointer_compare);
> > +  this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> > +  _mesa_key_pointer_equal);
> > 
> >this->current_function = NULL;
> > 
> >this->callback_enter = ir_validate::validate_ir;
> > -  this->data_enter = ht;
> > +  this->data_enter = ir_set;
> > }
> > 
> > ~ir_validate()
> > {
> > -  hash_table_dtor(this->ht);
> > +  _mesa_set_destroy(this->ir_set, NULL);
> > }
> > 
> > virtual ir_visitor_status visit(ir_variable *v);
> > @@ -80,7 +81,7 @@ public:
> > 
> > ir_function *current_function;
> > 
> > -   struct hash_table *ht;
> > +   struct set *ir_set;
> >  };
> > 
> >  } /* anonymous namespace */
> > @@ -94,7 +95,7 @@ ir_validate::visit(ir_dereference_variable *ir)
> >abort();
> > }
> > 
> > -   if (hash_table_find(ht, ir->var) == NULL) {
> > +   if (_mesa_set_search(ir_set, ir->var) == NULL) {
> >printf("ir_dereference_variable @ %p specifies undeclared variable 
> > "
> >  "`%s' @ %p\n",
> >  (void *) ir, ir->var->name, (void *) ir->var);
> > @@ -730,8 +731,7 @@ ir_validate::visit(ir_variable *ir)
> > if (ir->name && ir->is_name_ralloced())
> >assert(ralloc_parent(ir->name) == ir);
> > 
> > -   hash_table_insert(ht, ir, ir);
> > -
> > +   _mesa_set_add(ir_set, ir);
> > 
> > /* If a variable is an array, verify that the maximum array index is 
> > in
> >  * bounds.  There was once an error in AST-to-HIR conversion that set 
> > this
> > @@ -885,15 +885,15 @@ dump_ir:
> >  void
> >  ir_validate::validate_ir(ir_instruction *ir, void *data)
> >  {
> > -   struct hash_table *ht = (struct hash_table *) data;
> > +   struct set *ir_set = (struct set *) data;
> > 
> > -   if (hash_table_find(ht, ir)) {
> > +   if (_mesa_set_search(ir_set, ir)) {
> >printf("Instruction node present twice in ir tree:\n");
> >ir->print();
> >printf("\n");
> >abort();
> > }
> > -   hash_table_insert(ht, ir, ir);
> > +   _mesa_set_add(ir_set, ir);
> >  }
> > 
> >  void
> > --
> > 2.4.3
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: use set rather than old hash table for ir_validate

2015-07-10 Thread Iago Toral
On Fri, 2015-07-10 at 19:07 +1200, Chris Forbes wrote:
> Perf data?

When the new hash table implementation was added to Mesa by Eric it
claimed to be much faster, see commits 35fd61bd99c1 and 72e55bb6888ff. 
The set implementation seems to follow the same implementation strategy,
so I suppose it makes sense to claim the same for it.

Timothy: maybe it is a good idea to add some text like that to the
commit log for future reference. With that:

Reviewed-by: Iago Toral Quiroga 

Iago

> On Fri, Jul 10, 2015 at 6:41 PM, Timothy Arceri  wrote:
> > This implementation should be faster and there was no
> > need to store a data field.
> > ---
> >  src/glsl/ir_validate.cpp | 24 
> >  1 file changed, 12 insertions(+), 12 deletions(-)
> >
> > diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
> > index cfe0df3..684bef2 100644
> > --- a/src/glsl/ir_validate.cpp
> > +++ b/src/glsl/ir_validate.cpp
> > @@ -35,7 +35,8 @@
> >
> >  #include "ir.h"
> >  #include "ir_hierarchical_visitor.h"
> > -#include "program/hash_table.h"
> > +#include "util/hash_table.h"
> > +#include "util/set.h"
> >  #include "glsl_types.h"
> >
> >  namespace {
> > @@ -44,18 +45,18 @@ class ir_validate : public ir_hierarchical_visitor {
> >  public:
> > ir_validate()
> > {
> > -  this->ht = hash_table_ctor(0, hash_table_pointer_hash,
> > -hash_table_pointer_compare);
> > +  this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> > +  _mesa_key_pointer_equal);
> >
> >this->current_function = NULL;
> >
> >this->callback_enter = ir_validate::validate_ir;
> > -  this->data_enter = ht;
> > +  this->data_enter = ir_set;
> > }
> >
> > ~ir_validate()
> > {
> > -  hash_table_dtor(this->ht);
> > +  _mesa_set_destroy(this->ir_set, NULL);
> > }
> >
> > virtual ir_visitor_status visit(ir_variable *v);
> > @@ -80,7 +81,7 @@ public:
> >
> > ir_function *current_function;
> >
> > -   struct hash_table *ht;
> > +   struct set *ir_set;
> >  };
> >
> >  } /* anonymous namespace */
> > @@ -94,7 +95,7 @@ ir_validate::visit(ir_dereference_variable *ir)
> >abort();
> > }
> >
> > -   if (hash_table_find(ht, ir->var) == NULL) {
> > +   if (_mesa_set_search(ir_set, ir->var) == NULL) {
> >printf("ir_dereference_variable @ %p specifies undeclared variable "
> >  "`%s' @ %p\n",
> >  (void *) ir, ir->var->name, (void *) ir->var);
> > @@ -730,8 +731,7 @@ ir_validate::visit(ir_variable *ir)
> > if (ir->name && ir->is_name_ralloced())
> >assert(ralloc_parent(ir->name) == ir);
> >
> > -   hash_table_insert(ht, ir, ir);
> > -
> > +   _mesa_set_add(ir_set, ir);
> >
> > /* If a variable is an array, verify that the maximum array index is in
> >  * bounds.  There was once an error in AST-to-HIR conversion that set 
> > this
> > @@ -885,15 +885,15 @@ dump_ir:
> >  void
> >  ir_validate::validate_ir(ir_instruction *ir, void *data)
> >  {
> > -   struct hash_table *ht = (struct hash_table *) data;
> > +   struct set *ir_set = (struct set *) data;
> >
> > -   if (hash_table_find(ht, ir)) {
> > +   if (_mesa_set_search(ir_set, ir)) {
> >printf("Instruction node present twice in ir tree:\n");
> >ir->print();
> >printf("\n");
> >abort();
> > }
> > -   hash_table_insert(ht, ir, ir);
> > +   _mesa_set_add(ir_set, ir);
> >  }
> >
> >  void
> > --
> > 2.4.3
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: fix Bug 85252 - Segfault in compiler while processing ternary operator with void arguments

2015-07-10 Thread Samuel Iglesias Gonsálvez
Hello Renaud,

I am going to comment a few things to improve your patch submission.

In the email subject we usually put [PATCH vN] to indicate it is the
Nth version of the patch, so the reviewers don't get confused about
which version is the more recent. You can do that by editing the .patch
file generated through 'git format-patch' command.

"fix Bug 85252" is not needed in the subject line because you point to
the bug report at the end of the commit log.

On 09/07/15 15:55, Renaud Gaubert wrote:
> This is done by returning an rvalue of type void in the
> ast_function_expression::hir function instead of a void expression.
> 
> This produces (in the case of the ternary) an hir with a call
> to the void returning function and an assignement of a void variable
> which will be optimized out (the assignement) during the optimization
> pass.
> 

s/assignement/assignment in both places.

> This fix results in having a valid subexpression in the many
> different cases where the subexpressions are functions whose
> return values are void.
> 
> Thus preventing to dereference NULL in the following cases:
>   * binary operator
>   * unary operators
>   * ternary operator
>   * comparison operators (except equal and nequal operator)
> 

If the fix is more generic, I think you should change the subject line to:

"glsl: avoid compiler's segfault when processing operators with void
arguments"

Or something similar.

I guess the piglit tests you mention in the commit log are going to
check these cases, right?

> Equal and nequal had to be handled as a special case because
> instead of segfaulting on a forbidden syntax it was now accepting
> expressions with a void return value on either (or both) side of
> the expression.
> 
> Piglist tests are on the way
> 

s/Piglist/Piglit

And there is no need to put this sentence in the commit log, please
remove it. If you want to mention something for the reviewers, write it
down below the "---". For example:

---
 Piglit tests are on the way

 src/glsl/ast_function.cpp |  7 ++-
 src/glsl/ast_to_hir.cpp   | 10 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

It won't be part of the commit log when the patch is applied to the repo
but it is handy to describe differences to other patch versions or to
mention something relevant for the reviewer that doesn't fit inside the
commit log... like that sentence.

> Signed-off-by: Renaud Gaubert 
> Reviewed-by: Gabriel Laskar 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85252
> ---
>  src/glsl/ast_function.cpp |  7 ++-
>  src/glsl/ast_to_hir.cpp   | 10 +-
>  2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
> index 92e26bf..3c2b1ea 100644
> --- a/src/glsl/ast_function.cpp
> +++ b/src/glsl/ast_function.cpp
> @@ -1785,7 +1785,12 @@ ast_function_expression::hir(exec_list *instructions,
>/* an error has already been emitted */
>value = ir_rvalue::error_value(ctx);
>} else {
> -  value = generate_call(instructions, sig, &actual_parameters, state);
> +value = generate_call(instructions, sig, &actual_parameters, state);
> +if (!value) {
> +  ir_variable *const tmp = new(ctx) 
> ir_variable(glsl_type::void_type, "void_var", ir_var_temporary);
> +  value = new(ctx) ir_dereference_variable(tmp);
> +  instructions->push_tail(tmp);
> +}
>}
>  
>return value;
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index 8cb46be..00cc16c 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -1270,7 +1270,15 @@ ast_expression::do_hir(exec_list *instructions,
> *applied to one operand that can make them match, in which
> *case this conversion is done."
> */
> -  if ((!apply_implicit_conversion(op[0]->type, op[1], state)
> +
> +  if (op[0]->type == glsl_type::void_type || op[1]->type == 
> glsl_type::void_type) {
> +

Remove this newline.

> +_mesa_glsl_error(& loc, state, "`%s':  wrong operand types: no 
> operation "
> +  "`%1$s' exists that takes a left-hand operand of type 'void' or a "
> +  "right operand of type 'void'", (this->oper == ast_equal) ? "==" : 
> "!=");

Indention is wrong, ""`%1$s' exists [...]" and "right operant [...]"
lines should start below '& loc', like the rest of _mesa_glsl_error()
calls. Look at them as an example of indention for this case.

Beware of the 80 characters line limit (unless there is a good reason to
exceed it)

> +
> + error_emitted = true;

The indention is wrong, there is an extra white-space.

> +  } else if ((!apply_implicit_conversion(op[0]->type, op[1], state)
> && !apply_implicit_conversion(op[1]->type, op[0], state))
>|| (op[0]->type != op[1]->type)) {
>   _mesa_glsl_error(& loc, state, "operands of `%s' must have the same 
> "
> 

In general the patch seems right to 

Re: [Mesa-dev] [PATCH] clover: Implement image attribute getters

2015-07-10 Thread Zoltán Gilián
Sent a new patch with subject [Mesa-dev] [PATCH] clover: Pass image
attributes to the kernel.
2015.07.06. 17:58 ezt írta ("Zoltán Gilián" ):

> > Hint: you'll need new
> > module::argument::semantic enums
>
> I see. Reworked it a bit.
>
> On Mon, Jul 6, 2015 at 1:13 PM, Francisco Jerez 
> wrote:
> > Zoltán Gilián  writes:
> >
> >>> This seems to be doing essentially the same thing as v1?  Is it the
> >>> right patch?
> >>
> >> The llvm pass was invoked in clover in v1. This patch relies on llvm
> >> to perform that task (). What this patch does basically is that it
> >> adds the image attributes to the end of the kernel input vector.
> >> The commit message of this patch is misleading, I'll fix it.
> >>
> > NAK.  Just like in v1, you're implementing the same pipe driver-specific
> > policy in Clover's core layer -- If you don't feel like fixing this
> > properly as I described in my reply to v1, it would be acceptable to
> > implement it for the time being using a workaround similar to
> > llvm/invocation.cpp:433 -- Hint: you'll need new
> > module::argument::semantic enums.
> >
> > Thanks.
> >
> >> On Wed, Jun 24, 2015 at 2:48 PM, Francisco Jerez 
> wrote:
> >>> Zoltan Gilian  writes:
> >>>
>  Image attributes are passed to the kernel as hidden parameters after
> the
>  image attribute itself. An llvm pass replaces the getter builtins to
>  the appropriate parameters.
> >>>
> >>> This seems to be doing essentially the same thing as v1?  Is it the
> >>> right patch?
> >>>
>  ---
>   src/gallium/state_trackers/clover/core/kernel.cpp  | 26 +++
>   src/gallium/state_trackers/clover/core/kernel.hpp  | 13 ++--
>   src/gallium/state_trackers/clover/core/memory.cpp  |  2 +-
>   .../state_trackers/clover/llvm/invocation.cpp  | 81
> +-
>   4 files changed, 116 insertions(+), 6 deletions(-)
> 
>  diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp
> b/src/gallium/state_trackers/clover/core/kernel.cpp
>  index 0756f06..291c799 100644
>  --- a/src/gallium/state_trackers/clover/core/kernel.cpp
>  +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
>  @@ -185,6 +185,13 @@
> kernel::exec_context::bind(intrusive_ptr _q,
> }
>  }
> 
>  +   // Bind image attribute args.
>  +   for (const auto& arg: kern._args) {
>  +  if (auto img_arg = dynamic_cast(arg.get())) {
>  + img_arg->bind_attributes(*this);
>  +  }
>  +   }
>  +
>  // Create a new compute state if anything changed.
>  if (!st || q != _q ||
>  cs.req_local_mem != mem_local ||
>  @@ -465,6 +472,25 @@ kernel::constant_argument::unbind(exec_context
> &ctx) {
>   }
> 
>   void
>  +kernel::image_argument::bind_attributes(exec_context &ctx) {
>  +   cl_image_format format = img->format();
>  +   cl_uint attributes[] = {
>  + static_cast(img->width()),
>  + static_cast(img->height()),
>  + static_cast(img->depth()),
>  + format.image_channel_data_type,
>  + format.image_channel_order};
>  +   for (unsigned i = 0; i < 5; ++i) {
>  +  auto v = bytes(attributes[i]);
>  +
>  +  extend(v, module::argument::zero_ext, sizeof(cl_uint));
>  +  byteswap(v, ctx.q->device().endianness());
>  +  align(ctx.input, sizeof(cl_uint));
>  +  insert(ctx.input, v);
>  +   }
>  +}
>  +
>  +void
>   kernel::image_rd_argument::set(size_t size, const void *value) {
>  if (size != sizeof(cl_mem))
> throw error(CL_INVALID_ARG_SIZE);
>  diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp
> b/src/gallium/state_trackers/clover/core/kernel.hpp
>  index d6432a4..8c15b2f 100644
>  --- a/src/gallium/state_trackers/clover/core/kernel.hpp
>  +++ b/src/gallium/state_trackers/clover/core/kernel.hpp
>  @@ -190,7 +190,14 @@ namespace clover {
>    pipe_surface *st;
> };
> 
>  -  class image_rd_argument : public argument {
>  +  class image_argument : public argument {
>  +  public:
>  + void bind_attributes(exec_context &ctx);
>  +  protected:
>  + image *img;
>  +  };
>  +
>  +  class image_rd_argument : public image_argument {
> public:
>    virtual void set(size_t size, const void *value);
>    virtual void bind(exec_context &ctx,
>  @@ -198,11 +205,10 @@ namespace clover {
>    virtual void unbind(exec_context &ctx);
> 
> private:
>  - image *img;
>    pipe_sampler_view *st;
> };
> 
>  -  class image_wr_argument : public argument {
>  +  class image_wr_argument : public image_argument {
> public:
>    virtual void set(size_t size, const void *value);
>  

[Mesa-dev] [PATCH v3 (part1) 11/26] glsl: shader buffer variables cannot have initializers

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:

"Buffer variables cannot have initializers."

v2:
- Rewrite error message (Jordan)

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast_to_hir.cpp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index e887ac2..6299bf0 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2995,6 +2995,15 @@ process_initializer(ir_variable *var, ast_declaration 
*decl,
"cannot initialize uniforms");
}
 
+   /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+*
+*"Buffer variables cannot have initializers."
+*/
+   if (var->data.mode == ir_var_shader_storage) {
+  _mesa_glsl_error(& initializer_loc, state,
+   "SSBO variables cannot have initializers");
+   }
+
/* From section 4.1.7 of the GLSL 4.40 spec:
 *
 *"Opaque variables [...] are initialized only through the
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 02/26] glsl: Add ir_var_shader_storage

2015-07-10 Thread Iago Toral Quiroga
From: Kristian Høgsberg 

This will be used to identify buffer variables inside shader storage
buffer objects, which are very similar to uniforms except for a few
differences, most important of which is that they are writable.

Since buffer variables are so similar to uniforms, we will almost always
want them to go through the same paths as uniforms.

Reviewed-by: Jordan Justen 
---
 src/glsl/builtin_variables.cpp   |  5 +++--
 src/glsl/glsl_symbol_table.cpp   | 16 +++-
 src/glsl/ir.cpp  |  3 +++
 src/glsl/ir.h|  5 -
 src/glsl/ir_function.cpp |  1 +
 src/glsl/ir_print_visitor.cpp|  3 ++-
 src/glsl/ir_reader.cpp   |  2 ++
 src/glsl/loop_unroll.cpp |  1 +
 src/glsl/lower_named_interface_blocks.cpp|  5 +++--
 src/glsl/lower_variable_index_to_cond_assign.cpp |  1 +
 src/glsl/opt_structure_splitting.cpp |  5 +++--
 11 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index a765d35..aba1750 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -436,11 +436,12 @@ builtin_variable_generator::add_variable(const char *name,
   var->data.read_only = true;
   break;
case ir_var_shader_out:
+   case ir_var_shader_storage:
   break;
default:
   /* The only variables that are added using this function should be
-   * uniforms, shader inputs, and shader outputs, constants (which use
-   * ir_var_auto), and system values.
+   * uniforms, shader storage, shader inputs, and shader outputs, constants
+   * (which use ir_var_auto), and system values.
*/
   assert(0);
   break;
diff --git a/src/glsl/glsl_symbol_table.cpp b/src/glsl/glsl_symbol_table.cpp
index 2294dda..536f0a3 100644
--- a/src/glsl/glsl_symbol_table.cpp
+++ b/src/glsl/glsl_symbol_table.cpp
@@ -36,6 +36,9 @@ public:
   case ir_var_uniform:
  dest = &ibu;
  break;
+  case ir_var_shader_storage:
+ dest = &iss;
+ break;
   case ir_var_shader_in:
  dest = &ibi;
  break;
@@ -60,6 +63,8 @@ public:
   switch (mode) {
   case ir_var_uniform:
  return ibu;
+  case ir_var_shader_storage:
+ return iss;
   case ir_var_shader_in:
  return ibi;
   case ir_var_shader_out:
@@ -71,24 +76,25 @@ public:
}
 
symbol_table_entry(ir_variable *v)   :
-  v(v), f(0), t(0), ibu(0), ibi(0), ibo(0), a(0) {}
+  v(v), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
symbol_table_entry(ir_function *f)   :
-  v(0), f(f), t(0), ibu(0), ibi(0), ibo(0), a(0) {}
+  v(0), f(f), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
symbol_table_entry(const glsl_type *t)   :
-  v(0), f(0), t(t), ibu(0), ibi(0), ibo(0), a(0) {}
+  v(0), f(0), t(t), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
symbol_table_entry(const glsl_type *t, enum ir_variable_mode mode) :
-  v(0), f(0), t(0), ibu(0), ibi(0), ibo(0), a(0)
+  v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0)
{
   assert(t->is_interface());
   add_interface(t, mode);
}
symbol_table_entry(const class ast_type_specifier *a):
-  v(0), f(0), t(0), ibu(0), ibi(0), ibo(0), a(a) {}
+  v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(a) {}
 
ir_variable *v;
ir_function *f;
const glsl_type *t;
const glsl_type *ibu;
+   const glsl_type *iss;
const glsl_type *ibi;
const glsl_type *ibo;
const class ast_type_specifier *a;
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index dbd064f..9a25bf4 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1975,6 +1975,9 @@ mode_string(const ir_variable *var)
case ir_var_uniform:
   return "uniform";
 
+   case ir_var_shader_storage:
+  return "buffer";
+
case ir_var_shader_in:
   return "shader input";
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index f904555..2b9533a 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -324,6 +324,7 @@ protected:
 enum ir_variable_mode {
ir_var_auto = 0, /**< Function local variables and globals. */
ir_var_uniform,  /**< Variable declared as a uniform. */
+   ir_var_shader_storage,   /**< Variable declared as an ssbo. */
ir_var_shader_in,
ir_var_shader_out,
ir_var_function_in,
@@ -445,7 +446,9 @@ public:
 */
inline bool is_in_uniform_block() const
{
-  return this->data.mode == ir_var_uniform && this->interface_type != NULL;
+  return (this->data.mode == ir_var_uniform ||
+  this->data.mode == ir_var_shader_storage) &&
+ this->interface_type != NULL;
}
 
/**
diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index 1319443..93034be 100644
--- a/src/glsl/ir_func

[Mesa-dev] [PATCH v3 (part1) 04/26] nir: add nir_var_shader_storage

2015-07-10 Thread Iago Toral Quiroga
Reviewed-by: Jordan Justen 
---
 src/glsl/nir/glsl_to_nir.cpp | 4 
 src/glsl/nir/nir.h   | 1 +
 src/glsl/nir/nir_lower_atomics.c | 3 ++-
 src/glsl/nir/nir_lower_io.c  | 9 ++---
 src/glsl/nir/nir_print.c | 5 +++--
 src/glsl/nir/nir_validate.c  | 6 --
 6 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 95531bb..66430f3 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -280,6 +280,9 @@ nir_visitor::visit(ir_variable *ir)
   var->data.mode = nir_var_uniform;
   break;
 
+   case ir_var_shader_storage:
+  var->data.mode = nir_var_shader_storage;
+  break;
 
case ir_var_system_value:
   var->data.mode = nir_var_system_value;
@@ -371,6 +374,7 @@ nir_visitor::visit(ir_variable *ir)
   break;
 
case nir_var_uniform:
+   case nir_var_shader_storage:
   exec_list_push_tail(&shader->uniforms, &var->node);
   break;
 
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 9e2a281..e9a506c 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -87,6 +87,7 @@ typedef enum {
nir_var_global,
nir_var_local,
nir_var_uniform,
+   nir_var_shader_storage,
nir_var_system_value
 } nir_variable_mode;
 
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 0457de6..ce3615a 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -55,7 +55,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl 
*impl)
   return;
}
 
-   if (instr->variables[0]->var->data.mode != nir_var_uniform)
+   if (instr->variables[0]->var->data.mode != nir_var_uniform &&
+   instr->variables[0]->var->data.mode != nir_var_shader_storage)
   return; /* atomics passed as function arguments can't be lowered */
 
void *mem_ctx = ralloc_parent(instr);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 6761d5b..a9dd776 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -86,7 +86,8 @@ nir_assign_var_locations_scalar(struct exec_list *var_list, 
unsigned *size)
* UBO's have their own address spaces, so don't count them towards the
* number of global uniforms
*/
-  if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+  if ((var->data.mode == nir_var_uniform || var->data.mode == 
nir_var_shader_storage) &&
+  var->interface_type != NULL)
  continue;
 
   var->data.driver_location = location;
@@ -153,7 +154,8 @@ nir_assign_var_locations_scalar_direct_first(nir_shader 
*shader,
unsigned location = 0;
 
foreach_list_typed(nir_variable, var, node, var_list) {
-  if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+  if ((var->data.mode == nir_var_uniform || var->data.mode == 
nir_var_shader_storage) &&
+  var->interface_type != NULL)
  continue;
 
   if (_mesa_set_search(indirect_set, var))
@@ -166,7 +168,8 @@ nir_assign_var_locations_scalar_direct_first(nir_shader 
*shader,
*direct_size = location;
 
foreach_list_typed(nir_variable, var, node, var_list) {
-  if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+  if ((var->data.mode == nir_var_uniform || var->data.mode == 
nir_var_shader_storage) &&
+  var->interface_type != NULL)
  continue;
 
   if (!_mesa_set_search(indirect_set, var))
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index eb4045c..f591c4b 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -214,7 +214,7 @@ print_var_decl(nir_variable *var, print_var_state *state, 
FILE *fp)
const char *const samp = (var->data.sample) ? "sample " : "";
const char *const inv = (var->data.invariant) ? "invariant " : "";
const char *const mode[] = { "shader_in ", "shader_out ", "", "",
-"uniform ", "system " };
+"uniform ", "shader_storage", "system " };
const char *const interp[] = { "", "smooth", "flat", "noperspective" };
 
fprintf(fp, "%s%s%s%s%s ",
@@ -239,7 +239,8 @@ print_var_decl(nir_variable *var, print_var_state *state, 
FILE *fp)
 
if (var->data.mode == nir_var_shader_in ||
var->data.mode == nir_var_shader_out ||
-   var->data.mode == nir_var_uniform) {
+   var->data.mode == nir_var_uniform ||
+   var->data.mode == nir_var_shader_storage) {
   fprintf(fp, " (%u, %u)", var->data.location, var->data.driver_location);
}
 
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index da92ed9..dc79941 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -400,11 +400,13 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, 
validate_state *state)
   break;
case nir_intrinsic_store_var:
   assert(instr->variables[0]->var->data.mode != nir

[Mesa-dev] [PATCH v3 (part1) 03/26] mesa: rename is_in_uniform_block to is_in_buffer_block

2015-07-10 Thread Iago Toral Quiroga
Since this now checks if a variable is inside a uniform or a shader
storage block.

Reviewed-by: Jordan Justen 
---
 src/glsl/ast_to_hir.cpp| 2 +-
 src/glsl/ir.h  | 5 +++--
 src/glsl/link_uniform_block_active_visitor.cpp | 6 +++---
 src/glsl/link_uniform_initializers.cpp | 4 ++--
 src/glsl/link_uniforms.cpp | 6 +++---
 src/glsl/linker.cpp| 2 +-
 src/glsl/lower_ubo_reference.cpp   | 2 +-
 src/glsl/opt_dead_code.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 src/mesa/program/ir_to_mesa.cpp| 2 +-
 10 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index de6a86d..00f35eb 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2007,7 +2007,7 @@ validate_matrix_layout_for_type(struct 
_mesa_glsl_parse_state *state,
 const glsl_type *type,
 ir_variable *var)
 {
-   if (var && !var->is_in_uniform_block()) {
+   if (var && !var->is_in_buffer_block()) {
   /* Layout qualifiers may only apply to interface blocks and fields in
* them.
*/
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 2b9533a..1c7829b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -442,9 +442,10 @@ public:
glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
 
/**
-* Determine whether or not a variable is part of a uniform block.
+* Determine whether or not a variable is part of a uniform or
+* shader storage block.
 */
-   inline bool is_in_uniform_block() const
+   inline bool is_in_buffer_block() const
{
   return (this->data.mode == ir_var_uniform ||
   this->data.mode == ir_var_shader_storage) &&
diff --git a/src/glsl/link_uniform_block_active_visitor.cpp 
b/src/glsl/link_uniform_block_active_visitor.cpp
index 292cde3..ddfd2b2 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -73,7 +73,7 @@ process_block(void *mem_ctx, struct hash_table *ht, 
ir_variable *var)
 ir_visitor_status
 link_uniform_block_active_visitor::visit(ir_variable *var)
 {
-   if (!var->is_in_uniform_block())
+   if (!var->is_in_buffer_block())
   return visit_continue;
 
const glsl_type *const block_type = var->is_interface_instance()
@@ -124,7 +124,7 @@ 
link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
 * function.
 */
if (var == NULL
-   || !var->is_in_uniform_block()
+   || !var->is_in_buffer_block()
|| !var->is_interface_instance())
   return visit_continue;
 
@@ -194,7 +194,7 @@ 
link_uniform_block_active_visitor::visit(ir_dereference_variable *ir)
 {
ir_variable *var = ir->var;
 
-   if (!var->is_in_uniform_block())
+   if (!var->is_in_buffer_block())
   return visit_continue;
 
assert(!var->is_interface_instance() || !var->type->is_array());
diff --git a/src/glsl/link_uniform_initializers.cpp 
b/src/glsl/link_uniform_initializers.cpp
index 204acfa..d1f904e 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -267,7 +267,7 @@ link_set_uniform_initializers(struct gl_shader_program 
*prog,
 
 if (type->without_array()->is_sampler()) {
linker::set_sampler_binding(prog, var->name, var->data.binding);
-} else if (var->is_in_uniform_block()) {
+} else if (var->is_in_buffer_block()) {
const glsl_type *const iface_type = var->get_interface_type();
 
/* If the variable is an array and it is an interface instance,
@@ -280,7 +280,7 @@ link_set_uniform_initializers(struct gl_shader_program 
*prog,
 * float f[4];
 * };
 *
-* In this case "f" would pass is_in_uniform_block (above) and
+* In this case "f" would pass is_in_buffer_block (above) and
 * type->is_array(), but it will fail is_interface_instance().
 */
if (var->is_interface_instance() && var->type->is_array()) {
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 11ae06f..5fdf25e 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -298,7 +298,7 @@ public:
 
void process(ir_variable *var)
{
-  this->is_ubo_var = var->is_in_uniform_block();
+  this->is_ubo_var = var->is_in_buffer_block();
   if (var->is_interface_instance())
  program_resource_visitor::process(var->get_interface_type(),
var->get_interface_type()->name);
@@ -431,7 +431,7 @@ public:
   field_counter = 0;
 
   ubo_block_index = -1;
-  if (var->is_in_uniform_block()) {
+  if (var->is_in_buffer_block()) {
  if (var->is_in

[Mesa-dev] [PATCH v3 (part1) 00/26] ARB_shader_storage_buffer_object (mesa)

2015-07-10 Thread Iago Toral Quiroga
As discussed with Jordan, this v3-part1 series contains a good part of the
frontend stuff (most of which has already been reviewed). The idea is to get
this landed ahead, since some of the remaining patches (specifically the i965
backend stuff) depends on ther patches from Curro that have not landed yet.

For reference, this v3-part1 series does not include all the frontend bits,
specifically it lacks: support for the optional unsized array at the bottom of
SSBO definitions, implementation of layout mode std430, getters and queries
for GL_SHADER_STORAGE_BUFFER targets and glShaderStorageBlockBinding.

Link to the original v2:
http://lists.freedesktop.org/archives/mesa-dev/2015-June/085562.html

Development branch with this series:
git clone -b itoral-ARB_shader_storage_buffer_object-v3-part1 
https://github.com/Igalia/mesa.git

For reference, here is a repository with the full v3 series (together with the
patches from Curro it depends on):
git clone -b itoral-ARB_shader_storage_buffer_object-v3 
https://github.com/Igalia/mesa.git

Piglit repository including SSBO tests:
git clone -b arb_shader_storage_buffer_object-v2 
https://github.com/Igalia/piglit.git

Notice that if you intent to run these with this v3-part1 series you will need
to use:
MESA_EXTENSION_OVERRIDE="GL_ARB_shader_storage_buffer_object"

There are no piglit regressions with this series except for
arb_program_interface_query-getprogramresourceiv, but that is expected since
that test was edited some weeks to incorporate a SSBO in one of the shaders, so
it will abort when it hits that (since the NIR and i965 backend bits are not
included with this v3-part1 series).

As for the SSBO specific piglit tests present in the aforementioned repository,
the results are the ones expected and can be observed here:
http://paste.ubuntu.com/11854375/

Some notes on these results:

- crashes: mostly due to the fact that this part1 series does not provide the
NIR implementation of the new intrinsics, so they hit an assert. The
shader-storage-block-different-size crash is fixed with a patch from Antia that
was part of one of our dEQP series and was also included at the end of our
v2 series: http://lists.freedesktop.org/archives/mesa-dev/2015-June/085642.html
- Other than that, compiler/link tests generally pass and failed tests are
related to missing features (like unsized arrays or queries) or missing i965
backend implementation.

With the full v3 series there are no regressions (except for the
one we discussed with the v1 of this series that is actually a bogus UBO test
for which we sent a fix to piglit).

Iago Toral Quiroga (15):
  mesa: rename is_in_uniform_block to is_in_buffer_block
  nir: add nir_var_shader_storage
  glsl: Identify active uniform blocks that are buffer blocks as such.
  mesa: Add shader storage buffer support to struct gl_context
  mesa: Initialize and free shader storage buffers
  mesa: Implement _mesa_DeleteBuffers for target
GL_SHADER_STORAGE_BUFFER
  mesa: Implement _mesa_BindBuffersBase for target
GL_SHADER_STORAGE_BUFFER
  mesa: Implement _mesa_BindBuffersRange for target
GL_SHADER_STORAGE_BUFFER
  mesa: Implement _mesa_BindBufferBase for target
GL_SHADER_STORAGE_BUFFER
  mesa: Implement _mesa_BindBufferRange for target
GL_SHADER_STORAGE_BUFFER
  glsl: Don't do tree grafting on buffer variables
  glsl: Do not kill dead assignments to buffer variables or SSBO
declarations.
  glsl: Don't do constant propagation on buffer variables
  glsl: Don't do constant variable on buffer variables
  glsl: Don't do copy propagation on buffer variables

Kristian Høgsberg (3):
  glsl: Add ir_var_shader_storage
  glsl: Implement parser support for 'buffer' qualifier
  glsl: link buffer variables and shader storage buffer interface blocks

Samuel Iglesias Gonsalvez (8):
  mesa: define ARB_shader_storage_buffer_object extension
  mesa: add MaxShaderStorageBlocks to struct gl_program_constants
  glsl: enable binding layout qualifier usage for shader storage buffer
objects
  glsl: shader buffer variables cannot have initializers
  glsl: buffer variables cannot be defined outside interface blocks
  glsl: fix error messages in invalid declarations of shader storage
blocks
  glsl: Lower shader storage buffer object writes to GLSL IR instrinsics
  glsl: Lower shader storage buffer object loads to GLSL IR instrinsics

 src/glsl/ast.h   |   1 +
 src/glsl/ast_to_hir.cpp  |  81 +++-
 src/glsl/ast_type.cpp|   3 +-
 src/glsl/builtin_variables.cpp   |   5 +-
 src/glsl/glcpp/glcpp-parse.y |   3 +
 src/glsl/glsl_lexer.ll   |   1 +
 src/glsl/glsl_parser.yy  |  33 +-
 src/glsl/glsl_parser_extras.cpp  |  65 +--
 src/glsl/glsl_parser_extras.h|   7 +
 src/glsl/glsl_symbol_table.cpp   |  16 +-
 src/glsl/ir.cpp   

[Mesa-dev] [PATCH v3 (part1) 17/26] mesa: Implement _mesa_BindBuffersRange for target GL_SHADER_STORAGE_BUFFER

2015-07-10 Thread Iago Toral Quiroga
v2:
- Fix error message (Jordan)

Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 110 ++
 1 file changed, 110 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 27638ca..0a9ffe4 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3579,6 +3579,112 @@ bind_uniform_buffers_range(struct gl_context *ctx, 
GLuint first, GLsizei count,
_mesa_end_bufferobj_lookups(ctx);
 }
 
+static void
+bind_shader_storage_buffers_range(struct gl_context *ctx, GLuint first,
+  GLsizei count, const GLuint *buffers,
+  const GLintptr *offsets,
+  const GLsizeiptr *sizes)
+{
+   GLint i;
+
+   if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+"glBindBuffersRange"))
+  return;
+
+   /* Assume that at least one binding will be changed */
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   if (!buffers) {
+  /* The ARB_multi_bind spec says:
+   *
+   *"If  is NULL, all bindings from  through
+   * +-1 are reset to their unbound (zero) state.
+   * In this case, the offsets and sizes associated with the
+   * binding points are set to default values, ignoring
+   *  and ."
+   */
+  unbind_shader_storage_buffers(ctx, first, count);
+  return;
+   }
+
+   /* Note that the error semantics for multi-bind commands differ from
+* those of other GL commands.
+*
+* The Issues section in the ARB_multi_bind spec says:
+*
+*"(11) Typically, OpenGL specifies that if an error is generated by a
+*  command, that command has no effect.  This is somewhat
+*  unfortunate for multi-bind commands, because it would require a
+*  first pass to scan the entire list of bound objects for errors
+*  and then a second pass to actually perform the bindings.
+*  Should we have different error semantics?
+*
+*   RESOLVED:  Yes.  In this specification, when the parameters for
+*   one of the  binding points are invalid, that binding point
+*   is not updated and an error will be generated.  However, other
+*   binding points in the same command will be updated if their
+*   parameters are valid and no other error occurs."
+*/
+
+   _mesa_begin_bufferobj_lookups(ctx);
+
+   for (i = 0; i < count; i++) {
+  struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[first + i];
+  struct gl_buffer_object *bufObj;
+
+  if (!bind_buffers_check_offset_and_size(ctx, i, offsets, sizes))
+ continue;
+
+  /* The ARB_multi_bind spec says:
+   *
+   * "An INVALID_VALUE error is generated by BindBuffersRange if any
+   *  pair of values in  and  does not respectively
+   *  satisfy the constraints described for those parameters for the
+   *  specified target, as described in section 6.7.1 (per binding)."
+   *
+   * Section 6.7.1 refers to table 6.5, which says:
+   *
+   * "┌───┐
+   *  │ Shader storage buffer array bindings (see sec. 7.8)   │
+   *  ├─┬─┤
+   *  │  ...│  ...│
+   *  │  offset restriction │  multiple of value of SHADER_STORAGE_-  │
+   *  │ │  BUFFER_OFFSET_ALIGNMENT│
+   *  │  ...│  ...│
+   *  │  size restriction   │  none   │
+   *  
└─┴─┘"
+   */
+  if (offsets[i] & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glBindBuffersRange(offsets[%u]=%" PRId64
+ " is misaligned; it must be a multiple of the value of "
+ "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT=%u when "
+ "target=GL_SHADER_STORAGE_BUFFER)",
+ i, (int64_t) offsets[i],
+ ctx->Const.ShaderStorageBufferOffsetAlignment);
+ continue;
+  }
+
+  if (binding->BufferObject && binding->BufferObject->Name == buffers[i])
+ bufObj = binding->BufferObject;
+  else
+ bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i,
+"glBindBuffersRange");
+
+  if (bufObj) {
+ if (bufObj == ctx->Shared->NullBufferObj)
+set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_FALSE);
+   

[Mesa-dev] [PATCH v3 (part1) 20/26] glsl: Don't do tree grafting on buffer variables

2015-07-10 Thread Iago Toral Quiroga
Otherwise we can lose writes into the buffers backing the variables.

Reviewed-by: Jordan Justen 
---
 src/glsl/opt_tree_grafting.cpp | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index d47613c..7f2ee6c 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -359,10 +359,11 @@ tree_grafting_basic_block(ir_instruction *bb_first,
   if (!lhs_var)
 continue;
 
-  if (lhs_var->data.mode == ir_var_function_out ||
- lhs_var->data.mode == ir_var_function_inout ||
-  lhs_var->data.mode == ir_var_shader_out)
-continue;
+   if (lhs_var->data.mode == ir_var_function_out ||
+   lhs_var->data.mode == ir_var_function_inout ||
+   lhs_var->data.mode == ir_var_shader_out ||
+   lhs_var->data.mode == ir_var_shader_storage)
+  continue;
 
   ir_variable_refcount_entry *entry = 
info->refs->get_variable_entry(lhs_var);
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 01/26] mesa: define ARB_shader_storage_buffer_object extension

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/glcpp/glcpp-parse.y|  3 ++
 src/glsl/glsl_parser_extras.cpp | 63 +
 src/glsl/glsl_parser_extras.h   |  7 +
 src/mesa/main/extensions.c  |  1 +
 src/mesa/main/mtypes.h  |  1 +
 5 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index a11b6b2..ed1bffb 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2483,6 +2483,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
 
   if (extensions->ARB_shader_precision)
  add_builtin_define(parser, "GL_ARB_shader_precision", 1);
+
+ if (extensions->ARB_shader_storage_buffer_object)
+add_builtin_define(parser, 
"GL_ARB_shader_storage_buffer_object", 1);
   }
}
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 046d5d7..0d7e521 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -551,37 +551,38 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
 
/* ARB extensions go here, sorted alphabetically.
 */
-   EXT(ARB_arrays_of_arrays,   true,  false, ARB_arrays_of_arrays),
-   EXT(ARB_compute_shader, true,  false, ARB_compute_shader),
-   EXT(ARB_conservative_depth, true,  false, 
ARB_conservative_depth),
-   EXT(ARB_derivative_control, true,  false, 
ARB_derivative_control),
-   EXT(ARB_draw_buffers,   true,  false, dummy_true),
-   EXT(ARB_draw_instanced, true,  false, ARB_draw_instanced),
-   EXT(ARB_explicit_attrib_location,   true,  false, 
ARB_explicit_attrib_location),
-   EXT(ARB_explicit_uniform_location,  true,  false, 
ARB_explicit_uniform_location),
-   EXT(ARB_fragment_coord_conventions, true,  false, 
ARB_fragment_coord_conventions),
-   EXT(ARB_fragment_layer_viewport,true,  false, 
ARB_fragment_layer_viewport),
-   EXT(ARB_gpu_shader5,true,  false, ARB_gpu_shader5),
-   EXT(ARB_gpu_shader_fp64,true,  false, ARB_gpu_shader_fp64),
-   EXT(ARB_sample_shading, true,  false, ARB_sample_shading),
-   EXT(ARB_separate_shader_objects,true,  false, dummy_true),
-   EXT(ARB_shader_atomic_counters, true,  false, 
ARB_shader_atomic_counters),
-   EXT(ARB_shader_bit_encoding,true,  false, 
ARB_shader_bit_encoding),
-   EXT(ARB_shader_image_load_store,true,  false, 
ARB_shader_image_load_store),
-   EXT(ARB_shader_precision,   true,  false, ARB_shader_precision),
-   EXT(ARB_shader_stencil_export,  true,  false, 
ARB_shader_stencil_export),
-   EXT(ARB_shader_texture_lod, true,  false, 
ARB_shader_texture_lod),
-   EXT(ARB_shading_language_420pack,   true,  false, 
ARB_shading_language_420pack),
-   EXT(ARB_shading_language_packing,   true,  false, 
ARB_shading_language_packing),
-   EXT(ARB_texture_cube_map_array, true,  false, 
ARB_texture_cube_map_array),
-   EXT(ARB_texture_gather, true,  false, ARB_texture_gather),
-   EXT(ARB_texture_multisample,true,  false, 
ARB_texture_multisample),
-   EXT(ARB_texture_query_levels,   true,  false, 
ARB_texture_query_levels),
-   EXT(ARB_texture_query_lod,  true,  false, 
ARB_texture_query_lod),
-   EXT(ARB_texture_rectangle,  true,  false, dummy_true),
-   EXT(ARB_uniform_buffer_object,  true,  false, 
ARB_uniform_buffer_object),
-   EXT(ARB_vertex_attrib_64bit,true,  false, 
ARB_vertex_attrib_64bit),
-   EXT(ARB_viewport_array, true,  false, ARB_viewport_array),
+   EXT(ARB_arrays_of_arrays, true,  false, 
ARB_arrays_of_arrays),
+   EXT(ARB_compute_shader,   true,  false, ARB_compute_shader),
+   EXT(ARB_conservative_depth,   true,  false, 
ARB_conservative_depth),
+   EXT(ARB_derivative_control,   true,  false, 
ARB_derivative_control),
+   EXT(ARB_draw_buffers, true,  false, dummy_true),
+   EXT(ARB_draw_instanced,   true,  false, ARB_draw_instanced),
+   EXT(ARB_explicit_attrib_location, true,  false, 
ARB_explicit_attrib_location),
+   EXT(ARB_explicit_uniform_location,true,  false, 
ARB_explicit_uniform_location),
+   EXT(ARB_fragment_coord_conventions,   true,  false, 
ARB_fragment_coord_conventions),
+   EXT(ARB_fragment_layer_viewport,  true,  false, 
ARB_fragment_layer_viewport),
+   EXT(ARB_gpu_shader5,  true,  false, ARB_gpu_shader5),
+   EXT(ARB_gpu_shader_fp64,  true,  false, 
ARB_gpu_shader_fp64),
+   EXT(ARB_sample_shading,   true,  false,  

[Mesa-dev] [PATCH v3 (part1) 15/26] mesa: Implement _mesa_DeleteBuffers for target GL_SHADER_STORAGE_BUFFER

2015-07-10 Thread Iago Toral Quiroga
v2:
- Remove the extra spaces (Jordan)

Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 2d70f7b..ee920a9 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -1264,6 +1264,17 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
 _mesa_BindBuffer( GL_UNIFORM_BUFFER, 0 );
  }
 
+ /* unbind SSBO binding points */
+ for (j = 0; j < ctx->Const.MaxShaderStorageBufferBindings; j++) {
+if (ctx->ShaderStorageBufferBindings[j].BufferObject == bufObj) {
+   _mesa_BindBufferBase(GL_SHADER_STORAGE_BUFFER, j, 0);
+}
+ }
+
+ if (ctx->ShaderStorageBuffer == bufObj) {
+_mesa_BindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+ }
+
  /* unbind Atomci Buffer binding points */
  for (j = 0; j < ctx->Const.MaxAtomicBufferBindings; j++) {
 if (ctx->AtomicBufferBindings[j].BufferObject == bufObj) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 13/26] glsl: fix error messages in invalid declarations of shader storage blocks

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Due to GL_ARB_shader_storage_buffer_object extension, shader storage blocks
have the same limitations as uniform blocks.

This patch fixes the corresponding error messages.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast_to_hir.cpp | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 61020cf..ca30dbc 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5382,7 +5382,7 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  if (is_interface && field_type->contains_opaque()) {
 YYLTYPE loc = decl_list->get_location();
 _mesa_glsl_error(&loc, state,
- "uniform in non-default uniform block contains "
+ "uniform/buffer in non-default interface block 
contains "
  "opaque variable");
  }
 
@@ -5393,8 +5393,8 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  * FINISHME: structures.
  */
 YYLTYPE loc = decl_list->get_location();
-_mesa_glsl_error(&loc, state, "atomic counter in structure or "
- "uniform block");
+_mesa_glsl_error(&loc, state, "atomic counter in structure, "
+ "shader storage block or uniform block");
  }
 
  if (field_type->contains_image()) {
@@ -5404,7 +5404,8 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  */
 YYLTYPE loc = decl_list->get_location();
 _mesa_glsl_error(&loc, state,
- "image in structure or uniform block");
+ "image in structure, shader storage block or "
+ "uniform block");
  }
 
  const struct ast_type_qualifier *const qual =
@@ -5413,9 +5414,9 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  qual->flags.q.packed ||
  qual->flags.q.shared) {
 _mesa_glsl_error(&loc, state,
- "uniform block layout qualifiers std140, packed, 
and "
- "shared can only be applied to uniform blocks, 
not "
- "members");
+ "uniform/shader storage block layout qualifiers "
+ "std140, packed, and shared can only be applied "
+ "to uniform/shader storage blocks, not members");
  }
 
  if (qual->flags.q.constant) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 08/26] mesa: Add shader storage buffer support to struct gl_context

2015-07-10 Thread Iago Toral Quiroga
This includes the array of bindings, the current buffer bound to the
GL_SHADER_STORAGE_BUFFER target and a set of general limits and default
values for shader storage buffers.

v2:
- Use spec values for the new defined constants (Jordan)

Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c |  5 +
 src/mesa/main/config.h|  2 ++
 src/mesa/main/context.c   |  6 ++
 src/mesa/main/mtypes.h| 38 ++
 4 files changed, 51 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 66dee68..c5d4ada 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -112,6 +112,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
  return &ctx->UniformBuffer;
   }
   break;
+   case GL_SHADER_STORAGE_BUFFER:
+  if (ctx->Extensions.ARB_shader_storage_buffer_object) {
+ return &ctx->ShaderStorageBuffer;
+  }
+  break;
case GL_ATOMIC_COUNTER_BUFFER:
   if (ctx->Extensions.ARB_shader_atomic_counters) {
  return &ctx->AtomicBuffer;
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 9c3baf4..177f176 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -171,8 +171,10 @@
 #define MAX_PROGRAM_LOCAL_PARAMS   4096
 #define MAX_UNIFORMS   4096
 #define MAX_UNIFORM_BUFFERS15 /* + 1 default uniform buffer */
+#define MAX_SHADER_STORAGE_BUFFERS 7  /* + 1 default shader storage buffer 
*/
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
+#define MAX_COMBINED_SHADER_STORAGE_BUFFERS   (MAX_SHADER_STORAGE_BUFFERS * 6)
 #define MAX_ATOMIC_COUNTERS4096
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_ATOMIC_BUFFERS(MAX_UNIFORM_BUFFERS * 6)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index faa1de7..5470c56 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -597,6 +597,12 @@ _mesa_init_constants(struct gl_constants *consts, gl_api 
api)
consts->MaxUniformBlockSize = 16384;
consts->UniformBufferOffsetAlignment = 1;
 
+   /** GL_ARB_shader_storage_buffer_object */
+   consts->MaxCombinedShaderStorageBlocks = 8;
+   consts->MaxShaderStorageBufferBindings = 8;
+   consts->MaxShaderStorageBlockSize = 16 * 1024 * 1024;
+   consts->ShaderStorageBufferOffsetAlignment = 256;
+
/* GL_ARB_explicit_uniform_location, GL_MAX_UNIFORM_LOCATIONS */
consts->MaxUserAssignableUniformLocations =
   4 * MESA_SHADER_STAGES * MAX_UNIFORMS;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 86508c3..f1ab4eb 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3403,6 +3403,15 @@ struct gl_constants
GLuint UniformBufferOffsetAlignment;
/** @} */
 
+   /** @{
+* GL_ARB_shader_storage_buffer_object
+*/
+   GLuint MaxCombinedShaderStorageBlocks;
+   GLuint MaxShaderStorageBufferBindings;
+   GLuint MaxShaderStorageBlockSize;
+   GLuint ShaderStorageBufferOffsetAlignment;
+   /** @} */
+
/**
 * GL_ARB_explicit_uniform_location
 */
@@ -4049,6 +4058,20 @@ struct gl_uniform_buffer_binding
GLboolean AutomaticSize;
 };
 
+struct gl_shader_storage_buffer_binding
+{
+   struct gl_buffer_object *BufferObject;
+   /** Start of shader storage block data in the buffer */
+   GLintptr Offset;
+   /** Size of data allowed to be referenced from the buffer (in bytes) */
+   GLsizeiptr Size;
+   /**
+* glBindBufferBase() indicates that the Size should be ignored and only
+* limited by the current size of the BufferObject.
+*/
+   GLboolean AutomaticSize;
+};
+
 /**
  * ARB_shader_image_load_store image unit.
  */
@@ -4296,6 +4319,12 @@ struct gl_context
struct gl_buffer_object *UniformBuffer;
 
/**
+* Current GL_ARB_shader_storage_buffer_object binding referenced by
+* GL_SHADER_STORAGE_BUFFER target for glBufferData, glMapBuffer, etc.
+*/
+   struct gl_buffer_object *ShaderStorageBuffer;
+
+   /**
 * Array of uniform buffers for GL_ARB_uniform_buffer_object and GL 3.1.
 * This is set up using glBindBufferRange() or glBindBufferBase().  They are
 * associated with uniform blocks by glUniformBlockBinding()'s state in the
@@ -4305,6 +4334,15 @@ struct gl_context
   UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS];
 
/**
+* Array of shader storage buffers for ARB_shader_storage_buffer_object
+* and GL 4.3. This is set up using glBindBufferRange() or
+* glBindBufferBase().  They are associated with shader storage blocks by
+* glShaderStorageBlockBinding()'s state in the shader program.
+*/
+   struct gl_shader_storage_buffer_binding
+  ShaderStorageBufferBindings[MAX_COMBINED_SHADER_STORAGE_BUFFERS];
+
+   /**
 * Object currently associated with the GL_ATOMIC_COUNTER_BUFFER
 * target.
 */
-- 
1.9.1

_

[Mesa-dev] [PATCH v3 (part1) 14/26] mesa: Initialize and free shader storage buffers

2015-07-10 Thread Iago Toral Quiroga
v2:
- Fix indention, used tabs instead of whitespaces. (Jordan)

Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c5d4ada..2d70f7b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -836,6 +836,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer,
 ctx->Shared->NullBufferObj);
 
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer,
+ ctx->Shared->NullBufferObj);
+
_mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer,
 ctx->Shared->NullBufferObj);
 
@@ -850,6 +853,14 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
   ctx->UniformBufferBindings[i].Size = -1;
}
 
+   for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+  _mesa_reference_buffer_object(ctx,
+
&ctx->ShaderStorageBufferBindings[i].BufferObject,
+ctx->Shared->NullBufferObj);
+  ctx->ShaderStorageBufferBindings[i].Offset = -1;
+  ctx->ShaderStorageBufferBindings[i].Size = -1;
+   }
+
for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
   _mesa_reference_buffer_object(ctx,
&ctx->AtomicBufferBindings[i].BufferObject,
@@ -872,6 +883,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
 
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL);
 
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, NULL);
+
_mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer, NULL);
 
_mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
@@ -882,6 +895,12 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
NULL);
}
 
+   for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+  _mesa_reference_buffer_object(ctx,
+
&ctx->ShaderStorageBufferBindings[i].BufferObject,
+NULL);
+   }
+
for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
   _mesa_reference_buffer_object(ctx,
&ctx->AtomicBufferBindings[i].BufferObject,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 10/26] glsl: enable binding layout qualifier usage for shader storage buffer objects

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

See GLSL 4.30 spec, section 4.4.5 "Uniform and Shader Storage Block
Layout Qualifiers".

v2:
- Add whitespace in an error message. Delete period '.' at the end of that
error message (Jordan).

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast_to_hir.cpp | 29 -
 src/glsl/glsl_parser.yy |  3 ++-
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index f9f1c08..e887ac2 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2044,9 +2044,10 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state 
*state,
ir_variable *var,
const ast_type_qualifier *qual)
 {
-   if (var->data.mode != ir_var_uniform) {
+   if (var->data.mode != ir_var_uniform && var->data.mode != 
ir_var_shader_storage) {
   _mesa_glsl_error(loc, state,
-   "the \"binding\" qualifier only applies to uniforms");
+   "the \"binding\" qualifier only applies to uniforms and 
"
+   "shader storage buffer objects");
   return false;
}
 
@@ -2070,13 +2071,31 @@ validate_binding_qualifier(struct 
_mesa_glsl_parse_state *state,
*
* The implementation-dependent maximum is 
GL_MAX_UNIFORM_BUFFER_BINDINGS.
*/
-  if (max_index >= ctx->Const.MaxUniformBufferBindings) {
+  if (var->data.mode == ir_var_uniform &&
+ max_index >= ctx->Const.MaxUniformBufferBindings) {
  _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs 
exceeds "
   "the maximum number of UBO binding points (%d)",
   qual->binding, elements,
   ctx->Const.MaxUniformBufferBindings);
  return false;
   }
+  /* SSBOs. From page 67 of the GLSL 4.30 specification:
+   * "If the binding point for any uniform or shader storage block instance
+   *  is less than zero, or greater than or equal to the
+   *  implementation-dependent maximum number of uniform buffer bindings, a
+   *  compile-time error will occur. When the binding identifier is used
+   *  with a uniform or shader storage block instanced as an array of size
+   *  N, all elements of the array from binding through binding + N – 1 
must
+   *  be within this range."
+   */
+  if (var->data.mode == ir_var_shader_storage &&
+ max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs 
exceeds "
+  "the maximum number of SSBO binding points (%d)",
+  qual->binding, elements,
+  ctx->Const.MaxShaderStorageBufferBindings);
+ return false;
+  }
} else if (var->type->is_sampler() ||
   (var->type->is_array() && 
var->type->fields.array->is_sampler())) {
   /* Samplers.  From page 63 of the GLSL 4.20 specification:
@@ -5955,8 +5974,8 @@ ast_interface_block::hir(exec_list *instructions,
  if (state->symbols->get_variable(var->name) != NULL)
 _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
 
- /* Propagate the "binding" keyword into this UBO's fields;
-  * the UBO declaration itself doesn't get an ir_variable unless it
+ /* Propagate the "binding" keyword into this UBO/SSBO's fields.
+  * The UBO declaration itself doesn't get an ir_variable unless it
   * has an instance name.  This is ugly.
   */
  var->data.explicit_binding = this->layout.flags.q.explicit_binding;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 8564cb9..37c4401 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1425,7 +1425,8 @@ layout_qualifier_id:
   }
 
   if ((state->ARB_shading_language_420pack_enable ||
-   state->has_atomic_counters()) &&
+   state->has_atomic_counters() ||
+   state->ARB_shader_storage_buffer_object_enable) &&
   match_layout_qualifier("binding", $1, state) == 0) {
  $$.flags.q.explicit_binding = 1;
  $$.binding = $3;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 05/26] glsl: Implement parser support for 'buffer' qualifier

2015-07-10 Thread Iago Toral Quiroga
From: Kristian Høgsberg 

This is used to identify shader storage buffer interface blocks where
buffer variables are declared.

Reviewed-by: Jordan Justen 
---
 src/glsl/ast.h  |  1 +
 src/glsl/ast_to_hir.cpp | 14 ++
 src/glsl/ast_type.cpp   |  3 ++-
 src/glsl/glsl_lexer.ll  |  1 +
 src/glsl/glsl_parser.yy | 30 ++
 src/glsl/glsl_parser_extras.cpp |  2 ++
 6 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index ef74e51..4921229 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -435,6 +435,7 @@ struct ast_type_qualifier {
 unsigned centroid:1;
  unsigned sample:1;
 unsigned uniform:1;
+unsigned buffer:1;
 unsigned smooth:1;
 unsigned flat:1;
 unsigned noperspective:1;
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 00f35eb..f9f1c08 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2501,6 +2501,8 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
   var->data.mode = ir_var_shader_out;
else if (qual->flags.q.uniform)
   var->data.mode = ir_var_uniform;
+   else if (qual->flags.q.buffer)
+  var->data.mode = ir_var_shader_storage;
 
if (!is_parameter && is_varying_var(var, state->stage)) {
   /* User-defined ins/outs are not permitted in compute shaders. */
@@ -5265,8 +5267,9 @@ ast_type_specifier::hir(exec_list *instructions,
  * \c glsl_struct_field to describe the members.
  *
  * If we're processing an interface block, var_mode should be the type of the
- * interface block (ir_var_shader_in, ir_var_shader_out, or ir_var_uniform).
- * If we're processing a structure, var_mode should be ir_var_auto.
+ * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or
+ * ir_var_shader_storage).  If we're processing a structure, var_mode should be
+ * ir_var_auto.
  *
  * \return
  * The number of fields processed.  A pointer to the array structure fields is
@@ -5396,10 +5399,10 @@ ast_process_structure_or_interface_block(exec_list 
*instructions,
  fields[i].stream = qual->flags.q.explicit_stream ? qual->stream : -1;
 
  if (qual->flags.q.row_major || qual->flags.q.column_major) {
-if (!qual->flags.q.uniform) {
+if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(&loc, state,
 "row_major and column_major can only be "
-"applied to uniform interface blocks");
+"applied to interface blocks");
 } else
validate_matrix_layout_for_type(state, &loc, field_type, NULL);
  }
@@ -5596,6 +5599,9 @@ ast_interface_block::hir(exec_list *instructions,
} else if (this->layout.flags.q.uniform) {
   var_mode = ir_var_uniform;
   iface_type_name = "uniform";
+   } else if (this->layout.flags.q.buffer) {
+  var_mode = ir_var_shader_storage;
+  iface_type_name = "buffer";
} else {
   var_mode = ir_var_auto;
   iface_type_name = "UNKNOWN";
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 1bcf6a2..fa4806a 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -78,7 +78,8 @@ ast_type_qualifier::has_storage() const
   || this->flags.q.varying
   || this->flags.q.in
   || this->flags.q.out
-  || this->flags.q.uniform;
+  || this->flags.q.uniform
+  || this->flags.q.buffer;
 }
 
 bool
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 10db5b8..845deeb 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -308,6 +308,7 @@ in  return IN_TOK;
 outreturn OUT_TOK;
 inout  return INOUT_TOK;
 uniformreturn UNIFORM;
+buffer return BUFFER;
 varyingDEPRECATED_ES_KEYWORD(VARYING);
 centroid   KEYWORD(120, 300, 120, 300, CENTROID);
 invariant  KEYWORD(120, 100, 120, 100, INVARIANT);
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 3ce9e10..8564cb9 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -134,7 +134,7 @@ static bool match_layout_qualifier(const char *s1, const 
char *s2,
 }
 
 %token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
-%token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
+%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
 %token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 
DVEC2 DVEC3 DVEC4
 %token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
 %token NOPERSPECTIVE FLAT SMOOTH
@@ -1805,6 +1805,11 @@ storage_qualifier:
   memset(& $$, 0, sizeof($$));
   $$.flags.q.uniform = 1;
}
+   | BUFFER
+   {
+  memset(& $$, 0, sizeof($$));
+  $$.flags.q.buffer = 1;
+   }
;
 
 me

[Mesa-dev] [PATCH v3 (part1) 12/26] glsl: buffer variables cannot be defined outside interface blocks

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Section 4.3.7 "Buffer Variables", GLSL 4.30 spec:

"Buffer variables may only be declared inside interface blocks
(section 4.3.9 “Interface Blocks”), which are then referred to as
shader storage blocks. It is a compile-time error to declare buffer
variables at global scope (outside a block)."

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/glsl/ast_to_hir.cpp | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 6299bf0..61020cf 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3378,6 +3378,18 @@ ast_declarator_list::hir(exec_list *instructions,
 
decl_type = this->type->glsl_type(& type_name, state);
 
+   /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+*"Buffer variables may only be declared inside interface blocks
+*(section 4.3.9 “Interface Blocks”), which are then referred to as
+*shader storage blocks. It is a compile-time error to declare buffer
+*variables at global scope (outside a block)."
+*/
+   if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) {
+  _mesa_glsl_error(&loc, state,
+   "buffer variables cannot be declared outside "
+   "interface blocks");
+   }
+
/* An offset-qualified atomic counter declaration sets the default
 * offset for the next declaration within the same atomic counter
 * buffer.
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 09/26] mesa: add MaxShaderStorageBlocks to struct gl_program_constants

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

v2:
- Set MaxShaderStorageBlocks to 8.

Signed-off-by: Samuel Iglesias Gonsalvez 
Reviewed-by: Jordan Justen 
---
 src/mesa/main/context.c | 2 ++
 src/mesa/main/mtypes.h  | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 5470c56..f4dc4e3 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -536,6 +536,8 @@ init_program_limits(struct gl_constants *consts, 
gl_shader_stage stage,
 
prog->MaxAtomicBuffers = 0;
prog->MaxAtomicCounters = 0;
+
+   prog->MaxShaderStorageBlocks = 8;
 }
 
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index f1ab4eb..8a4ad76 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3306,6 +3306,9 @@ struct gl_program_constants
 
/* GL_ARB_shader_image_load_store */
GLuint MaxImageUniforms;
+
+   /* GL_ARB_shader_storage_buffer_object */
+   GLuint MaxShaderStorageBlocks;
 };
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 23/26] glsl: Don't do constant variable on buffer variables

2015-07-10 Thread Iago Toral Quiroga
Since the backing storage for these is shared we cannot ensure that
the value won't change by writes from other threads. Normally SSBO
accesses are not guaranteed to be syncronized with other threads,
except when memoryBarrier is used. So, we might be able to optimize
some SSBO accesses, but for now we always take the safe path and emit
the SSBO access.

Reviewed-by: Jordan Justen 
---
 src/glsl/opt_constant_variable.cpp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/glsl/opt_constant_variable.cpp 
b/src/glsl/opt_constant_variable.cpp
index 7222eb9..7aaaeed 100644
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -115,6 +115,13 @@ ir_constant_variable_visitor::visit_enter(ir_assignment 
*ir)
if (!var)
   return visit_continue;
 
+   /* Ignore buffer variables, since the underlying storage is shared
+* and we can't be sure that this variable won't be written by another
+* thread.
+*/
+   if (var->data.mode == ir_var_shader_storage)
+  return visit_continue;
+
constval = ir->rhs->constant_expression_value();
if (!constval)
   return visit_continue;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 24/26] glsl: Don't do copy propagation on buffer variables

2015-07-10 Thread Iago Toral Quiroga
Since the backing storage for these is shared we cannot ensure that
the value won't change by writes from other threads. Normally SSBO
accesses are not guaranteed to be syncronized with other threads,
except when memoryBarrier is used. So, we might be able to optimize
some SSBO accesses, but for now we always take the safe path and emit
the SSBO access.

Reviewed-by: Jordan Justen 
---
 src/glsl/opt_copy_propagation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/opt_copy_propagation.cpp 
b/src/glsl/opt_copy_propagation.cpp
index 806027b..f206995 100644
--- a/src/glsl/opt_copy_propagation.cpp
+++ b/src/glsl/opt_copy_propagation.cpp
@@ -330,7 +330,7 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
  */
 ir->condition = new(ralloc_parent(ir)) ir_constant(false);
 this->progress = true;
-  } else {
+  } else if (lhs_var->data.mode != ir_var_shader_storage) {
 entry = new(this->acp) acp_entry(lhs_var, rhs_var);
 this->acp->push_tail(entry);
   }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 07/26] glsl: Identify active uniform blocks that are buffer blocks as such.

2015-07-10 Thread Iago Toral Quiroga
Reviewed-by: Jordan Justen 
---
 src/glsl/link_uniform_block_active_visitor.cpp | 1 +
 src/glsl/link_uniform_block_active_visitor.h   | 1 +
 src/glsl/link_uniform_blocks.cpp   | 4 
 src/mesa/main/mtypes.h | 5 +
 4 files changed, 11 insertions(+)

diff --git a/src/glsl/link_uniform_block_active_visitor.cpp 
b/src/glsl/link_uniform_block_active_visitor.cpp
index ddfd2b2..5102947 100644
--- a/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/glsl/link_uniform_block_active_visitor.cpp
@@ -44,6 +44,7 @@ process_block(void *mem_ctx, struct hash_table *ht, 
ir_variable *var)
 
   b->type = block_type;
   b->has_instance_name = var->is_interface_instance();
+  b->is_shader_storage = var->data.mode == ir_var_shader_storage;
 
   if (var->data.explicit_binding) {
  b->has_binding = true;
diff --git a/src/glsl/link_uniform_block_active_visitor.h 
b/src/glsl/link_uniform_block_active_visitor.h
index e5ea501..b663a88 100644
--- a/src/glsl/link_uniform_block_active_visitor.h
+++ b/src/glsl/link_uniform_block_active_visitor.h
@@ -38,6 +38,7 @@ struct link_uniform_block_active {
 
bool has_instance_name;
bool has_binding;
+   bool is_shader_storage;
 };
 
 class link_uniform_block_active_visitor : public ir_hierarchical_visitor {
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 898544b..4df39e2 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -293,6 +293,8 @@ link_uniform_blocks(void *mem_ctx,
 blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)(&variables[parcel.index] - 
blocks[i].Uniforms);
 
+blocks[i].IsShaderStorage = b->is_shader_storage;
+
 i++;
  }
   } else {
@@ -311,6 +313,8 @@ link_uniform_blocks(void *mem_ctx,
  blocks[i].NumUniforms =
 (unsigned)(ptrdiff_t)(&variables[parcel.index] - 
blocks[i].Uniforms);
 
+ blocks[i].IsShaderStorage = b->is_shader_storage;
+
  i++;
   }
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9ec342b..86508c3 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2565,6 +2565,11 @@ struct gl_uniform_block
GLuint UniformBufferSize;
 
/**
+* Is this actually an interface block for a shader storage buffer?
+*/
+   bool IsShaderStorage;
+
+   /**
 * Layout specified in the shader
 *
 * This isn't accessible through the API, but it is used while
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 21/26] glsl: Do not kill dead assignments to buffer variables or SSBO declarations.

2015-07-10 Thread Iago Toral Quiroga
If we kill dead assignments we lose the buffer writes.

Also, we never kill UBO declarations even if they are never referenced
by the shader, they are always considered active. Although the spec
does not seem say this specifically for SSBOs, it is probably implied
since SSBOs are pretty much the same as UBOs, only that you can write
to them.

v2:
- Fix the comment (Jordan)

Reviewed-by: Jordan Justen 
---
 src/glsl/opt_dead_code.cpp | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index 7b4730a..04e4d56 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -77,11 +77,13 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
 
   if (entry->assign) {
 /* Remove a single dead assignment to the variable we found.
- * Don't do so if it's a shader or function output, though.
+ * Don't do so if it's a shader or function output or a shader
+ * storage variable though.
  */
 if (entry->var->data.mode != ir_var_function_out &&
 entry->var->data.mode != ir_var_function_inout &&
- entry->var->data.mode != ir_var_shader_out) {
+ entry->var->data.mode != ir_var_shader_out &&
+ entry->var->data.mode != ir_var_shader_storage) {
entry->assign->remove();
progress = true;
 
@@ -99,7 +101,8 @@ do_dead_code(exec_list *instructions, bool 
uniform_locations_assigned)
  * stage.  Also, once uniform locations have been assigned, the
  * declaration cannot be deleted.
  */
- if (entry->var->data.mode == ir_var_uniform) {
+ if (entry->var->data.mode == ir_var_uniform ||
+ entry->var->data.mode == ir_var_shader_storage) {
 if (uniform_locations_assigned || entry->var->constant_value)
continue;
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 18/26] mesa: Implement _mesa_BindBufferBase for target GL_SHADER_STORAGE_BUFFER

2015-07-10 Thread Iago Toral Quiroga
Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 56 +++
 1 file changed, 56 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 0a9ffe4..c3548b5 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3092,6 +3092,37 @@ bind_uniform_buffer(struct gl_context *ctx,
 }
 
 /**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * Unlike set_ssbo_binding(), this function also flushes vertices
+ * and updates NewDriverState.  It also checks if the binding
+ * has actually changed before updating it.
+ */
+static void
+bind_shader_storage_buffer(struct gl_context *ctx,
+   GLuint index,
+   struct gl_buffer_object *bufObj,
+   GLintptr offset,
+   GLsizeiptr size,
+   GLboolean autoSize)
+{
+   struct gl_shader_storage_buffer_binding *binding =
+  &ctx->ShaderStorageBufferBindings[index];
+
+   if (binding->BufferObject == bufObj &&
+   binding->Offset == offset &&
+   binding->Size == size &&
+   binding->AutomaticSize == autoSize) {
+  return;
+   }
+
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   set_ssbo_binding(ctx, binding, bufObj, offset, size, autoSize);
+}
+
+/**
  * Bind a region of a buffer object to a uniform block binding point.
  * \param index  the uniform buffer binding point index
  * \param bufObj  the buffer object
@@ -3150,6 +3181,28 @@ bind_buffer_base_uniform_buffer(struct gl_context *ctx,
 }
 
 /**
+ * Bind a buffer object to a shader storage block binding point.
+ * As above, but offset = 0.
+ */
+static void
+bind_buffer_base_shader_storage_buffer(struct gl_context *ctx,
+   GLuint index,
+   struct gl_buffer_object *bufObj)
+{
+   if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferBase(index=%d)", index);
+  return;
+   }
+
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+
+   if (bufObj == ctx->Shared->NullBufferObj)
+  bind_shader_storage_buffer(ctx, index, bufObj, -1, -1, GL_TRUE);
+   else
+  bind_shader_storage_buffer(ctx, index, bufObj, 0, 0, GL_TRUE);
+}
+
+/**
  * Binds a buffer object to an atomic buffer binding point.
  *
  * The caller is responsible for validating the offset,
@@ -4240,6 +4293,9 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint 
buffer)
case GL_UNIFORM_BUFFER:
   bind_buffer_base_uniform_buffer(ctx, index, bufObj);
   return;
+   case GL_SHADER_STORAGE_BUFFER:
+  bind_buffer_base_shader_storage_buffer(ctx, index, bufObj);
+  return;
case GL_ATOMIC_COUNTER_BUFFER:
   bind_atomic_buffer(ctx, index, bufObj, 0, 0,
  "glBindBufferBase");
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 19/26] mesa: Implement _mesa_BindBufferRange for target GL_SHADER_STORAGE_BUFFER

2015-07-10 Thread Iago Toral Quiroga
Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c3548b5..4e25a72 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3157,6 +3157,40 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx,
bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
 }
 
+/**
+ * Bind a region of a buffer object to a shader storage block binding point.
+ * \param index  the shader storage buffer binding point index
+ * \param bufObj  the buffer object
+ * \param offset  offset to the start of buffer object region
+ * \param size  size of the buffer object region
+ */
+static void
+bind_buffer_range_shader_storage_buffer(struct gl_context *ctx,
+GLuint index,
+struct gl_buffer_object *bufObj,
+GLintptr offset,
+GLsizeiptr size)
+{
+   if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d)", index);
+  return;
+   }
+
+   if (offset & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glBindBufferRange(offset misaligned %d/%d)", (int) offset,
+  ctx->Const.ShaderStorageBufferOffsetAlignment);
+  return;
+   }
+
+   if (bufObj == ctx->Shared->NullBufferObj) {
+  offset = -1;
+  size = -1;
+   }
+
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+   bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
+}
 
 /**
  * Bind a buffer object to a uniform block binding point.
@@ -4227,6 +4261,9 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
case GL_UNIFORM_BUFFER:
   bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size);
   return;
+   case GL_SHADER_STORAGE_BUFFER:
+  bind_buffer_range_shader_storage_buffer(ctx, index, bufObj, offset, 
size);
+  return;
case GL_ATOMIC_COUNTER_BUFFER:
   bind_atomic_buffer(ctx, index, bufObj, offset, size,
  "glBindBufferRange");
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 16/26] mesa: Implement _mesa_BindBuffersBase for target GL_SHADER_STORAGE_BUFFER

2015-07-10 Thread Iago Toral Quiroga
v2:
- Add space before const (Jordan)

Reviewed-by: Jordan Justen 
---
 src/mesa/main/bufferobj.c | 142 ++
 src/mesa/main/mtypes.h|   7 +++
 2 files changed, 149 insertions(+)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index ee920a9..27638ca 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3034,6 +3034,33 @@ set_ubo_binding(struct gl_context *ctx,
 }
 
 /**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * The caller is responsible for flushing vertices and updating
+ * NewDriverState.
+ */
+static void
+set_ssbo_binding(struct gl_context *ctx,
+ struct gl_shader_storage_buffer_binding *binding,
+ struct gl_buffer_object *bufObj,
+ GLintptr offset,
+ GLsizeiptr size,
+ GLboolean autoSize)
+{
+   _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj);
+
+   binding->Offset = offset;
+   binding->Size = size;
+   binding->AutomaticSize = autoSize;
+
+   /* If this is a real buffer object, mark it has having been used
+* at some point as a SSBO.
+*/
+   if (size >= 0)
+  bufObj->UsageHistory |= USAGE_SHADER_STORAGE_BUFFER;
+}
+
+/**
  * Binds a buffer object to a uniform buffer binding point.
  *
  * Unlike set_ubo_binding(), this function also flushes vertices
@@ -3254,6 +3281,35 @@ error_check_bind_uniform_buffers(struct gl_context *ctx,
return true;
 }
 
+static bool
+error_check_bind_shader_storage_buffers(struct gl_context *ctx,
+GLuint first, GLsizei count,
+const char *caller)
+{
+   if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+  _mesa_error(ctx, GL_INVALID_ENUM,
+  "%s(target=GL_SHADER_STORAGE_BUFFER)", caller);
+  return false;
+   }
+
+   /* The ARB_multi_bind_spec says:
+*
+* "An INVALID_OPERATION error is generated if  +  is
+*  greater than the number of target-specific indexed binding points,
+*  as described in section 6.7.1."
+*/
+   if (first + count > ctx->Const.MaxShaderStorageBufferBindings) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "%s(first=%u + count=%d > the value of "
+  "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS=%u)",
+  caller, first, count,
+  ctx->Const.MaxShaderStorageBufferBindings);
+  return false;
+   }
+
+   return true;
+}
+
 /**
  * Unbind all uniform buffers in the range
  *  through +-1
@@ -3269,6 +3325,22 @@ unbind_uniform_buffers(struct gl_context *ctx, GLuint 
first, GLsizei count)
   bufObj, -1, -1, GL_TRUE);
 }
 
+/**
+ * Unbind all shader storage buffers in the range
+ *  through +-1
+ */
+static void
+unbind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
+  GLsizei count)
+{
+   struct gl_buffer_object *bufObj = ctx->Shared->NullBufferObj;
+   GLint i;
+
+   for (i = 0; i < count; i++)
+  set_ssbo_binding(ctx, &ctx->ShaderStorageBufferBindings[first + i],
+   bufObj, -1, -1, GL_TRUE);
+}
+
 static void
 bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count,
   const GLuint *buffers)
@@ -3336,6 +3408,73 @@ bind_uniform_buffers_base(struct gl_context *ctx, GLuint 
first, GLsizei count,
 }
 
 static void
+bind_shader_storage_buffers_base(struct gl_context *ctx, GLuint first,
+ GLsizei count, const GLuint *buffers)
+{
+   GLint i;
+
+   if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+"glBindBuffersBase"))
+  return;
+
+   /* Assume that at least one binding will be changed */
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   if (!buffers) {
+  /* The ARB_multi_bind spec says:
+   *
+   *   "If  is NULL, all bindings from  through
+   *+-1 are reset to their unbound (zero) state."
+   */
+  unbind_shader_storage_buffers(ctx, first, count);
+  return;
+   }
+
+   /* Note that the error semantics for multi-bind commands differ from
+* those of other GL commands.
+*
+* The Issues section in the ARB_multi_bind spec says:
+*
+*"(11) Typically, OpenGL specifies that if an error is generated by a
+*  command, that command has no effect.  This is somewhat
+*  unfortunate for multi-bind commands, because it would require a
+*  first pass to scan the entire list of bound objects for errors
+*  and then a second pass to actually perform the bindings.
+*  Should we have different error semantics?
+*
+*   RESOLVED:  Yes.  In this specification, when the parameters for
+*   one of the  binding points are invalid, that bin

[Mesa-dev] [PATCH v3 (part1) 06/26] glsl: link buffer variables and shader storage buffer interface blocks

2015-07-10 Thread Iago Toral Quiroga
From: Kristian Høgsberg 

Reviewed-by: Jordan Justen 
---
 src/glsl/link_interface_blocks.cpp | 15 ---
 src/glsl/link_uniform_initializers.cpp |  3 ++-
 src/glsl/link_uniforms.cpp |  8 +---
 src/glsl/linker.cpp|  4 ++--
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/glsl/link_interface_blocks.cpp 
b/src/glsl/link_interface_blocks.cpp
index 07f5b42..f9ddb13 100644
--- a/src/glsl/link_interface_blocks.cpp
+++ b/src/glsl/link_interface_blocks.cpp
@@ -112,7 +112,8 @@ intrastage_match(interface_block_definition *a,
 * it's not clear from the spec whether they need to match, but
 * Mesa's implementation relies on them matching.
 */
-   if (a->instance_name != NULL && mode != ir_var_uniform &&
+   if (a->instance_name != NULL &&
+   mode != ir_var_uniform && mode != ir_var_shader_storage &&
strcmp(a->instance_name, b->instance_name) != 0) {
   return false;
}
@@ -253,6 +254,7 @@ validate_intrastage_interface_blocks(struct 
gl_shader_program *prog,
interface_block_definitions in_interfaces;
interface_block_definitions out_interfaces;
interface_block_definitions uniform_interfaces;
+   interface_block_definitions buffer_interfaces;
 
for (unsigned int i = 0; i < num_shaders; i++) {
   if (shader_list[i] == NULL)
@@ -279,6 +281,9 @@ validate_intrastage_interface_blocks(struct 
gl_shader_program *prog,
  case ir_var_uniform:
 definitions = &uniform_interfaces;
 break;
+ case ir_var_shader_storage:
+definitions = &buffer_interfaces;
+break;
  default:
 /* Only in, out, and uniform interfaces are legal, so we should
  * never get here.
@@ -361,7 +366,9 @@ validate_interstage_uniform_blocks(struct gl_shader_program 
*prog,
   const gl_shader *stage = stages[i];
   foreach_in_list(ir_instruction, node, stage->ir) {
  ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() || var->data.mode != 
ir_var_uniform)
+ if (!var || !var->get_interface_type() ||
+ (var->data.mode != ir_var_uniform &&
+  var->data.mode != ir_var_shader_storage))
 continue;
 
  interface_block_definition *old_def =
@@ -374,7 +381,9 @@ validate_interstage_uniform_blocks(struct gl_shader_program 
*prog,
  * uniform matchin rules (for uniforms, it is as though all
  * shaders are in the same shader stage).
  */
-if (!intrastage_match(old_def, &new_def, ir_var_uniform, prog)) {
+if (!intrastage_match(old_def, &new_def,
+  (ir_variable_mode) var->data.mode,
+  prog)) {
linker_error(prog, "definitions of interface block `%s' do not "
 "match\n", var->get_interface_type()->name);
return;
diff --git a/src/glsl/link_uniform_initializers.cpp 
b/src/glsl/link_uniform_initializers.cpp
index d1f904e..6322a2d 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -256,7 +256,8 @@ link_set_uniform_initializers(struct gl_shader_program 
*prog,
   foreach_in_list(ir_instruction, node, shader->ir) {
 ir_variable *const var = node->as_variable();
 
-if (!var || var->data.mode != ir_var_uniform)
+if (!var || (var->data.mode != ir_var_uniform &&
+var->data.mode != ir_var_shader_storage))
continue;
 
 if (!mem_ctx)
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 5fdf25e..e786ddc 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -766,7 +766,8 @@ link_update_uniform_buffer_variables(struct gl_shader 
*shader)
   if ((var == NULL) || !var->is_in_buffer_block())
 continue;
 
-  assert(var->data.mode == ir_var_uniform);
+  assert(var->data.mode == ir_var_uniform ||
+ var->data.mode == ir_var_shader_storage);
 
   if (var->is_interface_instance()) {
  var->data.location = 0;
@@ -943,7 +944,8 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
   foreach_in_list(ir_instruction, node, sh->ir) {
 ir_variable *const var = node->as_variable();
 
-if ((var == NULL) || (var->data.mode != ir_var_uniform))
+if ((var == NULL) || (var->data.mode != ir_var_uniform &&
+  var->data.mode != ir_var_shader_storage))
continue;
 
 uniform_size.process(var);
@@ -987,7 +989,7 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
   foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
 ir_variable *const var = node->as_variable();
 
-if ((var == NULL) || (var->data.mode != ir_var_uniform))
+if ((var == NULL) || (var->data.mode != ir_var_uniform && 
var->data.m

[Mesa-dev] [PATCH v3 (part1) 25/26] glsl: Lower shader storage buffer object writes to GLSL IR instrinsics

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Extend the existing lower_ubo_reference pass to also detect SSBO writes
and lower them to __intrinsic_store_ssbo intrinsics.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/lower_ubo_reference.cpp | 441 +++
 1 file changed, 311 insertions(+), 130 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index a61ff29..460b490 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -37,6 +37,7 @@
 #include "ir_builder.h"
 #include "ir_rvalue_visitor.h"
 #include "main/macros.h"
+#include "glsl_parser_extras.h"
 
 using namespace ir_builder;
 
@@ -139,12 +140,31 @@ public:
}
 
void handle_rvalue(ir_rvalue **rvalue);
-   void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
-   unsigned int deref_offset, bool row_major,
-   int matrix_columns);
+   ir_visitor_status visit_enter(ir_assignment *ir);
+
+   void setup_for_load_or_write(ir_variable *var,
+ir_dereference *deref,
+ir_rvalue **offset,
+unsigned *const_offset,
+bool *row_major,
+int *matrix_columns);
ir_expression *ubo_load(const struct glsl_type *type,
   ir_rvalue *offset);
 
+
+   void check_for_ssbo_write(ir_assignment *ir);
+   void write_to_memory(ir_dereference *deref,
+ir_variable *var,
+ir_variable *write_var,
+unsigned write_mask);
+   ir_call *ssbo_write(ir_rvalue *deref, ir_rvalue *offset,
+   unsigned write_mask);
+
+   void emit_reads_or_writes(bool is_write, ir_dereference *deref,
+ ir_variable *base_offset, unsigned int 
deref_offset,
+ bool row_major, int matrix_columns,
+ unsigned write_mask);
+
void *mem_ctx;
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
@@ -218,26 +238,20 @@ interface_field_name(void *mem_ctx, char *base_name, 
ir_dereference *d,
 }
 
 void
-lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
+lower_ubo_reference_visitor::setup_for_load_or_write(ir_variable *var,
+ ir_dereference *deref,
+ ir_rvalue **offset,
+ unsigned *const_offset,
+ bool *row_major,
+ int *matrix_columns)
 {
-   if (!*rvalue)
-  return;
-
-   ir_dereference *deref = (*rvalue)->as_dereference();
-   if (!deref)
-  return;
-
-   ir_variable *var = deref->variable_referenced();
-   if (!var || !var->is_in_buffer_block())
-  return;
-
-   mem_ctx = ralloc_parent(*rvalue);
-
+   /* Fix out the name of the interface block */
ir_rvalue *nonconst_block_index;
const char *const field_name =
   interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
deref, &nonconst_block_index);
 
+   /* Locate the ubo block by interface name */
this->uniform_block = NULL;
for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
   if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
@@ -263,10 +277,10 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
**rvalue)
 
assert(this->uniform_block);
 
-   ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
-   unsigned const_offset = 0;
-   bool row_major = is_dereferenced_thing_row_major(deref);
-   int matrix_columns = 1;
+   *offset = new(mem_ctx) ir_constant(0u);
+   *const_offset = 0;
+   *row_major = is_dereferenced_thing_row_major(deref);
+   *matrix_columns = 1;
 
/* Calculate the offset to the start of the region of the UBO
 * dereferenced by *rvalue.  This may be a variable offset if an
@@ -275,76 +289,76 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
**rvalue)
while (deref) {
   switch (deref->ir_type) {
   case ir_type_dereference_variable: {
-const_offset += ubo_var->Offset;
-deref = NULL;
-break;
+ *const_offset += ubo_var->Offset;
+ deref = NULL;
+ break;
   }
 
   case ir_type_dereference_array: {
-ir_dereference_array *deref_array = (ir_dereference_array *)deref;
-unsigned array_stride;
-if (deref_array->array->type->is_matrix() && row_major) {
-   /* When loading a vector out of a row major matrix, the
-* step between the columns (vectors) is the size of a
-* float, while the step between the rows (elements of a
-* vector) is handled below in emit_ubo_loads.
-*/
-   array_strid

[Mesa-dev] [PATCH v3 (part1) 22/26] glsl: Don't do constant propagation on buffer variables

2015-07-10 Thread Iago Toral Quiroga
Since the backing storage for these is shared we cannot ensure that
the value won't change by writes from other threads. Normally SSBO
accesses are not guaranteed to be syncronized with other threads,
except when memoryBarrier is used. So, we might be able to optimize
some SSBO accesses, but for now we always take the safe path and emit
the SSBO access.

Reviewed-by: Jordan Justen 
---
 src/glsl/opt_constant_propagation.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/glsl/opt_constant_propagation.cpp 
b/src/glsl/opt_constant_propagation.cpp
index 90cc0c8..10be8e8 100644
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -444,6 +444,14 @@ 
ir_constant_propagation_visitor::add_constant(ir_assignment *ir)
if (!deref->var->type->is_vector() && !deref->var->type->is_scalar())
   return;
 
+   /* We can't do copy propagation on buffer variables, since the underlying
+* memory storage is shared across multiple threads we can't be sure that
+* the variable value isn't modified between this assignment and the next
+* instruction where its value is read.
+*/
+   if (deref->var->data.mode == ir_var_shader_storage)
+  return;
+
entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
this->acp->push_tail(entry);
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 (part1) 26/26] glsl: Lower shader storage buffer object loads to GLSL IR instrinsics

2015-07-10 Thread Iago Toral Quiroga
From: Samuel Iglesias Gonsalvez 

Extend the existing lower_ubo_reference pass to also detect SSBO loads
and lower them to __intrinsic_load_ssbo intrinsics.

Signed-off-by: Samuel Iglesias Gonsalvez 
---
 src/glsl/lower_ubo_reference.cpp | 73 +++-
 1 file changed, 65 insertions(+), 8 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 460b490..822b723 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -150,7 +150,8 @@ public:
 int *matrix_columns);
ir_expression *ubo_load(const struct glsl_type *type,
   ir_rvalue *offset);
-
+   ir_call *ssbo_load(const struct glsl_type *type,
+  ir_rvalue *offset);
 
void check_for_ssbo_write(ir_assignment *ir);
void write_to_memory(ir_dereference *deref,
@@ -170,6 +171,7 @@ public:
struct gl_uniform_buffer_variable *ubo_var;
ir_rvalue *uniform_block;
bool progress;
+   bool is_shader_storage;
 };
 
 /**
@@ -266,6 +268,8 @@ 
lower_ubo_reference_visitor::setup_for_load_or_write(ir_variable *var,
 this->uniform_block = index;
  }
 
+ this->is_shader_storage = shader->UniformBlocks[i].IsShaderStorage;
+
  struct gl_uniform_block *block = &shader->UniformBlocks[i];
 
  this->ubo_var = var->is_interface_instance()
@@ -415,7 +419,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
**rvalue)
if (!var || !var->is_in_buffer_block())
   return;
 
-   mem_ctx = ralloc_parent(*rvalue);
+   mem_ctx = ralloc_parent(shader->ir);
 
ir_rvalue *offset = NULL;
unsigned const_offset;
@@ -512,6 +516,42 @@ lower_ubo_reference_visitor::ssbo_write(ir_rvalue *deref,
return new(mem_ctx) ir_call(sig, NULL, &call_params);
 }
 
+ir_call *
+lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
+   ir_rvalue *offset)
+{
+   exec_list sig_params;
+
+   ir_variable *block_ref = new(mem_ctx)
+  ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(block_ref);
+
+   ir_variable *offset_ref = new(mem_ctx)
+  ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
+   sig_params.push_tail(offset_ref);
+
+   ir_function_signature *sig =
+  new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
+   f->add_signature(sig);
+
+   ir_variable *result = new(mem_ctx)
+  ir_variable(type, "ssbo_load_result", ir_var_temporary);
+   base_ir->insert_before(result);
+   ir_dereference_variable *deref_result = new(mem_ctx)
+  ir_dereference_variable(result);
+
+   exec_list call_params;
+   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
+   call_params.push_tail(offset->clone(mem_ctx, NULL));
+
+   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
+}
+
 static inline int
 writemask_for_size(unsigned n)
 {
@@ -610,9 +650,17 @@ lower_ubo_reference_visitor::emit_reads_or_writes(bool 
is_write,
  add(base_offset, new(mem_ctx) ir_constant(deref_offset));
   if (is_write)
  base_ir->insert_after(ssbo_write(deref, offset, write_mask));
-  else
- base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-   ubo_load(deref->type, offset)));
+  else {
+ if (!this->is_shader_storage) {
+ base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+   ubo_load(deref->type, offset)));
+ } else {
+ir_call *load_ssbo = ssbo_load(deref->type, offset);
+base_ir->insert_before(load_ssbo);
+ir_rvalue *value = 
load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value));
+ }
+  }
} else {
   unsigned N = deref->type->is_double() ? 8 : 4;
 
@@ -640,9 +688,18 @@ lower_ubo_reference_visitor::emit_reads_or_writes(bool 
is_write,
  if (is_write) {
 base_ir->insert_after(ssbo_write(swizzle(deref, i, 1), 
chan_offset, 1));
  } else {
-base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-  ubo_load(deref_type, chan_offset),
-  (1U << i)));
+if (!this->is_shader_storage) {
+   base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+ ubo_load(deref_type, chan_offset),
+ (1U << i)));
+} else {
+   ir_call *load_ssbo = ssbo_load(deref_type, chan_offset);
+   base_ir->insert_before(load_ssbo)

Re: [Mesa-dev] [PATCH v3 (part1) 00/26] ARB_shader_storage_buffer_object (mesa)

2015-07-10 Thread Iago Toral
On Fri, 2015-07-10 at 12:13 +0200, Iago Toral Quiroga wrote:
> As discussed with Jordan, this v3-part1 series contains a good part of the
> frontend stuff (most of which has already been reviewed). The idea is to get
> this landed ahead, since some of the remaining patches (specifically the i965
> backend stuff) depends on ther patches from Curro that have not landed yet.
> 
> For reference, this v3-part1 series does not include all the frontend bits,
> specifically it lacks: support for the optional unsized array at the bottom of
> SSBO definitions, implementation of layout mode std430, getters and queries
> for GL_SHADER_STORAGE_BUFFER targets and glShaderStorageBlockBinding.

I forgot to make a reference to the changes in v3... the main change is
the fact that now we lower ssbo operations to GLSL IR intrinsics. In v2
we had ssbo loads implemented as ir_expression nodes and we had added a
new IR node for ssbo stores.

Iago

> Link to the original v2:
> http://lists.freedesktop.org/archives/mesa-dev/2015-June/085562.html
> 
> Development branch with this series:
> git clone -b itoral-ARB_shader_storage_buffer_object-v3-part1 
> https://github.com/Igalia/mesa.git
> 
> For reference, here is a repository with the full v3 series (together with the
> patches from Curro it depends on):
> git clone -b itoral-ARB_shader_storage_buffer_object-v3 
> https://github.com/Igalia/mesa.git
> 
> Piglit repository including SSBO tests:
> git clone -b arb_shader_storage_buffer_object-v2 
> https://github.com/Igalia/piglit.git
> 
> Notice that if you intent to run these with this v3-part1 series you will need
> to use:
> MESA_EXTENSION_OVERRIDE="GL_ARB_shader_storage_buffer_object"
> 
> There are no piglit regressions with this series except for
> arb_program_interface_query-getprogramresourceiv, but that is expected since
> that test was edited some weeks to incorporate a SSBO in one of the shaders, 
> so
> it will abort when it hits that (since the NIR and i965 backend bits are not
> included with this v3-part1 series).
> 
> As for the SSBO specific piglit tests present in the aforementioned 
> repository,
> the results are the ones expected and can be observed here:
> http://paste.ubuntu.com/11854375/
> 
> Some notes on these results:
> 
> - crashes: mostly due to the fact that this part1 series does not provide the
> NIR implementation of the new intrinsics, so they hit an assert. The
> shader-storage-block-different-size crash is fixed with a patch from Antia 
> that
> was part of one of our dEQP series and was also included at the end of our
> v2 series: 
> http://lists.freedesktop.org/archives/mesa-dev/2015-June/085642.html
> - Other than that, compiler/link tests generally pass and failed tests are
> related to missing features (like unsized arrays or queries) or missing i965
> backend implementation.
> 
> With the full v3 series there are no regressions (except for the
> one we discussed with the v1 of this series that is actually a bogus UBO test
> for which we sent a fix to piglit).
> 
> Iago Toral Quiroga (15):
>   mesa: rename is_in_uniform_block to is_in_buffer_block
>   nir: add nir_var_shader_storage
>   glsl: Identify active uniform blocks that are buffer blocks as such.
>   mesa: Add shader storage buffer support to struct gl_context
>   mesa: Initialize and free shader storage buffers
>   mesa: Implement _mesa_DeleteBuffers for target
> GL_SHADER_STORAGE_BUFFER
>   mesa: Implement _mesa_BindBuffersBase for target
> GL_SHADER_STORAGE_BUFFER
>   mesa: Implement _mesa_BindBuffersRange for target
> GL_SHADER_STORAGE_BUFFER
>   mesa: Implement _mesa_BindBufferBase for target
> GL_SHADER_STORAGE_BUFFER
>   mesa: Implement _mesa_BindBufferRange for target
> GL_SHADER_STORAGE_BUFFER
>   glsl: Don't do tree grafting on buffer variables
>   glsl: Do not kill dead assignments to buffer variables or SSBO
> declarations.
>   glsl: Don't do constant propagation on buffer variables
>   glsl: Don't do constant variable on buffer variables
>   glsl: Don't do copy propagation on buffer variables
> 
> Kristian Høgsberg (3):
>   glsl: Add ir_var_shader_storage
>   glsl: Implement parser support for 'buffer' qualifier
>   glsl: link buffer variables and shader storage buffer interface blocks
> 
> Samuel Iglesias Gonsalvez (8):
>   mesa: define ARB_shader_storage_buffer_object extension
>   mesa: add MaxShaderStorageBlocks to struct gl_program_constants
>   glsl: enable binding layout qualifier usage for shader storage buffer
> objects
>   glsl: shader buffer variables cannot have initializers
>   glsl: buffer variables cannot be defined outside interface blocks
>   glsl: fix error messages in invalid declarations of shader storage
> blocks
>   glsl: Lower shader storage buffer object writes to GLSL IR instrinsics
>   glsl: Lower shader storage buffer object loads to GLSL IR instrinsics
> 
>  src/glsl/ast.h   |   1 +
>  src/glsl/ast_to_hir.cpp 

Re: [Mesa-dev] [PATCH 1/4] gallium: clarify reference counting for fence

2015-07-10 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jul 8, 2015 at 10:34 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> No where was it spelled out that the state tracker may expect the pipe
> driver to unref the old fence.
>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/include/pipe/p_context.h | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/include/pipe/p_context.h 
> b/src/gallium/include/pipe/p_context.h
> index c2eedf8..d2c2e4c 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -361,8 +361,14 @@ struct pipe_context {
>  const void *clear_value,
>  int clear_value_size);
>
> -   /** Flush draw commands
> +   /**
> +* Flush draw commands
> +*
> +* NOTE: use screen->fence_reference() (or equivalent) to transfer
> +* new fence ref to **fence, to ensure that previous fence is unref'd
>  *
> +* \param fence  if not NULL, an old fence to unref and transfer a
> +*new fence reference to
>  * \param flags  bitfield of enum pipe_flush_flags values.
>  */
> void (*flush)(struct pipe_context *pipe,
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] xa: don't leak fences

2015-07-10 Thread Marek Olšák
I wonder... do we still need XA considering that everybody can just
use glamor instead?

Marek


On Wed, Jul 8, 2015 at 7:39 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> XA was never unref'ing last_fence in the various call paths to
> pipe->flush().  Add this to xa_context_flush() and update the other
> open-coded calls to pipe->flush() to use xa_context_flush() instead.
>
> This fixes a memory leak reported with xf86-video-freedreno.
>
> Reported-by: Nicolas Dechesne 
> Cc: "10.5 10.6" 
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/state_trackers/xa/xa_context.c | 6 +-
>  src/gallium/state_trackers/xa/xa_tracker.c | 2 +-
>  src/gallium/state_trackers/xa/xa_yuv.c | 2 +-
>  3 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/state_trackers/xa/xa_context.c 
> b/src/gallium/state_trackers/xa/xa_context.c
> index fd49c82..ebfb290 100644
> --- a/src/gallium/state_trackers/xa/xa_context.c
> +++ b/src/gallium/state_trackers/xa/xa_context.c
> @@ -37,7 +37,11 @@
>  XA_EXPORT void
>  xa_context_flush(struct xa_context *ctx)
>  {
> -   ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
> +if (ctx->last_fence) {
> +struct pipe_screen *screen = ctx->xa->screen;
> +screen->fence_reference(screen, &ctx->last_fence, NULL);
> +}
> +ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
>  }
>
>  XA_EXPORT struct xa_context *
> diff --git a/src/gallium/state_trackers/xa/xa_tracker.c 
> b/src/gallium/state_trackers/xa/xa_tracker.c
> index a384c1c..1df1da7 100644
> --- a/src/gallium/state_trackers/xa/xa_tracker.c
> +++ b/src/gallium/state_trackers/xa/xa_tracker.c
> @@ -464,7 +464,7 @@ xa_surface_redefine(struct xa_surface *srf,
> xa_min(save_height, template->height0), &src_box);
> pipe->resource_copy_region(pipe, texture,
>0, 0, 0, 0, srf->tex, 0, &src_box);
> -   pipe->flush(pipe, &xa->default_ctx->last_fence, 0);
> +   xa_context_flush(xa->default_ctx);
>  }
>
>  pipe_resource_reference(&srf->tex, texture);
> diff --git a/src/gallium/state_trackers/xa/xa_yuv.c 
> b/src/gallium/state_trackers/xa/xa_yuv.c
> index 1519639..97a1833 100644
> --- a/src/gallium/state_trackers/xa/xa_yuv.c
> +++ b/src/gallium/state_trackers/xa/xa_yuv.c
> @@ -154,7 +154,7 @@ xa_yuv_planar_blit(struct xa_context *r,
> box++;
>  }
>
> -r->pipe->flush(r->pipe, &r->last_fence, 0);
> +xa_context_flush(r);
>
>  xa_ctx_sampler_views_destroy(r);
>  xa_ctx_srf_destroy(r);
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Implement faster streaming memcpy

2015-07-10 Thread Marek Olšák
Shouldn't this stuff be in src/util?

Marek

On Wed, Jul 8, 2015 at 11:07 PM, Ben Widawsky
 wrote:
> WARNING: No perf data, please keep reading though)
>
> This implements the suggestion provided by the paper, "Fast USWC to WB Memory
> Copy"
> (https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers).
> This is described throughout the paper, but the sample code lives in Figure 
> 3-3.
> That paper purports a roughly 40% performance gain in Mbyte/second over the
> original implementation done by Matt.
>
> Section 3.1.2 is the summary of why an intermediate cache buffer is used. It
> claims that if you use the naive implementation, fill buffers are contended 
> for.
> To be honest, I can't quite fathom the underlying explanation, but I'll think
> about it some more. Most importantly would be to get the perf data... This 
> patch
> does need performance data. I don't currently have a platform that this would
> benefit (BYT or BSW), so I can't get anything useful. As soon as I get a
> platform to test it on, I will - meanwhile, maybe whomever tested the original
> patch the first time around come run this through?
>
> Cc: Matt Turner 
> Cc: Chad Versace 
> Cc: Kristian Høgsberg 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/main/streaming-load-memcpy.c | 61 
> +++
>  1 file changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/main/streaming-load-memcpy.c 
> b/src/mesa/main/streaming-load-memcpy.c
> index d7147af..3cd310a 100644
> --- a/src/mesa/main/streaming-load-memcpy.c
> +++ b/src/mesa/main/streaming-load-memcpy.c
> @@ -30,6 +30,8 @@
>  #include "main/streaming-load-memcpy.h"
>  #include 
>
> +static uint8_t rsvd_space[4096];
> +
>  /* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming
>   * read performance from uncached memory.
>   */
> @@ -59,23 +61,54 @@ _mesa_streaming_load_memcpy(void *restrict dst, void 
> *restrict src, size_t len)
>len -= MIN2(bytes_before_alignment_boundary, len);
> }
>
> -   while (len >= 64) {
> -  __m128i *dst_cacheline = (__m128i *)d;
> -  __m128i *src_cacheline = (__m128i *)s;
> +   while (len > 64) {
> +  __m128i *cached_buffer = (__m128i *)rsvd_space;
> +  size_t streaming_len = len > 4096 ? 4096 : len;
> +
> +  __asm__ volatile("mfence" ::: "memory");
> +
> +  while (streaming_len >= 64) {
> + __m128i *src_cacheline = (__m128i *)s;
> +
> + __m128i temp1 = _mm_stream_load_si128(src_cacheline + 0);
> + __m128i temp2 = _mm_stream_load_si128(src_cacheline + 1);
> + __m128i temp3 = _mm_stream_load_si128(src_cacheline + 2);
> + __m128i temp4 = _mm_stream_load_si128(src_cacheline + 3);
> +
> + _mm_store_si128(cached_buffer + 0, temp1);
> + _mm_store_si128(cached_buffer + 1, temp2);
> + _mm_store_si128(cached_buffer + 2, temp3);
> + _mm_store_si128(cached_buffer + 3, temp4);
> +
> + s += 64;
> + streaming_len -= 64;
> + cached_buffer += 4;
> +  }
> +
> +  cached_buffer = (__m128i *)rsvd_space;
> +  streaming_len = len > 4096 ? 4096 : len;
> +
> +  __asm__ volatile("mfence" ::: "memory");
> +
> +  while (streaming_len >= 64) {
> + __m128i *dst_cacheline = (__m128i *)d;
> +
> + __m128i temp1 = _mm_stream_load_si128(cached_buffer + 0);
> + __m128i temp2 = _mm_stream_load_si128(cached_buffer + 1);
> + __m128i temp3 = _mm_stream_load_si128(cached_buffer + 2);
> + __m128i temp4 = _mm_stream_load_si128(cached_buffer + 3);
>
> -  __m128i temp1 = _mm_stream_load_si128(src_cacheline + 0);
> -  __m128i temp2 = _mm_stream_load_si128(src_cacheline + 1);
> -  __m128i temp3 = _mm_stream_load_si128(src_cacheline + 2);
> -  __m128i temp4 = _mm_stream_load_si128(src_cacheline + 3);
> + _mm_store_si128(dst_cacheline + 0, temp1);
> + _mm_store_si128(dst_cacheline + 1, temp2);
> + _mm_store_si128(dst_cacheline + 2, temp3);
> + _mm_store_si128(dst_cacheline + 3, temp4);
>
> -  _mm_store_si128(dst_cacheline + 0, temp1);
> -  _mm_store_si128(dst_cacheline + 1, temp2);
> -  _mm_store_si128(dst_cacheline + 2, temp3);
> -  _mm_store_si128(dst_cacheline + 3, temp4);
> + d += 64;
> + streaming_len -= 64;
> + cached_buffer += 4;
>
> -  d += 64;
> -  s += 64;
> -  len -= 64;
> + len -= 64;
> +  }
> }
>
> /* memcpy() the tail. */
> --
> 2.4.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] radeon, r200: allow hyperz for radeon DRM module v2

2015-07-10 Thread Roland Scheidegger
Am 10.07.2015 um 05:44 schrieb Michel Dänzer:
> On 10.07.2015 05:13, Emil Velikov wrote:
>> The original code only half considered hyperz as an option. As per
>> previous commit "major != 2 cannot occur" we can simply things, and
>> allow users to set the option if they choose to do so.
>>
>> Signed-off-by: Emil Velikov 
>> ---
>>  src/mesa/drivers/dri/r200/r200_context.c | 10 ++
>>  src/mesa/drivers/dri/radeon/radeon_context.c |  9 ++---
>>  2 files changed, 4 insertions(+), 15 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/r200/r200_context.c 
>> b/src/mesa/drivers/dri/r200/r200_context.c
>> index 40cc50a..2a42ab3 100644
>> --- a/src/mesa/drivers/dri/r200/r200_context.c
>> +++ b/src/mesa/drivers/dri/r200/r200_context.c
>> @@ -225,14 +225,8 @@ GLboolean r200CreateContext( gl_api api,
>> rmesa->radeon.initialMaxAnisotropy = 
>> driQueryOptionf(&rmesa->radeon.optionCache,
>>  "def_max_anisotropy");
>>  
>> -   if ( sPriv->drm_version.major == 1
>> -   && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
>> -  if ( sPriv->drm_version.minor < 13 )
>> - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
>> -  "disabling.\n", sPriv->drm_version.minor );
>> -  else
>> - rmesa->using_hyperz = GL_TRUE;
>> -   }
> 
> This code only set rmesa->using_hyperz = GL_TRUE if
> sPriv->drm_version.major == 1. It was disabled for KMS in commit
> e541845959761e9f47d14ade6b58a32db04ef7e4 ("r200: Fix piglit paths test.").
> 
> 
>> +   if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz"))
>> +  rmesa->using_hyperz = GL_TRUE;
> 
> This enables it again for KMS. Maybe that's okay though, especially if
> the driconf option is disabled by default.


Oh you're right. The reason given though why it was disabled looks bogus
to me ("Piglit doesn't like HyperZ warning so disable it for kms." ???),
and I can't see why that would have only applied to r200, not r100. So
it should be fine. (Of course, you will get more failures with that
enabled with piglit, some things just plain won't work, but that was
just the case with UMS too, and the reason why it never was enabled by
default.)

Roland



> 
> 
> The rest of the series is
> 
> Reviewed-by: Michel Dänzer 
> 
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [HACK] i965/fs: Fix ordering of src0 alpha and oMask in the framebuffer write payload.

2015-07-10 Thread Francisco Jerez
Jason Ekstrand  writes:

> On Jul 9, 2015 7:57 AM, "Francisco Jerez"  wrote:
>>
>> We were passing src0 alpha and oMask in reverse order.  There seems to
>> be no good way to pass them in the correct order to the new-style
>> LOAD_PAYLOAD (how surprising) because src0 alpha is per-channel while
>> oMask is not.  Just split src0 alpha in fixed-width registers and pass
>> them to LOAD_PAYLOAD as if they were part of the header as work-around
>> for now.
>
> Bah... I came across this when I did the LOAD_PAYLOAD rework but thought it
> was only theoretical.  I wasn't very familiar with what omask actually did
> and, since piglit didn't hit it, I wasn't sure if it was a real problem or
> not.  I probably should have done more digging and written a piglit test at
> the time. My bad.
>
> One solution that I proposed at the time was to turn header_size into
> header_mask in the obvious way. We can still use 8 bits because we should
> never have a header source higher than 8.
>

So your idea is to have one bit per source indicating whether it's
header-like or per-channel?  I don't think that extends to instructions
other than LOAD_PAYLOAD (e.g. FB_WRITE) where the same source is at the
same time header and payload.  One bit per 32B register would extend
easily but it would be rather ugly to deal with if you want to keep your
code SIMD width-invariant.

I think if you go with the per-source flag you'll want it to be in its
own subclass of fs_inst.  With its own subclass you could even have an
array of per-source sizes determining the number of registers read for
each source, which would be rather nice for the visitor (no need to
split vectors into components while passing them to LOAD_PAYLOAD).

Still I think the most elegant solution would be to simply get rid of
the header/payload distinction by using force_writemask_all and, if it
proves to be necessary, fix the optimizer to get rid of redundant
force_writemask_all flags where it doesn't do it already.

> Thoughts?
> --Jason
>
>> I've written a piglit test that demonstrates the problem by using
>> gl_SampleMask from a fragment shader with multiple color outputs [1].
>>
>> [1] http://lists.freedesktop.org/archives/piglit/2015-July/016499.html
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26
> +-
>>  1 file changed, 17 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 94d6a58..304ae74 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -1535,6 +1535,19 @@ fs_visitor::emit_single_fb_write(const fs_builder
> &bld,
>>length++;
>> }
>>
>> +   if (src0_alpha.file != BAD_FILE && color0.file != BAD_FILE) {
>> +  /* Neat, we need to chop the src0 alpha component and pass it as
> part of
>> +   * the header even though it has per-channel semantics, because
> the next
>> +   * optional field is header-like and LOAD_PAYLOAD requires all such
>> +   * fields to form a contiguous segment at the beginning of the
> message.
>> +   */
>> +  for (unsigned i = 0; i < exec_size / 8; i++) {
>> + setup_color_payload(&sources[length], src0_alpha, 1, 8,
>> + use_2nd_half || i == 1);
>> + length++;
>> +  }
>> +   }
>> +
>> prog_data->uses_omask =
>>prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
>> if (prog_data->uses_omask) {
>> @@ -1561,19 +1574,14 @@ fs_visitor::emit_single_fb_write(const fs_builder
> &bld,
>>   offset(this->outputs[0], bld, 3),
>>   1, exec_size, false);
>>length += 4;
>> -   } else if (color1.file == BAD_FILE) {
>> -  if (src0_alpha.file != BAD_FILE) {
>> - setup_color_payload(&sources[length], src0_alpha, 1, exec_size,
> false);
>> - length++;
>> -  }
>> -
>> -  setup_color_payload(&sources[length], color0, components,
>> -  exec_size, use_2nd_half);
>> -  length += 4;
>> } else {
>>setup_color_payload(&sources[length], color0, components,
>>exec_size, use_2nd_half);
>>length += 4;
>> +
>> +   }
>> +
>> +   if (color1.file != BAD_FILE) {
>>setup_color_payload(&sources[length], color1, components,
>>exec_size, use_2nd_half);
>>length += 4;
>> --
>> 2.4.3
>>


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] xa: don't leak fences

2015-07-10 Thread Rob Clark
well, freedreno and vmware ddx still use XA.. they both could probably
be ported to use glamor instead, but that hasn't been done yet..

At least for freedreno with upstream drm/kms driver, one can just use
-modesetting ddx instead.  But that doesn't work w/ android fbdev
driver.  I need to check again w/ a more recent -modesetting+glamor,
but when I tried it a few months ago, there were some cases of
rendering corruption (but I didn't have time to debug and see whether
that was a freedreno issue or a glamor issue).

I wouldn't recommend new users of XA at this point, but I don't think
we are quite at the point where we can remove it.

BR,
-R

On Fri, Jul 10, 2015 at 7:12 AM, Marek Olšák  wrote:
> I wonder... do we still need XA considering that everybody can just
> use glamor instead?
>
> Marek
>
>
> On Wed, Jul 8, 2015 at 7:39 PM, Rob Clark  wrote:
>> From: Rob Clark 
>>
>> XA was never unref'ing last_fence in the various call paths to
>> pipe->flush().  Add this to xa_context_flush() and update the other
>> open-coded calls to pipe->flush() to use xa_context_flush() instead.
>>
>> This fixes a memory leak reported with xf86-video-freedreno.
>>
>> Reported-by: Nicolas Dechesne 
>> Cc: "10.5 10.6" 
>> Signed-off-by: Rob Clark 
>> ---
>>  src/gallium/state_trackers/xa/xa_context.c | 6 +-
>>  src/gallium/state_trackers/xa/xa_tracker.c | 2 +-
>>  src/gallium/state_trackers/xa/xa_yuv.c | 2 +-
>>  3 files changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/state_trackers/xa/xa_context.c 
>> b/src/gallium/state_trackers/xa/xa_context.c
>> index fd49c82..ebfb290 100644
>> --- a/src/gallium/state_trackers/xa/xa_context.c
>> +++ b/src/gallium/state_trackers/xa/xa_context.c
>> @@ -37,7 +37,11 @@
>>  XA_EXPORT void
>>  xa_context_flush(struct xa_context *ctx)
>>  {
>> -   ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
>> +if (ctx->last_fence) {
>> +struct pipe_screen *screen = ctx->xa->screen;
>> +screen->fence_reference(screen, &ctx->last_fence, NULL);
>> +}
>> +ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
>>  }
>>
>>  XA_EXPORT struct xa_context *
>> diff --git a/src/gallium/state_trackers/xa/xa_tracker.c 
>> b/src/gallium/state_trackers/xa/xa_tracker.c
>> index a384c1c..1df1da7 100644
>> --- a/src/gallium/state_trackers/xa/xa_tracker.c
>> +++ b/src/gallium/state_trackers/xa/xa_tracker.c
>> @@ -464,7 +464,7 @@ xa_surface_redefine(struct xa_surface *srf,
>> xa_min(save_height, template->height0), &src_box);
>> pipe->resource_copy_region(pipe, texture,
>>0, 0, 0, 0, srf->tex, 0, &src_box);
>> -   pipe->flush(pipe, &xa->default_ctx->last_fence, 0);
>> +   xa_context_flush(xa->default_ctx);
>>  }
>>
>>  pipe_resource_reference(&srf->tex, texture);
>> diff --git a/src/gallium/state_trackers/xa/xa_yuv.c 
>> b/src/gallium/state_trackers/xa/xa_yuv.c
>> index 1519639..97a1833 100644
>> --- a/src/gallium/state_trackers/xa/xa_yuv.c
>> +++ b/src/gallium/state_trackers/xa/xa_yuv.c
>> @@ -154,7 +154,7 @@ xa_yuv_planar_blit(struct xa_context *r,
>> box++;
>>  }
>>
>> -r->pipe->flush(r->pipe, &r->last_fence, 0);
>> +xa_context_flush(r);
>>
>>  xa_ctx_sampler_views_destroy(r);
>>  xa_ctx_srf_destroy(r);
>> --
>> 2.4.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] gallium: add interface for writable shader buffers

2015-07-10 Thread Roland Scheidegger
This looks all pretty reasonable, though I can't really figure out if we
could translate from d3d11 to that. In particular, it's UAVs for all of
buffers and images, and I'm not entirely sure if it's actually possible
to figure out which UAV entries are a shader buffer and which ones are a
shader image.
But I don't really understand the documentation in that area, I
certainly don't want to hold things up as I'm aware noone else is really
interested in d3d compatibility, at the worst we can still fix that up
somehow later.

Roland


Am 09.07.2015 um 23:46 schrieb Marek Olšák:
> From: Marek Olšák 
> 
> ---
>  src/gallium/include/pipe/p_context.h | 18 ++
>  src/gallium/include/pipe/p_state.h   | 10 ++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/src/gallium/include/pipe/p_context.h 
> b/src/gallium/include/pipe/p_context.h
> index 022ace5..b4512e7 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -58,6 +58,7 @@ struct pipe_resource;
>  struct pipe_sampler_state;
>  struct pipe_sampler_view;
>  struct pipe_scissor_state;
> +struct pipe_shader_buffer;
>  struct pipe_shader_state;
>  struct pipe_stencil_ref;
>  struct pipe_stream_output_target;
> @@ -237,6 +238,23 @@ struct pipe_context {
>const float default_inner_level[2]);
>  
> /**
> +* Bind an array of shader buffers that will be used by a shader.
> +* Any buffers that were previously bound to the specified range
> +* will be unbound.
> +*
> +* \param shader selects shader stage
> +* \param start_slot first buffer slot to bind.
> +* \param count  number of consecutive buffers to bind.
> +* \param buffersarray of pointers to the buffers to bind, it
> +*   should contain at least \a count elements
> +*   unless it's NULL, in which case no buffers will
> +*   be bound.
> +*/
> +   void (*set_shader_buffers)(struct pipe_context *, unsigned shader,
> +  unsigned start_slot, unsigned count,
> +  struct pipe_shader_buffer *buffers);
> +
> +   /**
>  * Bind an array of images that will be used by a shader.
>  * Any images that were previously bound to the specified range
>  * will be unbound.
> diff --git a/src/gallium/include/pipe/p_state.h 
> b/src/gallium/include/pipe/p_state.h
> index f655dda..b269a23 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -490,6 +490,16 @@ struct pipe_constant_buffer {
>  
>  
>  /**
> + * An untyped shader buffer supporting loads, stores, and atomics.
> + */
> +struct pipe_shader_buffer {
> +   struct pipe_resource *buffer; /**< the actual buffer */
> +   unsigned buffer_offset; /**< offset to start of data in buffer, in bytes 
> */
> +   unsigned buffer_size;   /**< how much data can be read in shader */
> +};
> +
> +
> +/**
>   * A stream output target. The structure specifies the range vertices can
>   * be written to.
>   *
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91292] glVertexAttribDivisor not working Linux 10.5.8 Intel HD 5500?

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91292

Bug ID: 91292
   Summary: glVertexAttribDivisor not working Linux 10.5.8 Intel
HD 5500?
   Product: Mesa
   Version: 10.5
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: major
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: lara...@ozemail.com.au
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 117029
  --> https://bugs.freedesktop.org/attachment.cgi?id=117029&action=edit
Complete small program to demonstrate problem. Compile instructions in source

glVertexAttribDivisor does not work on a Fedora 21 system with Intel HD
Graphics 5500, Mesa 10.5.8, core profile version 3.3. Attribute with divisor
set to 1 (per instance) appears to be still incrementing per vertex.

Mesa 10.5.8 built from source with floating point textures enabled. Problem is
not mentioned in release notes for 10.5.9, 10.6, or 10.6.1; does not appear in
bug database.

Attached is program that demonstrates problem.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91292] glVertexAttribDivisor not working Linux 10.5.8 Intel HD 5500?

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91292

--- Comment #1 from Hugh Fisher  ---
Created attachment 117030
  --> https://bugs.freedesktop.org/attachment.cgi?id=117030&action=edit
Expected output from running program

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] r600g: add streamout support

2015-07-10 Thread Marek Olšák
On Thu, Jul 9, 2015 at 8:43 AM, Dave Airlie  wrote:
> From: Glenn Kennard 
>
> This adds the main chunk of the geometry shader multiple stream
> support to the r600 driver.
>
> Glenn wrote the original pass, and I took his code and hacked
> it into a working state.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c   |  29 ++--
>  src/gallium/drivers/r600/r600_pipe.c |   2 +-
>  src/gallium/drivers/r600/r600_shader.c   | 200 
> ---
>  src/gallium/drivers/r600/r600_shader.h   |   6 +-
>  src/gallium/drivers/r600/r600_state.c|   6 +-
>  src/gallium/drivers/r600/r600_state_common.c |   7 +
>  6 files changed, 180 insertions(+), 70 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 4ddbc0b..788bf54 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -2988,8 +2988,12 @@ void evergreen_update_gs_state(struct pipe_context 
> *ctx, struct r600_pipe_shader
> struct r600_command_buffer *cb = &shader->command_buffer;
> struct r600_shader *rshader = &shader->shader;
> struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
> -   unsigned gsvs_itemsize =
> -   (cp_shader->ring_item_size * 
> rshader->gs_max_out_vertices) >> 2;
> +   unsigned gsvs_itemsizes[4] = {
> +   (cp_shader->ring_item_sizes[0] * 
> rshader->gs_max_out_vertices) >> 2,
> +   (cp_shader->ring_item_sizes[1] * 
> rshader->gs_max_out_vertices) >> 2,
> +   (cp_shader->ring_item_sizes[2] * 
> rshader->gs_max_out_vertices) >> 2,
> +   (cp_shader->ring_item_sizes[3] * 
> rshader->gs_max_out_vertices) >> 2
> +   };
>
> r600_init_command_buffer(cb, 64);
>
> @@ -3008,21 +3012,24 @@ void evergreen_update_gs_state(struct pipe_context 
> *ctx, struct r600_pipe_shader
> S_028B90_ENABLE(rshader->gs_num_invocations > 
> 0));
> }
> r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
> -   r600_store_value(cb, cp_shader->ring_item_size >> 2);
> -   r600_store_value(cb, 0);
> -   r600_store_value(cb, 0);
> -   r600_store_value(cb, 0);
> +   r600_store_value(cb, cp_shader->ring_item_sizes[0] >> 2);
> +   r600_store_value(cb, cp_shader->ring_item_sizes[1] >> 2);
> +   r600_store_value(cb, cp_shader->ring_item_sizes[2] >> 2);
> +   r600_store_value(cb, cp_shader->ring_item_sizes[3] >> 2);
>
> r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
> -  (rshader->ring_item_size) >> 2);
> +  (rshader->ring_item_sizes[0]) >> 2);
>
> r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
> -  gsvs_itemsize);
> +  gsvs_itemsizes[0] +
> +  gsvs_itemsizes[1] +
> +  gsvs_itemsizes[2] +
> +  gsvs_itemsizes[3]);
>
> r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
> -   r600_store_value(cb, gsvs_itemsize);
> -   r600_store_value(cb, gsvs_itemsize);
> -   r600_store_value(cb, gsvs_itemsize);
> +   r600_store_value(cb, gsvs_itemsizes[0]);
> +   r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1]);
> +   r600_store_value(cb, gsvs_itemsizes[0] + gsvs_itemsizes[1] + 
> gsvs_itemsizes[2]);
>
> /* FIXME calculate these values somehow ??? */
> r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
> diff --git a/src/gallium/drivers/r600/r600_pipe.c 
> b/src/gallium/drivers/r600/r600_pipe.c
> index 143e98e..67caa69 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -352,7 +352,7 @@ static int r600_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
> return 16384;
> case PIPE_CAP_MAX_VERTEX_STREAMS:
> -   return 1;
> +   return family >= CHIP_CEDAR ? 4 : 1;
>
> case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
> return 2047;
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index dbff313..ef19706 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -311,7 +311,9 @@ struct r600_shader_ctx {
> int gs_out_ring_offset;
> int gs_next_vertex;
> struct r600_shader  *gs_for_vs;
> -   int gs_export_gpr_treg;
> +   int gs_export_gpr_tregs[4];
> +   const struct pipe_stream_output_info*gs

Re: [Mesa-dev] [PATCH] i965/fs: Reimplement nir_op_uadd_carry and _usub_borrow without accumulator.

2015-07-10 Thread Ilia Mirkin
On Thu, Jul 9, 2015 at 4:11 PM, Francisco Jerez  wrote:
> Ilia Mirkin  writes:
>
>> FYI there's already a lowering pass that does this in the GLSL IR
>> (CARRY_TO_ARITH in lower_instructions). Perhaps the right place to do
>> this is NIR though, just wanted to let you know.
>>
> Ah, I wasn't aware of that flag, that seems even better.  I just tried
> it and it seems to generate one instruction more per op than my assembly
> code (apparently because our implementation of b2i is suboptimal, could
> probably be fixed), but it would also work to get rid of the no16()
> calls, which is all I care about right now.
>
> I'll resend using your approach tomorrow.

Ideally nir would be smart enough to combine ops back into
carry/borrow ops in its algebraic pass (assuming backend support for
GS5), which should hopefully allow CSE to also operate properly here.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] gallium: add interface for writable shader buffers

2015-07-10 Thread Marek Olšák
For Evergreen (the first DX11 radeon), which only supports 12 writable
resources, we'll only expose 8 images, 8 shader storage buffers and 8
atomic counter buffers. There will be a CAP saying that only a total
of 12 is allowed to be used by a shader and all bindings slots will
thus be considered virtual. The real binding will take place at draw
time when the shader and other states are known, and the shader will
determine the real slot locations.

I suppose there will be some difficulties while implementing DX11 on
top of gallium, but let's face it, DX11 is an old and limiting API, it
was released in 2009 and the first Vulkan(DX12)-capable radeon was
released in 2011 while OpenGL was picking up new features from both.

Marek


On Fri, Jul 10, 2015 at 3:04 PM, Roland Scheidegger  wrote:
> This looks all pretty reasonable, though I can't really figure out if we
> could translate from d3d11 to that. In particular, it's UAVs for all of
> buffers and images, and I'm not entirely sure if it's actually possible
> to figure out which UAV entries are a shader buffer and which ones are a
> shader image.
> But I don't really understand the documentation in that area, I
> certainly don't want to hold things up as I'm aware noone else is really
> interested in d3d compatibility, at the worst we can still fix that up
> somehow later.
>
> Roland
>
>
> Am 09.07.2015 um 23:46 schrieb Marek Olšák:
>> From: Marek Olšák 
>>
>> ---
>>  src/gallium/include/pipe/p_context.h | 18 ++
>>  src/gallium/include/pipe/p_state.h   | 10 ++
>>  2 files changed, 28 insertions(+)
>>
>> diff --git a/src/gallium/include/pipe/p_context.h 
>> b/src/gallium/include/pipe/p_context.h
>> index 022ace5..b4512e7 100644
>> --- a/src/gallium/include/pipe/p_context.h
>> +++ b/src/gallium/include/pipe/p_context.h
>> @@ -58,6 +58,7 @@ struct pipe_resource;
>>  struct pipe_sampler_state;
>>  struct pipe_sampler_view;
>>  struct pipe_scissor_state;
>> +struct pipe_shader_buffer;
>>  struct pipe_shader_state;
>>  struct pipe_stencil_ref;
>>  struct pipe_stream_output_target;
>> @@ -237,6 +238,23 @@ struct pipe_context {
>>const float default_inner_level[2]);
>>
>> /**
>> +* Bind an array of shader buffers that will be used by a shader.
>> +* Any buffers that were previously bound to the specified range
>> +* will be unbound.
>> +*
>> +* \param shader selects shader stage
>> +* \param start_slot first buffer slot to bind.
>> +* \param count  number of consecutive buffers to bind.
>> +* \param buffersarray of pointers to the buffers to bind, it
>> +*   should contain at least \a count elements
>> +*   unless it's NULL, in which case no buffers will
>> +*   be bound.
>> +*/
>> +   void (*set_shader_buffers)(struct pipe_context *, unsigned shader,
>> +  unsigned start_slot, unsigned count,
>> +  struct pipe_shader_buffer *buffers);
>> +
>> +   /**
>>  * Bind an array of images that will be used by a shader.
>>  * Any images that were previously bound to the specified range
>>  * will be unbound.
>> diff --git a/src/gallium/include/pipe/p_state.h 
>> b/src/gallium/include/pipe/p_state.h
>> index f655dda..b269a23 100644
>> --- a/src/gallium/include/pipe/p_state.h
>> +++ b/src/gallium/include/pipe/p_state.h
>> @@ -490,6 +490,16 @@ struct pipe_constant_buffer {
>>
>>
>>  /**
>> + * An untyped shader buffer supporting loads, stores, and atomics.
>> + */
>> +struct pipe_shader_buffer {
>> +   struct pipe_resource *buffer; /**< the actual buffer */
>> +   unsigned buffer_offset; /**< offset to start of data in buffer, in bytes 
>> */
>> +   unsigned buffer_size;   /**< how much data can be read in shader */
>> +};
>> +
>> +
>> +/**
>>   * A stream output target. The structure specifies the range vertices can
>>   * be written to.
>>   *
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91292] [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91292

Neil Roberts  changed:

   What|Removed |Added

 CC||n...@linux.intel.com
Summary|glVertexAttribDivisor not   |[BDW+]
   |working Linux 10.5.8 Intel  |glVertexAttribDivisor not
   |HD 5500?|working in combination with
   ||glPolygonMode

--- Comment #2 from Neil Roberts  ---
I tested this on git master (f12302b89836a2) on SKL and it's also failing.
However I think it is due to the combination of glPolygonMode and
glVertexAttribDivisor rather than a general failure with glVertexAttribDivisor.
If I comment out the call to glPolygonMode then it works (albeit with filled
triangles).

There is a piglit test called arb_instanced_arrays-instanced_arrays which tests
glVertexAttribDivisor and this does work on SKL.

This problem doesn't exist on HSW.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91292] [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91292

Neil Roberts  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
  Component|Mesa core   |Drivers/DRI/i965
   Assignee|mesa-dev@lists.freedesktop. |n...@linux.intel.com
   |org |
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

--- Comment #3 from Neil Roberts  ---
I have an idea what's going wrong. I'm working on a patch.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/78] i965: Take is_scalar_shader_stage() method out to allow reuse

2015-07-10 Thread Eduardo Lima Mitev
On 06/30/2015 06:58 PM, Jason Ekstrand wrote:
> On Fri, Jun 26, 2015 at 1:06 AM, Eduardo Lima Mitev  wrote:
>> This patch makes public the is_scalar_shader_stage() method in brw_shader, 
>> and
>> renames it to brw_compiler_is_scalar_shader_stage(). The plan is to later 
>> reuse it
>> in brw_nir, to enable/disable optimization passes depending on the type
>> of shader stage.
> 
> I'm not so sure that this is a good plan.  It assumes that whether we
> are doing a scalar or vec4 compile is based entirely on the shader
> stage and some static information (possibly based on environment
> variables).  Ken and I were talking around the office and we may want
> to use both SIMD4x2 and SIMD8 mode for geometry shaders depending on
> the number of inputs, etc.  This won't work in the given paradigm.
> 

If I understand correctly, what you propose is having a function to
dynamically choose the type of shader (scalar vs. vec4) when compiling
the shader, using not only gen and stage, but also actual application
data. I think this is a good idea and will allow experimenting with
different combinations of shaders with real input data.

However, I wonder if we this should be added later after more elaborated
thoughts on what exactly do we need and where to plug it. I have been
experimenting with a function following the use case you mentioned,
choosing shader backend based on inputs to a GS. But honestly it feels
like a blind guess from my side to what actually you and Ken have in mind.

Current patch basically reuses the function we already have to select
the shader, so what I propose is to move forward with it for the moment
(adding the missing MESA_SHADER_COMPUTE) and discuss how to extend it to
factor in dynamic data; or perhaps you can explain us your proposal with
a bit more detail.

WDYT?

>> The new method accepts a brw_compiler instead of a brw_context. This is done
>> for consistency, since the actual info we need (scalar_vs) is in 
>> brw_compiler,
>> and fetching in through brw_content->intelScreen->compiler seems like too
>> much indirection.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89580
>> ---
>>  src/mesa/drivers/dri/i965/brw_shader.cpp | 22 ++
>>  src/mesa/drivers/dri/i965/brw_shader.h   | 13 +
>>  2 files changed, 19 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
>> b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> index 0b53647..3b99046 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
>> @@ -182,19 +182,6 @@ brw_shader_precompile(struct gl_context *ctx,
>> return true;
>>  }
>>
>> -static inline bool
>> -is_scalar_shader_stage(struct brw_context *brw, int stage)
>> -{
>> -   switch (stage) {
>> -   case MESA_SHADER_FRAGMENT:
>> -  return true;
>> -   case MESA_SHADER_VERTEX:
>> -  return brw->intelScreen->compiler->scalar_vs;
>> -   default:
>> -  return false;
>> -   }
>> -}
>> -
>>  static void
>>  brw_lower_packing_builtins(struct brw_context *brw,
>> gl_shader_stage shader_type,
>> @@ -205,7 +192,8 @@ brw_lower_packing_builtins(struct brw_context *brw,
>> | LOWER_PACK_UNORM_2x16
>> | LOWER_UNPACK_UNORM_2x16;
>>
>> -   if (is_scalar_shader_stage(brw, shader_type)) {
>> +   if (brw_compiler_is_scalar_shader_stage(brw->intelScreen->compiler,
>> +   shader_type)) {
>>ops |= LOWER_UNPACK_UNORM_4x8
>> | LOWER_UNPACK_SNORM_4x8
>> | LOWER_PACK_UNORM_4x8
>> @@ -218,7 +206,8 @@ brw_lower_packing_builtins(struct brw_context *brw,
>> * lowering is needed. For SOA code, the Half2x16 ops must be
>> * scalarized.
>> */
>> -  if (is_scalar_shader_stage(brw, shader_type)) {
>> +  if (brw_compiler_is_scalar_shader_stage(brw->intelScreen->compiler,
>> +  shader_type)) {
>>   ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
>>   |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
>>}
>> @@ -294,7 +283,8 @@ process_glsl_ir(struct brw_context *brw,
>> do {
>>progress = false;
>>
>> -  if (is_scalar_shader_stage(brw, shader->Stage)) {
>> +  if (brw_compiler_is_scalar_shader_stage(brw->intelScreen->compiler,
>> +  shader->Stage)) {
>>   brw_do_channel_expressions(shader->ir);
>>   brw_do_vector_splitting(shader->ir);
>>}
>> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
>> b/src/mesa/drivers/dri/i965/brw_shader.h
>> index b2c1a0b..cef2226 100644
>> --- a/src/mesa/drivers/dri/i965/brw_shader.h
>> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
>> @@ -302,6 +302,19 @@ bool brw_cs_precompile(struct gl_context *ctx,
>> struct gl_shader_program *shader_prog,
>> struct gl_program *prog);
>>
>> +static inline bool
>> +brw_compiler_i

Re: [Mesa-dev] [HACK] i965/fs: Fix ordering of src0 alpha and oMask in the framebuffer write payload.

2015-07-10 Thread Jason Ekstrand
On Fri, Jul 10, 2015 at 5:25 AM, Francisco Jerez  wrote:
> Jason Ekstrand  writes:
>
>> On Jul 9, 2015 7:57 AM, "Francisco Jerez"  wrote:
>>>
>>> We were passing src0 alpha and oMask in reverse order.  There seems to
>>> be no good way to pass them in the correct order to the new-style
>>> LOAD_PAYLOAD (how surprising) because src0 alpha is per-channel while
>>> oMask is not.  Just split src0 alpha in fixed-width registers and pass
>>> them to LOAD_PAYLOAD as if they were part of the header as work-around
>>> for now.
>>
>> Bah... I came across this when I did the LOAD_PAYLOAD rework but thought it
>> was only theoretical.  I wasn't very familiar with what omask actually did
>> and, since piglit didn't hit it, I wasn't sure if it was a real problem or
>> not.  I probably should have done more digging and written a piglit test at
>> the time. My bad.
>>
>> One solution that I proposed at the time was to turn header_size into
>> header_mask in the obvious way. We can still use 8 bits because we should
>> never have a header source higher than 8.
>>
>
> So your idea is to have one bit per source indicating whether it's
> header-like or per-channel?  I don't think that extends to instructions
> other than LOAD_PAYLOAD (e.g. FB_WRITE) where the same source is at the
> same time header and payload.

You're right, it doesn't.  We really shouldn't be conflating them.  We
should have header_mask and header_present be different fields.  Maybe
use a union to save space, but they should have different semantic
meaning and different names.  We should probably also have a
compr4_mask and get rid of the hackery there.

> One bit per 32B register would extend
> easily but it would be rather ugly to deal with if you want to keep your
> code SIMD width-invariant.
>
> I think if you go with the per-source flag you'll want it to be in its
> own subclass of fs_inst.  With its own subclass you could even have an
> array of per-source sizes determining the number of registers read for
> each source, which would be rather nice for the visitor (no need to
> split vectors into components while passing them to LOAD_PAYLOAD).
>
> Still I think the most elegant solution would be to simply get rid of
> the header/payload distinction by using force_writemask_all and, if it
> proves to be necessary, fix the optimizer to get rid of redundant
> force_writemask_all flags where it doesn't do it already.

I really don't think that's a good long-term or short-term solution.

How badly are you blocking on this?   I don't really have a lot of
extra time to work on this at the moment but can carve some out if
needed.
--jason

>> Thoughts?
>> --Jason
>>
>>> I've written a piglit test that demonstrates the problem by using
>>> gl_SampleMask from a fragment shader with multiple color outputs [1].
>>>
>>> [1] http://lists.freedesktop.org/archives/piglit/2015-July/016499.html
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26
>> +-
>>>  1 file changed, 17 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> index 94d6a58..304ae74 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> @@ -1535,6 +1535,19 @@ fs_visitor::emit_single_fb_write(const fs_builder
>> &bld,
>>>length++;
>>> }
>>>
>>> +   if (src0_alpha.file != BAD_FILE && color0.file != BAD_FILE) {
>>> +  /* Neat, we need to chop the src0 alpha component and pass it as
>> part of
>>> +   * the header even though it has per-channel semantics, because
>> the next
>>> +   * optional field is header-like and LOAD_PAYLOAD requires all such
>>> +   * fields to form a contiguous segment at the beginning of the
>> message.
>>> +   */
>>> +  for (unsigned i = 0; i < exec_size / 8; i++) {
>>> + setup_color_payload(&sources[length], src0_alpha, 1, 8,
>>> + use_2nd_half || i == 1);
>>> + length++;
>>> +  }
>>> +   }
>>> +
>>> prog_data->uses_omask =
>>>prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
>>> if (prog_data->uses_omask) {
>>> @@ -1561,19 +1574,14 @@ fs_visitor::emit_single_fb_write(const fs_builder
>> &bld,
>>>   offset(this->outputs[0], bld, 3),
>>>   1, exec_size, false);
>>>length += 4;
>>> -   } else if (color1.file == BAD_FILE) {
>>> -  if (src0_alpha.file != BAD_FILE) {
>>> - setup_color_payload(&sources[length], src0_alpha, 1, exec_size,
>> false);
>>> - length++;
>>> -  }
>>> -
>>> -  setup_color_payload(&sources[length], color0, components,
>>> -  exec_size, use_2nd_half);
>>> -  length += 4;
>>> } else {
>>>setup_color_payload(&sources[length], color0, components,
>>>exec_size, use_2nd_half);
>>>length += 4;
>>> 

Re: [Mesa-dev] [HACK] i965/fs: Fix ordering of src0 alpha and oMask in the framebuffer write payload.

2015-07-10 Thread Francisco Jerez
Jason Ekstrand  writes:

> On Fri, Jul 10, 2015 at 5:25 AM, Francisco Jerez  
> wrote:
>> Jason Ekstrand  writes:
>>
>>> On Jul 9, 2015 7:57 AM, "Francisco Jerez"  wrote:

 We were passing src0 alpha and oMask in reverse order.  There seems to
 be no good way to pass them in the correct order to the new-style
 LOAD_PAYLOAD (how surprising) because src0 alpha is per-channel while
 oMask is not.  Just split src0 alpha in fixed-width registers and pass
 them to LOAD_PAYLOAD as if they were part of the header as work-around
 for now.
>>>
>>> Bah... I came across this when I did the LOAD_PAYLOAD rework but thought it
>>> was only theoretical.  I wasn't very familiar with what omask actually did
>>> and, since piglit didn't hit it, I wasn't sure if it was a real problem or
>>> not.  I probably should have done more digging and written a piglit test at
>>> the time. My bad.
>>>
>>> One solution that I proposed at the time was to turn header_size into
>>> header_mask in the obvious way. We can still use 8 bits because we should
>>> never have a header source higher than 8.
>>>
>>
>> So your idea is to have one bit per source indicating whether it's
>> header-like or per-channel?  I don't think that extends to instructions
>> other than LOAD_PAYLOAD (e.g. FB_WRITE) where the same source is at the
>> same time header and payload.
>
> You're right, it doesn't.  We really shouldn't be conflating them.  We
> should have header_mask and header_present be different fields.  Maybe
> use a union to save space, but they should have different semantic
> meaning and different names.  We should probably also have a
> compr4_mask and get rid of the hackery there.
>
>> One bit per 32B register would extend
>> easily but it would be rather ugly to deal with if you want to keep your
>> code SIMD width-invariant.
>>
>> I think if you go with the per-source flag you'll want it to be in its
>> own subclass of fs_inst.  With its own subclass you could even have an
>> array of per-source sizes determining the number of registers read for
>> each source, which would be rather nice for the visitor (no need to
>> split vectors into components while passing them to LOAD_PAYLOAD).
>>
>> Still I think the most elegant solution would be to simply get rid of
>> the header/payload distinction by using force_writemask_all and, if it
>> proves to be necessary, fix the optimizer to get rid of redundant
>> force_writemask_all flags where it doesn't do it already.
>
> I really don't think that's a good long-term or short-term solution.
>
> How badly are you blocking on this?   I don't really have a lot of
> extra time to work on this at the moment but can carve some out if
> needed.

I'm not blocking on this at all, feel free to fix it however you like,
or just go with this hack for the moment if you have higher priority
stuff to work on right now, I honestly don't care.

> --jason
>
>>> Thoughts?
>>> --Jason
>>>
 I've written a piglit test that demonstrates the problem by using
 gl_SampleMask from a fragment shader with multiple color outputs [1].

 [1] http://lists.freedesktop.org/archives/piglit/2015-July/016499.html
 ---
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26
>>> +-
  1 file changed, 17 insertions(+), 9 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 94d6a58..304ae74 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -1535,6 +1535,19 @@ fs_visitor::emit_single_fb_write(const fs_builder
>>> &bld,
length++;
 }

 +   if (src0_alpha.file != BAD_FILE && color0.file != BAD_FILE) {
 +  /* Neat, we need to chop the src0 alpha component and pass it as
>>> part of
 +   * the header even though it has per-channel semantics, because
>>> the next
 +   * optional field is header-like and LOAD_PAYLOAD requires all such
 +   * fields to form a contiguous segment at the beginning of the
>>> message.
 +   */
 +  for (unsigned i = 0; i < exec_size / 8; i++) {
 + setup_color_payload(&sources[length], src0_alpha, 1, 8,
 + use_2nd_half || i == 1);
 + length++;
 +  }
 +   }
 +
 prog_data->uses_omask =
prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
 if (prog_data->uses_omask) {
 @@ -1561,19 +1574,14 @@ fs_visitor::emit_single_fb_write(const fs_builder
>>> &bld,
   offset(this->outputs[0], bld, 3),
   1, exec_size, false);
length += 4;
 -   } else if (color1.file == BAD_FILE) {
 -  if (src0_alpha.file != BAD_FILE) {
 - setup_color_payload(&sources[length], src0_alpha, 1, exec_size,
>>> false);
 - length++;
>

[Mesa-dev] [PATCH] i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used

2015-07-10 Thread Neil Roberts
When the edge flag element is enabled then the elements are slightly
reordered so that the edge flag is always the last one. This was
confusing the code to upload the 3DSTATE_VF_INSTANCING state because
that is uploaded with a separate loop which has an instruction for
each element. The indices used in these instructions weren't taking
into account the reordering so the state would be incorrect.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91292
Cc: "10.6 10.5" 
---
 src/mesa/drivers/dri/i965/gen8_draw_upload.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
index 1af90ec..65b7625 100644
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -218,13 +218,24 @@ gen8_emit_vertices(struct brw_context *brw)
}
ADVANCE_BATCH();
 
-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
   const struct brw_vertex_element *input = brw->vb.enabled[i];
   const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+  unsigned element_index;
+
+  /* The edge flag element is reordered to be the last one in the code
+   * above so we need to compensate for that in the element indices used
+   * below.
+   */
+  if (input == gen6_edgeflag_input)
+ element_index = brw->vb.nr_enabled - 1;
+  else
+ element_index = j++;
 
   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-  OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+  OUT_BATCH(element_index |
+(buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
   OUT_BATCH(buffer->step_rate);
   ADVANCE_BATCH();
}
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Implement b2f and b2i using negation.

2015-07-10 Thread Francisco Jerez
Booleans are represented as 0/-1 on modern hardware which means we can
just negate them to convert them into a numeric type.  Negation has
the benefit that it can be implemented using a source modifier which
can easily be propagated into some other instruction.  shader-db
results on HSW:

 total instructions in shared programs: 5264246 -> 5264211 (-0.00%)
 instructions in affected programs: 1464 -> 1429 (-2.39%)
 helped:15
 HURT:  1

No piglit regressions.
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 4 +---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +--
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 4690d00..64ff24c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -969,10 +969,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
   break;
 
case nir_op_b2i:
-  bld.AND(result, op[0], fs_reg(1));
-  break;
case nir_op_b2f:
-  bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], 
fs_reg(0x3f80u));
+  bld.MOV(result, negate(op[0]));
   break;
 
case nir_op_f2b:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c9c2661..fd94a70 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1733,16 +1733,11 @@ vec4_visitor::visit(ir_expression *ir)
   emit(MOV(result_dst, op[0]));
   break;
case ir_unop_b2i:
-  emit(AND(result_dst, op[0], src_reg(1)));
-  break;
case ir_unop_b2f:
   if (devinfo->gen <= 5) {
  resolve_bool_comparison(ir->operands[0], &op[0]);
   }
-  op[0].type = BRW_REGISTER_TYPE_D;
-  result_dst.type = BRW_REGISTER_TYPE_D;
-  emit(AND(result_dst, op[0], src_reg(0x3f80u)));
-  result_dst.type = BRW_REGISTER_TYPE_F;
+  emit(MOV(result_dst, negate(op[0])));
   break;
case ir_unop_f2b:
   emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 2/2] i965: Implement nir_op_uadd_carry and _usub_borrow without accumulator.

2015-07-10 Thread Francisco Jerez
This gets rid of two no16() fall-backs and should allow better
scheduling of the generated IR.  There are no uses of usubBorrow() or
uaddCarry() in shader-db so no changes are expected.  However the
"arb_gpu_shader5/execution/built-in-functions/fs-usubBorrow" and
"arb_gpu_shader5/execution/built-in-functions/fs-uaddCarry" piglit
tests go from 40 to 28 instructions.  The reason is that the plain ADD
instruction can easily be CSE'ed with the original addition, and the
b2i negation can easily be propagated into the source modifier of
another instruction, so effectively both operations are performed with
just one instruction.

No piglit regressions.

v2: Rely on carry_to_arith() and borrow_to_arith() to lower these
(Ilia Mirkin).
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 26 --
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  4 +++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 17 +
 3 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 64ff24c..9cccd7f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -836,29 +836,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
   bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
   break;
 
-   case nir_op_uadd_carry: {
-  if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
-  struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-  BRW_REGISTER_TYPE_UD);
-
-  bld.ADDC(bld.null_reg_ud(), op[0], op[1]);
-  bld.MOV(result, fs_reg(acc));
-  break;
-   }
+   case nir_op_uadd_carry:
+  unreachable("Should have been lowered by carry_to_arith().");
 
-   case nir_op_usub_borrow: {
-  if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
-  struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-  BRW_REGISTER_TYPE_UD);
-
-  bld.SUBB(bld.null_reg_ud(), op[0], op[1]);
-  bld.MOV(result, fs_reg(acc));
-  break;
-   }
+   case nir_op_usub_borrow:
+  unreachable("Should have been lowered by borrow_to_arith().");
 
case nir_op_umod:
   bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 3e3d78b..d66baf3 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -259,7 +259,9 @@ process_glsl_ir(struct brw_context *brw,
   EXP_TO_EXP2 |
   LOG_TO_LOG2 |
   bitfield_insert |
-  LDEXP_TO_ARITH);
+  LDEXP_TO_ARITH |
+  CARRY_TO_ARITH |
+  BORROW_TO_ARITH);
 
/* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
 * if-statements need to be flattened.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fd94a70..da7561c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1601,20 +1601,13 @@ vec4_visitor::visit(ir_expression *ir)
   assert(ir->type->is_integer());
   emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
   break;
-   case ir_binop_carry: {
-  struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 
-  emit(ADDC(dst_null_ud(), op[0], op[1]));
-  emit(MOV(result_dst, src_reg(acc)));
-  break;
-   }
-   case ir_binop_borrow: {
-  struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+   case ir_binop_carry:
+  unreachable("Should have been lowered by carry_to_arith().");
+
+   case ir_binop_borrow:
+  unreachable("Should have been lowered by borrow_to_arith().");
 
-  emit(SUBB(dst_null_ud(), op[0], op[1]));
-  emit(MOV(result_dst, src_reg(acc)));
-  break;
-   }
case ir_binop_mod:
   /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
   assert(ir->type->is_integer());
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] radeon, r200: allow hyperz for radeon DRM module v2

2015-07-10 Thread Emil Velikov
On 10 July 2015 at 13:18, Roland Scheidegger  wrote:
> Am 10.07.2015 um 05:44 schrieb Michel Dänzer:
>> On 10.07.2015 05:13, Emil Velikov wrote:
>>> The original code only half considered hyperz as an option. As per
>>> previous commit "major != 2 cannot occur" we can simply things, and
>>> allow users to set the option if they choose to do so.
>>>
>>> Signed-off-by: Emil Velikov 
>>> ---
>>>  src/mesa/drivers/dri/r200/r200_context.c | 10 ++
>>>  src/mesa/drivers/dri/radeon/radeon_context.c |  9 ++---
>>>  2 files changed, 4 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/r200/r200_context.c 
>>> b/src/mesa/drivers/dri/r200/r200_context.c
>>> index 40cc50a..2a42ab3 100644
>>> --- a/src/mesa/drivers/dri/r200/r200_context.c
>>> +++ b/src/mesa/drivers/dri/r200/r200_context.c
>>> @@ -225,14 +225,8 @@ GLboolean r200CreateContext( gl_api api,
>>> rmesa->radeon.initialMaxAnisotropy = 
>>> driQueryOptionf(&rmesa->radeon.optionCache,
>>>  "def_max_anisotropy");
>>>
>>> -   if ( sPriv->drm_version.major == 1
>>> -   && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
>>> -  if ( sPriv->drm_version.minor < 13 )
>>> - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
>>> -  "disabling.\n", sPriv->drm_version.minor );
>>> -  else
>>> - rmesa->using_hyperz = GL_TRUE;
>>> -   }
>>
>> This code only set rmesa->using_hyperz = GL_TRUE if
>> sPriv->drm_version.major == 1. It was disabled for KMS in commit
>> e541845959761e9f47d14ade6b58a32db04ef7e4 ("r200: Fix piglit paths test.").
>>
>>
>>> +   if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz"))
>>> +  rmesa->using_hyperz = GL_TRUE;
>>
>> This enables it again for KMS. Maybe that's okay though, especially if
>> the driconf option is disabled by default.
>
>
> Oh you're right. The reason given though why it was disabled looks bogus
> to me ("Piglit doesn't like HyperZ warning so disable it for kms." ???),
> and I can't see why that would have only applied to r200, not r100. So
> it should be fine. (Of course, you will get more failures with that
> enabled with piglit, some things just plain won't work, but that was
> just the case with UMS too, and the reason why it never was enabled by
> default.)
>
Yes without Roland's knowledge if hyperz is supposed to work for KMS
the current code is quite ambiguous. If you guys prefer I can simply
rip out the whole thing, then again hyperz is disabled by default so
no harm should follow with this patch.

I don't mind either way.
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] auxiliary/vl: use the correct screen index

2015-07-10 Thread Emil Velikov
Inspired (copied) from Marek's commit for egl/x11
commit 0b56e23e7f3(egl/dri2: use the correct screen index)

Cc: 10.6 
Cc: Marek Olšák 
Signed-off-by: Emil Velikov 
---
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 7e61b88..ac2feec 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -293,6 +293,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
return vscreen;
 }
 
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+for (; iter.rem; --screen, xcb_screen_next(&iter))
+if (screen == 0)
+return iter.data;
+
+return NULL;
+}
+
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
@@ -333,9 +343,11 @@ vl_screen_create(Display *display, int screen)
if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2)
   goto free_query;
 
-   s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
-   while (screen--)
-   xcb_screen_next(&s);
+   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+   screen = get_xcb_screen(s, dri2_dpy->screen);
+   if (!screen)
+  goto free_query;
+
driverType = XCB_DRI2_DRIVER_TYPE_DRI;
 #ifdef DRI2DriverPrimeShift
{
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] configure.ac: null, android, gdi are not valid egl-platforms

2015-07-10 Thread Emil Velikov
... and update the documentation to reflect reality.
null and gdi are gone, and surfaceless is a recent addition.

Signed-off-by: Emil Velikov 
---
 configure.ac  | 3 ---
 docs/egl.html | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9ffd69d..f3a24f2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1763,9 +1763,6 @@ for plat in $egl_platforms; do
AC_MSG_ERROR([EGL platform surfaceless requires libdrm 
>= $LIBDRM_REQUIRED])
;;
 
-   android|gdi|null)
-   ;;
-
*)
AC_MSG_ERROR([EGL platform '$plat' does not exist])
;;
diff --git a/docs/egl.html b/docs/egl.html
index 3ab1a60..30d8687 100644
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -88,10 +88,10 @@ types such as EGLNativeDisplayType or
 EGLNativeWindowType defined for.
 
 The available platforms are x11, drm,
-wayland, null, android,
-haiku, and gdi.  The android platform
+wayland, surfaceless, android,
+and haiku.  The android platform
 can only be built as a system component, part of AOSP, while the
-haiku and gdi platforms can only be built with SCons.
+haiku platforms can only be built with SCons.
 Unless for special needs, the build system should
 select the right platforms automatically.
 
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] egl: remove ifdef $(egl_extension) compile guards

2015-07-10 Thread Emil Velikov
All of these are already defined in the headers provided.

Signed-off-by: Emil Velikov 
---
 src/egl/main/eglapi.c   | 36 +---
 src/egl/main/eglapi.h   | 24 
 src/egl/main/eglfallbacks.c |  6 --
 3 files changed, 1 insertion(+), 65 deletions(-)

diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 824e51e..d1018c6 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1014,8 +1014,6 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface)
 }
 
 
-#ifdef EGL_EXT_swap_buffers_with_damage
-
 static EGLBoolean EGLAPIENTRY
 eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface surface,
 EGLint *rects, EGLint n_rects)
@@ -1041,8 +1039,6 @@ eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface 
surface,
RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif /* EGL_EXT_swap_buffers_with_damage */
-
 EGLBoolean EGLAPIENTRY
 eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target)
 {
@@ -1203,8 +1199,6 @@ eglGetError(void)
 }
 
 
-#ifdef EGL_MESA_drm_display
-
 static EGLDisplay EGLAPIENTRY
 eglGetDRMDisplayMESA(int fd)
 {
@@ -1212,8 +1206,6 @@ eglGetDRMDisplayMESA(int fd)
return _eglGetDisplayHandle(dpy);
 }
 
-#endif /* EGL_MESA_drm_display */
-
 /**
  ** EGL 1.2
  **/
@@ -1579,8 +1571,6 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint 
attribute, EGLint *valu
 }
 
 
-#ifdef EGL_NOK_swap_region
-
 static EGLBoolean EGLAPIENTRY
 eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
EGLint numRects, const EGLint *rects)
@@ -1606,10 +1596,6 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface 
surface,
RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif /* EGL_NOK_swap_region */
-
-
-#ifdef EGL_MESA_drm_image
 
 static EGLImage EGLAPIENTRY
 eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
@@ -1649,9 +1635,7 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImage image,
RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif
 
-#ifdef EGL_WL_bind_wayland_display
 struct wl_display;
 
 static EGLBoolean EGLAPIENTRY
@@ -1708,9 +1692,8 @@ eglQueryWaylandBufferWL(EGLDisplay dpy, struct 
wl_resource *buffer,
 
RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
-#ifdef EGL_WL_create_wayland_buffer_from_image
+
 static struct wl_buffer * EGLAPIENTRY
 eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImage image)
 {
@@ -1731,7 +1714,6 @@ eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, 
EGLImage image)
 
RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
 static EGLBoolean EGLAPIENTRY
 eglPostSubBufferNV(EGLDisplay dpy, EGLSurface surface,
@@ -1774,7 +1756,6 @@ eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface 
surface,
RETURN_EGL_EVAL(disp, ret);
 }
 
-#ifdef EGL_MESA_image_dma_buf_export
 static EGLBoolean EGLAPIENTRY
 eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImage image,
   EGLint *fourcc, EGLint *nplanes,
@@ -1816,7 +1797,6 @@ eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImage image,
 
RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
 __eglMustCastToProperFunctionPointerType EGLAPIENTRY
 eglGetProcAddress(const char *procname)
@@ -1873,9 +1853,7 @@ eglGetProcAddress(const char *procname)
   { "eglGetPlatformDisplay", (_EGLProc) eglGetPlatformDisplay },
   { "eglCreatePlatformWindowSurface", (_EGLProc) 
eglCreatePlatformWindowSurface },
   { "eglCreatePlatformPixmapSurface", (_EGLProc) 
eglCreatePlatformPixmapSurface },
-#ifdef EGL_MESA_drm_display
   { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA },
-#endif
   { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR },
   { "eglDestroyImageKHR", (_EGLProc) eglDestroyImage },
   { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR },
@@ -1885,33 +1863,21 @@ eglGetProcAddress(const char *procname)
   { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR },
   { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR },
   { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR },
-#ifdef EGL_NOK_swap_region
   { "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK },
-#endif
-#ifdef EGL_MESA_drm_image
   { "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA },
   { "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA },
-#endif
-#ifdef EGL_WL_bind_wayland_display
   { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL },
   { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL },
   { "eglQueryWaylandBufferWL", (_EGLProc) eglQueryWaylandBufferWL },
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
   { "eglCreateWaylandBufferFromImageWL", (_EGLProc) 
eglCreateWaylandBufferFromImageWL },
-#endif
   { "eglPostSubBufferNV", (_EGLProc) eglPostSubBufferNV },
-#ifdef EGL_EXT_swap_buffers_with_damage
   { "eglSwapBuffersWithDamageEXT", (_EGLProc) eglSwapBuffersWithDamageEXT 
},
-#endif
   { "eglGetPlatformDisplayEXT", (_EGLProc) eglGetPlatformDisplayEXT },
   { "eg

[Mesa-dev] [PATCH 6/7] egl/wayland: use drmGetNodeTypeFromFd helper instead of opencoding it

2015-07-10 Thread Emil Velikov
Cc: Axel Davy 
Signed-off-by: Emil Velikov 
---
 configure.ac|  2 +-
 src/egl/drivers/dri2/platform_wayland.c | 20 ++--
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/configure.ac b/configure.ac
index e648a8d..e6efa01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,7 +66,7 @@ OSMESA_VERSION=8
 AC_SUBST([OSMESA_VERSION])
 
 dnl Versions for external dependencies
-LIBDRM_REQUIRED=2.4.38
+LIBDRM_REQUIRED=2.4.60
 LIBDRM_RADEON_REQUIRED=2.4.56
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 0b1adf8..9005eb7 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -842,22 +842,6 @@ bad_format:
return NULL;
 }
 
-static char
-is_fd_render_node(int fd)
-{
-   struct stat render;
-
-   if (fstat(fd, &render))
-  return 0;
-
-   if (!S_ISCHR(render.st_mode))
-  return 0;
-
-   if (render.st_rdev & 0x80)
-  return 1;
-   return 0;
-}
-
 static int
 dri2_wl_authenticate(_EGLDisplay *disp, uint32_t id)
 {
@@ -901,7 +885,7 @@ drm_handle_device(void *data, struct wl_drm *drm, const 
char *device)
   return;
}
 
-   if (is_fd_render_node(dri2_dpy->fd)) {
+   if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) {
   dri2_dpy->authenticated = 1;
} else {
   drmGetMagic(dri2_dpy->fd, &magic);
@@ -,7 +1095,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay 
*disp)
 * will return a render-node when the requested gpu is different
 * to the server, but also if the client asks for the same gpu than
 * the server by requesting its pci-id */
-   dri2_dpy->is_render_node = is_fd_render_node(dri2_dpy->fd);
+   dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == 
DRM_NODE_RENDER;
 
dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
if (dri2_dpy->driver_name == NULL) {
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/7] egl/wayland: remove dead code

2015-07-10 Thread Emil Velikov
The macro HAVE_MKOSTEMP was never defined.

Cc: Axel Davy 
Signed-off-by: Emil Velikov 
---
 src/egl/drivers/dri2/platform_wayland.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 9005eb7..bdccee3 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1255,17 +1255,11 @@ create_tmpfile_cloexec(char *tmpname)
 {
int fd;
 
-#ifdef HAVE_MKOSTEMP
-   fd = mkostemp(tmpname, O_CLOEXEC);
-   if (fd >= 0)
-  unlink(tmpname);
-#else
fd = mkstemp(tmpname);
if (fd >= 0) {
   fd = set_cloexec_or_close(fd);
   unlink(tmpname);
}
-#endif
 
return fd;
 }
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] egl/wayland: libdrm is a hard requirement, treat it as such

2015-07-10 Thread Emil Velikov
Prompt at configure time if it's missing otherwise we'll fail later on
in the build. Remove ambiguous HAVE_LIBDRM guard.

Cc: 10.6 
Signed-off-by: Emil Velikov 
---
 configure.ac| 3 +++
 src/egl/drivers/dri2/egl_dri2.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index f3a24f2..e648a8d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1740,6 +1740,9 @@ egl_platforms=`IFS=', '; echo $with_egl_platforms`
 for plat in $egl_platforms; do
case "$plat" in
wayland)
+   test "x$have_libdrm" != xyes &&
+   AC_MSG_ERROR([EGL platform wayland requires libdrm >= 
$LIBDRM_REQUIRED])
+
PKG_CHECK_MODULES([WAYLAND], [wayland-client >= 
$WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
 
if test "x$WAYLAND_SCANNER" = x; then
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index a4f8db9..39a0661 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -2149,13 +2149,11 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, 
_EGLDisplay *disp,
wl_drm_callbacks.authenticate =
   (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate;
 
-#ifdef HAVE_LIBDRM
if (drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap) == 0 &&
cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) &&
dri2_dpy->image->base.version >= 7 &&
dri2_dpy->image->createImageFromFds != NULL)
   flags |= WAYLAND_DRM_PRIME;
-#endif
 
dri2_dpy->wl_server_drm =
   wayland_drm_init(wl_dpy, dri2_dpy->device_name,
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] egl: consolidate ifdef HAVE_LIBDRM blocks

2015-07-10 Thread Emil Velikov
Move the code around rather than having it scattered. No functional
change.

Signed-off-by: Emil Velikov 
---
 src/egl/drivers/dri2/egl_dri2.c | 210 +++-
 1 file changed, 102 insertions(+), 108 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 65194cb..a4f8db9 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1384,53 +1384,6 @@ dri2_create_image_khr_renderbuffer(_EGLDisplay *disp, 
_EGLContext *ctx,
return dri2_create_image_from_dri(disp, dri_image);
 }
 
-#ifdef HAVE_LIBDRM
-static _EGLImage *
-dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
- EGLClientBuffer buffer, const EGLint 
*attr_list)
-{
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   EGLint format, name, pitch, err;
-   _EGLImageAttribs attrs;
-   __DRIimage *dri_image;
-
-   name = (EGLint) (uintptr_t) buffer;
-
-   err = _eglParseImageAttribList(&attrs, disp, attr_list);
-   if (err != EGL_SUCCESS)
-  return NULL;
-
-   if (attrs.Width <= 0 || attrs.Height <= 0 ||
-   attrs.DRMBufferStrideMESA <= 0) {
-  _eglError(EGL_BAD_PARAMETER,
-   "bad width, height or stride");
-  return NULL;
-   }
-
-   switch (attrs.DRMBufferFormatMESA) {
-   case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
-  format = __DRI_IMAGE_FORMAT_ARGB;
-  pitch = attrs.DRMBufferStrideMESA;
-  break;
-   default:
-  _eglError(EGL_BAD_PARAMETER,
-   "dri2_create_image_khr: unsupported pixmap depth");
-  return NULL;
-   }
-
-   dri_image =
-  dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
-  attrs.Width,
-  attrs.Height,
-  format,
-  name,
-  pitch,
-  NULL);
-
-   return dri2_create_image_from_dri(disp, dri_image);
-}
-#endif
-
 #ifdef HAVE_WAYLAND_PLATFORM
 
 /* This structure describes how a wl_buffer maps to one or more
@@ -1627,6 +1580,51 @@ dri2_create_wayland_buffer_from_image(_EGLDriver *drv, 
_EGLDisplay *dpy,
 }
 
 #ifdef HAVE_LIBDRM
+static _EGLImage *
+dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
+ EGLClientBuffer buffer, const EGLint 
*attr_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   EGLint format, name, pitch, err;
+   _EGLImageAttribs attrs;
+   __DRIimage *dri_image;
+
+   name = (EGLint) (uintptr_t) buffer;
+
+   err = _eglParseImageAttribList(&attrs, disp, attr_list);
+   if (err != EGL_SUCCESS)
+  return NULL;
+
+   if (attrs.Width <= 0 || attrs.Height <= 0 ||
+   attrs.DRMBufferStrideMESA <= 0) {
+  _eglError(EGL_BAD_PARAMETER,
+   "bad width, height or stride");
+  return NULL;
+   }
+
+   switch (attrs.DRMBufferFormatMESA) {
+   case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
+  format = __DRI_IMAGE_FORMAT_ARGB;
+  pitch = attrs.DRMBufferStrideMESA;
+  break;
+   default:
+  _eglError(EGL_BAD_PARAMETER,
+   "dri2_create_image_khr: unsupported pixmap depth");
+  return NULL;
+   }
+
+   dri_image =
+  dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
+  attrs.Width,
+  attrs.Height,
+  format,
+  name,
+  pitch,
+  NULL);
+
+   return dri2_create_image_from_dri(disp, dri_image);
+}
+
 static EGLBoolean
 dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs)
 {
@@ -1856,67 +1854,6 @@ dri2_create_image_dma_buf(_EGLDisplay *disp, _EGLContext 
*ctx,
 
return res;
 }
-#endif
-
-_EGLImage *
-dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
- _EGLContext *ctx, EGLenum target,
- EGLClientBuffer buffer, const EGLint *attr_list)
-{
-   (void) drv;
-
-   switch (target) {
-   case EGL_GL_TEXTURE_2D_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR:
-  return dri2_create_image_khr_texture(disp, ctx, target, buffer, 
attr_list);
-   case EGL_GL_TEXTURE_3D_KHR:
-  if (disp->Extensions.KHR_gl_texture_3D_image) {
- return dri2_create_image_khr_texture(disp, ctx, target, buffer, 
attr_list);
-  }
-  else {
- _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
- return EGL_NO_IMAGE_KHR;
-  }
-   case EGL_GL_RENDERBUFFER_KHR:
-  retu

[Mesa-dev] [PATCH 1/7] configure.ac: null, android, gdi are not valid egl-platforms

2015-07-10 Thread Emil Velikov
... and update the documentation to reflect reality.
null and gdi are gone, and surfaceless is a recent addition.

Signed-off-by: Emil Velikov 
---
 configure.ac  | 3 ---
 docs/egl.html | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9ffd69d..f3a24f2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1763,9 +1763,6 @@ for plat in $egl_platforms; do
AC_MSG_ERROR([EGL platform surfaceless requires libdrm 
>= $LIBDRM_REQUIRED])
;;
 
-   android|gdi|null)
-   ;;
-
*)
AC_MSG_ERROR([EGL platform '$plat' does not exist])
;;
diff --git a/docs/egl.html b/docs/egl.html
index 3ab1a60..30d8687 100644
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -88,10 +88,10 @@ types such as EGLNativeDisplayType or
 EGLNativeWindowType defined for.
 
 The available platforms are x11, drm,
-wayland, null, android,
-haiku, and gdi.  The android platform
+wayland, surfaceless, android,
+and haiku.  The android platform
 can only be built as a system component, part of AOSP, while the
-haiku and gdi platforms can only be built with SCons.
+haiku platforms can only be built with SCons.
 Unless for special needs, the build system should
 select the right platforms automatically.
 
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] egl/wayland: use designated initializers

2015-07-10 Thread Emil Velikov
Signed-off-by: Emil Velikov 
---
 src/egl/drivers/dri2/platform_wayland.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 1e12760..0b1adf8 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -65,7 +65,7 @@ sync_callback(void *data, struct wl_callback *callback, 
uint32_t serial)
 }
 
 static const struct wl_callback_listener sync_listener = {
-   sync_callback
+   .done = sync_callback
 };
 
 static int
@@ -104,8 +104,8 @@ wl_buffer_release(void *data, struct wl_buffer *buffer)
dri2_surf->color_buffers[i].locked = 0;
 }
 
-static struct wl_buffer_listener wl_buffer_listener = {
-   wl_buffer_release
+static const struct wl_buffer_listener wl_buffer_listener = {
+   .release = wl_buffer_release
 };
 
 static void
@@ -598,7 +598,7 @@ wayland_throttle_callback(void *data,
 }
 
 static const struct wl_callback_listener throttle_listener = {
-   wayland_throttle_callback
+   .done = wayland_throttle_callback
 };
 
 static void
@@ -944,10 +944,10 @@ drm_handle_authenticated(void *data, struct wl_drm *drm)
 }
 
 static const struct wl_drm_listener drm_listener = {
-   drm_handle_device,
-   drm_handle_format,
-   drm_handle_authenticated,
-   drm_handle_capabilities
+   .device = drm_handle_device,
+   .format = drm_handle_format,
+   .authenticated = drm_handle_authenticated,
+   .capabilities = drm_handle_capabilities
 };
 
 static void
@@ -972,8 +972,8 @@ registry_handle_global_remove(void *data, struct 
wl_registry *registry,
 }
 
 static const struct wl_registry_listener registry_listener_drm = {
-   registry_handle_global_drm,
-   registry_handle_global_remove
+   .global = registry_handle_global_drm,
+   .global_remove = registry_handle_global_remove
 };
 
 static EGLBoolean
@@ -1729,7 +1729,7 @@ shm_handle_format(void *data, struct wl_shm *shm, 
uint32_t format)
 }
 
 static const struct wl_shm_listener shm_listener = {
-   shm_handle_format
+   .format = shm_handle_format
 };
 
 static void
@@ -1746,8 +1746,8 @@ registry_handle_global_swrast(void *data, struct 
wl_registry *registry, uint32_t
 }
 
 static const struct wl_registry_listener registry_listener_swrast = {
-   registry_handle_global_swrast,
-   registry_handle_global_remove
+   .global = registry_handle_global_swrast,
+   .global_remove = registry_handle_global_remove
 };
 
 static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 66346] shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=66346

--- Comment #21 from Emil Velikov  ---
(In reply to Vinson Lee from comment #20)
> (In reply to José Fonseca from comment #19)
> > (In reply to Vinson Lee from comment #18)
> > > mesa: 0166b4c165271bd7525a91049e58e390cb596c60 (master 10.7.0-devel)
> > > 
> > > Still see this build error. BUILDING_MESA is only defined for darwin DRI
> > > enabled builds.
> > 
> > It should be a matter of adding the define on SCons builds too.
> > 
> > But I don't know if there's much point for building Mesa w/ SCons on Mac --
> > we don't actually build an usable libGL there.
> 
> BUILDING_MESA is also not defined on automake dri-disabled build.
It took me a second read to understand what you meant here. Esp. since the code
just does not work, with indirect rendering under MacOS/Darwin.

Please post the complete configure line that you're using. Thanks.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Shader-cache status and transition

2015-07-10 Thread Carl Worth
Hi folks,

I've pushed the latest version of my shader-cache work to a branch named
"shader-cache" at:

git://people.freedesktop.org/~cworth/mesa

I've rebased this against the latest master branch and verified that it
at least compiles and works with at least some trivial test programs.

I'm not attaching the code as patches sent via email, because things
really aren't ready for that yet, but instead will need a bit of
attention from someone else. There are a few FIXME comments in the code,
most of which should be quite trivial to address. Here are a couple of
less-trivial things that still need to be done:

1. Code needs to be added to fallback to recompile everything from
   source when there's a cache miss.

   The scenario here is that glCompileShader sees a sha1 of some GLSL
   source code that has been seen before so optimistically doesn't do
   any compilation. But then, (due to some intervening state change),
   the shader cache may miss when it actually does a lookup based on the
   composite sha1 that references the program keys, etc.

   In this case, we need code added to do the full recompile.

2. The functions brw_upload_programs() and upload_cached_program(), (in
   brw_state_upload.c and brw_shader_cache.c) need some refactoring.

   As-is, in the patch series, there are two significant problems here:

 i. The upload_cached_program function makes calls to the expensive
functions brw__populate_key. These functions are also
called subsequently by the various brw_upload__prog()
functions. This should be refactored so that the populate_key
functions are never called more than once for a single call to
brw_upload_programs.

 ii. The upload_cached_program has a really cheesy 64-entry array
 named "been_there" which is an attempt to avoid excessive
 checks of the on-disk cache. This array should be
 eliminated. In its place, the code from brw_upload_programs
 down should be refactored to find compiled binaries in the
 following order:

 1st: The in-memory BO "cache", (which is poorly named
  here---it's more a stash of every program seen before,
  there's no replacement happening so it's really not acting
  like a cache).

 2nd: If not found there, look in the on-disk cache

 The current "been_there" array exists only because the code is
 structured to look at the on-disk cache first, and that will be
 really wasteful if the program happens to be in memory
 already. The right fix is to simply do the expensive checks
 only if the cheap checks fail.

 I had made several preliminary attempts at this particular
 refactoring, but wasn't able to get any to work completely. Ken
 should be up to speed on what I was doing there.

Beyond that, there's obviously a lot of testing that will be needed
before we have assurance that the shader cache is working well, (such as
getting piglit to all pass). I've only been testing with trivial
programs so far. So I anticipate that there are lots of little pieces of
state that will needed to be saved and restored that are not currently
happening. The hardest part should be tracking down each of
these. Hopefully the actual code required in each case should be nearly
trivial. Fortunately, the hard part should be very parallelizable,
(everyone grab your favorite piglit test).

I really had wanted to get this code into better shape before now. But
the sad reality is that I don't anticipate being able to spend any more
direct time on this. I will be quite happy to answer any questions that
come up from whoever takes this code on.

Thanks for everything. I've had a lot of fun playing with mesa the last
few years, and I'm sure we'll all keep bumping into each other in
various places.

-Carl


pgpe44380JXmV.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] clover: Pass image attributes to the kernel

2015-07-10 Thread Francisco Jerez
Zoltan Gilian  writes:

> Read-only and write-only image arguments are recognized and
> distinguished.
> Attributes of the image arguments are passed to the kernel as implicit
> arguments.

Thanks, this looks much better.  One thing that still seems kind of
unfortunate is the fact that you've added a single "image_attributes"
argument that lumps image dimensions with format.  I expect the set of
targets that need format metadata to be a strict superset of the targets
that need image dimensions, so it would be nice if the target could
specify them as separate arguments (e.g. semantic::image_size and
::image_format).

Another related point is that you've chosen to pass the metadata for all
images together at the end of the input buffer.  I have the suspicion
that it would simplify both the OpenCL front-end and compiler back-end
code if the image metadata was interleaved with images themselves.
E.g. for each image argument and kernel the target would request an
argument list like

 type::imageNd semantic::general,
 type::scalar semantic::image_format,
 type::scalar semantic::image_size

and assume a struct-like layout for each image argument in the input
buffer:
 
 struct image_argument {
uint32_t index;
uint32_t size[3];
uint32_t format[2];
 };

For the back-end this would imply that the offset between a given image
argument and metadata field would be fixed, independent of how many
other arguments and how many images are being passed to the kernel, and
for the front-end it would mean you could get rid of the first pass of
the argument list you've added to exec_context::bind() (you could just
take the image from the last explicit_arg argument seen).

Some more nit-picks below.

> ---
>  src/gallium/state_trackers/clover/core/kernel.cpp  |  27 ++
>  src/gallium/state_trackers/clover/core/kernel.hpp  |  13 ++-
>  src/gallium/state_trackers/clover/core/memory.cpp  |   2 +-
>  src/gallium/state_trackers/clover/core/module.hpp  |   3 +-
>  .../state_trackers/clover/llvm/invocation.cpp  | 102 
> -
>  5 files changed, 140 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
> b/src/gallium/state_trackers/clover/core/kernel.cpp
> index 0756f06..d7d42a6 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.cpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
> @@ -159,6 +159,14 @@ kernel::exec_context::bind(intrusive_ptr 
> _q,
> auto msec = find(type_equals(module::section::text), m.secs);
> auto explicit_arg = kern._args.begin();
>  
> +   std::vector image_args;
> +   for (const auto& arg: kern._args) {
> +  if (auto img_arg = dynamic_cast(arg.get())) {
> + image_args.push_back(img_arg);
> +  }
> +   }
> +   auto image_arg = image_args.begin();
> +
> for (auto &marg : margs) {
>switch (marg.semantic) {
>case module::argument::general:
> @@ -182,9 +190,28 @@ kernel::exec_context::bind(intrusive_ptr 
> _q,
>   }
>   break;
>}
> +  case module::argument::image_attributes: {
> + auto img = (*image_arg++)->get_image();
> + cl_image_format fmt = img->format();
> + auto attributes = std::vector({
> +   static_cast(img->width()),
> +   static_cast(img->height()),
> +   static_cast(img->depth()),
> +   static_cast(fmt.image_channel_data_type),
> +   static_cast(fmt.image_channel_order)});

How about casting to cl_uint instead?  And you could do:

 std::vector attributes {
   ...
 };

> +
> + for (auto x: attributes) {
> +auto arg = argument::create(marg);
> +
> +arg->set(sizeof(x), &x);
> +arg->bind(*this, marg);
> + }
> + break;
> +  }
>}
> }
>  
> +

Unnecessary whitespace. 

> // Create a new compute state if anything changed.
> if (!st || q != _q ||
> cs.req_local_mem != mem_local ||
> diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp 
> b/src/gallium/state_trackers/clover/core/kernel.hpp
> index d6432a4..be9f783 100644
> --- a/src/gallium/state_trackers/clover/core/kernel.hpp
> +++ b/src/gallium/state_trackers/clover/core/kernel.hpp
> @@ -190,7 +190,14 @@ namespace clover {
>   pipe_surface *st;
>};
>  
> -  class image_rd_argument : public argument {
> +  class image_argument : public argument {
> +  public:
> + const image *get_image() const { return img; }

Can we call this method get() so the duality with set() is more obvious?

> +  protected:
> + image *img;
> +  };
> +
> +  class image_rd_argument : public image_argument {
>public:
>   virtual void set(size_t size, const void *value);
>   virtual void bind(exec_context &ctx,
> @@ -198,11 +205,10 @@ namespace clover {
>   virtual void unbind(exec_context &ctx);
>  
>private:
> - image *img;
> 

Re: [Mesa-dev] [PATCH 1/2] i965: Implement b2f and b2i using negation.

2015-07-10 Thread Matt Turner
On Fri, Jul 10, 2015 at 10:06 AM, Francisco Jerez  wrote:
> Booleans are represented as 0/-1 on modern hardware which means we can
> just negate them to convert them into a numeric type.  Negation has
> the benefit that it can be implemented using a source modifier which
> can easily be propagated into some other instruction.  shader-db
> results on HSW:
>
>  total instructions in shared programs: 5264246 -> 5264211 (-0.00%)
>  instructions in affected programs: 1464 -> 1429 (-2.39%)
>  helped:15
>  HURT:  1

Strange, I get different (better) numbers on Haswell:

total instructions in shared programs: 6279705 -> 6277316 (-0.04%)
instructions in affected programs: 40948 -> 38559 (-5.83%)
helped:123
HURT:  1
GAINED:1
LOST:  0

Certainly more than 15 helped programs in Civilization Beyond Earth alone.

The one hurt program is
rocketbirds-hardboiled-chicken/fp-2.shader_test, which is hurt because
we do not CSE the MOV instructions. I'll send a patch to fix this.

> No piglit regressions.

As a rule, this is implied by sending the patch. Don't put it in the
commit log -- in the worst case the patch is rebased and it's no
longer true (this has happened, embarrassingly enough). Same thing in
2/2.

> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 4 +---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +--
>  2 files changed, 2 insertions(+), 9 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 4690d00..64ff24c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -969,10 +969,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
> nir_alu_instr *instr)
>break;
>
> case nir_op_b2i:
> -  bld.AND(result, op[0], fs_reg(1));
> -  break;
> case nir_op_b2f:
> -  bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], 
> fs_reg(0x3f80u));
> +  bld.MOV(result, negate(op[0]));
>break;
>
> case nir_op_f2b:
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index c9c2661..fd94a70 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1733,16 +1733,11 @@ vec4_visitor::visit(ir_expression *ir)
>emit(MOV(result_dst, op[0]));
>break;
> case ir_unop_b2i:
> -  emit(AND(result_dst, op[0], src_reg(1)));
> -  break;
> case ir_unop_b2f:
>if (devinfo->gen <= 5) {
>   resolve_bool_comparison(ir->operands[0], &op[0]);
>}
> -  op[0].type = BRW_REGISTER_TYPE_D;
> -  result_dst.type = BRW_REGISTER_TYPE_D;
> -  emit(AND(result_dst, op[0], src_reg(0x3f80u)));
> -  result_dst.type = BRW_REGISTER_TYPE_F;
> +  emit(MOV(result_dst, negate(op[0])));
>break;
> case ir_unop_f2b:
>emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
> --
> 2.4.3
>

Good idea. Not sure why I didn't think of that before.

Both are:

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] egl/wayland: remove dead code

2015-07-10 Thread Matt Turner
On Fri, Jul 10, 2015 at 10:49 AM, Emil Velikov  wrote:
> The macro HAVE_MKOSTEMP was never defined.
>
> Cc: Axel Davy 
> Signed-off-by: Emil Velikov 
> ---
>  src/egl/drivers/dri2/platform_wayland.c | 6 --
>  1 file changed, 6 deletions(-)
>
> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
> b/src/egl/drivers/dri2/platform_wayland.c
> index 9005eb7..bdccee3 100644
> --- a/src/egl/drivers/dri2/platform_wayland.c
> +++ b/src/egl/drivers/dri2/platform_wayland.c
> @@ -1255,17 +1255,11 @@ create_tmpfile_cloexec(char *tmpname)
>  {
> int fd;
>
> -#ifdef HAVE_MKOSTEMP
> -   fd = mkostemp(tmpname, O_CLOEXEC);
> -   if (fd >= 0)
> -  unlink(tmpname);

It is indeed dead-code, but wouldn't you rather simply add

AC_CHECK_FUNC([mkostemp], AC_DEFINE(HAVE_MKOSTEMP, 1, [Use mkostemp(3)]))

to configure.ac and bring it to life?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91263] R600 Segfault in finalize_textures

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91263

--- Comment #2 from Marek Olšák  ---
Proposed fix:
http://patchwork.freedesktop.org/patch/52969/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Implement faster streaming memcpy

2015-07-10 Thread Matt Turner
On Fri, Jul 10, 2015 at 4:20 AM, Marek Olšák  wrote:
> Shouldn't this stuff be in src/util?

I guess? I'm not opposed to moving it, if other drivers can make use
of it, but I don't want this patch to do the move.

I wrote it in November 2013 and src/util only came into being in
August 2014 if you're wondering why it's not in src/util.

Also, top quoting.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] auxiliary/vl: use the correct screen index

2015-07-10 Thread Marek Olšák
I'm sure this doesn't build. "dri2_dpy" is not declared and "screen" is int.

Marek

On Fri, Jul 10, 2015 at 7:46 PM, Emil Velikov  wrote:
> Inspired (copied) from Marek's commit for egl/x11
> commit 0b56e23e7f3(egl/dri2: use the correct screen index)
>
> Cc: 10.6 
> Cc: Marek Olšák 
> Signed-off-by: Emil Velikov 
> ---
>  src/gallium/auxiliary/vl/vl_winsys_dri.c | 18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c 
> b/src/gallium/auxiliary/vl/vl_winsys_dri.c
> index 7e61b88..ac2feec 100644
> --- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
> +++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
> @@ -293,6 +293,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
> return vscreen;
>  }
>
> +static xcb_screen_t *
> +get_xcb_screen(xcb_screen_iterator_t iter, int screen)
> +{
> +for (; iter.rem; --screen, xcb_screen_next(&iter))
> +if (screen == 0)
> +return iter.data;
> +
> +return NULL;
> +}
> +
>  struct vl_screen*
>  vl_screen_create(Display *display, int screen)
>  {
> @@ -333,9 +343,11 @@ vl_screen_create(Display *display, int screen)
> if (dri2_query == NULL || error != NULL || dri2_query->minor_version < 2)
>goto free_query;
>
> -   s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
> -   while (screen--)
> -   xcb_screen_next(&s);
> +   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
> +   screen = get_xcb_screen(s, dri2_dpy->screen);
> +   if (!screen)
> +  goto free_query;
> +
> driverType = XCB_DRI2_DRIVER_TYPE_DRI;
>  #ifdef DRI2DriverPrimeShift
> {
> --
> 2.4.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Implement b2f and b2i using negation.

2015-07-10 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Jul 10, 2015 at 10:06 AM, Francisco Jerez  
> wrote:
>> Booleans are represented as 0/-1 on modern hardware which means we can
>> just negate them to convert them into a numeric type.  Negation has
>> the benefit that it can be implemented using a source modifier which
>> can easily be propagated into some other instruction.  shader-db
>> results on HSW:
>>
>>  total instructions in shared programs: 5264246 -> 5264211 (-0.00%)
>>  instructions in affected programs: 1464 -> 1429 (-2.39%)
>>  helped:15
>>  HURT:  1
>
> Strange, I get different (better) numbers on Haswell:
>
> total instructions in shared programs: 6279705 -> 6277316 (-0.04%)
> instructions in affected programs: 40948 -> 38559 (-5.83%)

Odd.  Apparently you have more instructions than I have overall so you
either have more shaders in your shader-db or some of them are not being
compiled for me for some reason.

> helped:123
> HURT:  1
> GAINED:1
> LOST:  0
>
> Certainly more than 15 helped programs in Civilization Beyond Earth alone.
>
> The one hurt program is
> rocketbirds-hardboiled-chicken/fp-2.shader_test, which is hurt because
> we do not CSE the MOV instructions. I'll send a patch to fix this.
>
>> No piglit regressions.
>
> As a rule, this is implied by sending the patch. Don't put it in the
> commit log -- in the worst case the patch is rebased and it's no
> longer true (this has happened, embarrassingly enough). Same thing in
> 2/2.

Hah, some people (including yourself earlier this week IIRC) have asked
me in the past whether some patch passes piglit after I sent it to the
mailing list, so I can only assume it's not redundant information.  You
also seemed to get angry recently because some commit I sent was missing
(from my point of view) redundant information you considered critical,
so don't be surprised to see all kinds of useless data in my commit
messages from now on.

>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 4 +---
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +--
>>  2 files changed, 2 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 4690d00..64ff24c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -969,10 +969,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
>> nir_alu_instr *instr)
>>break;
>>
>> case nir_op_b2i:
>> -  bld.AND(result, op[0], fs_reg(1));
>> -  break;
>> case nir_op_b2f:
>> -  bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], 
>> fs_reg(0x3f80u));
>> +  bld.MOV(result, negate(op[0]));
>>break;
>>
>> case nir_op_f2b:
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index c9c2661..fd94a70 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -1733,16 +1733,11 @@ vec4_visitor::visit(ir_expression *ir)
>>emit(MOV(result_dst, op[0]));
>>break;
>> case ir_unop_b2i:
>> -  emit(AND(result_dst, op[0], src_reg(1)));
>> -  break;
>> case ir_unop_b2f:
>>if (devinfo->gen <= 5) {
>>   resolve_bool_comparison(ir->operands[0], &op[0]);
>>}
>> -  op[0].type = BRW_REGISTER_TYPE_D;
>> -  result_dst.type = BRW_REGISTER_TYPE_D;
>> -  emit(AND(result_dst, op[0], src_reg(0x3f80u)));
>> -  result_dst.type = BRW_REGISTER_TYPE_F;
>> +  emit(MOV(result_dst, negate(op[0])));
>>break;
>> case ir_unop_f2b:
>>emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
>> --
>> 2.4.3
>>
>
> Good idea. Not sure why I didn't think of that before.
>
> Both are:
>
> Reviewed-by: Matt Turner 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965: Optimize batchbuffer macros.

2015-07-10 Thread Matt Turner
Previously OUT_BATCH was just a macro around an inline function which
does

   brw->batch.map[brw->batch.used++] = dword;

When making consecutive calls to intel_batchbuffer_emit_dword() the
compiler isn't able to recognize that we're writing consecutive memory
locations or that it doesn't need to write batch.used back to memory
each time.

We can avoid both of these problems by making a local pointer to the
next location in the batch in BEGIN_BATCH(), indexing it with a local
variable, and incrementing batch.used once in ADVANCE_BATCH().

Cuts 18k from the .text size.

   text data  bss  dec  hex  filename
4946956   19515226192  5168300   4edcac  i965_dri.so before
4928588   19515226192  5149932   4e94ec  i965_dri.so after

This series (including commit c0433948) improves performance of Synmark
OglBatch7 by 3.64514% +/- 0.298131% (n=282) on Ivybridge.
---
That -4.19005% +/- 1.15188% (n=30) regression on Ivybridge is now a
performance improvement! Thanks Chris for the help!

 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  8 ++---
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 52 +++
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index f82958f..93f2872 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -395,13 +395,13 @@ _intel_batchbuffer_flush(struct brw_context *brw,
  */
 uint32_t
 intel_batchbuffer_reloc(struct brw_context *brw,
-drm_intel_bo *buffer,
+drm_intel_bo *buffer, uint32_t offset,
 uint32_t read_domains, uint32_t write_domain,
 uint32_t delta)
 {
int ret;
 
-   ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+   ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
 buffer, delta,
 read_domains, write_domain);
assert(ret == 0);
@@ -416,11 +416,11 @@ intel_batchbuffer_reloc(struct brw_context *brw,
 
 uint64_t
 intel_batchbuffer_reloc64(struct brw_context *brw,
-  drm_intel_bo *buffer,
+  drm_intel_bo *buffer, uint32_t offset,
   uint32_t read_domains, uint32_t write_domain,
   uint32_t delta)
 {
-   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
  buffer, delta,
  read_domains, write_domain);
assert(ret == 0);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index c0456f3..6342c97 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -59,14 +59,16 @@ void intel_batchbuffer_data(struct brw_context *brw,
 
 uint32_t intel_batchbuffer_reloc(struct brw_context *brw,
  drm_intel_bo *buffer,
+ uint32_t offset,
  uint32_t read_domains,
  uint32_t write_domain,
- uint32_t offset);
+ uint32_t delta);
 uint64_t intel_batchbuffer_reloc64(struct brw_context *brw,
drm_intel_bo *buffer,
+   uint32_t offset,
uint32_t read_domains,
uint32_t write_domain,
-   uint32_t offset);
+   uint32_t delta);
 static inline uint32_t float_as_int(float f)
 {
union {
@@ -160,23 +162,43 @@ intel_batchbuffer_advance(struct brw_context *brw)
 #endif
 }
 
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING)
-#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
-#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f)
-#define OUT_RELOC(buf, read_domains, write_domain, delta)  \
-   OUT_BATCH(intel_batchbuffer_reloc(brw, buf, read_domains, write_domain, \
- delta))
+#define BEGIN_BATCH(n) do {\
+   intel_batchbuffer_begin(brw, (n), RENDER_RING); \
+   uint32_t *__map = &brw->batch.map[brw->batch.used]; \
+   int __idx = 0, UNUSED __final_idx = (n)
+
+#define BEGIN_BATCH_BLT(n) do {\
+   intel_batchbuffer_begin(brw, (n), BLT_RING);\
+   uint32_t *__map = &brw->batch.map[brw->batch.used]; \
+   int __idx = 0, UNUSED __final_idx = (n)
+
+#define OUT_BATCH(d) __map[__idx++] = (d)
+#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
+
+#define OUT_RELOC(buf, read_d

[Mesa-dev] [PATCH 1/5] i965: Move BEGIN_BATCH() into same control flow as ADVANCE_BATCH().

2015-07-10 Thread Matt Turner
BEGIN_BATCH() and ADVANCE_BATCH() will contain "do {" and "} while (0)"
respectively to allow declaring local variables used by intervening
OUT_BATCH macros. As such, BEGIN_BATCH() and ADVANCE_BATCH() need to be
in the same control flow.
---
 src/mesa/drivers/dri/i965/brw_draw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 69ad4d4..ec13473 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -261,17 +261,17 @@ static void brw_emit_prim(struct brw_context *brw,
   indirect_flag = 0;
}
 
+   BEGIN_BATCH(brw->gen >= 7 ? 7 : 6);
+
if (brw->gen >= 7) {
   if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
  predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
   else
  predicate_enable = 0;
 
-  BEGIN_BATCH(7);
   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | 
predicate_enable);
   OUT_BATCH(hw_prim | vertex_access_type);
} else {
-  BEGIN_BATCH(6);
   OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
 vertex_access_type);
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965: Turn set_blitter_tiling() into a macro.

2015-07-10 Thread Matt Turner
Its uses of OUT_BATCH will need a local variable defined by BEGIN_BATCH.

Increases .text size by 528 bytes.
---
 src/mesa/drivers/dri/i965/intel_blit.c | 55 --
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index bc39053..2a0f621 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -176,36 +176,31 @@ get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, 
bool is_src) {
  * tiling state would leak into other unsuspecting applications (like the X
  * server).
  */
-static void
-set_blitter_tiling(struct brw_context *brw,
-   bool dst_y_tiled, bool src_y_tiled)
-{
-   assert(brw->gen >= 6);
-
-   /* Idle the blitter before we update how tiling is interpreted. */
-   OUT_BATCH(MI_FLUSH_DW);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(BCS_SWCTRL);
-   OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
- (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
- (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
-}
-
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \
-  BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \
-  if (dst_y_tiled || src_y_tiled)   \
- set_blitter_tiling(brw, dst_y_tiled, src_y_tiled); \
-   } while (0)
-
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do {  \
-  if (dst_y_tiled || src_y_tiled)   \
- set_blitter_tiling(brw, false, false); \
-  ADVANCE_BATCH();  \
-   } while (0)
+#define SET_BLITTER_TILING(dst_y_tiled, src_y_tiled) do { \
+   assert(brw->gen >= 6); \
+  \
+   /* Idle the blitter before we update how tiling is interpreted. */ \
+   OUT_BATCH(MI_FLUSH_DW);\
+   OUT_BATCH(0);  \
+   OUT_BATCH(0);  \
+   OUT_BATCH(0);  \
+  \
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); \
+   OUT_BATCH(BCS_SWCTRL); \
+   OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |\
+ (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |   \
+ (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));   \
+} while (0)
+
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled)\
+   BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0));  \
+   if (dst_y_tiled || src_y_tiled)\
+  SET_BLITTER_TILING(dst_y_tiled, src_y_tiled)\
+
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
+   if (dst_y_tiled || src_y_tiled)\
+  SET_BLITTER_TILING(false, false);   \
+   ADVANCE_BATCH()
 
 static int
 blt_pitch(struct intel_mipmap_tree *mt)
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965: Turn emit_vertex_buffer_state() into a macro.

2015-07-10 Thread Matt Turner
Its uses of OUT_BATCH will need a local variable defined by BEGIN_BATCH.

Increases .text size by 8 bytes.
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 79 +
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c 
b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 320e40e..0536ac3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -604,46 +604,40 @@ brw_prepare_shader_draw_parameters(struct brw_context 
*brw)
 /**
  * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
  */
-static void
-emit_vertex_buffer_state(struct brw_context *brw,
- unsigned buffer_nr,
- drm_intel_bo *bo,
- unsigned bo_ending_address,
- unsigned bo_offset,
- unsigned stride,
- unsigned step_rate)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t dw0;
-
-   if (brw->gen >= 6) {
-  dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
-(step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
-   : GEN6_VB0_ACCESS_VERTEXDATA);
-   } else {
-  dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
-(step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
-   : BRW_VB0_ACCESS_VERTEXDATA);
-   }
-
-   if (brw->gen >= 7)
-  dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
-   if (brw->gen == 7)
-  dw0 |= GEN7_MOCS_L3 << 16;
-
-   WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
- "VBO stride %d too large, bad rendering may occur\n",
- stride);
-   OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
-   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);
-   if (brw->gen >= 5) {
-  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address);
-   } else {
-  OUT_BATCH(0);
-   }
-   OUT_BATCH(step_rate);
-}
+#define EMIT_VERTEX_BUFFER_STATE(buffer_nr, bo, bo_ending_address, \
+ bo_offset,stride, step_rate)  \
+do {   \
+   struct gl_context *ctx = &brw->ctx; \
+   uint32_t dw0;   \
+   \
+   if (brw->gen >= 6) {\
+  dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |  \
+(step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA  \
+   : GEN6_VB0_ACCESS_VERTEXDATA);  \
+   } else {\
+  dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |   \
+(step_rate ? BRW_VB0_ACCESS_INSTANCEDATA   \
+   : BRW_VB0_ACCESS_VERTEXDATA);   \
+   }   \
+   \
+   if (brw->gen >= 7)  \
+  dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;\
+   \
+   if (brw->gen == 7)  \
+  dw0 |= GEN7_MOCS_L3 << 16;   \
+   \
+   WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),  \
+ "VBO stride %d too large, bad rendering may occur\n", \
+ stride);  \
+   OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));   \
+   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);\
+   if (brw->gen >= 5) {\
+  OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address); \
+   } else {\
+  OUT_BATCH(0);\
+   }   \
+   OUT_BATCH(step_rate);   \
+} while (0)
 
 static void brw_emit_vertices(struct brw_context *brw)
 {
@@ -704,14 +698,13 @@ static void brw_emit_vertices(struct brw_context *brw)
   OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
   for (i = 0; i < brw->vb.nr_buffers; i++) {
 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
- emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
+ EMIT_VERTEX_BUFFER_STATE(i, buffer->bo, buffer->bo->size - 1,
   buffer->offset, buffer->stride,
   buffer->step_rate);
-
   }
 
   if (brw->vs.prog_data->uses_vertexid) {
- emit_vertex_buffer_state(brw, brw->vb.nr_buffers

[Mesa-dev] [PATCH 4/5] i965: Split batch emission from relocation functions.

2015-07-10 Thread Matt Turner
So that everything writing to the batch between BEGIN_BATCH() and
ADVANCE_BATCH() goes through OUT_BATCH.
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 30 ++-
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 34 ++-
 2 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index d93ee6e..f82958f 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -393,11 +393,11 @@ _intel_batchbuffer_flush(struct brw_context *brw,
 
 /*  This is the only way buffers get added to the validate list.
  */
-bool
-intel_batchbuffer_emit_reloc(struct brw_context *brw,
- drm_intel_bo *buffer,
- uint32_t read_domains, uint32_t write_domain,
-uint32_t delta)
+uint32_t
+intel_batchbuffer_reloc(struct brw_context *brw,
+drm_intel_bo *buffer,
+uint32_t read_domains, uint32_t write_domain,
+uint32_t delta)
 {
int ret;
 
@@ -411,16 +411,14 @@ intel_batchbuffer_emit_reloc(struct brw_context *brw,
 * case the buffer doesn't move and we can short-circuit the relocation
 * processing in the kernel
 */
-   intel_batchbuffer_emit_dword(brw, buffer->offset64 + delta);
-
-   return true;
+   return buffer->offset64 + delta;
 }
 
-bool
-intel_batchbuffer_emit_reloc64(struct brw_context *brw,
-   drm_intel_bo *buffer,
-   uint32_t read_domains, uint32_t write_domain,
-  uint32_t delta)
+uint64_t
+intel_batchbuffer_reloc64(struct brw_context *brw,
+  drm_intel_bo *buffer,
+  uint32_t read_domains, uint32_t write_domain,
+  uint32_t delta)
 {
int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
  buffer, delta,
@@ -432,11 +430,7 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw,
 * case the buffer doesn't move and we can short-circuit the relocation
 * processing in the kernel
 */
-   uint64_t offset = buffer->offset64 + delta;
-   intel_batchbuffer_emit_dword(brw, offset);
-   intel_batchbuffer_emit_dword(brw, offset >> 32);
-
-   return true;
+   return buffer->offset64 + delta;
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index e58eae4..c0456f3 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -57,16 +57,16 @@ void intel_batchbuffer_data(struct brw_context *brw,
 const void *data, GLuint bytes,
 enum brw_gpu_ring ring);
 
-bool intel_batchbuffer_emit_reloc(struct brw_context *brw,
-   drm_intel_bo *buffer,
-  uint32_t read_domains,
-  uint32_t write_domain,
-  uint32_t offset);
-bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
-drm_intel_bo *buffer,
-uint32_t read_domains,
-uint32_t write_domain,
-uint32_t offset);
+uint32_t intel_batchbuffer_reloc(struct brw_context *brw,
+ drm_intel_bo *buffer,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t offset);
+uint64_t intel_batchbuffer_reloc64(struct brw_context *brw,
+   drm_intel_bo *buffer,
+   uint32_t read_domains,
+   uint32_t write_domain,
+   uint32_t offset);
 static inline uint32_t float_as_int(float f)
 {
union {
@@ -164,14 +164,16 @@ intel_batchbuffer_advance(struct brw_context *brw)
 #define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
 #define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
 #define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f)
-#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
-   intel_batchbuffer_emit_reloc(brw, buf,  \
-   read_domains, write_domain, delta); \
-} while (0)
+#define OUT_RELOC(buf, read_domains, write_domain, delta)  \
+   OUT_BATCH(intel_batchbuffer_reloc(brw, buf, read_domains, write_domain, \
+ delta))
 
 /* Handle 48-bit address relocations for Gen8+ */
-#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
-   intel_batchbuffer_emit

Re: [Mesa-dev] [PATCH] [v3] i965: Split out gen8 push constant state upload

2015-07-10 Thread Matt Turner
On Thu, Jul 9, 2015 at 11:00 AM, Ben Widawsky
 wrote:
> While implementing the workaround in the previous patch I noticed things were
> starting to get a bit messy. Since gen8 works differently enough from gen7, I
> thought splitting it out with be good.
>
> While here, get rid of gen8 MOCS which does nothing and was in the wrong place
> anyway.
>
> This patch is totally optional. I'd be willing to just always use buffer #2 on
> gen8+. Pre-HSW this wasn't allowed, but it looks like it's okay for gen8 too.
>
> v2: Move inactive batch generation to the top of the function in order to make
> the rest of the code easier to read.
>
> Jenkins results (still a bunch of spurious failures, I miss Mark):
> http://otc-mesa-ci.jf.intel.com/job/bwidawsk/169/
>
> v3: v2 had a bug in that it both didn't emit the right number of dwords, and 
> it
> didn't do ADVANCE_BATCH(). I'm moderately worried that there were no failures 
> as
> a result.
> http://otc-mesa-ci.jf.intel.com/job/bwidawsk/170/

I don't think putting Intel-internal links in the commit message is a good idea.

Ken's made similar comments to me.

Also, so much off the wall commentary...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91290] SIGSEGV glcpp/glcpp-parse.y:1077

2015-07-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91290

Vinson Lee  changed:

   What|Removed |Added

   Keywords||regression
 CC||anuj.pho...@gmail.com,
   ||cwo...@cworth.org
Version|git |10.3

--- Comment #1 from Vinson Lee  ---
a6e9cd14cad20ad98213adf541a7687a7498 is the first bad commit
commit a6e9cd14cad20ad98213adf541a7687a7498
Author: Anuj Phogat 
Date:   Fri Jun 6 16:56:59 2014 -0700

glsl/glcpp: Fix preprocessor error condition for macro redefinition

This patch specifically fixes redefinition condition for white space
changes. #define and #undef functionality in GLSL follows the standard
for C++ preprocessors for macro definitions.

From
https://gcc.gnu.org/onlinedocs/cpp/Undefining-and-Redefining-Macros.html:

These definitions are effectively the same:

 #define FOUR (2 + 2)
 #define FOUR (2+2)
 #define FOUR (2 /* two */ + 2)

but these are not:

 #define FOUR (2 + 2)
 #define FOUR ( 2+2 )
 #define FOUR (2 * 2)
 #define FOUR(score,and,seven,years,ago) (2 + 2)

Fixes Khronos GLES3 CTS tests;
invalid_object_whitespace_vertex
invalid_object_whitespace_fragment

Signed-off-by: Anuj Phogat 
Reviewed-by: Carl Worth 

:04 04 21e3582eed6555a0e4510982863892e0ead45930
6bb0f8508d19b41b5938b9f812b793ba90c278a0 Msrc
bisect run success

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Replace illegal compacted NOP with valid compact instruction

2015-07-10 Thread Matt Turner
On Wed, Jul 8, 2015 at 10:58 PM, Zhenyu Wang  wrote:
> NOP actually has no compact version, but we use it for instruction
> alignment for compact kernel. Although it seems working on HW, it is
> illegal and might not be valid for any future one.
>
> This trys to get a temporary compact instruction with no effect for
> alignment to replace compacted NOP. G45 spec has note that HW compact
> logic could determine NENOP and drop it right away, so we can still
> keep with that.
>
> v2: rebase to master, we still need this to work with internal tool.
>
> Signed-off-by: Zhenyu Wang 
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 41 
> +-
>  1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 67f0b45..719667a 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -1367,6 +1367,39 @@ brw_init_compaction_tables(const struct 
> brw_device_info *devinfo)
> }
>  }
>
> +static void
> +brw_get_noop_compact(struct brw_codegen *p, brw_compact_inst *dst)

I'd rather call this function fill_compaction_padding() or something
similar -- there's no need for the brw_ prefix since it's static, it's
not "get"ting anything, and "noop" in the name is a little confusing
since it's not emitting a NOP. :)

> +{
> +   const struct brw_device_info *devinfo = p->devinfo;
> +   brw_inst *inst, i;
> +   struct brw_reg g0 = brw_vec8_grf(0, 0);
> +
> +   memset(dst, 0, sizeof(*dst));
> +
> +   /* G45 compact logic could recognize NENOP and drop right away. */
> +   if (devinfo->is_g4x) {
> +  brw_compact_inst_set_opcode(dst, BRW_OPCODE_NENOP);
> +  brw_compact_inst_set_cmpt_control(dst, true);
> +  return;
> +   }
> +
> +   /*
> +* As NOP has no legal compact version, try to use a legal compact
> +* instruction for compact instruction alignment.
> +*/
> +   brw_push_insn_state(p);
> +   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> +   brw_set_default_access_mode(p, BRW_ALIGN_1);
> +   inst = brw_MOV(p, g0, g0);
> +   memcpy(&i, inst, sizeof(brw_inst));
> +   brw_pop_insn_state(p);
> +
> +   if (!brw_try_compact_instruction(devinfo, dst, &i)) {
> +  fprintf(stderr, "Failed to generate compact inst for alignment!\n");
> +  exit(1);

This isn't an error we ever expect a user to hit, to let's make it an assert:

bool UNUSED ret = brw_try_compact_instruction(devinfo, dst, &i);
assert(ret);

With those small changes, this patch is

Reviewed-by: Matt Turner 

I'd be happy to make the changes myself and commit the patch if you'd
like -- just tell me so. :)

Thanks Zhenyu!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [HACK] i965/fs: Fix ordering of src0 alpha and oMask in the framebuffer write payload.

2015-07-10 Thread Jason Ekstrand
On Fri, Jul 10, 2015 at 9:53 AM, Francisco Jerez  wrote:
> Jason Ekstrand  writes:
>
>> On Fri, Jul 10, 2015 at 5:25 AM, Francisco Jerez  
>> wrote:
>>> Jason Ekstrand  writes:
>>>
 On Jul 9, 2015 7:57 AM, "Francisco Jerez"  wrote:
>
> We were passing src0 alpha and oMask in reverse order.  There seems to
> be no good way to pass them in the correct order to the new-style
> LOAD_PAYLOAD (how surprising) because src0 alpha is per-channel while
> oMask is not.  Just split src0 alpha in fixed-width registers and pass
> them to LOAD_PAYLOAD as if they were part of the header as work-around
> for now.

 Bah... I came across this when I did the LOAD_PAYLOAD rework but thought it
 was only theoretical.  I wasn't very familiar with what omask actually did
 and, since piglit didn't hit it, I wasn't sure if it was a real problem or
 not.  I probably should have done more digging and written a piglit test at
 the time. My bad.

 One solution that I proposed at the time was to turn header_size into
 header_mask in the obvious way. We can still use 8 bits because we should
 never have a header source higher than 8.

>>>
>>> So your idea is to have one bit per source indicating whether it's
>>> header-like or per-channel?  I don't think that extends to instructions
>>> other than LOAD_PAYLOAD (e.g. FB_WRITE) where the same source is at the
>>> same time header and payload.
>>
>> You're right, it doesn't.  We really shouldn't be conflating them.  We
>> should have header_mask and header_present be different fields.  Maybe
>> use a union to save space, but they should have different semantic
>> meaning and different names.  We should probably also have a
>> compr4_mask and get rid of the hackery there.
>>
>>> One bit per 32B register would extend
>>> easily but it would be rather ugly to deal with if you want to keep your
>>> code SIMD width-invariant.
>>>
>>> I think if you go with the per-source flag you'll want it to be in its
>>> own subclass of fs_inst.  With its own subclass you could even have an
>>> array of per-source sizes determining the number of registers read for
>>> each source, which would be rather nice for the visitor (no need to
>>> split vectors into components while passing them to LOAD_PAYLOAD).
>>>
>>> Still I think the most elegant solution would be to simply get rid of
>>> the header/payload distinction by using force_writemask_all and, if it
>>> proves to be necessary, fix the optimizer to get rid of redundant
>>> force_writemask_all flags where it doesn't do it already.
>>
>> I really don't think that's a good long-term or short-term solution.
>>
>> How badly are you blocking on this?   I don't really have a lot of
>> extra time to work on this at the moment but can carve some out if
>> needed.
>
> I'm not blocking on this at all, feel free to fix it however you like,
> or just go with this hack for the moment if you have higher priority
> stuff to work on right now, I honestly don't care.

That's good to hear.  I'll try and take a look at this in a couple of
weeks.  Thanks for bringing it up and writing the piglit test!
--Jason

>> --jason
>>
 Thoughts?
 --Jason

> I've written a piglit test that demonstrates the problem by using
> gl_SampleMask from a fragment shader with multiple color outputs [1].
>
> [1] http://lists.freedesktop.org/archives/piglit/2015-July/016499.html
> ---
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26
 +-
>  1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 94d6a58..304ae74 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1535,6 +1535,19 @@ fs_visitor::emit_single_fb_write(const fs_builder
 &bld,
>length++;
> }
>
> +   if (src0_alpha.file != BAD_FILE && color0.file != BAD_FILE) {
> +  /* Neat, we need to chop the src0 alpha component and pass it as
 part of
> +   * the header even though it has per-channel semantics, because
 the next
> +   * optional field is header-like and LOAD_PAYLOAD requires all such
> +   * fields to form a contiguous segment at the beginning of the
 message.
> +   */
> +  for (unsigned i = 0; i < exec_size / 8; i++) {
> + setup_color_payload(&sources[length], src0_alpha, 1, 8,
> + use_2nd_half || i == 1);
> + length++;
> +  }
> +   }
> +
> prog_data->uses_omask =
>prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
> if (prog_data->uses_omask) {
> @@ -1561,19 +1574,14 @@ fs_visitor::emit_single_fb_write(const fs_builder
 &bld,
>   offset(this->outp

Re: [Mesa-dev] [PATCH 7/7] egl/wayland: remove dead code

2015-07-10 Thread Emil Velikov
On 10/07/15 19:22, Matt Turner wrote:
> On Fri, Jul 10, 2015 at 10:49 AM, Emil Velikov  
> wrote:
>> The macro HAVE_MKOSTEMP was never defined.
>>
>> Cc: Axel Davy 
>> Signed-off-by: Emil Velikov 
>> ---
>>  src/egl/drivers/dri2/platform_wayland.c | 6 --
>>  1 file changed, 6 deletions(-)
>>
>> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
>> b/src/egl/drivers/dri2/platform_wayland.c
>> index 9005eb7..bdccee3 100644
>> --- a/src/egl/drivers/dri2/platform_wayland.c
>> +++ b/src/egl/drivers/dri2/platform_wayland.c
>> @@ -1255,17 +1255,11 @@ create_tmpfile_cloexec(char *tmpname)
>>  {
>> int fd;
>>
>> -#ifdef HAVE_MKOSTEMP
>> -   fd = mkostemp(tmpname, O_CLOEXEC);
>> -   if (fd >= 0)
>> -  unlink(tmpname);
> 
> It is indeed dead-code, but wouldn't you rather simply add
> 
> AC_CHECK_FUNC([mkostemp], AC_DEFINE(HAVE_MKOSTEMP, 1, [Use mkostemp(3)]))
> 
> to configure.ac and bring it to life?
> 
I just wanted to remove some code (sob, sob). But seriously, adding the
configure check will do the job, if you prefer we'll use it.

Thanks for having a look!
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] auxiliary/vl: use the correct screen index

2015-07-10 Thread Emil Velikov
Inspired (copied) from Marek's commit for egl/x11
commit 0b56e23e7f3(egl/dri2: use the correct screen index)

v2: Fix copy/pasta errors.

Cc: 10.6 
Cc: Marek Olšák 
Signed-off-by: Emil Velikov 
---
 src/gallium/auxiliary/vl/vl_winsys_dri.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 7e61b88..1e6613c 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -293,6 +293,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
return vscreen;
 }
 
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+for (; iter.rem; --screen, xcb_screen_next(&iter))
+if (screen == 0)
+return iter.data;
+
+return NULL;
+}
+
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
@@ -334,8 +344,7 @@ vl_screen_create(Display *display, int screen)
   goto free_query;
 
s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
-   while (screen--)
-   xcb_screen_next(&s);
+
driverType = XCB_DRI2_DRIVER_TYPE_DRI;
 #ifdef DRI2DriverPrimeShift
{
@@ -351,7 +360,7 @@ vl_screen_create(Display *display, int screen)
}
 #endif
 
-   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, s.data->root, 
driverType);
+   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, 
screen)->root, driverType);
connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
if (connect == NULL || connect->driver_name_length + 
connect->device_name_length == 0)
   goto free_connect;
@@ -370,7 +379,7 @@ vl_screen_create(Display *display, int screen)
if (drmGetMagic(fd, &magic))
   goto free_connect;
 
-   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, 
s.data->root, magic);
+   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, 
get_xcb_screen(s, screen)->root, magic);
authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, 
NULL);
 
if (authenticate == NULL || !authenticate->authenticated)
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] auxiliary/vl: use the correct screen index

2015-07-10 Thread Emil Velikov
On 10 July 2015 at 19:33, Marek Olšák  wrote:
> I'm sure this doesn't build. "dri2_dpy" is not declared and "screen" is int.
>
In all honesty I "wrote" it at 3am, but that is no excuse for such a
goof-up. A correct version should be out.

Thanks,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50, nvc0: enable at least one color RT if alphatest is enabled

2015-07-10 Thread Ilia Mirkin
Fixes the following piglits:
  fbo-alphatest-nocolor
  fbo-alphatest-nocolor-ff

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
The nv50 bits need testing, only have a GK208 on-hand. Will be sure to test 
before pushing.

 src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 18 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 18 ++
 2 files changed, 36 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 116bf4b..ead4b29 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50,
nv50_fp_linkage_validate(nv50);
 }
 
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nv50_validate_derived_2(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+   if (nv50->zsa && nv50->zsa->pipe.alpha.enabled &&
+   nv50->framebuffer.nr_cbufs == 0) {
+  nv50_fb_set_null_rt(push, 0);
+  BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+  PUSH_DATA (push, (076543210 << 4) | 1);
+   }
+}
+
 static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
@@ -456,6 +473,7 @@ static struct state_validate {
 { nv50_gp_linkage_validate,NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
 { nv50_validate_derived_rs,NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
 { nv50_validate_clip,  NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
 { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c52399a..785e52e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -535,6 +535,23 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
}
 }
 
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nvc0_validate_derived_2(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+   nvc0->framebuffer.nr_cbufs == 0) {
+  nvc0_fb_set_null_rt(push, 0);
+  BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+  PUSH_DATA (push, (076543210 << 4) | 1);
+   }
+}
+
 static void
 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
 {
@@ -597,6 +614,7 @@ static struct state_validate {
 { nvc0_fragprog_validate,  NVC0_NEW_FRAGPROG },
 { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
+{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
 { nvc0_validate_clip,  NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
NVC0_NEW_VERTPROG |
NVC0_NEW_TEVLPROG |
-- 
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 (part1) 25/26] glsl: Lower shader storage buffer object writes to GLSL IR instrinsics

2015-07-10 Thread Jordan Justen
On 2015-07-10 03:13:44, Iago Toral Quiroga wrote:
> From: Samuel Iglesias Gonsalvez 
> 
> Extend the existing lower_ubo_reference pass to also detect SSBO writes
> and lower them to __intrinsic_store_ssbo intrinsics.
> 
> Signed-off-by: Samuel Iglesias Gonsalvez 
> ---
>  src/glsl/lower_ubo_reference.cpp | 441 
> +++
>  1 file changed, 311 insertions(+), 130 deletions(-)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index a61ff29..460b490 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -37,6 +37,7 @@
>  #include "ir_builder.h"
>  #include "ir_rvalue_visitor.h"
>  #include "main/macros.h"
> +#include "glsl_parser_extras.h"
>  
>  using namespace ir_builder;
>  
> @@ -139,12 +140,31 @@ public:
> }
>  
> void handle_rvalue(ir_rvalue **rvalue);
> -   void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
> -   unsigned int deref_offset, bool row_major,
> -   int matrix_columns);
> +   ir_visitor_status visit_enter(ir_assignment *ir);
> +
> +   void setup_for_load_or_write(ir_variable *var,

How about setup_for_load_or_store, or setup_for_buffer_access?

> +ir_dereference *deref,
> +ir_rvalue **offset,
> +unsigned *const_offset,
> +bool *row_major,
> +int *matrix_columns);
> ir_expression *ubo_load(const struct glsl_type *type,
>ir_rvalue *offset);
>  
> +
> +   void check_for_ssbo_write(ir_assignment *ir);

Since we are using load elsewhere, I guess 'store' is better than
'write'.

> +   void write_to_memory(ir_dereference *deref,
> +ir_variable *var,
> +ir_variable *write_var,
> +unsigned write_mask);
> +   ir_call *ssbo_write(ir_rvalue *deref, ir_rvalue *offset,
> +   unsigned write_mask);

store

> +
> +   void emit_reads_or_writes(bool is_write, ir_dereference *deref,
> + ir_variable *base_offset, unsigned int 
> deref_offset,
> + bool row_major, int matrix_columns,
> + unsigned write_mask);

What about emit_access?

> +
> void *mem_ctx;
> struct gl_shader *shader;
> struct gl_uniform_buffer_variable *ubo_var;
> @@ -218,26 +238,20 @@ interface_field_name(void *mem_ctx, char *base_name, 
> ir_dereference *d,
>  }
>  
>  void
> -lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
> +lower_ubo_reference_visitor::setup_for_load_or_write(ir_variable *var,
> + ir_dereference *deref,
> + ir_rvalue **offset,
> + unsigned *const_offset,
> + bool *row_major,
> + int *matrix_columns)
>  {
> -   if (!*rvalue)
> -  return;
> -
> -   ir_dereference *deref = (*rvalue)->as_dereference();
> -   if (!deref)
> -  return;
> -
> -   ir_variable *var = deref->variable_referenced();
> -   if (!var || !var->is_in_buffer_block())
> -  return;
> -
> -   mem_ctx = ralloc_parent(*rvalue);
> -
> +   /* Fix out the name of the interface block */

Maybe 'Fix out' => 'Determine'?

> ir_rvalue *nonconst_block_index;
> const char *const field_name =
>interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
> deref, &nonconst_block_index);
>  
> +   /* Locate the ubo block by interface name */
> this->uniform_block = NULL;
> for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
>if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
> @@ -263,10 +277,10 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
>  
> assert(this->uniform_block);
>  
> -   ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
> -   unsigned const_offset = 0;
> -   bool row_major = is_dereferenced_thing_row_major(deref);
> -   int matrix_columns = 1;
> +   *offset = new(mem_ctx) ir_constant(0u);
> +   *const_offset = 0;
> +   *row_major = is_dereferenced_thing_row_major(deref);
> +   *matrix_columns = 1;
>  
> /* Calculate the offset to the start of the region of the UBO
>  * dereferenced by *rvalue.  This may be a variable offset if an
> @@ -275,76 +289,76 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
> while (deref) {
>switch (deref->ir_type) {
>case ir_type_dereference_variable: {
> -const_offset += ubo_var->Offset;
> -deref = NULL;
> -break;
> + *const_offset += ubo_var->Offset;
> + deref = NULL;
> + break;
>}
>  
> 

Re: [Mesa-dev] [PATCH v3 (part1) 26/26] glsl: Lower shader storage buffer object loads to GLSL IR instrinsics

2015-07-10 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2015-07-10 03:13:45, Iago Toral Quiroga wrote:
> From: Samuel Iglesias Gonsalvez 
> 
> Extend the existing lower_ubo_reference pass to also detect SSBO loads
> and lower them to __intrinsic_load_ssbo intrinsics.
> 
> Signed-off-by: Samuel Iglesias Gonsalvez 
> ---
>  src/glsl/lower_ubo_reference.cpp | 73 
> +++-
>  1 file changed, 65 insertions(+), 8 deletions(-)
> 
> diff --git a/src/glsl/lower_ubo_reference.cpp 
> b/src/glsl/lower_ubo_reference.cpp
> index 460b490..822b723 100644
> --- a/src/glsl/lower_ubo_reference.cpp
> +++ b/src/glsl/lower_ubo_reference.cpp
> @@ -150,7 +150,8 @@ public:
>  int *matrix_columns);
> ir_expression *ubo_load(const struct glsl_type *type,
>ir_rvalue *offset);
> -
> +   ir_call *ssbo_load(const struct glsl_type *type,
> +  ir_rvalue *offset);
>  
> void check_for_ssbo_write(ir_assignment *ir);
> void write_to_memory(ir_dereference *deref,
> @@ -170,6 +171,7 @@ public:
> struct gl_uniform_buffer_variable *ubo_var;
> ir_rvalue *uniform_block;
> bool progress;
> +   bool is_shader_storage;
>  };
>  
>  /**
> @@ -266,6 +268,8 @@ 
> lower_ubo_reference_visitor::setup_for_load_or_write(ir_variable *var,
>  this->uniform_block = index;
>   }
>  
> + this->is_shader_storage = shader->UniformBlocks[i].IsShaderStorage;
> +
>   struct gl_uniform_block *block = &shader->UniformBlocks[i];
>  
>   this->ubo_var = var->is_interface_instance()
> @@ -415,7 +419,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
> **rvalue)
> if (!var || !var->is_in_buffer_block())
>return;
>  
> -   mem_ctx = ralloc_parent(*rvalue);
> +   mem_ctx = ralloc_parent(shader->ir);
>  
> ir_rvalue *offset = NULL;
> unsigned const_offset;
> @@ -512,6 +516,42 @@ lower_ubo_reference_visitor::ssbo_write(ir_rvalue *deref,
> return new(mem_ctx) ir_call(sig, NULL, &call_params);
>  }
>  
> +ir_call *
> +lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
> +   ir_rvalue *offset)
> +{
> +   exec_list sig_params;
> +
> +   ir_variable *block_ref = new(mem_ctx)
> +  ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
> +   sig_params.push_tail(block_ref);
> +
> +   ir_variable *offset_ref = new(mem_ctx)
> +  ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
> +   sig_params.push_tail(offset_ref);
> +
> +   ir_function_signature *sig =
> +  new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
> +   assert(sig);
> +   sig->replace_parameters(&sig_params);
> +   sig->is_intrinsic = true;
> +
> +   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
> +   f->add_signature(sig);
> +
> +   ir_variable *result = new(mem_ctx)
> +  ir_variable(type, "ssbo_load_result", ir_var_temporary);
> +   base_ir->insert_before(result);
> +   ir_dereference_variable *deref_result = new(mem_ctx)
> +  ir_dereference_variable(result);
> +
> +   exec_list call_params;
> +   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
> +   call_params.push_tail(offset->clone(mem_ctx, NULL));
> +
> +   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
> +}
> +
>  static inline int
>  writemask_for_size(unsigned n)
>  {
> @@ -610,9 +650,17 @@ lower_ubo_reference_visitor::emit_reads_or_writes(bool 
> is_write,
>   add(base_offset, new(mem_ctx) ir_constant(deref_offset));
>if (is_write)
>   base_ir->insert_after(ssbo_write(deref, offset, write_mask));
> -  else
> - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
> -   ubo_load(deref->type, offset)));
> +  else {
> + if (!this->is_shader_storage) {
> + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
> +   ubo_load(deref->type, offset)));
> + } else {
> +ir_call *load_ssbo = ssbo_load(deref->type, offset);
> +base_ir->insert_before(load_ssbo);
> +ir_rvalue *value = 
> load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
> +base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 
> value));
> + }
> +  }
> } else {
>unsigned N = deref->type->is_double() ? 8 : 4;
>  
> @@ -640,9 +688,18 @@ lower_ubo_reference_visitor::emit_reads_or_writes(bool 
> is_write,
>   if (is_write) {
>  base_ir->insert_after(ssbo_write(swizzle(deref, i, 1), 
> chan_offset, 1));
>   } else {
> -base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
> -  ubo_load(deref_type, chan_offset),
> -  (1U << i)));
> +if (!this->is_shader_storage) {
> +

Re: [Mesa-dev] [PATCH 3/4] radeon, r200: allow hyperz for radeon DRM module v2

2015-07-10 Thread Roland Scheidegger
Am 10.07.2015 um 19:41 schrieb Emil Velikov:
> On 10 July 2015 at 13:18, Roland Scheidegger  wrote:
>> Am 10.07.2015 um 05:44 schrieb Michel Dänzer:
>>> On 10.07.2015 05:13, Emil Velikov wrote:
 The original code only half considered hyperz as an option. As per
 previous commit "major != 2 cannot occur" we can simply things, and
 allow users to set the option if they choose to do so.

 Signed-off-by: Emil Velikov 
 ---
  src/mesa/drivers/dri/r200/r200_context.c | 10 ++
  src/mesa/drivers/dri/radeon/radeon_context.c |  9 ++---
  2 files changed, 4 insertions(+), 15 deletions(-)

 diff --git a/src/mesa/drivers/dri/r200/r200_context.c 
 b/src/mesa/drivers/dri/r200/r200_context.c
 index 40cc50a..2a42ab3 100644
 --- a/src/mesa/drivers/dri/r200/r200_context.c
 +++ b/src/mesa/drivers/dri/r200/r200_context.c
 @@ -225,14 +225,8 @@ GLboolean r200CreateContext( gl_api api,
 rmesa->radeon.initialMaxAnisotropy = 
 driQueryOptionf(&rmesa->radeon.optionCache,
  "def_max_anisotropy");

 -   if ( sPriv->drm_version.major == 1
 -   && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
 -  if ( sPriv->drm_version.minor < 13 )
 - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
 -  "disabling.\n", sPriv->drm_version.minor );
 -  else
 - rmesa->using_hyperz = GL_TRUE;
 -   }
>>>
>>> This code only set rmesa->using_hyperz = GL_TRUE if
>>> sPriv->drm_version.major == 1. It was disabled for KMS in commit
>>> e541845959761e9f47d14ade6b58a32db04ef7e4 ("r200: Fix piglit paths test.").
>>>
>>>
 +   if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz"))
 +  rmesa->using_hyperz = GL_TRUE;
>>>
>>> This enables it again for KMS. Maybe that's okay though, especially if
>>> the driconf option is disabled by default.
>>
>>
>> Oh you're right. The reason given though why it was disabled looks bogus
>> to me ("Piglit doesn't like HyperZ warning so disable it for kms." ???),
>> and I can't see why that would have only applied to r200, not r100. So
>> it should be fine. (Of course, you will get more failures with that
>> enabled with piglit, some things just plain won't work, but that was
>> just the case with UMS too, and the reason why it never was enabled by
>> default.)
>>
> Yes without Roland's knowledge if hyperz is supposed to work for KMS
> the current code is quite ambiguous. If you guys prefer I can simply
> rip out the whole thing, then again hyperz is disabled by default so
> no harm should follow with this patch.
> 
> I don't mind either way.
> Emil
> 

I'd say keep the option (for both drivers) for now.
I've got some r200 in a dust bin actually, haven't touched it in years...
I think it should work in the same way as it does on r100.
The depth buffer metadata (i.e. the bits saying if a tile is compressed
or not etc.) is really fixed onchip cache, and there's no attempt to
grab or restore that data when the depth buffer is changed, which
obviously isn't quite right... The other limitation is that you cannot
read or write the depth buffer directly (well you can but you get back
garbage - not being able to do a glReadPixel on the depth buffer alone
will cause lots of piglit failures). UMS could also do fast z clear,
this is pretty simple as the command would just set the on-chip tile
bits to the "cleared" state, but this never made it to KMS - the rest of
hyperz should work without this, however, though I'm not entirely sure.
IIRC the r300 has actually pretty much the same hw limitations, however
the driver there fixes these issues (though I think that chip had some
nicer way of fast z clear).

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] i965/fs: fix stride and type for hw_reg's in regs_read()

2015-07-10 Thread Matt Turner
On Wed, Jul 1, 2015 at 11:51 AM, Connor Abbott  wrote:
> sources with file == HW_REG get all their information from the
> fixed_hw_reg field, so we need to get the stride and type from there
> when computing the size.
>
> Signed-off-by: Connor Abbott 
> ---

Patches 1 and 2 are

Reviewed-by: Matt Turner 

I think everything in this series is now reviewed.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] [v3] i965: Split out gen8 push constant state upload

2015-07-10 Thread Ben Widawsky
On Fri, Jul 10, 2015 at 12:03:54PM -0700, Matt Turner wrote:
> On Thu, Jul 9, 2015 at 11:00 AM, Ben Widawsky
>  wrote:
> > While implementing the workaround in the previous patch I noticed things 
> > were
> > starting to get a bit messy. Since gen8 works differently enough from gen7, 
> > I
> > thought splitting it out with be good.
> >
> > While here, get rid of gen8 MOCS which does nothing and was in the wrong 
> > place
> > anyway.
> >
> > This patch is totally optional. I'd be willing to just always use buffer #2 
> > on
> > gen8+. Pre-HSW this wasn't allowed, but it looks like it's okay for gen8 
> > too.
> >
> > v2: Move inactive batch generation to the top of the function in order to 
> > make
> > the rest of the code easier to read.
> >
> > Jenkins results (still a bunch of spurious failures, I miss Mark):
> > http://otc-mesa-ci.jf.intel.com/job/bwidawsk/169/
> >
> > v3: v2 had a bug in that it both didn't emit the right number of dwords, 
> > and it
> > didn't do ADVANCE_BATCH(). I'm moderately worried that there were no 
> > failures as
> > a result.
> > http://otc-mesa-ci.jf.intel.com/job/bwidawsk/170/
> 
> I don't think putting Intel-internal links in the commit message is a good 
> idea.
> 
> Ken's made similar comments to me.
> 
> Also, so much off the wall commentary...

Maybe my definition of "off the wall" is different than yours. The only thing
off the wall to me, was the bit about missing Mark. It was *some* off the wall
commentary.

That aside though, I think the internal links is a good point and thing to
discuss... I've had a couple of cases already where I, or Neil benefited from
the Jenkins links being there to try to figure out some later regression. I can
sympathize with not having internal links in the history since it isn't
accessible to anyone. Earlier, I would have fought somewhat strongly for the
links, except that when Mark moved servers he didn't preserve the old links, so
that made me feel like it's a lot more transient than I initially felt.

However, I think it's really valuable for us to have them in the patches,
especially for review by some of the internal folks - like isn't it great to see
for yourself that I ran it? I suppose I can discard the URLs before pushing. The
cases I mentioned above would have benefited just as well having the links on
the list and not in the commit history (albeit a bit harder to find). Any
opposition to that?

*I do certainly thing posting JIRA tasks is irrelevant and wrong unless the
entire contents of the JIRA entry are also pasted. Why I feel Jenkins results
are different is we at least know approximately what is contained at that link.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/gs: Move vertex_count != 0 check up a level; skip one caller.

2015-07-10 Thread Matt Turner
On Wed, Jul 1, 2015 at 7:28 PM, Kenneth Graunke  wrote:
> Paul's original code had emit_control_data_bits() skip the URB write if
> vertex_count was 0.  This meant wrapping every control data write in a
> conditional write.
>
> We accumulate control data bits in a single UD (32-bit) register.  For
> simple shaders that don't emit many vertices, the control data header
> will be <= 32-bits long, so we only need to write it once at the end of
> the shader.
>
> For shaders with larger headers, we write out batches of control data
> bits at EmitVertex(), when (vertex_count * bits_per_vertex) % 32 == 0.
> On the first EmitVertex() call, the above expression will evaluate to
> true simply because vertex_count == 0.  But we want to avoid emitting
> the control data bits, because we haven't accumulated 32-bits worth yet.
>
> In other words, the vertex_count != 0 check is really only necessary in
> the EmitVertex() batching case, not the end-of-thread case.
>
> This saves a CMP/IF/ENDIF in every shader that uses EndPrimitive() or
> multiple streams.  The only downside is that a shader which emits no
> vertices at all will execute an additional URB write---but such shaders
> are pointless and not worth optimizing.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index 2f948ee..55408eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -348,11 +348,6 @@ vec4_gs_visitor::emit_control_data_bits()
> if (c->control_data_header_size_bits > 128)
>urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
>
> -   /* If vertex_count is 0, then no control data bits have been accumulated
> -* yet, so we should do nothing.
> -*/
> -   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
> -   emit(IF(BRW_PREDICATE_NORMAL));
> {
>/* If we are using either channel masks or a per-slot offset, then we
> * need to figure out which DWORD we are trying to write to, using the
> @@ -431,7 +426,6 @@ vec4_gs_visitor::emit_control_data_bits()
>inst->base_mrf = base_mrf;
>inst->mlen = 2;
> }
> -   emit(BRW_OPCODE_ENDIF);
>  }
>
>  void
> @@ -531,9 +525,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
>  emit(AND(dst_null_d(), this->vertex_count,
>   (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
>   inst->conditional_mod = BRW_CONDITIONAL_Z;
> +
>   emit(IF(BRW_PREDICATE_NORMAL));
>   {
> +/* If vertex_count is 0, then no control data bits have been
> + * accumulated yet, so we skip emitting them.
> + */
> +emit(CMP(dst_null_d(), this->vertex_count, 0u,
> + BRW_CONDITIONAL_NEQ));

I think you wanted to indent BRW_CONDITIONAL_NEQ to match dst_null_d().

Also, can we s/NEQ/NZ/ while we're here?

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Fix indentation in emit_control_data_bits().

2015-07-10 Thread Matt Turner
Confirmed that git show -w shows only the removal of matching braces.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 33/82] i965: Upload Shader Storage Buffer Object surfaces

2015-07-10 Thread Jordan Justen
On 2015-06-03 00:01:23, Iago Toral Quiroga wrote:
> Since these are a special kind of UBOs we emit them together reusing the
> same infrastructure, however, we use a RAW surface so we can reuse
> existing untyped read/write/atomic messages which include a pixel mask
> header that we need to set to obtain correct behavior with helper
> invocations of the fragment shader.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h  |  6 +++
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 62 
> +++-
>  2 files changed, 56 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 01c4283..154d7ae 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1718,6 +1718,12 @@ void brw_create_constant_surface(struct brw_context 
> *brw,
>   uint32_t size,
>   uint32_t *out_offset,
>   bool dword_pitch);
> +void brw_create_buffer_surface(struct brw_context *brw,
> +   drm_intel_bo *bo,
> +   uint32_t offset,
> +   uint32_t size,
> +   uint32_t *out_offset,
> +   bool dword_pitch);
>  void brw_update_buffer_texture_surface(struct gl_context *ctx,
> unsigned unit,
> uint32_t *surf_offset);
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 160dd2f..e9ccdd6 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -410,6 +410,29 @@ brw_create_constant_surface(struct brw_context *brw,
>  }
>  
>  /**
> + * Create the buffer surface.  Vertex/fragment shader buffer variables will 
> be

Only vertex/fragment? How about 'Shader buffer variables' ...?

Reviewed-by: Jordan Justen 

> + * read from / write to this buffer with Data Port Read/Write
> + * instructions/messages.
> + */
> +void
> +brw_create_buffer_surface(struct brw_context *brw,
> +  drm_intel_bo *bo,
> +  uint32_t offset,
> +  uint32_t size,
> +  uint32_t *out_offset,
> +  bool dword_pitch)
> +{
> +   /* Use a raw surface so we can reuse existing untyped read/write/atomic
> +* messages. We need these specifically for the fragment shader since they
> +* include a pixel mask header that we need to ensure correct behavior
> +* with helper invocations, which cannot write to the buffer.
> +*/
> +   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
> +   BRW_SURFACEFORMAT_RAW,
> +   size, 1, true);
> +}
> +
> +/**
>   * Set up a binding table entry for use by stream output logic (transform
>   * feedback).
>   *
> @@ -897,24 +920,39 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
>&stage_state->surf_offset[prog_data->binding_table.ubo_start];
>  
> for (int i = 0; i < shader->NumUniformBlocks; i++) {
> -  struct gl_uniform_buffer_binding *binding;
>struct intel_buffer_object *intel_bo;
>  
> -  binding = 
> &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
> -  intel_bo = intel_buffer_object(binding->BufferObject);
> -  drm_intel_bo *bo =
> - intel_bufferobj_buffer(brw, intel_bo,
> -binding->Offset,
> -binding->BufferObject->Size - 
> binding->Offset);
> -
>/* Because behavior for referencing outside of the binding's size in 
> the
> * glBindBufferRange case is undefined, we can just bind the whole 
> buffer
> * glBindBufferBase wants and be a correct implementation.
> */
> -  brw_create_constant_surface(brw, bo, binding->Offset,
> -  bo->size - binding->Offset,
> -  &surf_offsets[i],
> -  dword_pitch);
> +  if (!shader->UniformBlocks[i].IsShaderStorage) {
> + struct gl_uniform_buffer_binding *binding;
> + binding =
> +&ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
> + intel_bo = intel_buffer_object(binding->BufferObject);
> + drm_intel_bo *bo =
> +intel_bufferobj_buffer(brw, intel_bo,
> +   binding->Offset,
> +   binding->BufferObject->Size - 
> binding->Offset);
> + brw_create_constant_surface(brw, bo, binding->Offset,
> + bo->size - binding->Offset,
> + &surf_offsets[i],
> 

Re: [Mesa-dev] [PATCH v2 34/82] i965: handle visiting of ir_var_shader_storage variables

2015-07-10 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2015-06-03 00:01:24, Iago Toral Quiroga wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index a2a75a4..13496a3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1053,11 +1053,12 @@ vec4_visitor::visit(ir_variable *ir)
>break;
>  
> case ir_var_uniform:
> +   case ir_var_shader_storage:
>reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
>  
>/* Thanks to the lower_ubo_reference pass, we will see only
> -   * ir_binop_ubo_load expressions and not ir_dereference_variable for 
> UBO
> -   * variables, so no need for them to be in variable_ht.
> +   * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
> +   * for UBO/SSBO variables, so no need for them to be in variable_ht.
> *
> * Some uniforms, such as samplers and atomic counters, have no actual
> * storage, so we should ignore them.
> -- 
> 1.9.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] xa: don't leak fences

2015-07-10 Thread Eric Anholt
Rob Clark  writes:

> well, freedreno and vmware ddx still use XA.. they both could probably
> be ported to use glamor instead, but that hasn't been done yet..
>
> At least for freedreno with upstream drm/kms driver, one can just use
> -modesetting ddx instead.  But that doesn't work w/ android fbdev
> driver.  I need to check again w/ a more recent -modesetting+glamor,
> but when I tried it a few months ago, there were some cases of
> rendering corruption (but I didn't have time to debug and see whether
> that was a freedreno issue or a glamor issue).

I'm confused what "that doesn't work w/ android fbdev driver" means.
Why would you be using an fbdev driver with X?  Also, are you doing X on
Android?


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 35/82] i965/fs: Do not split buffer variables

2015-07-10 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2015-06-03 00:01:25, Iago Toral Quiroga wrote:
> Buffer variables are the same as uniforms, only that read/write, so we want
> the same treatment.
> ---
>  src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> index 01d3a56..61b6ebc 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
> @@ -104,6 +104,7 @@ 
> ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
>  
> switch (var->data.mode) {
> case ir_var_uniform:
> +   case ir_var_shader_storage:
> case ir_var_shader_in:
> case ir_var_shader_out:
> case ir_var_system_value:
> -- 
> 1.9.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] xa: don't leak fences

2015-07-10 Thread Rob Clark
On Fri, Jul 10, 2015 at 6:20 PM, Eric Anholt  wrote:
> Rob Clark  writes:
>
>> well, freedreno and vmware ddx still use XA.. they both could probably
>> be ported to use glamor instead, but that hasn't been done yet..
>>
>> At least for freedreno with upstream drm/kms driver, one can just use
>> -modesetting ddx instead.  But that doesn't work w/ android fbdev
>> driver.  I need to check again w/ a more recent -modesetting+glamor,
>> but when I tried it a few months ago, there were some cases of
>> rendering corruption (but I didn't have time to debug and see whether
>> that was a freedreno issue or a glamor issue).
>
> I'm confused what "that doesn't work w/ android fbdev driver" means.
> Why would you be using an fbdev driver with X?  Also, are you doing X on
> Android?

huge hacks to kinda/sorta work on top of android fbdev + downstream
gpu driver, so people have some approximate chance of getting
freedreno/x11 working on $random_android_device.. see:

https://github.com/freedreno/freedreno/wiki/Architecture

mostly it is to try to (best-effort) support devices with dsi panels,
since it was only recently that upstream drm/kms driver grew decent
dsi support.  I'm planning to write panel drivers for a handful of
devices, to give reasonable example for anyone with
$random_android_device to start with, so between that and the fact
that fbdev support barely works (due to differences between different
devices) I'll probably torpedo the fbdev support eventually..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used

2015-07-10 Thread Chris Forbes
Surely the *right* thing would be to have the correct order expressed
in brw->vb.*, instead so you don't have this workaround in multiple
places.

As a minimal fix for stable though, this seems OK, so -

Reviewed-by: Chris Forbes 



On Sat, Jul 11, 2015 at 5:04 AM, Neil Roberts  wrote:
> When the edge flag element is enabled then the elements are slightly
> reordered so that the edge flag is always the last one. This was
> confusing the code to upload the 3DSTATE_VF_INSTANCING state because
> that is uploaded with a separate loop which has an instruction for
> each element. The indices used in these instructions weren't taking
> into account the reordering so the state would be incorrect.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91292
> Cc: "10.6 10.5" 
> ---
>  src/mesa/drivers/dri/i965/gen8_draw_upload.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c 
> b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> index 1af90ec..65b7625 100644
> --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
> @@ -218,13 +218,24 @@ gen8_emit_vertices(struct brw_context *brw)
> }
> ADVANCE_BATCH();
>
> -   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
> +   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
>const struct brw_vertex_element *input = brw->vb.enabled[i];
>const struct brw_vertex_buffer *buffer = 
> &brw->vb.buffers[input->buffer];
> +  unsigned element_index;
> +
> +  /* The edge flag element is reordered to be the last one in the code
> +   * above so we need to compensate for that in the element indices used
> +   * below.
> +   */
> +  if (input == gen6_edgeflag_input)
> + element_index = brw->vb.nr_enabled - 1;
> +  else
> + element_index = j++;
>
>BEGIN_BATCH(3);
>OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
> -  OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
> +  OUT_BATCH(element_index |
> +(buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
>OUT_BATCH(buffer->step_rate);
>ADVANCE_BATCH();
> }
> --
> 1.9.3
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/fs: Don't use the pixel interpolater for centroid interpolation

2015-07-10 Thread Chris Forbes
s/interpolater/interpolator/g

On Fri, Jul 10, 2015 at 1:31 AM, Neil Roberts  wrote:
> For centroid interpolation we can just directly use the values set up
> in the shader payload instead of querying the pixel interpolator. To
> do this we need to modify brw_compute_barycentric_interp_modes to
> detect when interpolateAtCentroid is called.
>
> v2: Rebase on top of changes to set the pulls bary bit on SKL
> ---
>
> As an aside, I was deliberating over whether to call the function
> set_up_blah instead of setup_blah because I think the former is more
> correct. The rest of Mesa seems to use setup so maybe it's more
> important to be consistent than correct.
>
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 +++---
>  src/mesa/drivers/dri/i965/brw_wm.c   | 55 
> 
>  2 files changed, 88 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 5d1ea21..fd7f1b8 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1238,6 +1238,25 @@ fs_visitor::emit_percomp(const fs_builder &bld, const 
> fs_inst &inst,
> }
>  }
>
> +/* For most messages, we need one reg of ignored data; the hardware requires
> + * mlen==1 even when there is no payload. in the per-slot offset case, we'll
> + * replace this with the proper source data.
> + */
> +static void
> +setup_pixel_interpolater_instruction(fs_visitor *v,
> + nir_intrinsic_instr *instr,
> + fs_inst *inst,
> + int mlen = 1)
> +{
> +  inst->mlen = mlen;
> +  inst->regs_written = 2 * v->dispatch_width / 8;
> +  inst->pi_noperspective = instr->variables[0]->var->data.interpolation 
> ==
> +   INTERP_QUALIFIER_NOPERSPECTIVE;
> +
> +  assert(v->stage == MESA_SHADER_FRAGMENT);
> +  ((struct brw_wm_prog_data *) v->prog_data)->pulls_bary = true;
> +}
> +
>  void
>  fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr 
> *instr)
>  {
> @@ -1482,25 +1501,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
> case nir_intrinsic_interp_var_at_centroid:
> case nir_intrinsic_interp_var_at_sample:
> case nir_intrinsic_interp_var_at_offset: {
> -  assert(stage == MESA_SHADER_FRAGMENT);
> -
> -  ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
> -
>fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
>
> -  /* For most messages, we need one reg of ignored data; the hardware
> -   * requires mlen==1 even when there is no payload. in the per-slot
> -   * offset case, we'll replace this with the proper source data.
> -   */
>fs_reg src = vgrf(glsl_type::float_type);
> -  int mlen = 1; /* one reg unless overriden */
>fs_inst *inst;
>
>switch (instr->intrinsic) {
> -  case nir_intrinsic_interp_var_at_centroid:
> - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
> - dst_xy, src, fs_reg(0u));
> +  case nir_intrinsic_interp_var_at_centroid: {
> + enum brw_wm_barycentric_interp_mode interp_mode;
> + if (instr->variables[0]->var->data.interpolation ==
> + INTERP_QUALIFIER_NOPERSPECTIVE)
> +interp_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
> + else
> +interp_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
> + uint8_t reg = payload.barycentric_coord_reg[interp_mode];
> + dst_xy = fs_reg(brw_vec16_grf(reg, 0));
>   break;
> +  }
>
>case nir_intrinsic_interp_var_at_sample: {
>   /* XXX: We should probably handle non-constant sample id's */
> @@ -1509,6 +1526,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>   unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
>   inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
>   fs_reg(msg_data));
> + setup_pixel_interpolater_instruction(this, instr, inst);
>   break;
>}
>
> @@ -1521,6 +1539,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>
>  inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, 
> src,
>  fs_reg(off_x | (off_y << 4)));
> +setup_pixel_interpolater_instruction(this, instr, inst);
>   } else {
>  src = vgrf(glsl_type::ivec2_type);
>  fs_reg offset_src = retype(get_nir_src(instr->src[0]),
> @@ -1550,9 +1569,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
> bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
>  }
>
> -mlen = 2 * dispatch_width / 8;
>   

Re: [Mesa-dev] [PATCH v2] i965/fs: Don't use the pixel interpolater for centroid interpolation

2015-07-10 Thread Chris Forbes
Nitpicks aside, I don't think this is a great idea now that you've got
the SKL PI working.

I also think it's broken -- you need to arrange to have the centroid
barycentric coords delivered to the FS thread, which won't be
happening if this is the *only* use of them. Masked in the tests,
because they compare with a centroid-qualified input. [I'm assuming
you don't always get these delivered to the FS in SKL, but no docs
access...]

- Chris

On Sat, Jul 11, 2015 at 11:18 AM, Chris Forbes  wrote:
> s/interpolater/interpolator/g
>
> On Fri, Jul 10, 2015 at 1:31 AM, Neil Roberts  wrote:
>> For centroid interpolation we can just directly use the values set up
>> in the shader payload instead of querying the pixel interpolator. To
>> do this we need to modify brw_compute_barycentric_interp_modes to
>> detect when interpolateAtCentroid is called.
>>
>> v2: Rebase on top of changes to set the pulls bary bit on SKL
>> ---
>>
>> As an aside, I was deliberating over whether to call the function
>> set_up_blah instead of setup_blah because I think the former is more
>> correct. The rest of Mesa seems to use setup so maybe it's more
>> important to be consistent than correct.
>>
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 52 +++---
>>  src/mesa/drivers/dri/i965/brw_wm.c   | 55 
>> 
>>  2 files changed, 88 insertions(+), 19 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 5d1ea21..fd7f1b8 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -1238,6 +1238,25 @@ fs_visitor::emit_percomp(const fs_builder &bld, const 
>> fs_inst &inst,
>> }
>>  }
>>
>> +/* For most messages, we need one reg of ignored data; the hardware requires
>> + * mlen==1 even when there is no payload. in the per-slot offset case, we'll
>> + * replace this with the proper source data.
>> + */
>> +static void
>> +setup_pixel_interpolater_instruction(fs_visitor *v,
>> + nir_intrinsic_instr *instr,
>> + fs_inst *inst,
>> + int mlen = 1)
>> +{
>> +  inst->mlen = mlen;
>> +  inst->regs_written = 2 * v->dispatch_width / 8;
>> +  inst->pi_noperspective = instr->variables[0]->var->data.interpolation 
>> ==
>> +   INTERP_QUALIFIER_NOPERSPECTIVE;
>> +
>> +  assert(v->stage == MESA_SHADER_FRAGMENT);
>> +  ((struct brw_wm_prog_data *) v->prog_data)->pulls_bary = true;
>> +}
>> +
>>  void
>>  fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr 
>> *instr)
>>  {
>> @@ -1482,25 +1501,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder 
>> &bld, nir_intrinsic_instr *instr
>> case nir_intrinsic_interp_var_at_centroid:
>> case nir_intrinsic_interp_var_at_sample:
>> case nir_intrinsic_interp_var_at_offset: {
>> -  assert(stage == MESA_SHADER_FRAGMENT);
>> -
>> -  ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
>> -
>>fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
>>
>> -  /* For most messages, we need one reg of ignored data; the hardware
>> -   * requires mlen==1 even when there is no payload. in the per-slot
>> -   * offset case, we'll replace this with the proper source data.
>> -   */
>>fs_reg src = vgrf(glsl_type::float_type);
>> -  int mlen = 1; /* one reg unless overriden */
>>fs_inst *inst;
>>
>>switch (instr->intrinsic) {
>> -  case nir_intrinsic_interp_var_at_centroid:
>> - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
>> - dst_xy, src, fs_reg(0u));
>> +  case nir_intrinsic_interp_var_at_centroid: {
>> + enum brw_wm_barycentric_interp_mode interp_mode;
>> + if (instr->variables[0]->var->data.interpolation ==
>> + INTERP_QUALIFIER_NOPERSPECTIVE)
>> +interp_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
>> + else
>> +interp_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
>> + uint8_t reg = payload.barycentric_coord_reg[interp_mode];
>> + dst_xy = fs_reg(brw_vec16_grf(reg, 0));
>>   break;
>> +  }
>>
>>case nir_intrinsic_interp_var_at_sample: {
>>   /* XXX: We should probably handle non-constant sample id's */
>> @@ -1509,6 +1526,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
>> nir_intrinsic_instr *instr
>>   unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
>>   inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
>>   fs_reg(msg_data));
>> + setup_pixel_interpolater_instruction(this, instr, inst);
>>   break;
>>}
>>
>> @@ -1521,6 +1539,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
>> nir_intrinsic_instr *instr
>>
>>  i

  1   2   >