[Mesa-dev] [PATCH] i965: Return the correct value type from brw_compile_gs()
brw_compile_gs() should return a pointer to unsigned, but it is returning the bool 'false' at some point, hence annoying us with a compiler warning: In function 'const unsigned int* brw::brw_compile_gs(const brw_compiler*, void*, void*, const brw_gs_prog_key*, brw_gs_prog_data*, const nir_shader*, gl_shader_program*, int, unsigned int*, char**)': brw_vec4_gs_visitor.cpp:776:14: warning: converting 'false' to pointer type 'const unsigned int*' [-Wconversion-null] return false; ^ --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 81353ae..0c49865 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -773,7 +773,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) - return false; + return NULL; /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Return the correct value type from brw_compile_gs()
Reviewed-by: Jordan Justen On 2015-11-17 00:55:14, Eduardo Lima Mitev wrote: > brw_compile_gs() should return a pointer to unsigned, but it is returning the > bool 'false' at some point, hence annoying us with a compiler warning: > > In function 'const unsigned int* brw::brw_compile_gs(const brw_compiler*, >void*, void*, const brw_gs_prog_key*, brw_gs_prog_data*, const nir_shader*, >gl_shader_program*, int, unsigned int*, char**)': > > brw_vec4_gs_visitor.cpp:776:14: warning: converting 'false' to pointer type > 'const unsigned int*' [-Wconversion-null] > return false; >^ > --- > src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > index 81353ae..0c49865 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > @@ -773,7 +773,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void > *log_data, > if (compiler->devinfo->gen == 6) >max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; > if (output_size_bytes > max_output_size_bytes) > - return false; > + return NULL; > > > /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and > -- > 2.5.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/6] nir: reduce memory footprint of glsl_struct_field's precision
All 6 patches Reviewed-by: Tapani Pälli On 11/17/2015 09:33 AM, Samuel Iglesias Gonsálvez wrote: Signed-off-by: Samuel Iglesias Gonsálvez --- src/glsl/nir/glsl_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h index d841a32..2d44059 100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@ -837,7 +837,7 @@ struct glsl_struct_field { /** * Precision qualifier */ - unsigned precision; + unsigned precision:2; /** * Image qualifiers, applicable to buffer variables defined in shader ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 32/36] glsl: Translate atomic intrinsic functions on shared variables
On 2015-11-16 06:51:55, Iago Toral wrote: > On Sat, 2015-11-14 at 13:44 -0800, Jordan Justen wrote: > > When an intrinsic atomic operation is used on a shared variable, we > > translate it to a new 'share variable' specific intrinsic function > > call. > > > > For example, add call to __intrinsic_atomic_add when used on a shared > > variable will be translated to a call to > > __intrinsic_atomic_add_shared. > > I suppose we should name these __intrinsic_atomic__shared_internal > for consistency with the ssbo atomic intrinsics... or just remove the > 'internal' suffix from the ssbo atomics. I think I'd prefer the latter, > we can do this as a separate patch after this lands though. I'd also prefer to remove 'internal'. Should I just do that in patch 30 with the rename? -Jorda > > > Signed-off-by: Jordan Justen > > --- > > src/glsl/lower_shared_reference.cpp | 151 > > > > 1 file changed, 151 insertions(+) > > > > diff --git a/src/glsl/lower_shared_reference.cpp > > b/src/glsl/lower_shared_reference.cpp > > index 810c6b6..7ff2c0c 100644 > > --- a/src/glsl/lower_shared_reference.cpp > > +++ b/src/glsl/lower_shared_reference.cpp > > @@ -79,6 +79,10 @@ public: > > ir_visitor_status visit_enter(ir_assignment *ir); > > void handle_assignment(ir_assignment *ir); > > > > + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); > > + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); > > + ir_visitor_status visit_enter(ir_call *ir); > > + > > unsigned get_shared_offset(const ir_variable *); > > > > ir_call *shared_load(const struct glsl_type *type, ir_rvalue *offset); > > @@ -337,6 +341,153 @@ lower_shared_reference_visitor::shared_load(const > > struct glsl_type *type, > > return new(mem_ctx) ir_call(sig, deref_result, &call_params); > > } > > > > +/* Lowers the intrinsic call to a new internal intrinsic that swaps the > > + * access to the buffer variable in the first parameter by an offset > > + * and block index. This involves creating the new internal intrinsic > > + * (i.e. the new function signature). > > + */ > > +ir_call * > > +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) > > +{ > > + /* Shared atomics usually have 2 parameters, the shared variable and an > > +* integer argument. The exception is CompSwap, that has an additional > > +* integer parameter. > > +*/ > > + int param_count = ir->actual_parameters.length(); > > + assert(param_count == 2 || param_count == 3); > > + > > + /* First argument must be a scalar integer buffer variable */ > > + exec_node *param = ir->actual_parameters.get_head(); > > + ir_instruction *inst = (ir_instruction *) param; > > + assert(inst->ir_type == ir_type_dereference_variable || > > + inst->ir_type == ir_type_dereference_array || > > + inst->ir_type == ir_type_dereference_record || > > + inst->ir_type == ir_type_swizzle); > > + > > + ir_rvalue *deref = (ir_rvalue *) inst; > > + assert(deref->type->is_scalar() && deref->type->is_integer()); > > + > > + ir_variable *var = deref->variable_referenced(); > > + assert(var); > > + > > + /* Compute the offset to the start if the dereference and the > > +* block index > > +*/ > > + mem_ctx = ralloc_parent(shader->ir); > > + > > + ir_rvalue *offset = NULL; > > + unsigned const_offset = get_shared_offset(var); > > + bool row_major; > > + int matrix_columns; > > + const glsl_type *iface = var->get_interface_type(); > > + unsigned packing = > > + iface ? iface->interface_packing : GLSL_INTERFACE_PACKING_STD430; > > + buffer_access_type = shared_atomic_access; > > + > > + setup_buffer_access(var, deref, > > + &offset, &const_offset, > > + &row_major, &matrix_columns, packing); > > + > > + assert(offset); > > + assert(!row_major); > > + assert(matrix_columns == 1); > > + > > + ir_rvalue *deref_offset = > > + add(offset, new(mem_ctx) ir_constant(const_offset)); > > + > > + /* Create the new internal function signature that will take a block > > +* index and offset instead of a buffer variable > > +*/ > > + exec_list sig_params; > > + ir_variable *sig_param = new(mem_ctx) > > + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + > > + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? > > + glsl_type::int_type : glsl_type::uint_type; > > + sig_param = new(mem_ctx) > > + ir_variable(type, "data1", ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + > > + if (param_count == 3) { > > + sig_param = new(mem_ctx) > > +ir_variable(type, "data2", ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + } > > + > > + ir_function_signature *sig = > > + new(mem_ctx) ir_function_signature(deref->type, > > +
Re: [Mesa-dev] [PATCH 32/36] glsl: Translate atomic intrinsic functions on shared variables
On 2015-11-16 07:27:10, Iago Toral wrote: > hOn Sat, 2015-11-14 at 13:44 -0800, Jordan Justen wrote: > > When an intrinsic atomic operation is used on a shared variable, we > > translate it to a new 'share variable' specific intrinsic function > > call. > > > > For example, add call to __intrinsic_atomic_add when used on a shared > > variable will be translated to a call to > > __intrinsic_atomic_add_shared. > > > > Signed-off-by: Jordan Justen > > --- > > src/glsl/lower_shared_reference.cpp | 151 > > > > 1 file changed, 151 insertions(+) > > > > diff --git a/src/glsl/lower_shared_reference.cpp > > b/src/glsl/lower_shared_reference.cpp > > index 810c6b6..7ff2c0c 100644 > > --- a/src/glsl/lower_shared_reference.cpp > > +++ b/src/glsl/lower_shared_reference.cpp > > @@ -79,6 +79,10 @@ public: > > ir_visitor_status visit_enter(ir_assignment *ir); > > void handle_assignment(ir_assignment *ir); > > > > + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); > > + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); > > + ir_visitor_status visit_enter(ir_call *ir); > > + > > unsigned get_shared_offset(const ir_variable *); > > > > ir_call *shared_load(const struct glsl_type *type, ir_rvalue *offset); > > @@ -337,6 +341,153 @@ lower_shared_reference_visitor::shared_load(const > > struct glsl_type *type, > > return new(mem_ctx) ir_call(sig, deref_result, &call_params); > > } > > > > +/* Lowers the intrinsic call to a new internal intrinsic that swaps the > > + * access to the buffer variable in the first parameter by an offset > > + * and block index. This involves creating the new internal intrinsic > > + * (i.e. the new function signature). > > + */ > > +ir_call * > > +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) > > +{ > > + /* Shared atomics usually have 2 parameters, the shared variable and an > > +* integer argument. The exception is CompSwap, that has an additional > > +* integer parameter. > > +*/ > > + int param_count = ir->actual_parameters.length(); > > + assert(param_count == 2 || param_count == 3); > > + > > + /* First argument must be a scalar integer buffer variable */ > > + exec_node *param = ir->actual_parameters.get_head(); > > + ir_instruction *inst = (ir_instruction *) param; > > + assert(inst->ir_type == ir_type_dereference_variable || > > + inst->ir_type == ir_type_dereference_array || > > + inst->ir_type == ir_type_dereference_record || > > + inst->ir_type == ir_type_swizzle); > > + > > + ir_rvalue *deref = (ir_rvalue *) inst; > > + assert(deref->type->is_scalar() && deref->type->is_integer()); > > + > > + ir_variable *var = deref->variable_referenced(); > > + assert(var); > > + > > + /* Compute the offset to the start if the dereference and the > > +* block index > > +*/ > > + mem_ctx = ralloc_parent(shader->ir); > > + > > + ir_rvalue *offset = NULL; > > + unsigned const_offset = get_shared_offset(var); > > + bool row_major; > > + int matrix_columns; > > + const glsl_type *iface = var->get_interface_type(); > > + unsigned packing = > > + iface ? iface->interface_packing : GLSL_INTERFACE_PACKING_STD430; > > + buffer_access_type = shared_atomic_access; > > + > > + setup_buffer_access(var, deref, > > + &offset, &const_offset, > > + &row_major, &matrix_columns, packing); > > + > > + assert(offset); > > + assert(!row_major); > > + assert(matrix_columns == 1); > > + > > + ir_rvalue *deref_offset = > > + add(offset, new(mem_ctx) ir_constant(const_offset)); > > + > > + /* Create the new internal function signature that will take a block > > +* index and offset instead of a buffer variable > > +*/ > > + exec_list sig_params; > > + ir_variable *sig_param = new(mem_ctx) > > + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + > > + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? > > + glsl_type::int_type : glsl_type::uint_type; > > + sig_param = new(mem_ctx) > > + ir_variable(type, "data1", ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + > > + if (param_count == 3) { > > + sig_param = new(mem_ctx) > > +ir_variable(type, "data2", ir_var_function_in); > > + sig_params.push_tail(sig_param); > > + } > > + > > + ir_function_signature *sig = > > + new(mem_ctx) ir_function_signature(deref->type, > > + compute_shader_enabled); > > + assert(sig); > > + sig->replace_parameters(&sig_params); > > + sig->is_intrinsic = true; > > + > > + char func_name[64]; > > + sprintf(func_name, "%s_shared", ir->callee_name()); > > + ir_function *f = new(mem_ctx) ir_function(func_name); > > + f->add_signature(sig); > > + > > + /* Now, create the call
Re: [Mesa-dev] [PATCH] i965: Add more MAX_*_URB_ENTRY_SIZE_BYTES #defines.
On 11/17/2015 02:38 AM, Kenneth Graunke wrote: > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_defines.h | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 0b8de63..ade3ede 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1938,8 +1938,14 @@ enum brw_message_target { > > /* Gen7 "GS URB Entry Allocation Size" is a U9-1 field, so the maximum > gs_size > * is 2^9, or 512. It's counted in multiples of 64 bytes. > + * > + * Identical for VS, DS, and HS. > */ > #define GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES(512*64) > +#define GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES(512*64) > +#define GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES(512*64) > +#define GEN7_MAX_VS_URB_ENTRY_SIZE_BYTES(512*64) > + > /* Gen6 "GS URB Entry Allocation Size" is defined as a number of 1024-bit > * (128 bytes) URB rows and the maximum allowed value is 5 rows. > */ > Checked this in spec. Yep, this is correct. Patch is Reviewed-by: Abdiel Janulgue ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 32/36] glsl: Translate atomic intrinsic functions on shared variables
On Tue, 2015-11-17 at 01:30 -0800, Jordan Justen wrote: > On 2015-11-16 06:51:55, Iago Toral wrote: > > On Sat, 2015-11-14 at 13:44 -0800, Jordan Justen wrote: > > > When an intrinsic atomic operation is used on a shared variable, we > > > translate it to a new 'share variable' specific intrinsic function > > > call. > > > > > > For example, add call to __intrinsic_atomic_add when used on a shared > > > variable will be translated to a call to > > > __intrinsic_atomic_add_shared. > > > > I suppose we should name these __intrinsic_atomic__shared_internal > > for consistency with the ssbo atomic intrinsics... or just remove the > > 'internal' suffix from the ssbo atomics. I think I'd prefer the latter, > > we can do this as a separate patch after this lands though. > > I'd also prefer to remove 'internal'. Should I just do that in patch > 30 with the rename? Yes, I think that would ok. Iago > -Jorda > > > > > > Signed-off-by: Jordan Justen > > > --- > > > src/glsl/lower_shared_reference.cpp | 151 > > > > > > 1 file changed, 151 insertions(+) > > > > > > diff --git a/src/glsl/lower_shared_reference.cpp > > > b/src/glsl/lower_shared_reference.cpp > > > index 810c6b6..7ff2c0c 100644 > > > --- a/src/glsl/lower_shared_reference.cpp > > > +++ b/src/glsl/lower_shared_reference.cpp > > > @@ -79,6 +79,10 @@ public: > > > ir_visitor_status visit_enter(ir_assignment *ir); > > > void handle_assignment(ir_assignment *ir); > > > > > > + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); > > > + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); > > > + ir_visitor_status visit_enter(ir_call *ir); > > > + > > > unsigned get_shared_offset(const ir_variable *); > > > > > > ir_call *shared_load(const struct glsl_type *type, ir_rvalue *offset); > > > @@ -337,6 +341,153 @@ lower_shared_reference_visitor::shared_load(const > > > struct glsl_type *type, > > > return new(mem_ctx) ir_call(sig, deref_result, &call_params); > > > } > > > > > > +/* Lowers the intrinsic call to a new internal intrinsic that swaps the > > > + * access to the buffer variable in the first parameter by an offset > > > + * and block index. This involves creating the new internal intrinsic > > > + * (i.e. the new function signature). > > > + */ > > > +ir_call * > > > +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call > > > *ir) > > > +{ > > > + /* Shared atomics usually have 2 parameters, the shared variable and > > > an > > > +* integer argument. The exception is CompSwap, that has an additional > > > +* integer parameter. > > > +*/ > > > + int param_count = ir->actual_parameters.length(); > > > + assert(param_count == 2 || param_count == 3); > > > + > > > + /* First argument must be a scalar integer buffer variable */ > > > + exec_node *param = ir->actual_parameters.get_head(); > > > + ir_instruction *inst = (ir_instruction *) param; > > > + assert(inst->ir_type == ir_type_dereference_variable || > > > + inst->ir_type == ir_type_dereference_array || > > > + inst->ir_type == ir_type_dereference_record || > > > + inst->ir_type == ir_type_swizzle); > > > + > > > + ir_rvalue *deref = (ir_rvalue *) inst; > > > + assert(deref->type->is_scalar() && deref->type->is_integer()); > > > + > > > + ir_variable *var = deref->variable_referenced(); > > > + assert(var); > > > + > > > + /* Compute the offset to the start if the dereference and the > > > +* block index > > > +*/ > > > + mem_ctx = ralloc_parent(shader->ir); > > > + > > > + ir_rvalue *offset = NULL; > > > + unsigned const_offset = get_shared_offset(var); > > > + bool row_major; > > > + int matrix_columns; > > > + const glsl_type *iface = var->get_interface_type(); > > > + unsigned packing = > > > + iface ? iface->interface_packing : GLSL_INTERFACE_PACKING_STD430; > > > + buffer_access_type = shared_atomic_access; > > > + > > > + setup_buffer_access(var, deref, > > > + &offset, &const_offset, > > > + &row_major, &matrix_columns, packing); > > > + > > > + assert(offset); > > > + assert(!row_major); > > > + assert(matrix_columns == 1); > > > + > > > + ir_rvalue *deref_offset = > > > + add(offset, new(mem_ctx) ir_constant(const_offset)); > > > + > > > + /* Create the new internal function signature that will take a block > > > +* index and offset instead of a buffer variable > > > +*/ > > > + exec_list sig_params; > > > + ir_variable *sig_param = new(mem_ctx) > > > + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); > > > + sig_params.push_tail(sig_param); > > > + > > > + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? > > > + glsl_type::int_type : glsl_type::uint_type; > > > + sig_param = new(mem_ctx) > > > + ir_variable(type, "data1", ir_var_function_in); > > > + sig_params.push_
Re: [Mesa-dev] [PATCH 32/36] glsl: Translate atomic intrinsic functions on shared variables
On Tue, 2015-11-17 at 01:35 -0800, Jordan Justen wrote: > On 2015-11-16 07:27:10, Iago Toral wrote: > > hOn Sat, 2015-11-14 at 13:44 -0800, Jordan Justen wrote: > > > When an intrinsic atomic operation is used on a shared variable, we > > > translate it to a new 'share variable' specific intrinsic function > > > call. > > > > > > For example, add call to __intrinsic_atomic_add when used on a shared > > > variable will be translated to a call to > > > __intrinsic_atomic_add_shared. > > > > > > Signed-off-by: Jordan Justen > > > --- > > > src/glsl/lower_shared_reference.cpp | 151 > > > > > > 1 file changed, 151 insertions(+) > > > > > > diff --git a/src/glsl/lower_shared_reference.cpp > > > b/src/glsl/lower_shared_reference.cpp > > > index 810c6b6..7ff2c0c 100644 > > > --- a/src/glsl/lower_shared_reference.cpp > > > +++ b/src/glsl/lower_shared_reference.cpp > > > @@ -79,6 +79,10 @@ public: > > > ir_visitor_status visit_enter(ir_assignment *ir); > > > void handle_assignment(ir_assignment *ir); > > > > > > + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); > > > + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); > > > + ir_visitor_status visit_enter(ir_call *ir); > > > + > > > unsigned get_shared_offset(const ir_variable *); > > > > > > ir_call *shared_load(const struct glsl_type *type, ir_rvalue *offset); > > > @@ -337,6 +341,153 @@ lower_shared_reference_visitor::shared_load(const > > > struct glsl_type *type, > > > return new(mem_ctx) ir_call(sig, deref_result, &call_params); > > > } > > > > > > +/* Lowers the intrinsic call to a new internal intrinsic that swaps the > > > + * access to the buffer variable in the first parameter by an offset > > > + * and block index. This involves creating the new internal intrinsic > > > + * (i.e. the new function signature). > > > + */ > > > +ir_call * > > > +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call > > > *ir) > > > +{ > > > + /* Shared atomics usually have 2 parameters, the shared variable and > > > an > > > +* integer argument. The exception is CompSwap, that has an additional > > > +* integer parameter. > > > +*/ > > > + int param_count = ir->actual_parameters.length(); > > > + assert(param_count == 2 || param_count == 3); > > > + > > > + /* First argument must be a scalar integer buffer variable */ > > > + exec_node *param = ir->actual_parameters.get_head(); > > > + ir_instruction *inst = (ir_instruction *) param; > > > + assert(inst->ir_type == ir_type_dereference_variable || > > > + inst->ir_type == ir_type_dereference_array || > > > + inst->ir_type == ir_type_dereference_record || > > > + inst->ir_type == ir_type_swizzle); > > > + > > > + ir_rvalue *deref = (ir_rvalue *) inst; > > > + assert(deref->type->is_scalar() && deref->type->is_integer()); > > > + > > > + ir_variable *var = deref->variable_referenced(); > > > + assert(var); > > > + > > > + /* Compute the offset to the start if the dereference and the > > > +* block index > > > +*/ > > > + mem_ctx = ralloc_parent(shader->ir); > > > + > > > + ir_rvalue *offset = NULL; > > > + unsigned const_offset = get_shared_offset(var); > > > + bool row_major; > > > + int matrix_columns; > > > + const glsl_type *iface = var->get_interface_type(); > > > + unsigned packing = > > > + iface ? iface->interface_packing : GLSL_INTERFACE_PACKING_STD430; > > > + buffer_access_type = shared_atomic_access; > > > + > > > + setup_buffer_access(var, deref, > > > + &offset, &const_offset, > > > + &row_major, &matrix_columns, packing); > > > + > > > + assert(offset); > > > + assert(!row_major); > > > + assert(matrix_columns == 1); > > > + > > > + ir_rvalue *deref_offset = > > > + add(offset, new(mem_ctx) ir_constant(const_offset)); > > > + > > > + /* Create the new internal function signature that will take a block > > > +* index and offset instead of a buffer variable > > > +*/ > > > + exec_list sig_params; > > > + ir_variable *sig_param = new(mem_ctx) > > > + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); > > > + sig_params.push_tail(sig_param); > > > + > > > + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? > > > + glsl_type::int_type : glsl_type::uint_type; > > > + sig_param = new(mem_ctx) > > > + ir_variable(type, "data1", ir_var_function_in); > > > + sig_params.push_tail(sig_param); > > > + > > > + if (param_count == 3) { > > > + sig_param = new(mem_ctx) > > > +ir_variable(type, "data2", ir_var_function_in); > > > + sig_params.push_tail(sig_param); > > > + } > > > + > > > + ir_function_signature *sig = > > > + new(mem_ctx) ir_function_signature(deref->type, > > > + compute_shader_enabled); > > > + assert(sig); > > >
[Mesa-dev] [PATCH] i965: Add missing stdio.h include to brw_compiler.h.
This is needed for the FILE * type in brw_print_vue_map(). Apparently, all files that include brw_compiler.h already pick this up via some include chain, so this isn't actually a build fix. However, I have patches which introduce new consumers of brw_compiler.h that fail to build because of the missing #include. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 3f54616..8f147d3 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -23,6 +23,7 @@ #pragma once +#include #include "brw_device_info.h" #include "main/mtypes.h" -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: fix occlusion queries on Fiji
Would somebody with Fiji please test if this fixes occlusion queries? Thanks, Marek On Tue, Nov 10, 2015 at 11:49 PM, Marek Olšák wrote: > From: Marek Olšák > > Untested. I need a new PSU with two 8pin connectors to be able to use Fiji. > --- > src/gallium/drivers/radeon/r600_pipe_common.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c > b/src/gallium/drivers/radeon/r600_pipe_common.c > index 3599692..5aa1609 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.c > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c > @@ -234,8 +234,8 @@ bool r600_common_context_init(struct r600_common_context > *rctx, > rctx->family = rscreen->family; > rctx->chip_class = rscreen->chip_class; > > - if (rscreen->family == CHIP_HAWAII) > - rctx->max_db = 16; > + if (rscreen->chip_class >= CIK) > + rctx->max_db = MAX2(8, rscreen->info.r600_num_backends); > else if (rscreen->chip_class >= EVERGREEN) > rctx->max_db = 8; > else > -- > 2.1.4 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Add missing stdio.h include to brw_compiler.h.
Reviewed-by: Iago Toral Quiroga On Tue, 2015-11-17 at 01:38 -0800, Kenneth Graunke wrote: > This is needed for the FILE * type in brw_print_vue_map(). > > Apparently, all files that include brw_compiler.h already pick this up > via some include chain, so this isn't actually a build fix. However, > I have patches which introduce new consumers of brw_compiler.h that > fail to build because of the missing #include. > > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index 3f54616..8f147d3 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -23,6 +23,7 @@ > > #pragma once > > +#include > #include "brw_device_info.h" > #include "main/mtypes.h" > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 92980] Push access to mesa request
https://bugs.freedesktop.org/show_bug.cgi?id=92980 Bug ID: 92980 Summary: Push access to mesa request Product: Mesa Version: unspecified Hardware: Other OS: All Status: NEW Severity: normal Priority: medium Component: Other Assignee: mesa-dev@lists.freedesktop.org Reporter: oded.gab...@gmail.com QA Contact: mesa-dev@lists.freedesktop.org Hi, I am Oded Gabbay and I have been working for the past 3 months on mesa for fixing ppc64/ppc64le code and have submitted several patches already. I am requesting push access to mesa since I plan to continue contributing to mesa for the foreseeable future, working on the above mentioned architectures. I already have an fdo account with push access for pixman so my ssh key and gpg key exists in fdo. My username is "gabbayo" Thanks, Oded -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/11] i965: Move postprocess_nir to codegen time
Hi Jason, On 12 November 2015 at 01:26, Jason Ekstrand wrote: > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +-- > src/mesa/drivers/dri/i965/brw_nir.c | 1 - > src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - > src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 6 +- > 4 files changed, 18 insertions(+), 5 deletions(-) > Can you please add a couple of lines why we want this. This way it will be beneficial to you and/or others while skimming through git log X weeks down the line. Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] main: Don't restrict several KHR_debug enum to desktop GL
On 8 November 2015 at 09:56, Boyan Ding wrote: > In preparation for supporting GL_KHR_debug in OpenGL ES > > v2: add a missing hunk in _mesa_IsEnabled (Emil) > > Signed-off-by: Boyan Ding > Reviewed-by: Emil Velikov Thanks for the update Boyan. All, do we have any objections against this and/or the strings update [1] related to using this extension ? If there are no objections by tomorrow evening I will push the lot to master. This way we can iron out any outstanding issues for 11.1.0. Cheers Emil [1] http://patchwork.freedesktop.org/patch/63920/ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: disable vsx in ppc due to LLVM PPC bug
This patch makes sure that if we use altivec (VMX) instructions, we don't use VSX instructions as well, as this cause piglit tests to fail For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 With this patch, ppc64le reaches parity with x86-64 as far as piglit test suite is concerned. Signed-off-by: Oded Gabbay Cc: "11.0" --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda118..8c74cb8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); + MAttrs.push_back("-vsx"); #endif builder.setMAttrs(MAttrs); -- 2.5.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: disable vsx in ppc due to LLVM PPC bug
Am 17.11.2015 um 15:19 schrieb Oded Gabbay: > This patch makes sure that if we use altivec (VMX) instructions, we don't > use VSX instructions as well, as this cause piglit tests to fail > > For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 > > With this patch, ppc64le reaches parity with x86-64 as far as piglit test > suite is concerned. > > Signed-off-by: Oded Gabbay > Cc: "11.0" > --- > src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > index 7bda118..8c74cb8 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > @@ -536,6 +536,7 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, > > #if defined(PIPE_ARCH_PPC) > MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); > + MAttrs.push_back("-vsx"); > #endif > > builder.setMAttrs(MAttrs); > Doesn't that need some llvm version check, otherwise the attribute might be unsupported (not entirely sure what happens in this case)? From a very quick look, vsx seems to be a fairly recent addition (that is, might not be in llvm 3.3 which is the minimum, at least for x86). Otherwise looks ok to me. Albeit if llvm indeed miscompiles this ought to be fixed as that's quite nasty (we had some bugs in the past where we thought it might be miscompilation and it was due to us relying on undefined behavior too). Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/11] i965: Move postprocess_nir to codegen time
On Tue, Nov 17, 2015 at 4:04 AM, Emil Velikov wrote: > Hi Jason, > > On 12 November 2015 at 01:26, Jason Ekstrand wrote: >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +-- >> src/mesa/drivers/dri/i965/brw_nir.c | 1 - >> src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - >> src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 6 +- >> 4 files changed, 18 insertions(+), 5 deletions(-) >> > Can you please add a couple of lines why we want this. This way it > will be beneficial to you and/or others while skimming through git log > X weeks down the line. Sure. I can do that. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v5 5/7] glsl: Add precision information to ir_variable
On Mon, Nov 16, 2015 at 11:44 AM, Ilia Mirkin wrote: > On Mon, Nov 16, 2015 at 11:42 AM, Samuel Iglesias Gonsálvez > wrote: >> >> >> On 16/11/15 17:34, Ilia Mirkin wrote: >>> On Mon, Nov 16, 2015 at 11:29 AM, Samuel Iglesias Gonsálvez >>> wrote: On 16/11/15 13:07, Tapani Pälli wrote: > > On 11/16/2015 01:35 PM, Tapani Pälli wrote: >> >> >> On 11/16/2015 01:29 PM, Samuel Iglesias Gonsálvez wrote: >>> Hello Ilia, Tapani: >>> >>> I have reproduced the issue with a piglit test but not with the trace >>> uploaded in the bug report :-( >>> >>> The piglit test was: bin/arb_shader_storage_buffer_object-maxblocks >>> >>> I have upload a branch with some fixes at Igalia's mesa repo: >>> >>> Git repo: https://github.com/Igalia/mesa.git >>> Branch: wip/siglesias/precision-fixes >>> >>> But as this error might come from other initializations that I might >>> overlook: >>> * Ilia: Could you test if this issue is still happening to you? As I >>> cannot reproduce it locally, I might be forgetting something. >>> * Tapani: Could you do a quick run on CTS to check I have not broken >>> anything? >> >> Sure thing, I'll run testing. FWIW one of the patches was identical to >> my fix sent for fixing tessellation shader problems: >> >> http://lists.freedesktop.org/archives/mesa-dev/2015-November/100396.html > > No CTS regressions with these patches, I've gone through these and > changes look good to me! > > OK, once Ilia replies that the issue is fixed with those patches, I will send them for review to the mailing list :-) >>> >>> I won't have time to look until tonight. However the repro steps were >>> pretty simple... download the trace and run through valgrind. Probably >>> tons of other ways to trigger it too, of course... I'd esp look for >>> piglits that have uniform structs. >>> >> >> The problem is that I could not reproduce it with the trace. That's why >> I am asking. >> >> I reproduce it with a piglit tests, but maybe precision is uninitialized >> in other cases. Tomorrow I will do some more testing, just in case. > > Well, irrespective of other cases, if the things you're fixing are > real fixes, no need to wait on me. I'll be sure to complain again if I > still see problems. FWIW I did see them with nouveau, not i965. I > suspect llvmpipe would take the same paths. With all the latest patches that have landed, valgrind is happy again. [At least in regards to this... it's got complaints about other things, but those aren't your fault.] Thanks for fixing! -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/11] i965: Move postprocess_nir to codegen time
On Tue, Nov 17, 2015 at 7:09 AM, Jason Ekstrand wrote: > On Tue, Nov 17, 2015 at 4:04 AM, Emil Velikov > wrote: >> Hi Jason, >> >> On 12 November 2015 at 01:26, Jason Ekstrand wrote: >>> --- >>> src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +-- >>> src/mesa/drivers/dri/i965/brw_nir.c | 1 - >>> src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - >>> src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 6 +- >>> 4 files changed, 18 insertions(+), 5 deletions(-) >>> >> Can you please add a couple of lines why we want this. This way it >> will be beneficial to you and/or others while skimming through git log >> X weeks down the line. > > Sure. I can do that. I added: This allows us to insert NIR passes between initial NIR compilation and optimization (link time) and actual backend code-gen. In particular, it will allow us to do shader variants in NIR and share some of that shader variant code between backends. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 92980] Push access to mesa request
https://bugs.freedesktop.org/show_bug.cgi?id=92980 Brian Paul changed: What|Removed |Added Component|Other |Account Modification ||Requests Assignee|mesa-dev@lists.freedesktop. |sitewranglers@lists.freedes |org |ktop.org Product|Mesa|freedesktop.org QA Contact|mesa-dev@lists.freedesktop. | |org | --- Comment #1 from Brian Paul --- Sounds good. Reassigning to fd.o admins. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On Tue, Nov 17, 2015 at 4:42 PM, Roland Scheidegger wrote: > Am 17.11.2015 um 15:19 schrieb Oded Gabbay: >> This patch makes sure that if we use altivec (VMX) instructions, we don't >> use VSX instructions as well, as this cause piglit tests to fail >> >> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 >> >> With this patch, ppc64le reaches parity with x86-64 as far as piglit test >> suite is concerned. >> >> Signed-off-by: Oded Gabbay >> Cc: "11.0" >> --- >> src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 1 + >> 1 file changed, 1 insertion(+) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> index 7bda118..8c74cb8 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> @@ -536,6 +536,7 @@ >> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, >> >> #if defined(PIPE_ARCH_PPC) >> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); >> + MAttrs.push_back("-vsx"); >> #endif >> >> builder.setMAttrs(MAttrs); >> > > Doesn't that need some llvm version check, otherwise the attribute might > be unsupported (not entirely sure what happens in this case)? From a > very quick look, vsx seems to be a fairly recent addition (that is, > might not be in llvm 3.3 which is the minimum, at least for x86). > Otherwise looks ok to me. Albeit if llvm indeed miscompiles this ought > to be fixed as that's quite nasty (we had some bugs in the past where we > thought it might be miscompilation and it was due to us relying on > undefined behavior too). > > Roland > Hi Roland, vsx was added in release 3.4, according to git log. I believe there is no need for version check because I don't think ppc64le distributions uses llvm lower than that (I know that in Red Hat/Fedora we have at least 3.4). But if you insist I can add that check. I'm in contact with LLVM devs from IBM to solve this bug and from my debugging (and I did a LOT of it), I don't believe its our misuse of the code. Based on different experiments I did (remove optimizations, replace vmx with vsx instructions and more), I truly think this is a bug in LLVM backend. It is not an easy bug - you need to go over each vsx instruction and make sure if its lane-sensitive or not. If so, you need to add a special treatment inside LLVM code for it. Once I get a resolution from IBM's devs, I will of course remove this workaround. Moreover, once that happens, I will probably move some of the vmx calls to vsx, as vsx have a larger register file. Oded ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] DRI3 support for EGL (v4)
On 10/11/15 20:26, Axel Davy wrote: Hi, I did take a look, and it looks good to me. I'm happy you implemented DRI_PRIME support as well. About it, do you need testers to check everything works ? A mistake about it I noticed is that you don't disable EGL_KHR_image_pixmap when is_different_gpu is set. It should be disabled, just like GLX_EXT_texture_from_pixmap is for GLX. Thanks for the review, I followed your suggestion! Sorry for the late answer though. I just pushed the patches. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On 17/11/15 15:15, Oded Gabbay wrote: On Tue, Nov 17, 2015 at 4:42 PM, Roland Scheidegger wrote: Am 17.11.2015 um 15:19 schrieb Oded Gabbay: This patch makes sure that if we use altivec (VMX) instructions, we don't use VSX instructions as well, as this cause piglit tests to fail For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 With this patch, ppc64le reaches parity with x86-64 as far as piglit test suite is concerned. Signed-off-by: Oded Gabbay Cc: "11.0" --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda118..8c74cb8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); + MAttrs.push_back("-vsx"); #endif builder.setMAttrs(MAttrs); Doesn't that need some llvm version check, otherwise the attribute might be unsupported (not entirely sure what happens in this case)? From a very quick look, vsx seems to be a fairly recent addition (that is, might not be in llvm 3.3 which is the minimum, at least for x86). Otherwise looks ok to me. Albeit if llvm indeed miscompiles this ought to be fixed as that's quite nasty (we had some bugs in the past where we thought it might be miscompilation and it was due to us relying on undefined behavior too). Roland Hi Roland, vsx was added in release 3.4, according to git log. I believe there is no need for version check because I don't think ppc64le distributions uses llvm lower than that (I know that in Red Hat/Fedora we have at least 3.4). But if you insist I can add that check. I'm in contact with LLVM devs from IBM to solve this bug and from my debugging (and I did a LOT of it), I don't believe its our misuse of the code. Based on different experiments I did (remove optimizations, replace vmx with vsx instructions and more), I truly think this is a bug in LLVM backend. It is not an easy bug - you need to go over each vsx instruction and make sure if its lane-sensitive or not. If so, you need to add a special treatment inside LLVM code for it. Once I get a resolution from IBM's devs, I will of course remove this workaround. Moreover, once that happens, I will probably move some of the vmx calls to vsx, as vsx have a larger register file. Oded Let's add the LLVM PR URL in a commment above the line for future reference. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
This patch makes sure that if we use altivec (VMX) instructions, we don't use VSX instructions as well, as this cause piglit tests to fail For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 With this patch, ppc64le reaches parity with x86-64 as far as piglit test suite is concerned. v2: - Added check that we have at least LLVM 3.4 - Added the LLVM bug URL as a comment in the code Signed-off-by: Oded Gabbay Cc: "11.0" --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda118..152593a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,10 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); +#if HAVE_LLVM >= 0x0304 + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ + MAttrs.push_back("-vsx"); +#endif #endif builder.setMAttrs(MAttrs); -- 2.5.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On 17 November 2015 at 16:02, Oded Gabbay wrote: > This patch makes sure that if we use altivec (VMX) instructions, we don't > use VSX instructions as well, as this cause piglit tests to fail > > For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 > > With this patch, ppc64le reaches parity with x86-64 as far as piglit test > suite is concerned. > > v2: > - Added check that we have at least LLVM 3.4 > - Added the LLVM bug URL as a comment in the code > > Signed-off-by: Oded Gabbay > Cc: "11.0" > --- > src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > index 7bda118..152593a 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > @@ -536,6 +536,10 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, > > #if defined(PIPE_ARCH_PPC) > MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); > +#if HAVE_LLVM >= 0x0304 > + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ > + MAttrs.push_back("-vsx"); The commit message does not reflect what the patch actually does. I cannot object against the patch in any way, although the two should be in sync imho. Base of a very quick look at the llvm bug, I'm leaning that the commit msg is correct and the patch is off ? Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i965: Prevent fast clears for MSRTs on SKL
Chad Versace writes: > Neil, do you have a bug open for this? What kind of bug do you mean? I don't think it would make sense to open a Freedesktop bug because it doesn't cause any problems as fast clears aren't enabled at all yet for SKL. > Reviewed-by: Chad Versace Thanks for the review. Regards, - Neil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] mesa: Add KBL PCI IDs and platform information.
On Mon, Nov 16, 2015 at 04:40:20PM -0800, Matt Turner wrote: > On Mon, Nov 16, 2015 at 4:24 PM, Sarah Sharp > wrote: > > Add PCI IDs for the Intel Kabylake platforms. The IDs are taken > > directly from the Linux kernel patches, which are under review: > > > > http://lists.freedesktop.org/archives/intel-gfx/2015-October/078967.html > > http://cgit.freedesktop.org/~vivijim/drm-intel/log/?h=kbl-upstream-v2 > > > > Please note that if this patch is backported, the following fixes will > > need to be added before this patch: > > > > commit 28ed1e08e8ba98e "i965/skl: Remove early platform support" > > commit c1e38ad37042b0e "i965/skl: Use larger URB size where available." > > > > Thanks to Ben for fixing a bug around setting urb.size, and being > > patient with my questions about what the various fields mean. > > > > Signed-off-by: Sarah Sharp > > Suggested-by: Ben Widawsky > > Tested-by: Rodrigo Vivi (KBL-GT2) > > --- > > > > include/pci_ids/i965_pci_ids.h | 22 +++ > > src/mesa/drivers/dri/i965/brw_device_info.c | 60 > > + > > 2 files changed, 82 insertions(+) > > > > diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h > > index 8a42599..ea3cc08 100644 > > --- a/include/pci_ids/i965_pci_ids.h > > +++ b/include/pci_ids/i965_pci_ids.h > > @@ -124,6 +124,28 @@ CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F") > > CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3") > > CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3") > > CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3") > > +CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > > +CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > > +CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > > +CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") > > +CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1") > > +CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") > > +CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1") > > +CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") > > +CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") > > +CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2") > > +CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") > > +CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3") > > +CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3") > > +CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4") > > +CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") > > +CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4") > > +CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4") > > CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)") > > CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)") > > CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") > > This doesn't apply, because it hasn't been rebased onto commit dde33fc. Ok, I'll rebase and resend. > I find it odd that GT1.5 comes before GT1 and that there's a GT2F in > the middle of the GT2s. Can we move GT1.5 between 1 and 2? I don't > know where GT2F should go. That was the order the kernel patch listed in them; I just used a sed script to get it into the format needed for Mesa. I'm happy to re-arrange them (and I agree the order doesn't make sense), but it was much easier to visually make sure the kernel and the mesa patch matched when they were both in the same order. Sarah Sharp ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/8] glsl: move block validation outside function that validates members
On 13 November 2015 at 01:13, Timothy Arceri wrote: > From: Timothy Arceri > > --- > src/glsl/ast_to_hir.cpp | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index f4c53b4..60f415d 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -5816,14 +5816,6 @@ ast_process_structure_or_interface_block(exec_list > *instructions, > { > unsigned decl_count = 0; > > - /* For blocks that accept memory qualifiers (i.e. shader storage), verify > -* that we don't have incompatible qualifiers > -*/ > - if (layout && layout->flags.q.read_only && layout->flags.q.write_only) { > - _mesa_glsl_error(&loc, state, > - "Interface block sets both readonly and writeonly"); > - } > - > /* Make an initial pass over the list of fields to determine how > * many there are. Each element in this list is an ast_declarator_list. > * This means that we actually need to count the number of elements in the > @@ -6239,6 +6231,14 @@ ast_interface_block::hir(exec_list *instructions, > */ > state->struct_specifier_depth++; > > + /* For blocks that accept memory qualifiers (i.e. shader storage), verify > +* that we don't have incompatible qualifiers > +*/ > + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { > + _mesa_glsl_error(&loc, state, When I was looking in this area I've noticed that loc isn't properly initialised (seems to contain 0s). Obviously not part of this patch yet mildly related. Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] glsl: rename function that processes struct and iface members
On 13 November 2015 at 01:13, Timothy Arceri wrote: > From: Timothy Arceri > Perhaps a small message - "As of last commit this function handles only the struct/iface members." or alike. Not a big deal either way: Reviewed-by: Emil Velikov -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] ast clean-up and fixes
On 13 November 2015 at 01:13, Timothy Arceri wrote: > This is a bunch of clean ups and some small fixes I noticed while > getting ready to add arb_enhanced_layouts support. > > No regressions after runnning on Intels CI system. > For the series: (1 v2) Reviewed-by: Emil Velikov Not the best person when in comes to the glsl code, although things are pretty trivial. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] xmlconfig: Add support for DragonFly
Hi, Emil has recently convinced me to send DragonFly support patches present in our ports tree, so here's the first one. -- Francois Tigeot >From c4a53d4ea3568f0eb727f3be8d5597371f15339d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Tigeot?= Date: Tue, 17 Nov 2015 18:54:01 +0100 Subject: [PATCH] xmlconfig: Add support for DragonFly --- src/mesa/drivers/dri/common/xmlconfig.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index b8ab480..a8f7c9b 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -59,6 +59,9 @@ extern char *program_invocation_name, *program_invocation_short_name; #elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100) #include #define GET_PROGRAM_NAME() getprogname() +#elif defined(__DragonFly__) +#include +#define GET_PROGRAM_NAME() getprogname() #elif defined(__APPLE__) #include #define GET_PROGRAM_NAME() getprogname() -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 04/14] glsl: remove duplicate validation for index layout qualifier
On 14 November 2015 at 13:42, Timothy Arceri wrote: > From: Timothy Arceri > > The minimum value for index is validated in the ast code and Nitpick: One might want to say "apply_explicit_location" instead of "int the ast code". Not a big deal though. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 06/14] glsl: move stream layout qualifier validation
On 14 November 2015 at 13:42, Timothy Arceri wrote: > From: Timothy Arceri > > We are moving this out of the parser in preparation for compile > time constant support. > > The reason a validation function is used rather than an apply > function like what is used with bindings is because glsl allows > streams to be defined on members of blocks even though they must > match the stream thats associated with the current block, this > means we need access to the value after validation to do this > comparision. > --- > src/glsl/ast_to_hir.cpp | 44 +--- > src/glsl/glsl_parser.yy | 11 ++- > 2 files changed, 35 insertions(+), 20 deletions(-) > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index 8705f6e..53faacf 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -3034,7 +3034,11 @@ apply_layout_qualifier_to_variable(const struct > ast_type_qualifier *qual, > > if (state->stage == MESA_SHADER_GEOMETRY && > qual->flags.q.out && qual->flags.q.stream) { > - var->data.stream = qual->stream; > + unsigned qual_stream; > + if (process_qualifier_constant(state, loc, "stream", qual->stream, > + &qual_stream)) { > + var->data.stream = qual_stream; > + } > } > > if (var->type->contains_atomic()) { > @@ -6080,7 +6084,8 @@ ast_process_struct_or_iface_block_members(exec_list > *instructions, >enum glsl_matrix_layout > matrix_layout, >bool allow_reserved_names, >ir_variable_mode var_mode, > - ast_type_qualifier *layout) > + ast_type_qualifier *layout, > + unsigned block_stream) > { > unsigned decl_count = 0; > > @@ -6188,11 +6193,16 @@ ast_process_struct_or_iface_block_members(exec_list > *instructions, > * the specified stream must match the stream associated with the > * containing block." > */ > - if (qual->flags.q.explicit_stream && > - qual->stream != layout->stream) { > - _mesa_glsl_error(&loc, state, "stream layout qualifier on interface > " > - "block member does not match the interface block " > - "(%d vs %d)", qual->stream, layout->stream); > + if (qual->flags.q.explicit_stream) { > + unsigned qual_stream; > + if (process_qualifier_constant(state, &loc, "stream", > +qual->stream, &qual_stream) && > + qual_stream != block_stream) { > +_mesa_glsl_error(&loc, state, "stream layout qualifier on " > + "interface block member does not match " > + "the interface block (%d vs %d)", qual->stream, > + block_stream); > + } >} > >if (qual->flags.q.uniform && qual->has_interpolation()) { > @@ -6350,7 +6360,8 @@ ast_struct_specifier::hir(exec_list *instructions, > GLSL_MATRIX_LAYOUT_INHERITED, > false /* > allow_reserved_names */, > ir_var_auto, > -NULL); > +NULL, > +0 /* for interface only */); > > validate_identifier(this->name, loc, state); > > @@ -6504,6 +6515,16 @@ ast_interface_block::hir(exec_list *instructions, > "Interface block sets both readonly and writeonly"); > } > > + unsigned qual_stream; > + if (!process_qualifier_constant(state, &loc, "stream", > this->layout.stream, > + &qual_stream)) { > + /* If the stream qualifier is invalid it doesn't make sense to continue > + * on and try to compare stream layouts on member variables agaist it > so typo "against". -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On Tue, Nov 17, 2015 at 6:15 PM, Emil Velikov wrote: > On 17 November 2015 at 16:02, Oded Gabbay wrote: >> This patch makes sure that if we use altivec (VMX) instructions, we don't >> use VSX instructions as well, as this cause piglit tests to fail >> >> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 >> >> With this patch, ppc64le reaches parity with x86-64 as far as piglit test >> suite is concerned. >> >> v2: >> - Added check that we have at least LLVM 3.4 >> - Added the LLVM bug URL as a comment in the code >> >> Signed-off-by: Oded Gabbay >> Cc: "11.0" >> --- >> src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 >> 1 file changed, 4 insertions(+) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> index 7bda118..152593a 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> @@ -536,6 +536,10 @@ >> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, >> >> #if defined(PIPE_ARCH_PPC) >> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); >> +#if HAVE_LLVM >= 0x0304 >> + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ >> + MAttrs.push_back("-vsx"); > The commit message does not reflect what the patch actually does. I > cannot object against the patch in any way, although the two should be > in sync imho. > > Base of a very quick look at the llvm bug, I'm leaning that the commit > msg is correct and the patch is off ? > > Thanks > Emil Hmm, I'm not sure I understand what you mean. The commit message says: "This patch makes sure that if we use altivec (VMX) instructions, we don't use VSX instructions as well, as this cause piglit tests to fail" And the patch itself disables the VSX attribute in LLVM backend in case we use Altivec (VMX) - meaning that no VSX instructions will be generated alongside Altivec instructions. So unless I completely misunderstood something, the commit message and the patch match. The llvm bug description maybe a little misleading, because I started it last week and then I had a different impression. But that's why I pointed to comment #7 which is where I wrote the updated description, which matches this patch (workaround). Oded ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 12/14] glsl: add support for complie-time constant expressions
On 14 November 2015 at 13:42, Timothy Arceri wrote: > From: Timothy Arceri > > This patch replaces the old interger constant qualifiers with either > the new ast_layout_expression type if the qualifier requires merging > or ast_expression if the qualifier can't have mulitple declarations > or if all but the newest qualifier is simply ignored. > > We also update the process_qualifier_constant() helper to be > similar to the one in the ast_layout_expression class, but in > this case it will be used to process the ast_expression qualifiers. > > Global shader layout qualifier validation is moved out of the parser > in this change as we now need to evaluate any constant expression > before doing the validation. > --- > src/glsl/ast.h | 33 +-- > src/glsl/ast_to_hir.cpp | 126 > > src/glsl/ast_type.cpp | 69 -- > src/glsl/glsl_parser.yy | 87 +-- > src/glsl/glsl_parser_extras.cpp | 44 -- > 5 files changed, 195 insertions(+), 164 deletions(-) > > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -4079,9 +4113,18 @@ ast_declarator_list::hir(exec_list *instructions, > */ > if (decl_type && decl_type->contains_atomic()) { >if (type->qualifier.flags.q.explicit_binding && > - type->qualifier.flags.q.explicit_offset) > - state->atomic_counter_offsets[type->qualifier.binding] = > -type->qualifier.offset; > + type->qualifier.flags.q.explicit_offset) { > + unsigned qual_binding; > + unsigned qual_offset; > + if (process_qualifier_constant(state, &loc, "binding", > +type->qualifier.binding, > +&qual_binding) > + && process_qualifier_constant(state, &loc, "offset", Nitpick: Please leave the && trailing on the previous line. > --- a/src/glsl/glsl_parser.yy > +++ b/src/glsl/glsl_parser.yy >/* Layout qualifiers for tessellation control shaders. */ >if (match_layout_qualifier("vertices", $1, state) == 0) { > $$.flags.q.vertices = 1; > - > - if ($3 <= 0) { > -_mesa_glsl_error(& @3, state, > - "invalid vertices (%d) specified", $3); > -YYERROR; > - } else if ($3 > (int)state->Const.MaxPatchVertices) { > -_mesa_glsl_error(& @3, state, > - "vertices (%d) exceeds " > - "GL_MAX_PATCH_VERTICES", $3); > -YYERROR; > - } else { > -$$.vertices = $3; > -if (!state->ARB_tessellation_shader_enable && > -!state->is_version(400, 0)) { > - _mesa_glsl_error(& @1, state, > -"vertices qualifier requires GLSL 4.00 or " > -"ARB_tessellation_shader"); > -} > + $$.vertices = new(ctx) ast_layout_expression(@1, $3); > + if (!state->ARB_tessellation_shader_enable && > + !state->is_version(400, 0)) { > +_mesa_glsl_error(& @1, state, > + "vertices qualifier requires GLSL 4.00 or " > + "ARB_tessellation_shader"); > } >} > > diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp > index 1678d89..2f870fc 100644 > --- a/src/glsl/glsl_parser_extras.cpp > +++ b/src/glsl/glsl_parser_extras.cpp > @@ -1644,8 +1644,20 @@ set_shader_inout_layout(struct gl_shader *shader, > switch (shader->Stage) { > case MESA_SHADER_TESS_CTRL: >shader->TessCtrl.VerticesOut = 0; > - if (state->tcs_output_vertices_specified) > - shader->TessCtrl.VerticesOut = state->out_qualifier->vertices; > + if (state->tcs_output_vertices_specified) { > + unsigned vertices; > + if (state->out_qualifier->vertices-> > + process_qualifier_constant(state, "vertices", &vertices, > + true)) { The existing code considering 0 an invalid amount, which afaict that is incorrect. Splitting that into a separate patch is an overkill, although mentioning it in the commit message is a (almost) must have imho. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] ARB_enhanced_layout compile-time-constants V3
Hi Tim, On 14 November 2015 at 13:42, Timothy Arceri wrote: > This series adds support for compile time constants and also adds > subroutine index qualifier support which was missing for > ARB_explicit_uniform_location. > > This series applies on top of a clean-up series[3] > > V3: > - Some refactoring and a bug fix based on Emil's feedback on V2. > - Series overhauled to reduce code churn while still keeping patches > as small as possible for easy review. > - Improvements to handling compute shader local_size layout qualifiers > - Rebased on top of clean-up series [3] > > V2: Validation of minimum qualifier value moved to the helper functions, all > qualifiers will now always have the mimimum value checked. Split the patches > that move validation out of the parser and add the compile time constant > support into smaller patches where possible. > > Piglit tests have been reviewed and pushed to master, there is one outstanding > that tests querying of the subroutine index [1]. > > The extension is disabled by default until the remaining features are added. > > MESA_EXTENSION_OVERRIDE=GL_ARB_enhanced_layouts can be used for testing. > > You can get the series from my arb_enhanced_layouts6 branch [2] > > [1] https://patchwork.freedesktop.org/patch/63795/ > [2] https://github.com/tarceri/Mesa_arrays_of_arrays.git > [3] https://patchwork.freedesktop.org/series/728/ > Hats down for splitting this into smaller hunks - it does make a world of difference. There are a couple of "please add a bit of X in the commit message" style of suggestions and a "keep && trailing in previous line". Feel free to ignore the latter, but please don't do so with the former. No need to resent imho. For the series, minus 13/14 as I'm not familiar with ARB_explicit_uniform_location Reviewed-by: Emil Velikov Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/11] i965: Move postprocess_nir to codegen time
On 17 November 2015 at 15:12, Jason Ekstrand wrote: > On Tue, Nov 17, 2015 at 7:09 AM, Jason Ekstrand wrote: >> On Tue, Nov 17, 2015 at 4:04 AM, Emil Velikov >> wrote: >>> Hi Jason, >>> >>> On 12 November 2015 at 01:26, Jason Ekstrand wrote: --- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +-- src/mesa/drivers/dri/i965/brw_nir.c | 1 - src/mesa/drivers/dri/i965/brw_vec4.cpp| 5 - src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 6 +- 4 files changed, 18 insertions(+), 5 deletions(-) >>> Can you please add a couple of lines why we want this. This way it >>> will be beneficial to you and/or others while skimming through git log >>> X weeks down the line. >> >> Sure. I can do that. > > I added: > > This allows us to insert NIR passes between initial NIR compilation and > optimization (link time) and actual backend code-gen. In particular, it > will allow us to do shader variants in NIR and share some of that shader > variant code between backends. As a person not that deep into the i965 code-base to provide meaningful review - thank you. It looks great. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Add enums for 3DSTATE_TE field values.
3DSTATE_TE has partitioning, output topology, and domain fields, each of which has several enumerated values. We'll also need to switch on the domain, so enums (rather than #defines) seem like a natural fit. I chose to put these in brw_compiler.h because they'll be stored in struct brw_tes_prog_data, which will live there. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_compiler.h | 28 1 file changed, 28 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 8f147d3..1ee01eb 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -490,6 +490,34 @@ enum shader_dispatch_mode { DISPATCH_MODE_SIMD8 = 3, }; +/** + * @defgroup Tessellator parameter enumerations. + * + * These correspond to the hardware values in 3DSTATE_TE, and are provided + * as part of the tessellation evaluation shader. + * + * @{ + */ +enum brw_tess_partitioning { + BRW_TESS_PARTITIONING_INTEGER = 0, + BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1, + BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2, +}; + +enum brw_tess_output_topology { + BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0, + BRW_TESS_OUTPUT_TOPOLOGY_LINE= 1, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2, + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3, +}; + +enum brw_tess_domain { + BRW_TESS_DOMAIN_QUAD= 0, + BRW_TESS_DOMAIN_TRI = 1, + BRW_TESS_DOMAIN_ISOLINE = 2, +}; +/** @} */ + struct brw_vue_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Create new files for HS/DS/TE state upload code.
For now, this just splits the existing code to disable these stages into separate atoms/files. We can then replace it with real code. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 6 +- src/mesa/drivers/dri/i965/brw_state.h| 6 +- src/mesa/drivers/dri/i965/brw_state_upload.c | 7 +- src/mesa/drivers/dri/i965/gen7_disable.c | 98 src/mesa/drivers/dri/i965/gen7_ds_state.c| 64 ++ src/mesa/drivers/dri/i965/gen7_hs_state.c| 65 ++ src/mesa/drivers/dri/i965/gen7_te_state.c| 47 + src/mesa/drivers/dri/i965/gen8_disable.c | 67 --- src/mesa/drivers/dri/i965/gen8_ds_state.c| 54 +++ src/mesa/drivers/dri/i965/gen8_hs_state.c| 71 10 files changed, 317 insertions(+), 168 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/gen7_disable.c create mode 100644 src/mesa/drivers/dri/i965/gen7_ds_state.c create mode 100644 src/mesa/drivers/dri/i965/gen7_hs_state.c create mode 100644 src/mesa/drivers/dri/i965/gen7_te_state.c create mode 100644 src/mesa/drivers/dri/i965/gen8_ds_state.c create mode 100644 src/mesa/drivers/dri/i965/gen8_hs_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 5a88d66..e85c793 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -182,11 +182,13 @@ i965_FILES = \ gen7_blorp.cpp \ gen7_blorp.h \ gen7_cs_state.c \ - gen7_disable.c \ + gen7_ds_state.c \ gen7_gs_state.c \ + gen7_hs_state.c \ gen7_misc_state.c \ gen7_sf_state.c \ gen7_sol_state.c \ + gen7_te_state.c \ gen7_urb.c \ gen7_viewport_state.c \ gen7_vs_state.c \ @@ -196,7 +198,9 @@ i965_FILES = \ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ + gen8_ds_state.c \ gen8_gs_state.c \ + gen8_hs_state.c \ gen8_misc_state.c \ gen8_multisample_state.c \ gen8_ps_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 94734ba..74f513a 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -127,14 +127,16 @@ extern const struct brw_tracked_state gen6_wm_push_constants; extern const struct brw_tracked_state gen6_wm_state; extern const struct brw_tracked_state gen7_depthbuffer; extern const struct brw_tracked_state gen7_clip_state; -extern const struct brw_tracked_state gen7_disable_stages; +extern const struct brw_tracked_state gen7_ds_state; extern const struct brw_tracked_state gen7_gs_state; +extern const struct brw_tracked_state gen7_hs_state; extern const struct brw_tracked_state gen7_ps_state; extern const struct brw_tracked_state gen7_push_constant_space; extern const struct brw_tracked_state gen7_sbe_state; extern const struct brw_tracked_state gen7_sf_clip_viewport; extern const struct brw_tracked_state gen7_sf_state; extern const struct brw_tracked_state gen7_sol_state; +extern const struct brw_tracked_state gen7_te_state; extern const struct brw_tracked_state gen7_urb; extern const struct brw_tracked_state gen7_vs_state; extern const struct brw_tracked_state gen7_wm_state; @@ -142,7 +144,9 @@ extern const struct brw_tracked_state gen7_hw_binding_tables; extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_blend_state; extern const struct brw_tracked_state gen8_disable_stages; +extern const struct brw_tracked_state gen8_ds_state; extern const struct brw_tracked_state gen8_gs_state; +extern const struct brw_tracked_state gen8_hs_state; extern const struct brw_tracked_state gen8_index_buffer; extern const struct brw_tracked_state gen8_multisample_state; extern const struct brw_tracked_state gen8_pma_fix; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index aae51a0..96c247a 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -225,8 +225,10 @@ static const struct brw_tracked_state *gen7_render_atoms[] = &brw_gs_samplers, &gen6_multisample_state, - &gen7_disable_stages, &gen7_vs_state, + &gen7_hs_state, + &gen7_te_state, + &gen7_ds_state, &gen7_gs_state, &gen7_sol_state, &gen7_clip_state, @@ -315,6 +317,9 @@ static const struct brw_tracked_state *gen8_render_atoms[] = &gen8_disable_stages, &gen8_vs_state, + &gen8_hs_state, + &gen7_te_state, + &gen8_ds_state, &gen8_gs_state, &gen8_sol_state, &gen6_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c deleted file mode 100644 index bb50969..000 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ /d
Re: [Mesa-dev] [PATCH] i965: Add enums for 3DSTATE_TE field values.
On Tue, Nov 17, 2015 at 11:16 AM, Kenneth Graunke wrote: > 3DSTATE_TE has partitioning, output topology, and domain fields, > each of which has several enumerated values. We'll also need to > switch on the domain, so enums (rather than #defines) seem like a > natural fit. > > I chose to put these in brw_compiler.h because they'll be stored > in struct brw_tes_prog_data, which will live there. > > Signed-off-by: Kenneth Graunke Reviewed-by: Kristian Høgsberg > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 28 > 1 file changed, 28 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index 8f147d3..1ee01eb 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -490,6 +490,34 @@ enum shader_dispatch_mode { > DISPATCH_MODE_SIMD8 = 3, > }; > > +/** > + * @defgroup Tessellator parameter enumerations. > + * > + * These correspond to the hardware values in 3DSTATE_TE, and are provided > + * as part of the tessellation evaluation shader. > + * > + * @{ > + */ > +enum brw_tess_partitioning { > + BRW_TESS_PARTITIONING_INTEGER = 0, > + BRW_TESS_PARTITIONING_ODD_FRACTIONAL = 1, > + BRW_TESS_PARTITIONING_EVEN_FRACTIONAL = 2, > +}; > + > +enum brw_tess_output_topology { > + BRW_TESS_OUTPUT_TOPOLOGY_POINT = 0, > + BRW_TESS_OUTPUT_TOPOLOGY_LINE= 1, > + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2, > + BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3, > +}; > + > +enum brw_tess_domain { > + BRW_TESS_DOMAIN_QUAD= 0, > + BRW_TESS_DOMAIN_TRI = 1, > + BRW_TESS_DOMAIN_ISOLINE = 2, > +}; > +/** @} */ > + > struct brw_vue_prog_data { > struct brw_stage_prog_data base; > struct brw_vue_map vue_map; > -- > 2.6.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2] i965: Implement ARB_pipeline_statistics_query tessellation counters.
We basically just need to uncomment Ben's code. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_queryobj.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) Still totally untested, but now less blatantly broken... diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 9f4a5db..d508c4c 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ - 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */ - 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */ + HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */ + DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */ GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */ @@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx, case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: query->Base.Result = results[1] - results[0]; break; @@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, query->Base.Result /= 4; break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_queryobj_get_results()"); } @@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_begin_query()"); } @@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: case GL_GEOMETRY_SHADER_INVOCATIONS: + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 1); break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: default: unreachable("Unrecognized query target in brw_end_query()"); } -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Create new files for HS/DS/TE state upload code.
On Tue, Nov 17, 2015 at 11:20 AM, Kenneth Graunke wrote: > For now, this just splits the existing code to disable these stages into > separate atoms/files. We can then replace it with real code. Nice, bye bye gen7/8_disable.c. This commit is a milestone. Reviewed-by: Kristian Høgsberg > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/Makefile.sources | 6 +- > src/mesa/drivers/dri/i965/brw_state.h| 6 +- > src/mesa/drivers/dri/i965/brw_state_upload.c | 7 +- > src/mesa/drivers/dri/i965/gen7_disable.c | 98 > > src/mesa/drivers/dri/i965/gen7_ds_state.c| 64 ++ > src/mesa/drivers/dri/i965/gen7_hs_state.c| 65 ++ > src/mesa/drivers/dri/i965/gen7_te_state.c| 47 + > src/mesa/drivers/dri/i965/gen8_disable.c | 67 --- > src/mesa/drivers/dri/i965/gen8_ds_state.c| 54 +++ > src/mesa/drivers/dri/i965/gen8_hs_state.c| 71 > 10 files changed, 317 insertions(+), 168 deletions(-) > delete mode 100644 src/mesa/drivers/dri/i965/gen7_disable.c > create mode 100644 src/mesa/drivers/dri/i965/gen7_ds_state.c > create mode 100644 src/mesa/drivers/dri/i965/gen7_hs_state.c > create mode 100644 src/mesa/drivers/dri/i965/gen7_te_state.c > create mode 100644 src/mesa/drivers/dri/i965/gen8_ds_state.c > create mode 100644 src/mesa/drivers/dri/i965/gen8_hs_state.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 5a88d66..e85c793 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -182,11 +182,13 @@ i965_FILES = \ > gen7_blorp.cpp \ > gen7_blorp.h \ > gen7_cs_state.c \ > - gen7_disable.c \ > + gen7_ds_state.c \ > gen7_gs_state.c \ > + gen7_hs_state.c \ > gen7_misc_state.c \ > gen7_sf_state.c \ > gen7_sol_state.c \ > + gen7_te_state.c \ > gen7_urb.c \ > gen7_viewport_state.c \ > gen7_vs_state.c \ > @@ -196,7 +198,9 @@ i965_FILES = \ > gen8_depth_state.c \ > gen8_disable.c \ > gen8_draw_upload.c \ > + gen8_ds_state.c \ > gen8_gs_state.c \ > + gen8_hs_state.c \ > gen8_misc_state.c \ > gen8_multisample_state.c \ > gen8_ps_state.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index 94734ba..74f513a 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -127,14 +127,16 @@ extern const struct brw_tracked_state > gen6_wm_push_constants; > extern const struct brw_tracked_state gen6_wm_state; > extern const struct brw_tracked_state gen7_depthbuffer; > extern const struct brw_tracked_state gen7_clip_state; > -extern const struct brw_tracked_state gen7_disable_stages; > +extern const struct brw_tracked_state gen7_ds_state; > extern const struct brw_tracked_state gen7_gs_state; > +extern const struct brw_tracked_state gen7_hs_state; > extern const struct brw_tracked_state gen7_ps_state; > extern const struct brw_tracked_state gen7_push_constant_space; > extern const struct brw_tracked_state gen7_sbe_state; > extern const struct brw_tracked_state gen7_sf_clip_viewport; > extern const struct brw_tracked_state gen7_sf_state; > extern const struct brw_tracked_state gen7_sol_state; > +extern const struct brw_tracked_state gen7_te_state; > extern const struct brw_tracked_state gen7_urb; > extern const struct brw_tracked_state gen7_vs_state; > extern const struct brw_tracked_state gen7_wm_state; > @@ -142,7 +144,9 @@ extern const struct brw_tracked_state > gen7_hw_binding_tables; > extern const struct brw_tracked_state haswell_cut_index; > extern const struct brw_tracked_state gen8_blend_state; > extern const struct brw_tracked_state gen8_disable_stages; > +extern const struct brw_tracked_state gen8_ds_state; > extern const struct brw_tracked_state gen8_gs_state; > +extern const struct brw_tracked_state gen8_hs_state; > extern const struct brw_tracked_state gen8_index_buffer; > extern const struct brw_tracked_state gen8_multisample_state; > extern const struct brw_tracked_state gen8_pma_fix; > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c > b/src/mesa/drivers/dri/i965/brw_state_upload.c > index aae51a0..96c247a 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c > @@ -225,8 +225,10 @@ static const struct brw_tracked_state > *gen7_render_atoms[] = > &brw_gs_samplers, > &gen6_multisample_state, > > - &gen7_disable_stages, > &gen7_vs_state, > + &gen7_hs_state, > + &gen7_te_state, > + &gen7_ds_state, > &gen7_gs_state, > &gen7_sol_state, > &gen7_clip_state, > @@ -315,6 +317,9 @@ static const struct brw_tracked_state > *gen8_render_atoms[] = > >
Re: [Mesa-dev] [PATCH v2] i965: Implement ARB_pipeline_statistics_query tessellation counters.
On Tue, Nov 17, 2015 at 11:25 AM, Kenneth Graunke wrote: > We basically just need to uncomment Ben's code. Reviewed-by: Kristian Høgsberg > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/gen6_queryobj.c | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > Still totally untested, but now less blatantly broken... > > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index 9f4a5db..d508c4c 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -136,8 +136,8 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo > *bo, >IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ >IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ >VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ > - 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */ > - 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */ > + HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */ > + DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */ >GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ >PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ >CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */ > @@ -231,6 +231,8 @@ gen6_queryobj_get_results(struct gl_context *ctx, > case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: >query->Base.Result = results[1] - results[0]; >break; > > @@ -250,8 +252,6 @@ gen6_queryobj_get_results(struct gl_context *ctx, > query->Base.Result /= 4; >break; > > - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > default: >unreachable("Unrecognized query target in brw_queryobj_get_results()"); > } > @@ -329,11 +329,11 @@ gen6_begin_query(struct gl_context *ctx, struct > gl_query_object *q) > case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: >emit_pipeline_stat(brw, query->bo, query->Base.Stream, > query->Base.Target, 0); >break; > > - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > default: >unreachable("Unrecognized query target in brw_begin_query()"); > } > @@ -381,12 +381,12 @@ gen6_end_query(struct gl_context *ctx, struct > gl_query_object *q) > case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > case GL_GEOMETRY_SHADER_INVOCATIONS: > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: >emit_pipeline_stat(brw, query->bo, > query->Base.Stream, query->Base.Target, 1); >break; > > - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > default: >unreachable("Unrecognized query target in brw_end_query()"); > } > -- > 2.6.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On Tue, Nov 17, 2015 at 12:37 PM, Oded Gabbay wrote: > On Tue, Nov 17, 2015 at 6:15 PM, Emil Velikov > wrote: > > On 17 November 2015 at 16:02, Oded Gabbay wrote: > >> This patch makes sure that if we use altivec (VMX) instructions, we > don't > >> use VSX instructions as well, as this cause piglit tests to fail > >> > >> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 > >> > >> With this patch, ppc64le reaches parity with x86-64 as far as piglit > test > >> suite is concerned. > >> > >> v2: > >> - Added check that we have at least LLVM 3.4 > >> - Added the LLVM bug URL as a comment in the code > >> > >> Signed-off-by: Oded Gabbay > >> Cc: "11.0" > >> --- > >> src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 > >> 1 file changed, 4 insertions(+) > >> > >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > >> index 7bda118..152593a 100644 > >> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > >> @@ -536,6 +536,10 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, > >> > >> #if defined(PIPE_ARCH_PPC) > >> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : > "-altivec"); > >> +#if HAVE_LLVM >= 0x0304 > >> + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ > >> + MAttrs.push_back("-vsx"); > > The commit message does not reflect what the patch actually does. I > > cannot object against the patch in any way, although the two should be > > in sync imho. > > > > Base of a very quick look at the llvm bug, I'm leaning that the commit > > msg is correct and the patch is off ? > > > > Thanks > > Emil > > Hmm, I'm not sure I understand what you mean. > > The commit message says: "This patch makes sure that if we use altivec > (VMX) instructions, we don't > use VSX instructions as well, as this cause piglit tests to fail" > > And the patch itself disables the VSX attribute in LLVM backend in > case we use Altivec (VMX) - meaning that no VSX instructions will be > generated alongside Altivec instructions. > > So unless I completely misunderstood something, the commit message and > the patch match. > sorry to meddle. the commit message suggests that you only want to disable vsx if altivec is enabled. However, the patch adds -vsx unconditionally. Do you want to ever have "-altivec -vsx"? Jan > > The llvm bug description maybe a little misleading, because I started > it last week and then I had a different impression. But that's why I > pointed to comment #7 which is where I wrote the updated description, > which matches this patch (workaround). > > Oded > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/2] mesa: Add KBL PCI IDs and platform information.
Add PCI IDs for the Intel Kabylake platforms. The IDs are taken directly from the Linux kernel patches, which are under review: http://lists.freedesktop.org/archives/intel-gfx/2015-October/078967.html http://cgit.freedesktop.org/~vivijim/drm-intel/log/?h=kbl-upstream-v2 The Kabylake PCI IDs taken from the kernel are rearranged to be in order of GT type, then PCI ID. Please note that if this patch is backported, the following fixes will need to be added before this patch: commit 28ed1e08e8ba98e "i965/skl: Remove early platform support" commit c1e38ad37042b0e "i965/skl: Use larger URB size where available." Thanks to Ben for fixing a bug around setting urb.size, and being patient with my questions about what the various fields mean. Signed-off-by: Sarah Sharp Suggested-by: Ben Widawsky Tested-by: Rodrigo Vivi (KBL-GT2) --- v2: - reorder the PCI IDs - rebase on latest mesa master include/pci_ids/i965_pci_ids.h | 22 +++ src/mesa/drivers/dri/i965/brw_device_info.c | 60 + 2 files changed, 82 insertions(+) diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 5891ba6..5139e27 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -132,6 +132,28 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4") CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4") CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4") CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4") +CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") +CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") +CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") +CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1") +CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1") +CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") +CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") +CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5") +CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") +CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") +CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3") +CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3") +CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4") +CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4") +CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") +CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4") CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 4550550..c58e4a5 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -371,6 +371,66 @@ static const struct brw_device_info brw_device_info_bxt = { } }; +/* + * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. + * There's no KBL entry. Using the default SKL (GEN9) GS entries value. + */ + +/* + * Both SKL and KBL support a maximum of 64 threads per + * Pixel Shader Dispatch (PSD) unit. + */ +#define KBL_MAX_THREADS_PER_PSD 64 + +static const struct brw_device_info brw_device_info_kbl_gt1 = { + GEN9_FEATURES, + .gt = 1, + + .max_cs_threads = 7 * 6, + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2, + .urb.size = 192, +}; + +static const struct brw_device_info brw_device_info_kbl_gt1_5 = { + GEN9_FEATURES, + .gt = 1, + + .max_cs_threads = 7 * 6, + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, +}; + +static const struct brw_device_info brw_device_info_kbl_gt2 = { + GEN9_FEATURES, + .gt = 2, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, +}; + +static const struct brw_device_info brw_device_info_kbl_gt3 = { + GEN9_FEATURES, + .gt = 3, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6, +}; + +static const struct brw_device_info brw_device_info_kbl_gt4 = { + GEN9_FEATURES, + .gt = 4, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9, + /* +* From the "L3 Allocation and Programming" documentation: +* +* "URB is limited to 1008KB due to programming restrictions. This +* is not a restriction of the L3 implementation, but of the FF and +* other clients. Therefore, in a GT4 implementation it is +* possible for the programmed allocation of the L3 data array to +* provide 3*384KB=1152KB for URB, but only 1008KB of this +* will be used." +*/ + .urb.size = 1008 / 3, +}; + const struct brw_device_info * brw_get_device_info(int devid) { -- 2.3.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/me
Re: [Mesa-dev] [PATCH v3 12/14] glsl: add support for complie-time constant expressions
On Tue, 2015-11-17 at 19:02 +, Emil Velikov wrote: > On 14 November 2015 at 13:42, Timothy Arceri wrote: > > From: Timothy Arceri > > > > This patch replaces the old interger constant qualifiers with either > > the new ast_layout_expression type if the qualifier requires merging > > or ast_expression if the qualifier can't have mulitple declarations > > or if all but the newest qualifier is simply ignored. > > > > We also update the process_qualifier_constant() helper to be > > similar to the one in the ast_layout_expression class, but in > > this case it will be used to process the ast_expression qualifiers. > > > > Global shader layout qualifier validation is moved out of the parser > > in this change as we now need to evaluate any constant expression > > before doing the validation. > > --- > > src/glsl/ast.h | 33 +-- > > src/glsl/ast_to_hir.cpp | 126 --- > > - > > src/glsl/ast_type.cpp | 69 -- > > src/glsl/glsl_parser.yy | 87 +-- > > src/glsl/glsl_parser_extras.cpp | 44 -- > > 5 files changed, 195 insertions(+), 164 deletions(-) > > > > > --- a/src/glsl/ast_to_hir.cpp > > +++ b/src/glsl/ast_to_hir.cpp > > > @@ -4079,9 +4113,18 @@ ast_declarator_list::hir(exec_list *instructions, > > */ > > if (decl_type && decl_type->contains_atomic()) { > >if (type->qualifier.flags.q.explicit_binding && > > - type->qualifier.flags.q.explicit_offset) > > - state->atomic_counter_offsets[type->qualifier.binding] = > > -type->qualifier.offset; > > + type->qualifier.flags.q.explicit_offset) { > > + unsigned qual_binding; > > + unsigned qual_offset; > > + if (process_qualifier_constant(state, &loc, "binding", > > +type->qualifier.binding, > > +&qual_binding) > > + && process_qualifier_constant(state, &loc, "offset", > Nitpick: Please leave the && trailing on the previous line. > > > --- a/src/glsl/glsl_parser.yy > > +++ b/src/glsl/glsl_parser.yy > > >/* Layout qualifiers for tessellation control shaders. */ > >if (match_layout_qualifier("vertices", $1, state) == 0) { > > $$.flags.q.vertices = 1; > > - > > - if ($3 <= 0) { > > -_mesa_glsl_error(& @3, state, > > - "invalid vertices (%d) specified", $3); > > -YYERROR; > > - } else if ($3 > (int)state->Const.MaxPatchVertices) { > > -_mesa_glsl_error(& @3, state, > > - "vertices (%d) exceeds " > > - "GL_MAX_PATCH_VERTICES", $3); > > -YYERROR; > > - } else { > > -$$.vertices = $3; > > -if (!state->ARB_tessellation_shader_enable && > > -!state->is_version(400, 0)) { > > - _mesa_glsl_error(& @1, state, > > -"vertices qualifier requires GLSL 4.00 or > > " > > -"ARB_tessellation_shader"); > > -} > > + $$.vertices = new(ctx) ast_layout_expression(@1, $3); > > + if (!state->ARB_tessellation_shader_enable && > > + !state->is_version(400, 0)) { > > +_mesa_glsl_error(& @1, state, > > + "vertices qualifier requires GLSL 4.00 or " > > + "ARB_tessellation_shader"); > > } > >} > > > > diff --git a/src/glsl/glsl_parser_extras.cpp > > b/src/glsl/glsl_parser_extras.cpp > > index 1678d89..2f870fc 100644 > > --- a/src/glsl/glsl_parser_extras.cpp > > +++ b/src/glsl/glsl_parser_extras.cpp > > @@ -1644,8 +1644,20 @@ set_shader_inout_layout(struct gl_shader *shader, > > switch (shader->Stage) { > > case MESA_SHADER_TESS_CTRL: > >shader->TessCtrl.VerticesOut = 0; > > - if (state->tcs_output_vertices_specified) > > - shader->TessCtrl.VerticesOut = state->out_qualifier->vertices; > > + if (state->tcs_output_vertices_specified) { > > + unsigned vertices; > > + if (state->out_qualifier->vertices-> > > + process_qualifier_constant(state, "vertices", &vertices, > > + true)) { > The existing code considering 0 an invalid amount, which afaict that > is incorrect. Splitting that into a separate patch is an overkill, > although mentioning it in the commit message is a (almost) must have > imho. Nope this is my bug thanks for spotting :) I'll fix this locally. I'm sure I double checked all these before sending out. In this case I'm sure I changed this value, maybe I changed it to the wrong value. > > -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-d
Re: [Mesa-dev] [Mesa-stable] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
On Tue, Nov 17, 2015 at 9:40 PM, Jan Vesely wrote: > > > On Tue, Nov 17, 2015 at 12:37 PM, Oded Gabbay wrote: >> >> On Tue, Nov 17, 2015 at 6:15 PM, Emil Velikov >> wrote: >> > On 17 November 2015 at 16:02, Oded Gabbay wrote: >> >> This patch makes sure that if we use altivec (VMX) instructions, we >> >> don't >> >> use VSX instructions as well, as this cause piglit tests to fail >> >> >> >> For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 >> >> >> >> With this patch, ppc64le reaches parity with x86-64 as far as piglit >> >> test >> >> suite is concerned. >> >> >> >> v2: >> >> - Added check that we have at least LLVM 3.4 >> >> - Added the LLVM bug URL as a comment in the code >> >> >> >> Signed-off-by: Oded Gabbay >> >> Cc: "11.0" >> >> --- >> >> src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 >> >> 1 file changed, 4 insertions(+) >> >> >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> >> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> >> index 7bda118..152593a 100644 >> >> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp >> >> @@ -536,6 +536,10 @@ >> >> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, >> >> >> >> #if defined(PIPE_ARCH_PPC) >> >> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : >> >> "-altivec"); >> >> +#if HAVE_LLVM >= 0x0304 >> >> + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ >> >> + MAttrs.push_back("-vsx"); >> > The commit message does not reflect what the patch actually does. I >> > cannot object against the patch in any way, although the two should be >> > in sync imho. >> > >> > Base of a very quick look at the llvm bug, I'm leaning that the commit >> > msg is correct and the patch is off ? >> > >> > Thanks >> > Emil >> >> Hmm, I'm not sure I understand what you mean. >> >> The commit message says: "This patch makes sure that if we use altivec >> (VMX) instructions, we don't >> use VSX instructions as well, as this cause piglit tests to fail" >> >> And the patch itself disables the VSX attribute in LLVM backend in >> case we use Altivec (VMX) - meaning that no VSX instructions will be >> generated alongside Altivec instructions. >> >> So unless I completely misunderstood something, the commit message and >> the patch match. > > > sorry to meddle. the commit message suggests that you only want to disable > vsx if altivec is enabled. However, the patch adds -vsx unconditionally. Do > you want to ever have "-altivec -vsx"? > > Jan There is no option, AFAIK, that altivec support is missing, but vsx support exists. So, if we write "-altivec", then "-vsx" has no meaning, cause there isn't vsx support anyway. However, just to make it logically correct, I will send another version that only disables vsx if altivec is enabled Oded > >> >> >> The llvm bug description maybe a little misleading, because I started >> it last week and then I had a different impression. But that's why I >> pointed to comment #7 which is where I wrote the updated description, >> which matches this patch (workaround). >> >> Oded >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH v2] llvmpipe: disable vsx in ppc due to LLVM PPC bug
Am 17.11.2015 um 21:27 schrieb Oded Gabbay: > On Tue, Nov 17, 2015 at 9:40 PM, Jan Vesely wrote: >> >> >> On Tue, Nov 17, 2015 at 12:37 PM, Oded Gabbay wrote: >>> >>> On Tue, Nov 17, 2015 at 6:15 PM, Emil Velikov >>> wrote: On 17 November 2015 at 16:02, Oded Gabbay wrote: > This patch makes sure that if we use altivec (VMX) instructions, we > don't > use VSX instructions as well, as this cause piglit tests to fail > > For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 > > With this patch, ppc64le reaches parity with x86-64 as far as piglit > test > suite is concerned. > > v2: > - Added check that we have at least LLVM 3.4 > - Added the LLVM bug URL as a comment in the code > > Signed-off-by: Oded Gabbay > Cc: "11.0" > --- > src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > index 7bda118..152593a 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp > @@ -536,6 +536,10 @@ > lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, > > #if defined(PIPE_ARCH_PPC) > MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : > "-altivec"); > +#if HAVE_LLVM >= 0x0304 > + /* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 */ > + MAttrs.push_back("-vsx"); The commit message does not reflect what the patch actually does. I cannot object against the patch in any way, although the two should be in sync imho. Base of a very quick look at the llvm bug, I'm leaning that the commit msg is correct and the patch is off ? Thanks Emil >>> >>> Hmm, I'm not sure I understand what you mean. >>> >>> The commit message says: "This patch makes sure that if we use altivec >>> (VMX) instructions, we don't >>> use VSX instructions as well, as this cause piglit tests to fail" >>> >>> And the patch itself disables the VSX attribute in LLVM backend in >>> case we use Altivec (VMX) - meaning that no VSX instructions will be >>> generated alongside Altivec instructions. >>> >>> So unless I completely misunderstood something, the commit message and >>> the patch match. >> >> >> sorry to meddle. the commit message suggests that you only want to disable >> vsx if altivec is enabled. However, the patch adds -vsx unconditionally. Do >> you want to ever have "-altivec -vsx"? >> >> Jan > > There is no option, AFAIK, that altivec support is missing, but vsx > support exists. So, if we write "-altivec", then "-vsx" has no > meaning, cause there isn't vsx support anyway. > However, just to make it logically correct, I will send another > version that only disables vsx if altivec is enabled > > My guess is you could just adjust the comment instead. After all maybe it's really vsx which causes the bug, not just together with altivec. Albeit I guess vsx is more of an extension to altivec, so it might not even be possible, but in any case switching it off always looks like a good idea to me. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] llvmpipe: disable VSX in ppc due to LLVM PPC bug
This patch disables the use of VSX instructions, as they cause some piglit tests to fail For more details, see: https://llvm.org/bugs/show_bug.cgi?id=25503#c7 With this patch, ppc64le reaches parity with x86-64 as far as piglit test suite is concerned. v2: - Added check that we have at least LLVM 3.4 - Added the LLVM bug URL as a comment in the code v3: - Only disable VSX if Altivec is supported, because if Altivec support is missing, then VSX support doesn't exist anyway. - Change original patch description. Signed-off-by: Oded Gabbay Cc: "11.0" --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 9 + 1 file changed, 9 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 7bda118..3ee708f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -536,6 +536,15 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); +#if HAVE_LLVM >= 0x0304 + /* +* Make sure VSX instructions are disabled +* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 +*/ + if (util_cpu_caps.has_altivec) { + MAttrs.push_back("-vsx"); + } +#endif #endif builder.setMAttrs(MAttrs); -- 2.5.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa/uniform_query: use IROUND for doubles as well as floats
From: Dave Airlie For the case where we convert a double to an int, we should round the same as we do for floats. This fixes GL41-CTS.gpu_shader_fp64.state_query Signed-off-by: Dave Airlie --- src/mesa/main/uniform_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 083087d..cbf7062 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -437,7 +437,7 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, dst[didx].i = src[sidx].i ? 1 : 0; break; case GLSL_TYPE_DOUBLE: - dst[didx].i = *(double *)&src[sidx].f; + dst[didx].i = IROUND(*(double *)&src[sidx].f); break; default: assert(!"Should not get here."); -- 2.5.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa/uniform_query: use IROUND for doubles as well as floats
On Tue, Nov 17, 2015 at 4:00 PM, Dave Airlie wrote: > From: Dave Airlie > > For the case where we convert a double to an int, we should > round the same as we do for floats. > > This fixes GL41-CTS.gpu_shader_fp64.state_query > > Signed-off-by: Dave Airlie > --- > src/mesa/main/uniform_query.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp > index 083087d..cbf7062 100644 > --- a/src/mesa/main/uniform_query.cpp > +++ b/src/mesa/main/uniform_query.cpp > @@ -437,7 +437,7 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, > GLint location, > dst[didx].i = src[sidx].i ? 1 : 0; > break; >case GLSL_TYPE_DOUBLE: > - dst[didx].i = *(double *)&src[sidx].f; > + dst[didx].i = IROUND(*(double *)&src[sidx].f); I think you want a IROUNDD... this one takes a float from what I can see, which will end up giving you incorrect integers > 2^23. I guess that CTS test isn't *that* sensitive :) > break; >default: > assert(!"Should not get here."); > -- > 2.5.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i965: Implement ARB_pipeline_statistics_query tessellation counters.
On Tue, Nov 17, 2015 at 11:28:25AM -0800, Kristian Høgsberg wrote: > On Tue, Nov 17, 2015 at 11:25 AM, Kenneth Graunke > wrote: > > We basically just need to uncomment Ben's code. > > Reviewed-by: Kristian Høgsberg > Reviewed-by: Ben Widawsky ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] mesa: Add KBL PCI IDs and platform information.
On Tue, Nov 17, 2015 at 11:40:53AM -0800, Sarah Sharp wrote: > Add PCI IDs for the Intel Kabylake platforms. The IDs are taken > directly from the Linux kernel patches, which are under review: > > http://lists.freedesktop.org/archives/intel-gfx/2015-October/078967.html > http://cgit.freedesktop.org/~vivijim/drm-intel/log/?h=kbl-upstream-v2 > > The Kabylake PCI IDs taken from the kernel are rearranged to be in order > of GT type, then PCI ID. > > Please note that if this patch is backported, the following fixes will > need to be added before this patch: > > commit 28ed1e08e8ba98e "i965/skl: Remove early platform support" > commit c1e38ad37042b0e "i965/skl: Use larger URB size where available." > > Thanks to Ben for fixing a bug around setting urb.size, and being > patient with my questions about what the various fields mean. > > Signed-off-by: Sarah Sharp > Suggested-by: Ben Widawsky > Tested-by: Rodrigo Vivi (KBL-GT2) > --- > > v2: > - reorder the PCI IDs > - rebase on latest mesa master There's not really a consensus I guess, but most people do leave the version information in the final commit message. Also FWIW, I too was tempted to ask for the PCI IDs to be in order, but, that makes the patch harder to review :-( - so I am assuming it's just the sorted list from the v1, and I am not going through it again. > > include/pci_ids/i965_pci_ids.h | 22 +++ > src/mesa/drivers/dri/i965/brw_device_info.c | 60 > + > 2 files changed, 82 insertions(+) > > diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h > index 5891ba6..5139e27 100644 > --- a/include/pci_ids/i965_pci_ids.h > +++ b/include/pci_ids/i965_pci_ids.h > @@ -132,6 +132,28 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4") > CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4") > CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4") > CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4") > +CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1") > +CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > +CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > +CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5") > +CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2") > +CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") > +CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") > +CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3") > +CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3") > +CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4") > +CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4") > +CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") > +CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4") > CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)") > CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)") > CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") > diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c > b/src/mesa/drivers/dri/i965/brw_device_info.c > index 4550550..c58e4a5 100644 > --- a/src/mesa/drivers/dri/i965/brw_device_info.c > +++ b/src/mesa/drivers/dri/i965/brw_device_info.c > @@ -371,6 +371,66 @@ static const struct brw_device_info brw_device_info_bxt > = { > } > }; > > +/* > + * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. > + * There's no KBL entry. Using the default SKL (GEN9) GS entries value. > + */ > + > +/* > + * Both SKL and KBL support a maximum of 64 threads per > + * Pixel Shader Dispatch (PSD) unit. > + */ > +#define KBL_MAX_THREADS_PER_PSD 64 > + > +static const struct brw_device_info brw_device_info_kbl_gt1 = { > + GEN9_FEATURES, > + .gt = 1, > + > + .max_cs_threads = 7 * 6, > + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2, > + .urb.size = 192, > +}; > + > +static const struct brw_device_info brw_device_info_kbl_gt1_5 = { > + GEN9_FEATURES, > + .gt = 1, > + > + .max_cs_threads = 7 * 6, > + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, > +}; > + > +static const struct brw_device_info brw_device_info_kbl_gt2 = { > + GEN9_FEATURES, > + .gt = 2, > + > + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, > +}; > + > +static const struct brw_device_info brw_device_info_kbl_gt3 = { > + GEN9_FEATURES, > + .gt = 3, > + > + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6, > +}; > + > +static const struct brw_device_info brw_device_info_kbl_gt4 = { > + GEN9_FEATURES, > + .gt = 4, > + > + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9, > + /* > +* From the "L3 Allocation and Programming" documentation:
[Mesa-dev] [Bug 92983] [vmwgfx] SIGABRT vmw_screen_ioctl.c:461
https://bugs.freedesktop.org/show_bug.cgi?id=92983 Bug ID: 92983 Summary: [vmwgfx] SIGABRT vmw_screen_ioctl.c:461 Product: Mesa Version: unspecified Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Keywords: have-backtrace Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: v...@freedesktop.org QA Contact: mesa-dev@lists.freedesktop.org CC: bri...@vmware.com [vmwgfx] SIGABRT vmw_screen_ioctl.c:461 mesa: 5b596f38785a11ad429e30b2237de2c8c59a451f (master 11.1.0-devel) vmwgfx crashed while running WebGL Conformance Tests on Firefox. Program received signal SIGSEGV, Segmentation fault. 0x560a46dce464 in mozalloc_abort(char const*) () (gdb) bt full #0 0x560a46dce464 in mozalloc_abort(char const*) () #1 0x560a46dce447 in mozalloc_abort(char const*) () #2 0x7f0cf3b73b14 in vmw_ioctl_command (vws=0x7f0cf7fe3990, cid=, throttle_us=throttle_us@entry=0, commands=commands@entry=0x7f0cc68e30f0, size=, pfence=pfence@entry=0x7ffe30bc9c08) at vmw_screen_ioctl.c:461 arg = {commands = 139692847542512, command_size = 5040, throttle_us = 0, fence_rep = 140729716087664, version = 2, flags = 0, context_handle = 536825321, pad64 = 0} rep = {handle = 0, mask = 0, seqno = 0, passed_seqno = 0, pad64 = 0, error = -14} ret = __func__ = "vmw_ioctl_command" #3 0x7f0cf3b71409 in vmw_swc_flush (swc=0x7f0cc68e3000, pfence=0x7ffe30bc9c48) at vmw_context.c:213 fence = 0x0 i = ret = __PRETTY_FUNCTION__ = "vmw_swc_flush" #4 0x7f0cf3b7994b in svga_context_flush (svga=svga@entry=0x7f0cb4673000, pfence=pfence@entry=0x0) at svga_context.c:313 svgascreen = 0x7f0d03d02000 fence = 0x0 #5 0x7f0cf3b8034c in svga_flush (pipe=0x7f0cb4673000, fence=0x0, flags=) at svga_pipe_flush.c:46 __func__ = "svga_flush" #6 0x7f0cf369cad4 in st_glFlush (ctx=) at state_tracker/st_cb_flush.c:121 #7 0x7f0d21c3c0c2 in mozilla::gl::GLContext::FlushIfHeavyGLCallsSinceLastFlush() () at /usr/lib64/firefox/libxul.so #8 0x7f0d2291a09f in nsRefreshDriver::Tick(long, mozilla::TimeStamp) () at /usr/lib64/firefox/libxul.so #9 0x7f0d2291a3ac in mozilla::RefreshDriverTimer::Tick(long, mozilla::TimeStamp) () at /usr/lib64/firefox/libxul.so #10 0x7f0d2291a4d8 in mozilla::VsyncRefreshDriverTimer::RefreshDriverVsyncObserver::TickRefreshDriver(mozilla::TimeStamp) () at /usr/lib64/firefox/libxul.so #11 0x7f0d22916c2e in nsRunnableMethodImpl::Run() () at /usr/lib64/firefox/libxul.so #12 0x7f0d216e4db6 in nsThread::ProcessNextEvent(bool, bool*) () at /usr/lib64/firefox/libxul.so #13 0x7f0d216ffb4f in NS_ProcessNextEvent(nsIThread*, bool) () at /usr/lib64/firefox/libxul.so #14 0x7f0d218cbaf2 in mozilla::ipc::MessagePump::Run(base::MessagePump::Delegate*) () at /usr/lib64/firefox/libxul.so #15 0x7f0d218bbc0e in MessageLoop::Run() () at /usr/lib64/firefox/libxul.so #16 0x7f0d227db3cb in nsBaseAppShell::Run() () at /usr/lib64/firefox/libxul.so #17 0x7f0d22d79b31 in nsAppStartup::Run() () at /usr/lib64/firefox/libxul.so #18 0x7f0d22db2ea2 in XREMain::XRE_mainRun() () at /usr/lib64/firefox/libxul.so #19 0x7f0d22db314d in XREMain::XRE_main(int, char**, nsXREAppData const*) () at /usr/lib64/firefox/libxul.so #20 0x7f0d22db3398 in XRE_main () at /usr/lib64/firefox/libxul.so #21 0x560a46dce371 in do_main(int, char**, nsIFile*) [clone .constprop.0] () #22 0x560a46dcda72 in main () (gdb) frame 2 #2 0x7f0cf3b73b14 in vmw_ioctl_command (vws=0x7f0cf7fe3990, cid=, throttle_us=throttle_us@entry=0, commands=commands@entry=0x7f0cc68e30f0, size=, pfence=pfence@entry=0x7ffe30bc9c08) at vmw_screen_ioctl.c:461 461 abort(); (gdb) l 456 do { 457 ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, argsize); 458 } while(ret == -ERESTART); 459 if (ret) { 460 vmw_error("%s error %s.\n", __FUNCTION__, strerror(-ret)); 461 abort(); 462 } 463 464 if (rep.error) { 465 (gdb) print *vws $3 = {base = {destroy = 0x7f0cf3b749a0 , get_hw_version = 0x7f0cf3b74730 , get_cap = 0x7f0cf3b749b0 , context_create = 0x7f0cf3b72040 , surface_create = 0x7f0cf3b74a30 , surface_from_handle = 0x7f0cf3b729d0 , surface_get_handle = 0x7f0cf3b72920 , surface_is_flushed = 0x7f0cf3b74720 , surface_reference = 0x7f0cf3b749f0 , surface_can_create = 0x7f0cf3b74f80 , buffer_create = 0x7f0cf3b74750 , buffer_map = 0x7f0cf3b70fa0 , buffer_unmap = 0x7f0cf3b71030 , buffer_destroy = 0x7f0cf3b70eb0 , fence_reference = 0x7f0cf3b74990 , fence_signalled = 0x7f0cf3b74980 , fence_finish = 0x7f0cf3b74970 , have_gb_objects = true, have_gb_dma = true, shader_create = 0x7f0cf3b74840 ,
Re: [Mesa-dev] [PATCH 2/2] intel/kbl: Add Kabylake PCI ids
I don't know what the accepted solution is, but generally libdrm patches should go to dri-de...@lists.freedesktop.org. Since not everyone reads dri-devel regularly I usually send to both lists. I don't know if it matters much anymore. On Mon, Nov 16, 2015 at 04:25:12PM -0800, Sarah Sharp wrote: > From: Rodrigo Vivi > > Also, following kernel definition Kabylake is skylake. > > Signed-off-by: Rodrigo Vivi > Signed-off-by: Sarah Sharp > --- > > intel/intel_chipset.h | 57 > ++- > 1 file changed, 56 insertions(+), 1 deletion(-) > > diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h > index 253ea71..4bbad5c 100644 > --- a/intel/intel_chipset.h > +++ b/intel/intel_chipset.h > @@ -181,6 +181,29 @@ > #define PCI_CHIP_SKYLAKE_SRV_GT1 0x190A > #define PCI_CHIP_SKYLAKE_WKS_GT2 0x191D > > +#define PCI_CHIP_KABYLAKE_ULT_GT20x5916 > +#define PCI_CHIP_KABYLAKE_ULT_GT1_5 0x5913 > +#define PCI_CHIP_KABYLAKE_ULT_GT10x5906 > +#define PCI_CHIP_KABYLAKE_ULT_GT30x5926 > +#define PCI_CHIP_KABYLAKE_ULT_GT2F 0x5921 > +#define PCI_CHIP_KABYLAKE_ULX_GT1_5 0x5915 > +#define PCI_CHIP_KABYLAKE_ULX_GT10x590E > +#define PCI_CHIP_KABYLAKE_ULX_GT20x591E > +#define PCI_CHIP_KABYLAKE_DT_GT2 0x5912 Sweet, the docs have 2 different definitions for 5912. Fantastic. > +#define PCI_CHIP_KABYLAKE_DT_GT1_5 0x5917 > +#define PCI_CHIP_KABYLAKE_DT_GT1 0x5902 > +#define PCI_CHIP_KABYLAKE_DT_GT4 0x5932 > +#define PCI_CHIP_KABYLAKE_HALO_GT2 0x591B Again. My goodness, brand strings are going to be a pain. > +#define PCI_CHIP_KABYLAKE_HALO_GT4 0x593B > +#define PCI_CHIP_KABYLAKE_HALO_GT3 0x592B > +#define PCI_CHIP_KABYLAKE_HALO_GT1 0x590B > +#define PCI_CHIP_KABYLAKE_SRV_GT20x591A Another one. > +#define PCI_CHIP_KABYLAKE_SRV_GT30x592A > +#define PCI_CHIP_KABYLAKE_SRV_GT10x590A > +#define PCI_CHIP_KABYLAKE_SRV_GT40x593A > +#define PCI_CHIP_KABYLAKE_WKS_GT20x591D > +#define PCI_CHIP_KABYLAKE_WKS_GT40x593D > + I suppose given all the duplicates, it might be nice to add comments, but honestly, nobody really cares about this level of detail in libdrm - and I missed this when reviewing the mesa patch. > #define PCI_CHIP_BROXTON_0 0x0A84 > #define PCI_CHIP_BROXTON_1 0x1A84 > #define PCI_CHIP_BROXTON_2 0x5A84 > @@ -362,6 +385,37 @@ >(devid) == PCI_CHIP_SKYLAKE_HALO_GT3 || \ >(devid) == PCI_CHIP_SKYLAKE_SRV_GT3) > > +#define IS_KBL_GT1(devid)((devid) == PCI_CHIP_KABYLAKE_ULT_GT1_5 || \ > + (devid) == PCI_CHIP_KABYLAKE_ULX_GT1_5 || \ > + (devid) == PCI_CHIP_KABYLAKE_DT_GT1_5 || \ > + (devid) == PCI_CHIP_KABYLAKE_ULT_GT1 || \ > + (devid) == PCI_CHIP_KABYLAKE_ULX_GT1 || \ > + (devid) == PCI_CHIP_KABYLAKE_DT_GT1|| \ > + (devid) == PCI_CHIP_KABYLAKE_HALO_GT1 || \ > + (devid) == PCI_CHIP_KABYLAKE_SRV_GT1) > + > +#define IS_KBL_GT2(devid)((devid) == PCI_CHIP_KABYLAKE_ULT_GT2 || \ > + (devid) == PCI_CHIP_KABYLAKE_ULT_GT2F || \ > + (devid) == PCI_CHIP_KABYLAKE_ULX_GT2 || \ > + (devid) == PCI_CHIP_KABYLAKE_DT_GT2|| \ > + (devid) == PCI_CHIP_KABYLAKE_HALO_GT2 || \ > + (devid) == PCI_CHIP_KABYLAKE_SRV_GT2 || \ > + (devid) == PCI_CHIP_KABYLAKE_WKS_GT2) > + > +#define IS_KBL_GT3(devid)((devid) == PCI_CHIP_KABYLAKE_ULT_GT3 || \ > + (devid) == PCI_CHIP_KABYLAKE_HALO_GT3 || \ > + (devid) == PCI_CHIP_KABYLAKE_SRV_GT3) > + > +#define IS_KBL_GT4(devid)((devid) == PCI_CHIP_KABYLAKE_DT_GT4|| \ > + (devid) == PCI_CHIP_KABYLAKE_HALO_GT4 || \ > + (devid) == PCI_CHIP_KABYLAKE_SRV_GT4 || \ > + (devid) == PCI_CHIP_KABYLAKE_WKS_GT4) > + > +#define IS_KABYLAKE(devid) (IS_KBL_GT1(devid) || \ > + IS_KBL_GT2(devid) || \ > + IS_KBL_GT3(devid) || \ > + IS_KBL_GT4(devid)) > + The actual correct breakdown IMO (and the encoding going back to bdw) is nibble #1 represents the GT#. ie 590x = gt1 591x = gt2 592x = gt3 593x = gt4 This doesn't match how it's done here (the fuse parts are rounded down instead of up, like gt 1.5), but it's up to you. Reviewed-by: Ben Widawsky > #define IS_SKYLAKE(devid)(IS_SKL_GT1(devid) || \ >IS_SKL_GT2(devid) || \ >IS_SKL_GT3(devid)) > @@ -371,7 +425,8 @@ >(devid) == PC
[Mesa-dev] [PATCH] nir: Store the size of the TCS output patch in nir_shader_info.
Signed-off-by: Kenneth Graunke --- src/glsl/nir/glsl_to_nir.cpp | 4 src/glsl/nir/nir.h | 5 + 2 files changed, 9 insertions(+) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 6d24341..c4b53f3 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -167,6 +167,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader_prog->TransformFeedback.NumVarying > 0; switch (stage) { + case MESA_SHADER_TESS_CTRL: + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; + break; + case MESA_SHADER_GEOMETRY: shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; shader->info.gs.output_primitive = sh->Geom.OutputType; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index beabcaf..46add22 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1544,6 +1544,11 @@ typedef struct nir_shader_info { struct { unsigned local_size[3]; } cs; + + struct { + /** The number of vertices in the TCS output patch. */ + unsigned vertices_out; + } tcs; }; } nir_shader_info; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: error out in indirect draw when vertex bindings mismatch
On Tuesday 17 November 2015, Tapani Pälli wrote: > > On 11/16/2015 08:55 AM, Tapani Pälli wrote: > > > > > > On 11/13/2015 07:18 PM, Fredrik Höglund wrote: > >> On Friday 13 November 2015, Tapani Pälli wrote: > >>> Patch adds additional mask for tracking which vertex buffer bindings > >>> are set. This array can be directly compared to which vertex arrays > >>> are enabled and should match when drawing. > >>> > >>> Fixes following CTS tests: > >>> > >>> ES31-CTS.draw_indirect.negative-noVBO-arrays > >>> ES31-CTS.draw_indirect.negative-noVBO-elements > >>> > >>> Signed-off-by: Tapani Pälli > >>> --- > >>> src/mesa/main/api_validate.c | 13 + > >>> src/mesa/main/mtypes.h | 3 +++ > >>> src/mesa/main/varray.c | 5 + > >>> 3 files changed, 21 insertions(+) > >>> > >>> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c > >>> index a490189..e82e89a 100644 > >>> --- a/src/mesa/main/api_validate.c > >>> +++ b/src/mesa/main/api_validate.c > >>> @@ -710,6 +710,19 @@ valid_draw_indirect(struct gl_context *ctx, > >>> return GL_FALSE; > >>> } > >>> > >>> + /* From OpenGL ES 3.1 spec. section 10.5: > >>> +* "An INVALID_OPERATION error is generated if zero is bound to > >>> +* VERTEX_ARRAY_BINDING, DRAW_INDIRECT_BUFFER or to any enabled > >>> +* vertex array." > >>> +* > >>> +* Here we check that vertex buffer bindings match with enabled > >>> +* vertex arrays. > >>> +*/ > >>> + if (ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexBindingMask) { > >> > >> This test only works when the enabled vertex arrays are associated with > >> their default vertex buffer binding points. > > > > Could you open up this more, is there some existing test or app that > > would do this? Would be great for testing purposes, all the indirect > > draw rendering CTS tests pass with this change. > > Sorry, the question does not make sense. What I meant is that do you > know some app that would fail this test to help debugging/fixing the issue? No, but the following example should trigger the problem: /* Enable arrays 0 and 1 */ glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); /* Make both arrays use VBO binding point #0 */ glVertexAttribBinding(0, 0); glVertexAttribBinding(1, 0); /* Bind a buffer object to VBO binding point #0 */ glBindVertexBuffer(0, ...); /* Bind a draw indirect buffer */ glBindBuffer(GL_DRAW_INDIRECT_BUFFER, ...); /* This call will now generate an INVALID_OPERATION error since * no buffer is bound to VBO binding point #1, even though none * of the enabled arrays use it. */ glDrawArraysIndirect(...); > > > >>> + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name); > >>> + return GL_FALSE; > >>> + } > >>> + > >>> if (!_mesa_valid_prim_mode(ctx, mode, name)) > >>> return GL_FALSE; > >>> > >>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > >>> index 4efdf1e..6c6187f 100644 > >>> --- a/src/mesa/main/mtypes.h > >>> +++ b/src/mesa/main/mtypes.h > >>> @@ -1419,6 +1419,9 @@ struct gl_vertex_array_object > >>> /** Vertex buffer bindings */ > >>> struct gl_vertex_buffer_binding VertexBinding[VERT_ATTRIB_MAX]; > >>> > >>> + /** Mask indicating which binding points are set. */ > >>> + GLbitfield64 VertexBindingMask; > >>> + > >>> /** Mask of VERT_BIT_* values indicating which arrays are > >>> enabled */ > >>> GLbitfield64 _Enabled; > >>> > >>> diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c > >>> index 887d0c0..0a94c5a 100644 > >>> --- a/src/mesa/main/varray.c > >>> +++ b/src/mesa/main/varray.c > >>> @@ -174,6 +174,11 @@ bind_vertex_buffer(struct gl_context *ctx, > >>> binding->Offset = offset; > >>> binding->Stride = stride; > >>> > >>> + if (vbo == ctx->Shared->NullBufferObj) > >>> + vao->VertexBindingMask &= ~VERT_BIT(index); > >>> + else > >>> + vao->VertexBindingMask |= VERT_BIT(index); > >>> + > >>> vao->NewArrays |= binding->_BoundArrays; > >>> } > >>> } > >>> > >> > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] DRI3 support for EGL (v4)
Hi Out of interest have any of you tested this on Plasma5? When I set OpenGL & EGL in kwin and I'm using DRI3 compositing is disabled (it wasn't before) This is on Kabini using the latest mesa, xorg and radeon drivers from got Cheers Mike On Tue, 17 Nov 2015, 3:31 p.m. Martin Peres wrote: > > On 10/11/15 20:26, Axel Davy wrote: > > Hi, > > > > I did take a look, and it looks good to me. > > > > I'm happy you implemented DRI_PRIME support as well. > > About it, do you need testers to check everything works ? > > > > A mistake about it I noticed is that you don't disable > > EGL_KHR_image_pixmap > > when is_different_gpu is set. > > It should be disabled, just like GLX_EXT_texture_from_pixmap is for GLX. > > Thanks for the review, I followed your suggestion! Sorry for the late > answer though. > > I just pushed the patches. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: Store the size of the TCS output patch in nir_shader_info.
Seems reasonable, Reviewed-by: Jason Ekstrand On Tue, Nov 17, 2015 at 3:16 PM, Kenneth Graunke wrote: > Signed-off-by: Kenneth Graunke > --- > src/glsl/nir/glsl_to_nir.cpp | 4 > src/glsl/nir/nir.h | 5 + > 2 files changed, 9 insertions(+) > > diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp > index 6d24341..c4b53f3 100644 > --- a/src/glsl/nir/glsl_to_nir.cpp > +++ b/src/glsl/nir/glsl_to_nir.cpp > @@ -167,6 +167,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, >shader_prog->TransformFeedback.NumVarying > 0; > > switch (stage) { > + case MESA_SHADER_TESS_CTRL: > + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; > + break; > + > case MESA_SHADER_GEOMETRY: >shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; >shader->info.gs.output_primitive = sh->Geom.OutputType; > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index beabcaf..46add22 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1544,6 +1544,11 @@ typedef struct nir_shader_info { >struct { > unsigned local_size[3]; >} cs; > + > + struct { > + /** The number of vertices in the TCS output patch. */ > + unsigned vertices_out; > + } tcs; > }; > } nir_shader_info; > > -- > 2.6.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] DRI3 support for EGL (v4)
On 18/11/15 01:37, Mike Lothian wrote: Hi Out of interest have any of you tested this on Plasma5? When I set OpenGL & EGL in kwin and I'm using DRI3 compositing is disabled (it wasn't before) This is on Kabini using the latest mesa, xorg and radeon drivers from got Cheers Mike Hey Mike, I do use plasma 5, but with glx. Anyway, I can reproduce the issue so I will have a look at it tomorrow! For the record, I get the following errors: QXcbConnection: XCB error: 3 (BadWindow), sequence: 171, resource id: 73400326, major code: 20 (GetProperty), minor code: 0 kwin_core: Failed to initialize compositing, compositing disabled QXcbConnection: XCB error: 3 (BadWindow), sequence: 2171, resource id: 108, major code: 3 (GetWindowAttributes), minor code: 0 Thanks for testing and bug report, Martin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 05/36] i965: Import tables enumerating the set of validated L3 configurations.
On Sat, Nov 14, 2015 at 01:43:41PM -0800, Jordan Justen wrote: > From: Francisco Jerez > > It should be possible to use additional L3 configurations other than > the ones listed in the tables of validated allocations ("BSpec » > 3D-Media-GPGPU Engine » L3 Cache and URB [IVB+] » L3 Cache and URB [*] > » L3 Allocation and Programming"), but it seems sensible for now to > hard-code the tables in order to stick to the hardware docs. Instead > of setting up the arbitrary L3 partitioning given as input, the > closest validated L3 configuration will be looked up in these tables > and used to program the hardware. > > The included tables should work for Gen7-9. Note that the quantities > are specified in ways rather than in KB, this is because the L3 > control registers expect the value in ways, and because by doing that > we can re-use a single table for all GT variants of the same > generation (and in the case of IVB/HSW and CHV/SKL across different > generations) which generally have different L3 way sizes but allow the > same combinations of way allocations. > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/gen7_l3_state.c | 163 > + > 2 files changed, 164 insertions(+) > create mode 100644 src/mesa/drivers/dri/i965/gen7_l3_state.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 5a88d66..91901ad 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -184,6 +184,7 @@ i965_FILES = \ > gen7_cs_state.c \ > gen7_disable.c \ > gen7_gs_state.c \ > + gen7_l3_state.c \ > gen7_misc_state.c \ > gen7_sf_state.c \ > gen7_sol_state.c \ > diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c > b/src/mesa/drivers/dri/i965/gen7_l3_state.c > new file mode 100644 > index 000..8f9ba5b > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c > @@ -0,0 +1,163 @@ > +/* > + * Copyright (c) 2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "brw_context.h" > +#include "brw_defines.h" > +#include "brw_state.h" > +#include "intel_batchbuffer.h" > + > +/** > + * Chunk of L3 cache reserved for some specific purpose. > + */ > +enum brw_l3_partition { > + /** Shared local memory. */ > + L3P_SLM = 0, > + /** Unified return buffer. */ > + L3P_URB, > + /** Union of DC and RO. */ > + L3P_ALL, > + /** Data cluster RW partition. */ > + L3P_DC, > + /** Union of IS, C and T. */ > + L3P_RO, > + /** Instruction and state cache. */ > + L3P_IS, > + /** Constant cache. */ > + L3P_C, > + /** Texture cache. */ > + L3P_T, > + /** Number of supported L3 partitions. */ > + NUM_L3P > +}; > + > +/** > + * L3 configuration represented as the number of ways allocated for each > + * partition. \sa get_l3_way_size(). > + */ > +struct brw_l3_config { > + unsigned n[NUM_L3P]; > +}; > + > +/** > + * IVB/HSW validated L3 configurations. > + */ > +static const struct brw_l3_config ivb_l3_configs[] = { > + {{ 0, 32, 0, 0, 32, 0, 0, 0 }}, > + {{ 0, 32, 0, 16, 16, 0, 0, 0 }}, > + {{ 0, 32, 0, 4, 0, 8, 4, 16 }}, > + {{ 0, 28, 0, 8, 0, 8, 4, 16 }}, > + {{ 0, 28, 0, 16, 0, 8, 4, 8 }}, > + {{ 0, 28, 0, 8, 0, 16, 4, 8 }}, > + {{ 0, 28, 0, 0, 0, 16, 4, 16 }}, > + {{ 0, 32, 0, 0, 0, 16, 0, 16 }}, > + {{ 0, 28, 0, 4, 32, 0, 0, 0 }}, > + {{ 16, 16, 0, 16, 16, 0, 0, 0 }}, > + {{ 16, 16, 0, 8, 0, 8, 8, 8 }}, > + {{ 16, 16, 0, 4, 0, 8, 4, 16 }}, > + {{ 16, 16, 0, 4, 0, 16, 4, 8 }}, > + {{ 16, 16, 0, 0, 32, 0, 0, 0 }}, > + {{ 0 }} > +}; > + > +/** > + * VLV validated L3 configurations. > + */ > +static const struct brw_l3_config vlv_l3_configs[] =
[Mesa-dev] [PATCH] [v2] i965: Add lossless compression to surface format table
Background: Prior to Skylake and since Ivybridge Intel hardware has had the ability to use a MCS (Multisample Control Surface) as auxiliary data in "compression" operations on the surface. This reduces memory bandwidth. This hardware was either used for MSAA compression, and fast clear operations. On Gen8, a similar mechanism exists to allow the hiz buffer to be sampled from, and therefore this feature is sometimes referred to more generally as "AUX buffers". Skylake adds the ability to have the display engine directly source compressed surfaces on top of the ability to sample from them. Inference dictates that enabling this display features adding a restriction to the formats which could actually be compressed. The current set of surfaces seems to be a subset as compared to previous gens (see the next patch). Also, if I had to guess I would guess that future gens add support for more surface formats. To make handling this a bit easier to read, and more future proof, the support for this is moved into the surface formats table. Along with the modifications to the table, a helper function is also provided to determine if a surface is CCS compatible. Because fast clears are currently disabled on SKL, we can plumb the helper all the way through here, and not actually have anything break. The logic in the table works a bit differently than the other columns in the table and therefore deserves a small mention. For most other features, the GEN which began implementing it is set, and it is assumed future gens also support this. For this feature, GEN9 actually eliminates support for certain formats. We could use this column to determine support for the similar feature on older generation hardware. Aside from that being an error prone task which is unrelated to enabling this on GEN9, it becomes somewhat tricky to implement because of the fact that surface format support diminishes. You'd probably want another column to cleanly implement it. v2: - rename ccs to ccs_e; Requested-by: Chad - rename lossless_compression to lossless_compression Requested-by: Chad - change meaning of brw_losslessly_compressible_format Requested-by: Chad - related changes to the code to reflect this. - remove excess ccs (Chad) Requested-by: Chad Versace Requested-by: Neil Roberts Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_context.h | 2 + src/mesa/drivers/dri/i965/brw_surface_formats.c | 525 +--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +- 3 files changed, 282 insertions(+), 252 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8d6bc19..fe45edb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1467,6 +1467,8 @@ void brw_upload_image_surfaces(struct brw_context *brw, /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); +bool brw_losslessly_compressible_format(struct brw_context *brw, +uint32_t brw_format); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, mesa_format format); diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 97fff60..16f7fec 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -39,14 +39,15 @@ struct surface_format_info { int input_vb; int streamed_output_vb; int color_processing; + int lossless_compression; const char *name; }; /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. */ -#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ - [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf}, +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, sf) \ + [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, ccs_e, #sf}, #define Y 0 #define x 999 @@ -74,6 +75,7 @@ struct surface_format_info { * VB- Input Vertex Buffer * SO- Steamed Output Vertex Buffers (transform feedback) * color - Color Processing + * ccs_e - Lossless Compression Support (gen9+ only) * sf- Surface Format * * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. @@ -84,257 +86,258 @@ struct surface_format_info { * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. * - VOL4_Part1 section 3.9.11 Render Target Write. + * - Render Target Surface Types [SKL+] */ const struct surface_format_info surface_formats[] = { -/* smpl filt shad CK RT AB VB SO color
[Mesa-dev] [PATCH 7/7] [v2] i965/gen9: Support fast clears for 32b float
SKL supports the ability to do fast clears and resolves of 32b RGBA as both integer and floats. This patch only enables float color clears because we haven't yet enabled integer color clears, (HW support for that was added in BDW). Two formats are explicitly disabled because they fail piglit tests, LUMINANCE16F and INTENSITY16F. There is some question about the validity of sampling from these surfaces for all gens, however, there seem to be no other failures, so I'd prefer to leave tackling that for a separate series. v2: Remove LUMINANCE16F and INTENSITY16F special cases since they are now handled by Neil's patch to disable MSAA fast clears. Cc: Neil Roberts Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 8 ++-- src/mesa/drivers/dri/i965/gen8_surface_state.c | 8 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index f3c256d..499daba 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -359,8 +359,12 @@ is_color_fast_clear_compatible(struct brw_context *brw, } for (int i = 0; i < 4; i++) { - if (color->f[i] != 0.0f && color->f[i] != 1.0f && - _mesa_format_has_color_component(format, i)) { + if (!_mesa_format_has_color_component(format, i)) { + continue; + } + + if (brw->gen < 9 && + color->f[i] != 0.0f && color->f[i] != 1.0f) { return false; } } diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index e81b646..9cdd1c7 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -188,14 +188,6 @@ gen8_emit_fast_clear_color(struct brw_context *brw, uint32_t *surf) { if (brw->gen >= 9) { -#define check_fast_clear_val(x) \ - assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \ - mt->gen9_fast_clear_color.f[x] == 1.0) - check_fast_clear_val(0); - check_fast_clear_val(1); - check_fast_clear_val(2); - check_fast_clear_val(3); -#undef check_fast_clear_val surf[12] = mt->gen9_fast_clear_color.ui[0]; surf[13] = mt->gen9_fast_clear_color.ui[1]; surf[14] = mt->gen9_fast_clear_color.ui[2]; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] [v3] i965/skl: skip fast clears for certain surface formats
Some of the information originally in this commit message is now in the patch before this. SKL adds compressible render targets and as a result mutates some of the programming for fast clears and resolves. There is a new internal surface type called the CCS. The old AUX_MCS bit becomes AUX_CCS_D. "The Auxiliary surface is a CCS (Color Control Surface) with compression disabled or an MCS with compression enabled, depending on number of multisamples. MCS (Multisample Control Surface) is a special type of CCS." The formats which are supported are defined in the table titled "Render Target Surface Types [SKL+]". There is no PRM yet to reference. The previously implemented helper function already does the right thing provided the table is correct. v2: Use better English in commit message (Matt) s/compressable/compressible/ (Matt) Don't compare bools to true (Matt) Use the helper function and don't increase the context size - this is mostly implemented in the patch just before this (Chad, Neil) Remove an "invalid" assert (Chad) Fix assertion to check num_samples > 1, instead of num_samples (Chad) v3: Use Matt's code as Requested-by: Chad. I didn't even look at it since Chad said he was fine with that, and presumably Matt is fine with it. Cc: Chad Versace Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 52 - src/mesa/drivers/dri/i965/gen8_surface_state.c | 8 +++- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 16f7fec..eb981d6 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -90,9 +90,9 @@ struct surface_format_info { */ const struct surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color ccs_e */ - SF( Y, 50, x, x, Y, Y, Y, Y, x,x, R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x,x, R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x,x, R32G32B32A32_UINT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT) SF( x, x, x, x, x, x, Y, x, x,x, R32G32B32A32_UNORM) SF( x, x, x, x, x, x, Y, x, x,x, R32G32B32A32_SNORM) SF( x, x, x, x, x, x, Y, x, x,x, R64G64_FLOAT) @@ -109,15 +109,15 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x,x, R32G32B32_SSCALED) SF( x, x, x, x, x, x, Y, x, x,x, R32G32B32_USCALED) SF( x, x, x, x, x, x, x, x, x,x, R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60,x, R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x,x, R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x,x, R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x,x, R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x,x, R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x,x, R32G32_FLOAT) + SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT) SF( Y, 70, x, x, Y, Y, Y, Y, x,x, R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x,x, R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x,x, R32G32_UINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT) SF( Y, 50, Y, x, x, x, x, x, x,x, R32_FLOAT_X8X24_TYPELESS) SF( Y, x, x, x, x, x, x, x, x,x, X32_TYPELESS_G8X24_UINT) SF( Y, 50, x, x, x, x, x, x, x,x, L32A32_FLOAT) @@ -125,7 +125,7 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x,x, R32G32_SNORM) SF( x, x, x, x, x, x, Y, x, x,x, R64_FLOAT) SF( Y, Y, x, x, x, x, x, x, x,x, R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x,x, R16G16B16X16_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT) SF( Y, 50, x, x, x, x, x, x, x,x, A32X32_FLOAT) SF( Y, 50, x, x, x, x, x, x, x,x, L32X32_FLOAT) SF( Y, 50, x, x, x, x, x, x, x,x, I32X32_FLOAT) @@ -135,29 +135,29 @@ const struct surface_format_info surface_formats[] = { SF( x, x, x, x, x, x, Y, x, x,x, R32G32_USCALED) SF( x, x, x, x, x, x, x, x,
Re: [Mesa-dev] [PATCH] i965: Add assertion for src_stencil payload size
On Monday, November 16, 2015 06:20:57 PM Ben Widawsky wrote: > This helps address a coverity warning and prevents future questions about this > code. > > Reported-by: Coverity (via Ilia) > Cc: Matt Turner > Cc: Ilia Mirkin > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++ > 1 file changed, 6 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 84b5920..995ab22 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -3603,6 +3603,12 @@ lower_fb_write_logical_send(const fs_builder &bld, > fs_inst *inst, >assert(devinfo->gen >= 9); >assert(bld.dispatch_width() != 16); > > + /* XXX: src_stencil is only available on gen9+. dst_depth is never > + * available on gen9+. As such it's impossible to have both enabled at > the > + * same time and therefore length cannot overrun the array. > + */ > + assert(length < 15); > + >sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); >bld.exec_all().annotate("FB write OS") > .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], > I'd drop the XXX, personally, as this is an explanation of why the code is okay, not a comment meaning "I need to come back and fix this later". Up to you though. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] DRI3 support for EGL (v4)
2015-11-18 8:04 GMT+08:00 Martin Peres : > > > On 18/11/15 01:37, Mike Lothian wrote: >> >> >> Hi >> >> Out of interest have any of you tested this on Plasma5? When I set OpenGL >> & EGL in kwin and I'm using DRI3 compositing is disabled (it wasn't before) >> >> This is on Kabini using the latest mesa, xorg and radeon drivers from got >> >> Cheers >> >> Mike > > > Hey Mike, > > I do use plasma 5, but with glx. Anyway, I can reproduce the issue so I will > have a look at it tomorrow! For the record, I get the following errors: > > QXcbConnection: XCB error: 3 (BadWindow), sequence: 171, resource id: > 73400326, major code: 20 (GetProperty), minor code: 0 > kwin_core: Failed to initialize compositing, compositing disabled > QXcbConnection: XCB error: 3 (BadWindow), sequence: 2171, resource id: 108, > major code: 3 (GetWindowAttributes), minor code: 0 > > Thanks for testing and bug report, > Martin > > Hi, I do remember that Plasma 5 was running okay with compositing on when I was writing v2 of the patch series. But I also reproduced the same failure with current git. Seems weird. I'll try to test and see what's happening when I'm free. Regards, Boyan Ding ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/5] util/set: don't compare against deleted entries
On Mon, Nov 16, 2015 at 6:56 PM, Timothy Arceri wrote: > On Sat, 2015-11-14 at 21:59 -0500, Connor Abbott wrote: >> Not sure how this wasn't already caught by valgrind, but it fixes an >> issue with the vectorizer. > > Can you give a more detailed description of the problem that is fixed? I'm > assuming its something to do with the key_equals_function having issues > comparing to the deleted_key value? Yes, that's correct. The pass I was adding tried to dereference the data contained in deleted_key and segfaulted. I'm surprised that other things aren't reading past deleted_key and getting uninitialized memory, or at least getting valgrind errors, but this pass seems to be the only one that does something sophisticated enough with the key to trigger this. I'll add that bit, and some more explanation of the problem to the message. > >> >> Signed-off-by: Connor Abbott >> --- >> src/util/set.c | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/src/util/set.c b/src/util/set.c >> index f01f869..331ff58 100644 >> --- a/src/util/set.c >> +++ b/src/util/set.c >> @@ -282,7 +282,8 @@ set_add(struct set *ht, uint32_t hash, const void *key) >> * If freeing of old keys is required to avoid memory leaks, >> * perform a search before inserting. >> */ >> - if (entry->hash == hash && >> + if (entry_is_present(entry) && > > You can use !entry_is_deleted(entry) here as free entries will have already > cased the loop the break. Ok, fine. The choice is pretty arbitrary, but I guess entry_is_deleted() will generate slightly better code. > > With these two comments addressed this and patch 2 are: > > Reviewed-by: Timothy Arceri > >> + entry->hash == hash && >>ht->key_equals_function(key, entry->key)) { >> entry->key = key; >> return entry; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 00/42] Computer shader shared variables
git://people.freedesktop.org/~jljusten/mesa cs-shared-variables-v2 http://patchwork.freedesktop.org/bundle/jljusten/cs-shared-variables-v2 11 of the 42 patches have a Reviewed-by Patches 1 - 14: * curro's "i965: L3 cache partitioning." (sent Sept 6) I split one patch and changed a comment. * 1 - 5 have Reviewed-by Patches 15 - 21: * Rework lower_ubo_reference to allow code sharing with lower_shared_reference * Added a new patch since v1 to remove mem_ctx as a member of the lower_ubo_reference_helper class. * 16, 18 & 19 have Reviewed-by Patches 22 - 30: * Add shared variable support for i965. Add lower_shared_reference, which works similar to lower_ubo_reference for SSBOs, except it merges all shared variable into one shared variable region. (Rather than separate BOs like SSBOs allows.) Patches 31 - 39: * Adds atomic support for shared variable on i965, which is implemented similar to SSBOs. * 31 - 33 have Reviewed-by Patches 40 - 42: * Shared variables are the last major feature, so mark the extension as done Francisco Jerez (14): i965: Define symbolic constants for some useful L3 cache control registers. i965: Keep track of whether LRI is allowed in the context struct. i965: Adjust gen check in can_do_pipelined_register_writes i965: Define state flag to signal that the URB size has been altered. i965/gen8: Don't add workaround bits to PIPE_CONTROL stalls if DC flush is set. i965: Import tables enumerating the set of validated L3 configurations. i965: Implement programming of the L3 configuration. i965/hsw: Enable L3 atomics. i965: Implement selection of the closest L3 configuration based on a vector of weights. i965: Calculate appropriate L3 partition weights for the current pipeline state. i965: Implement L3 state atom. i965: Add debug flag to print out the new L3 state during transitions. i965: Work around L3 state leaks during context switches. i965: Hook up L3 partitioning state atom. Jordan Justen (28): glsl ubo/ssbo: Use enum to track current buffer access type glsl ubo/ssbo: Split buffer access to insert_buffer_access glsl ubo/ssbo: Add lower_buffer_access class glsl ubo/ssbo: Move is_dereferenced_thing_row_major into lower_buffer_access glsl ubo/ssbo: Move common code into lower_buffer_access::setup_buffer_access glsl: Remove mem_ctx as member variable in lower_ubo_reference_visitor glsl: Add default matrix ordering in lower_buffer_access glsl: Don't lower_variable_index_to_cond_assign for shared variables glsl: Add lowering pass for shared variable references nir: Translate glsl shared var load intrinsic to nir intrinsic nir: Translate glsl shared var store intrinsic to nir intrinsic i965: Disable vector splitting on shared variables i965/fs: Handle nir shared variable load intrinsic i965/fs: Handle nir shared variable store intrinsic function i965: Enable shared local memory for CS shared variables i965: Lower shared variable references to intrinsic calls glsl: Allow atomic functions to be used with shared variables glsl: Replace atomic_ssbo and ssbo_atomic with atomic glsl: Check for SSBO variable in SSBO atomic lowering glsl: Check for SSBO variable in check_for_ssbo_store glsl: Translate atomic intrinsic functions on shared variables glsl: Buffer atomics are supported for compute shaders glsl: Disable several optimizations on shared variables nir: Add nir intrinsics for shared variable atomic operations i965/nir: Implement shared variable atomic operations i965: Enable ARB_compute_shader extension on supported hardware docs: Mark ARB_compute_shader as done for i965 docs: Add ARB_compute_shader to 11.1.0 release notes docs/GL3.txt | 4 +- docs/relnotes/11.1.0.html | 1 + src/glsl/Makefile.sources | 3 + src/glsl/ast_function.cpp | 18 +- src/glsl/builtin_functions.cpp | 236 src/glsl/ir_optimization.h | 1 + src/glsl/linker.cpp| 4 + src/glsl/lower_buffer_access.cpp | 486 + src/glsl/lower_buffer_access.h | 72 +++ src/glsl/lower_shared_reference.cpp| 516 ++ src/glsl/lower_ubo_reference.cpp | 599 - src/glsl/lower_variable_index_to_cond_assign.cpp | 3 + src/glsl/nir/glsl_to_nir.cpp | 131 - src/glsl/nir/nir_intrinsics.h | 29 +- src/glsl/opt_constant_propagation.cpp | 3 +- src/glsl/opt_constant_variable.cpp | 3 +- src/glsl/opt_copy_propagation.cpp | 3 +- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_compiler.h | 1 + src/mesa/drivers/dri/i965
[Mesa-dev] [PATCH v2 30/42] i965: Lower shared variable references to intrinsic calls
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c4a567f..1365609 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -137,6 +137,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] + .LowerShaderSharedVariables = true; + return compiler; } -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 39/42] i965/nir: Implement shared variable atomic operations
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 60 2 files changed, 62 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cbfc07f..1c9b4c3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -276,6 +276,8 @@ public: nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_shared_atomic(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c8c6370..792fda7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1967,6 +1967,37 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, break; } + case nir_intrinsic_shared_atomic_add: + nir_emit_shared_atomic(bld, BRW_AOP_ADD, instr); + break; + case nir_intrinsic_shared_atomic_min: + if (dest.type == BRW_REGISTER_TYPE_D) + nir_emit_shared_atomic(bld, BRW_AOP_IMIN, instr); + else + nir_emit_shared_atomic(bld, BRW_AOP_UMIN, instr); + break; + case nir_intrinsic_shared_atomic_max: + if (dest.type == BRW_REGISTER_TYPE_D) + nir_emit_shared_atomic(bld, BRW_AOP_IMAX, instr); + else + nir_emit_shared_atomic(bld, BRW_AOP_UMAX, instr); + break; + case nir_intrinsic_shared_atomic_and: + nir_emit_shared_atomic(bld, BRW_AOP_AND, instr); + break; + case nir_intrinsic_shared_atomic_or: + nir_emit_shared_atomic(bld, BRW_AOP_OR, instr); + break; + case nir_intrinsic_shared_atomic_xor: + nir_emit_shared_atomic(bld, BRW_AOP_XOR, instr); + break; + case nir_intrinsic_shared_atomic_exchange: + nir_emit_shared_atomic(bld, BRW_AOP_MOV, instr); + break; + case nir_intrinsic_shared_atomic_comp_swap: + nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); + break; + default: nir_emit_intrinsic(bld, instr); break; @@ -2607,6 +2638,35 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, } void +fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) +{ + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + unsigned index = BRW_SLM_SURFACE_INDEX; + fs_reg surface = fs_reg(index); + + fs_reg offset = get_nir_src(instr->src[0]); + fs_reg data1 = get_nir_src(instr->src[1]); + fs_reg data2; + if (op == BRW_AOP_CMPWR) + data2 = get_nir_src(instr->src[2]); + + /* Emit the actual atomic operation operation */ + + fs_reg atomic_result = + surface_access::emit_untyped_atomic(bld, surface, offset, + data1, data2, + 1 /* dims */, 1 /* rsize */, + op, + BRW_PREDICATE_NONE); + dest.type = atomic_result.type; + bld.MOV(dest, atomic_result); +} + +void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { unsigned sampler = instr->sampler_index; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 17/42] glsl ubo/ssbo: Add lower_buffer_access class
This class has code that will be shared by lower_ubo_reference and lower_shared_reference. (lower_shared_reference will be used to support compute shader shared variables.) v2: * Add lower_buffer_access.h to makefile (Emil) * Remove static is_dereferenced_thing_row_major from lower_buffer_access.cpp. This will become a lower_buffer_access method in the next commit. * Pass mem_ctx as parameter rather than using a member variable (Iago) Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga --- src/glsl/Makefile.sources| 2 + src/glsl/lower_buffer_access.cpp | 218 +++ src/glsl/lower_buffer_access.h | 55 ++ src/glsl/lower_ubo_reference.cpp | 191 ++ 4 files changed, 285 insertions(+), 181 deletions(-) create mode 100644 src/glsl/lower_buffer_access.cpp create mode 100644 src/glsl/lower_buffer_access.h diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index d4b02c1..1b51116 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -155,6 +155,8 @@ LIBGLSL_FILES = \ loop_analysis.h \ loop_controls.cpp \ loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ lower_clip_distance.cpp \ lower_const_arrays_to_uniforms.cpp \ lower_discard.cpp \ diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp new file mode 100644 index 000..ffc995f --- /dev/null +++ b/src/glsl/lower_buffer_access.cpp @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.cpp + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" +#include "lower_buffer_access.h" + +using namespace ir_builder; + +namespace lower_buffer_access { + +static inline int +writemask_for_size(unsigned n) +{ + return ((1 << n) - 1); +} + +/** + * Takes a deref and recursively calls itself to break the deref down to the + * point that the reads or writes generated are contiguous scalars or vectors. + */ +void +lower_buffer_access::emit_access(void *mem_ctx, + bool is_write, + ir_dereference *deref, + ir_variable *base_offset, + unsigned int deref_offset, + bool row_major, + int matrix_columns, + unsigned int packing, + unsigned int write_mask) +{ + if (deref->type->is_record()) { + unsigned int field_offset = 0; + + for (unsigned i = 0; i < deref->type->length; i++) { + const struct glsl_struct_field *field = +&deref->type->fields.structure[i]; + ir_dereference *field_deref = +new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), + field->name); + + field_offset = +glsl_align(field_offset, + field->type->std140_base_alignment(row_major)); + + emit_access(mem_ctx, is_write, field_deref, base_offset, + deref_offset + field_offset, + row_major, 1, packing, + writemask_for_size(field_deref->type->vector_elements)); + + field_offset += field->type->std140_size(row_major); + } + return; + } + + if (deref->type->is_array()
[Mesa-dev] [PATCH v2 07/42] i965: Implement programming of the L3 configuration.
From: Francisco Jerez --- src/mesa/drivers/dri/i965/gen7_l3_state.c | 95 +++ 1 file changed, 95 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 8f9ba5b..48bca29 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -161,3 +161,98 @@ get_l3_way_size(const struct brw_device_info *devinfo) else return 2 << devinfo->gt; } + +/** + * Program the hardware to use the specified L3 configuration. + */ +static void +setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg) +{ + const bool has_dc = cfg->n[L3P_DC] || cfg->n[L3P_ALL]; + const bool has_is = cfg->n[L3P_IS] || cfg->n[L3P_RO] || cfg->n[L3P_ALL]; + const bool has_c = cfg->n[L3P_C] || cfg->n[L3P_RO] || cfg->n[L3P_ALL]; + const bool has_t = cfg->n[L3P_T] || cfg->n[L3P_RO] || cfg->n[L3P_ALL]; + const bool has_slm = cfg->n[L3P_SLM]; + + /* According to the hardware docs, the L3 partitioning can only be changed +* while the pipeline is completely drained and the caches are flushed, +* what involves a first PIPE_CONTROL flush which stalls the pipeline and +* initiates invalidation of the relevant caches... +*/ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_DATA_CACHE_INVALIDATE | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + + /* ...followed by a second stalling flush which guarantees that +* invalidation is complete when the L3 configuration registers are +* modified. +*/ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_DATA_CACHE_INVALIDATE | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + + if (brw->gen >= 8) { + assert(!cfg->n[L3P_IS] && !cfg->n[L3P_C] && !cfg->n[L3P_T]); + + BEGIN_BATCH(3); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); + + /* Set up the L3 partitioning. */ + OUT_BATCH(GEN8_L3CNTLREG); + OUT_BATCH((has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) | +SET_FIELD(cfg->n[L3P_URB], GEN8_L3CNTLREG_URB_ALLOC) | +SET_FIELD(cfg->n[L3P_RO], GEN8_L3CNTLREG_RO_ALLOC) | +SET_FIELD(cfg->n[L3P_DC], GEN8_L3CNTLREG_DC_ALLOC) | +SET_FIELD(cfg->n[L3P_ALL], GEN8_L3CNTLREG_ALL_ALLOC)); + + ADVANCE_BATCH(); + + } else { + assert(!cfg->n[L3P_ALL]); + + /* When enabled SLM only uses a portion of the L3 on half of the banks, + * the matching space on the remaining banks has to be allocated to a + * client (URB for all validated configurations) set to the + * lower-bandwidth 2-bank address hashing mode. + */ + const bool urb_low_bw = has_slm && !brw->is_baytrail; + assert(!urb_low_bw || cfg->n[L3P_URB] == cfg->n[L3P_SLM]); + + /* Minimum number of ways that can be allocated to the URB. */ + const unsigned n0_urb = (brw->is_baytrail ? 32 : 0); + assert(cfg->n[L3P_URB] >= n0_urb); + + BEGIN_BATCH(7); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2)); + + /* Demote any clients with no ways assigned to LLC. */ + OUT_BATCH(GEN7_L3SQCREG1); + OUT_BATCH((brw->is_haswell ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : + brw->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : + IVB_L3SQCREG1_SQGHPCI_DEFAULT) | +(has_dc ? 0 : GEN7_L3SQCREG1_CONV_DC_UC) | +(has_is ? 0 : GEN7_L3SQCREG1_CONV_IS_UC) | +(has_c ? 0 : GEN7_L3SQCREG1_CONV_C_UC) | +(has_t ? 0 : GEN7_L3SQCREG1_CONV_T_UC)); + + /* Set up the L3 partitioning. */ + OUT_BATCH(GEN7_L3CNTLREG2); + OUT_BATCH((has_slm ? GEN7_L3CNTLREG2_SLM_ENABLE : 0) | +SET_FIELD(cfg->n[L3P_URB] - n0_urb, GEN7_L3CNTLREG2_URB_ALLOC) | +(urb_low_bw ? GEN7_L3CNTLREG2_URB_LOW_BW : 0) | +SET_FIELD(cfg->n[L3P_ALL], GEN7_L3CNTLREG2_ALL_ALLOC) | +SET_FIELD(cfg->n[L3P_RO], GEN7_L3CNTLREG2_RO_ALLOC) | +SET_FIELD(cfg->n[L3P_DC], GEN7_L3CNTLREG2_DC_ALLOC)); + OUT_BATCH(GEN7_L3CNTLREG3); + OUT_BATCH(SET_FIELD(cfg->n[L3P_IS], GEN7_L3CNTLREG3_IS_ALLOC) | +SET_FIELD(cfg->n[L3P_C], GEN7_L3CNTLREG3_C_ALLOC) | +SET_FIELD(cfg->n[L3P_T], GEN7_L3CNTLREG3_T_ALLOC)); + + ADVANCE_BATCH(); + } +} -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 06/42] i965: Import tables enumerating the set of validated L3 configurations.
From: Francisco Jerez It should be possible to use additional L3 configurations other than the ones listed in the tables of validated allocations ("BSpec » 3D-Media-GPGPU Engine » L3 Cache and URB [IVB+] » L3 Cache and URB [*] » L3 Allocation and Programming"), but it seems sensible for now to hard-code the tables in order to stick to the hardware docs. Instead of setting up the arbitrary L3 partitioning given as input, the closest validated L3 configuration will be looked up in these tables and used to program the hardware. The included tables should work for Gen7-9. Note that the quantities are specified in ways rather than in KB, this is because the L3 control registers expect the value in ways, and because by doing that we can re-use a single table for all GT variants of the same generation (and in the case of IVB/HSW and CHV/SKL across different generations) which generally have different L3 way sizes but allow the same combinations of way allocations. --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/gen7_l3_state.c | 163 + 2 files changed, 164 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/gen7_l3_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 5a88d66..91901ad 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -184,6 +184,7 @@ i965_FILES = \ gen7_cs_state.c \ gen7_disable.c \ gen7_gs_state.c \ + gen7_l3_state.c \ gen7_misc_state.c \ gen7_sf_state.c \ gen7_sol_state.c \ diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c new file mode 100644 index 000..8f9ba5b --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" + +/** + * Chunk of L3 cache reserved for some specific purpose. + */ +enum brw_l3_partition { + /** Shared local memory. */ + L3P_SLM = 0, + /** Unified return buffer. */ + L3P_URB, + /** Union of DC and RO. */ + L3P_ALL, + /** Data cluster RW partition. */ + L3P_DC, + /** Union of IS, C and T. */ + L3P_RO, + /** Instruction and state cache. */ + L3P_IS, + /** Constant cache. */ + L3P_C, + /** Texture cache. */ + L3P_T, + /** Number of supported L3 partitions. */ + NUM_L3P +}; + +/** + * L3 configuration represented as the number of ways allocated for each + * partition. \sa get_l3_way_size(). + */ +struct brw_l3_config { + unsigned n[NUM_L3P]; +}; + +/** + * IVB/HSW validated L3 configurations. + */ +static const struct brw_l3_config ivb_l3_configs[] = { + {{ 0, 32, 0, 0, 32, 0, 0, 0 }}, + {{ 0, 32, 0, 16, 16, 0, 0, 0 }}, + {{ 0, 32, 0, 4, 0, 8, 4, 16 }}, + {{ 0, 28, 0, 8, 0, 8, 4, 16 }}, + {{ 0, 28, 0, 16, 0, 8, 4, 8 }}, + {{ 0, 28, 0, 8, 0, 16, 4, 8 }}, + {{ 0, 28, 0, 0, 0, 16, 4, 16 }}, + {{ 0, 32, 0, 0, 0, 16, 0, 16 }}, + {{ 0, 28, 0, 4, 32, 0, 0, 0 }}, + {{ 16, 16, 0, 16, 16, 0, 0, 0 }}, + {{ 16, 16, 0, 8, 0, 8, 8, 8 }}, + {{ 16, 16, 0, 4, 0, 8, 4, 16 }}, + {{ 16, 16, 0, 4, 0, 16, 4, 8 }}, + {{ 16, 16, 0, 0, 32, 0, 0, 0 }}, + {{ 0 }} +}; + +/** + * VLV validated L3 configurations. + */ +static const struct brw_l3_config vlv_l3_configs[] = { + {{ 0, 80, 0, 0, 16, 0, 0, 0 }}, + {{ 0, 80, 0, 8, 8, 0, 0, 0 }}, + {{ 0, 64, 0, 16, 16, 0, 0, 0 }}, + {{ 0, 64, 0, 0, 32, 0, 0, 0 }}, + {{ 0, 60, 0, 4, 32, 0, 0, 0 }}, + {{ 32, 32, 0, 16, 16, 0, 0, 0 }}, + {{ 32, 40, 0, 8, 16, 0, 0, 0 }}, + {{ 32, 40,
[Mesa-dev] [PATCH v2 05/42] i965/gen8: Don't add workaround bits to PIPE_CONTROL stalls if DC flush is set.
From: Francisco Jerez According to the hardware docs a DC flush is sufficient to make CS_STALL happy, there's no need to add STALL_AT_SCOREBOARD whenever it's present. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_pipe_control.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index a2aef8a..ae3d818 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -36,6 +36,7 @@ * - Stall at Pixel Scoreboard * - Post-Sync Operation * - Depth Stall + * - DC Flush Enable * * I chose "Stall at Pixel Scoreboard" since we've used it effectively * in the past, but the choice is fairly arbitrary. @@ -49,7 +50,8 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags) PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_WRITE_TIMESTAMP | PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_DEPTH_STALL; + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DATA_CACHE_INVALIDATE; /* If we're doing a CS stall, and don't already have one of the * workaround bits set, add "Stall at Pixel Scoreboard." -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 18/42] glsl ubo/ssbo: Move is_dereferenced_thing_row_major into lower_buffer_access
Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga Reviewed-by: Iago Toral Quiroga --- src/glsl/lower_buffer_access.cpp | 90 src/glsl/lower_buffer_access.h | 2 + src/glsl/lower_ubo_reference.cpp | 90 3 files changed, 92 insertions(+), 90 deletions(-) diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp index ffc995f..b5fe6e3 100644 --- a/src/glsl/lower_buffer_access.cpp +++ b/src/glsl/lower_buffer_access.cpp @@ -215,4 +215,94 @@ lower_buffer_access::emit_access(void *mem_ctx, } } +/** + * Determine if a thing being dereferenced is row-major + * + * There is some trickery here. + * + * If the thing being dereferenced is a member of uniform block \b without an + * instance name, then the name of the \c ir_variable is the field name of an + * interface type. If this field is row-major, then the thing referenced is + * row-major. + * + * If the thing being dereferenced is a member of uniform block \b with an + * instance name, then the last dereference in the tree will be an + * \c ir_dereference_record. If that record field is row-major, then the + * thing referenced is row-major. + */ +bool +lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) +{ + bool matrix = false; + const ir_rvalue *ir = deref; + + while (true) { + matrix = matrix || ir->type->without_array()->is_matrix(); + + switch (ir->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const array_deref = +(const ir_dereference_array *) ir; + + ir = array_deref->array; + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const record_deref = +(const ir_dereference_record *) ir; + + ir = record_deref->record; + + const int idx = ir->type->field_index(record_deref->field); + assert(idx >= 0); + + const enum glsl_matrix_layout matrix_layout = +glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: +break; + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: +return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: +return matrix || deref->type->without_array()->is_record(); + } + + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const var_deref = +(const ir_dereference_variable *) ir; + + const enum glsl_matrix_layout matrix_layout = +glsl_matrix_layout(var_deref->var->data.matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: +assert(!matrix); +return false; + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: +return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: +return matrix || deref->type->without_array()->is_record(); + } + + unreachable("invalid matrix layout"); + break; + } + + default: + return false; + } + } + + /* The tree must have ended with a dereference that wasn't an +* ir_dereference_variable. That is invalid, and it should be impossible. +*/ + unreachable("invalid dereference tree"); + return false; +} + } /* namespace lower_buffer_access */ diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h index 3d6e93f..b21ea28 100644 --- a/src/glsl/lower_buffer_access.h +++ b/src/glsl/lower_buffer_access.h @@ -48,6 +48,8 @@ public: ir_variable *base_offset, unsigned int deref_offset, bool row_major, int matrix_columns, unsigned int packing, unsigned int write_mask); + + bool is_dereferenced_thing_row_major(const ir_rvalue *deref); }; } /* namespace lower_buffer_access */ diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index f9cd056..ad7a522 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -42,96 +42,6 @@ using namespace ir_builder; -/** - * Determine if a thing being dereferenced is row-major - * - * There is some trickery here. - * - * If the thing being dereferenced is a member of uniform block \b without an - * instance name, then the name of the \c ir_variable is the field name of an - * interface type. If this field is row-major, then the thing referenced is - * row-major. - * - * If the thing being dereferenced is a member of uniform block \b with an - * instance name, then the last dereference in the tree will be an - * \c ir_dereference_record. If that record field is row-major, then the - * thing referenced is row-major. - */ -static bool -is_dereferenced_thing_row_major(const ir_rvalue *deref) -{ - bool matrix = false;
[Mesa-dev] [PATCH v2 23/42] glsl: Add lowering pass for shared variable references
In this lowering pass, shared variables are decomposed into intrinsic calls. v2: * Send mem_ctx as a parameter (Iago) Signed-off-by: Jordan Justen --- src/glsl/Makefile.sources | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/linker.cpp | 4 + src/glsl/lower_shared_reference.cpp | 365 src/mesa/main/mtypes.h | 7 + 5 files changed, 378 insertions(+) create mode 100644 src/glsl/lower_shared_reference.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 1b51116..d73cfc5 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -181,6 +181,7 @@ LIBGLSL_FILES = \ lower_vector_insert.cpp \ lower_vertex_id.cpp \ lower_output_reads.cpp \ + lower_shared_reference.cpp \ lower_ubo_reference.cpp \ opt_algebraic.cpp \ opt_array_splitting.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 2fee81c..dabd80a 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -124,6 +124,7 @@ bool lower_const_arrays_to_uniforms(exec_list *instructions); bool lower_clip_distance(gl_shader *shader); void lower_output_reads(unsigned stage, exec_list *instructions); bool lower_packing_builtins(exec_list *instructions, int op_mask); +void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); void lower_ubo_reference(struct gl_shader *shader); void lower_packed_varyings(void *mem_ctx, unsigned locations_used, ir_variable_mode mode, diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index db00f8f..dedcf77 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -4452,6 +4452,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) lower_ubo_reference(prog->_LinkedShaders[i]); + if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables) + lower_shared_reference(prog->_LinkedShaders[i], +&prog->Comp.SharedSize); + lower_vector_derefs(prog->_LinkedShaders[i]); } diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp new file mode 100644 index 000..21bc5d5 --- /dev/null +++ b/src/glsl/lower_shared_reference.cpp @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_shared_reference.cpp + * + * IR lower pass to replace dereferences of compute shader shared variables + * with intrinsic function calls. + * + * This relieves drivers of the responsibility of allocating space for the + * shared variables in the shared memory region. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" +#include "lower_buffer_access.h" + +using namespace ir_builder; + +namespace { + +struct var_offset { + struct list_head node; + const ir_variable *var; + unsigned offset; +}; + +class lower_shared_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + + lower_shared_reference_visitor(struct gl_shader *shader) + : lower_buffer_access::lower_buffer_access(GLSL_MATRIX_LAYOUT_COLUMN_MAJOR), +list_ctx(ralloc_context(NULL)), shader(shader), +shared_size(0u) + { + list_inithead(&var_offsets); + } + + ~lower_shared_reference_visitor() + { + ralloc_free(list_ctx); + } + + enum { + shared_load_access, + shared_store_access, + shared_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, +
[Mesa-dev] [PATCH v2 42/42] docs: Add ARB_compute_shader to 11.1.0 release notes
Signed-off-by: Jordan Justen --- docs/relnotes/11.1.0.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 6654311..c89b822 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_arrays_of_arrays on i965 GL_ARB_blend_func_extended on freedreno (a3xx) GL_ARB_clear_texture on nv50, nvc0 +GL_ARB_compute_shader on i965 GL_ARB_copy_image on nv50, nvc0, radeonsi GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips GL_ARB_gpu_shader5 on r600 for Evergreen and later chips -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 20/42] glsl: Remove mem_ctx as member variable in lower_ubo_reference_visitor
Signed-off-by: Jordan Justen Cc: Iago Toral Quiroga --- src/glsl/lower_ubo_reference.cpp | 64 +--- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 5082da8..2808ac1 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -54,24 +54,23 @@ public: void handle_rvalue(ir_rvalue **rvalue); ir_visitor_status visit_enter(ir_assignment *ir); - void setup_for_load_or_store(ir_variable *var, + void setup_for_load_or_store(void *mem_ctx, +ir_variable *var, ir_rvalue *deref, ir_rvalue **offset, unsigned *const_offset, bool *row_major, int *matrix_columns, unsigned packing); - ir_expression *ubo_load(const struct glsl_type *type, + ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, ir_rvalue *offset); - ir_call *ssbo_load(const struct glsl_type *type, + ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, ir_rvalue *offset); void check_for_ssbo_store(ir_assignment *ir); - void write_to_memory(ir_dereference *deref, -ir_variable *var, -ir_variable *write_var, -unsigned write_mask); - ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset, + void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, +ir_variable *write_var, unsigned write_mask); + ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask); enum { @@ -94,7 +93,7 @@ public: ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, ir_dereference *, ir_variable *); - ir_expression *emit_ssbo_get_buffer_size(); + ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); unsigned calculate_unsized_array_stride(ir_dereference *deref, unsigned packing); @@ -103,7 +102,6 @@ public: ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); ir_visitor_status visit_enter(ir_call *ir); - void *mem_ctx; struct gl_shader *shader; struct gl_uniform_buffer_variable *ubo_var; ir_rvalue *uniform_block; @@ -242,7 +240,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, } void -lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, +lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, + ir_variable *var, ir_rvalue *deref, ir_rvalue **offset, unsigned *const_offset, @@ -307,7 +306,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) if (!var || !var->is_in_buffer_block()) return; - mem_ctx = ralloc_parent(shader->ir); + void *mem_ctx = ralloc_parent(shader->ir); ir_rvalue *offset = NULL; unsigned const_offset; @@ -322,7 +321,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) /* Compute the offset to the start if the dereference as well as other * information we need to configure the write */ - setup_for_load_or_store(var, deref, + setup_for_load_or_store(mem_ctx, var, deref, &offset, &const_offset, &row_major, &matrix_columns, packing); @@ -352,7 +351,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) } ir_expression * -lower_ubo_reference_visitor::ubo_load(const glsl_type *type, +lower_ubo_reference_visitor::ubo_load(void *mem_ctx, + const glsl_type *type, ir_rvalue *offset) { ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); @@ -371,7 +371,8 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state) } ir_call * -lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref, +lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, +ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask) { @@ -411,7 +412,8 @@ lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref, } ir_call * -lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type, +lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, + co
[Mesa-dev] [PATCH v2 01/42] i965: Define symbolic constants for some useful L3 cache control registers.
From: Francisco Jerez Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/intel_reg.h | 53 +++ 1 file changed, 53 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index a261c2b..0b167d5 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -192,3 +192,56 @@ #define MI_PREDICATE_RESULT 0x2418 #define MI_PREDICATE_RESULT_1 0x241C #define MI_PREDICATE_RESULT_2 0x2214 + +/* L3 cache control registers. */ +#define GEN7_L3SQCREG1 0xb010 +/* L3SQ general and high priority credit initialization. */ +# define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x0073 +# define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d3 +# define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x0061 +# define GEN7_L3SQCREG1_CONV_DC_UC (1 << 24) +# define GEN7_L3SQCREG1_CONV_IS_UC (1 << 25) +# define GEN7_L3SQCREG1_CONV_C_UC (1 << 26) +# define GEN7_L3SQCREG1_CONV_T_UC (1 << 27) + +#define GEN7_L3CNTLREG20xb020 +# define GEN7_L3CNTLREG2_SLM_ENABLE(1 << 0) +# define GEN7_L3CNTLREG2_URB_ALLOC_SHIFT 1 +# define GEN7_L3CNTLREG2_URB_ALLOC_MASKINTEL_MASK(6, 1) +# define GEN7_L3CNTLREG2_URB_LOW_BW(1 << 7) +# define GEN7_L3CNTLREG2_ALL_ALLOC_SHIFT 8 +# define GEN7_L3CNTLREG2_ALL_ALLOC_MASKINTEL_MASK(13, 8) +# define GEN7_L3CNTLREG2_RO_ALLOC_SHIFT14 +# define GEN7_L3CNTLREG2_RO_ALLOC_MASK INTEL_MASK(19, 14) +# define GEN7_L3CNTLREG2_RO_LOW_BW (1 << 20) +# define GEN7_L3CNTLREG2_DC_ALLOC_SHIFT21 +# define GEN7_L3CNTLREG2_DC_ALLOC_MASK INTEL_MASK(26, 21) +# define GEN7_L3CNTLREG2_DC_LOW_BW (1 << 27) + +#define GEN7_L3CNTLREG30xb024 +# define GEN7_L3CNTLREG3_IS_ALLOC_SHIFT1 +# define GEN7_L3CNTLREG3_IS_ALLOC_MASK INTEL_MASK(6, 1) +# define GEN7_L3CNTLREG3_IS_LOW_BW (1 << 7) +# define GEN7_L3CNTLREG3_C_ALLOC_SHIFT 8 +# define GEN7_L3CNTLREG3_C_ALLOC_MASK INTEL_MASK(13, 8) +# define GEN7_L3CNTLREG3_C_LOW_BW (1 << 14) +# define GEN7_L3CNTLREG3_T_ALLOC_SHIFT 15 +# define GEN7_L3CNTLREG3_T_ALLOC_MASK INTEL_MASK(20, 15) +# define GEN7_L3CNTLREG3_T_LOW_BW (1 << 21) + +#define HSW_SCRATCH1 0xb038 +#define HSW_SCRATCH1_L3_ATOMIC_DISABLE (1 << 27) + +#define HSW_ROW_CHICKEN3 0xe49c +#define HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE (1 << 6) + +#define GEN8_L3CNTLREG 0x7034 +# define GEN8_L3CNTLREG_SLM_ENABLE (1 << 0) +# define GEN8_L3CNTLREG_URB_ALLOC_SHIFT1 +# define GEN8_L3CNTLREG_URB_ALLOC_MASK INTEL_MASK(7, 1) +# define GEN8_L3CNTLREG_RO_ALLOC_SHIFT 11 +# define GEN8_L3CNTLREG_RO_ALLOC_MASK INTEL_MASK(17, 11) +# define GEN8_L3CNTLREG_DC_ALLOC_SHIFT 18 +# define GEN8_L3CNTLREG_DC_ALLOC_MASK INTEL_MASK(24, 18) +# define GEN8_L3CNTLREG_ALL_ALLOC_SHIFT25 +# define GEN8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25) -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 33/42] glsl: Check for SSBO variable in SSBO atomic lowering
When an atomic function is called, we need to check to see if it is for an SSBO variable before lowering it to the SSBO specific intrinsic function. v2: * is_in_buffer_block => is_in_shader_storage_block (Iago) Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga Reviewed-by: Iago Toral Quiroga --- src/glsl/lower_ubo_reference.cpp | 14 ++ 1 file changed, 14 insertions(+) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 915db6c..667a80e 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -862,6 +862,20 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) ir_call * lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) { + exec_list& params = ir->actual_parameters; + + if (params.length() < 2) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return ir; + const char *callee = ir->callee_name(); if (!strcmp("__intrinsic_atomic_add", callee) || !strcmp("__intrinsic_atomic_min", callee) || -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 13/42] i965: Work around L3 state leaks during context switches.
From: Francisco Jerez This is going to require some rather intrusive kernel changes to fix properly, in the meantime (and forever on at least pre-v4.1 kernels) we'll have to restore the hardware defaults at the end of every batch in which the L3 configuration was changed to avoid interfering with the DDX and GL clients that use an older non-L3-aware version of Mesa. --- src/mesa/drivers/dri/i965/brw_state.h | 4 +++ src/mesa/drivers/dri/i965/gen7_l3_state.c | 48 +++ src/mesa/drivers/dri/i965/intel_batchbuffer.c | 7 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 6 +++- 4 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 49f301a..b7c0039 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -380,6 +380,10 @@ void gen7_update_binding_table_from_array(struct brw_context *brw, void gen7_disable_hw_binding_tables(struct brw_context *brw); void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw); +/* gen7_l3_state.c */ +void +gen7_restore_default_l3_config(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 45bad02..84ab118 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -495,3 +495,51 @@ const struct brw_tracked_state gen7_l3_state = { }, .emit = emit_l3_state }; + +/** + * Hack to restore the default L3 configuration. + * + * This will be called at the end of every batch in order to reset the L3 + * configuration to the default values for the time being until the kernel is + * fixed. Until kernel commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b + * (included in v4.1) we would set the MI_RESTORE_INHIBIT bit when submitting + * batch buffers for the default context used by the DDX, which meant that any + * context state changed by the GL would leak into the DDX, the assumption + * being that the DDX would initialize any state it cares about manually. The + * DDX is however not careful enough to program an L3 configuration + * explicitly, and it makes assumptions about it (URB size) which won't hold + * and cause it to misrender if we let our L3 set-up to leak into the DDX. + * + * Since v4.1 of the Linux kernel the default context is saved and restored + * normally, so it's far less likely for our L3 programming to interfere with + * other contexts -- In fact restoring the default L3 configuration at the end + * of the batch will be redundant most of the time. A kind of state leak is + * still possible though if the context making assumptions about L3 state is + * created immediately after our context was active (e.g. without the DDX + * default context being scheduled in between) because at present the DRM + * doesn't fully initialize the contents of newly created contexts and instead + * sets the MI_RESTORE_INHIBIT flag causing it to inherit the state from the + * last active context. + * + * It's possible to realize such a scenario if, say, an X server (or a GL + * application using an outdated non-L3-aware Mesa version) is started while + * another GL application is running and happens to have modified the L3 + * configuration, or if no X server is running at all and a GL application + * using a non-L3-aware Mesa version is started after another GL application + * ran and modified the L3 configuration -- The latter situation can actually + * be reproduced easily on IVB in our CI system. + */ +void +gen7_restore_default_l3_config(struct brw_context *brw) +{ + const struct brw_l3_weights w = + get_default_l3_weights(brw->intelScreen->devinfo, false, false); + const struct brw_l3_config *const cfg = + get_l3_config(brw->intelScreen->devinfo, w); + + if (cfg != brw->l3.config && brw->can_do_pipelined_register_writes) { + setup_l3_config(brw, cfg); + update_urb_size(brw, cfg); + brw->l3.config = cfg; + } +} diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 0363bd3..f778074 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -208,6 +208,13 @@ brw_finish_batch(struct brw_context *brw) brw_emit_query_end(brw); if (brw->batch.ring == RENDER_RING) { + /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which + * assume that the L3 cache is configured according to the hardware + * defaults. + */ + if (brw->gen >= 7) + gen7_restore_default_l3_config(brw); + /* We may also need to snapshot and disable OA counters. */ brw_perf_monitor_finish_batch(brw); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 2b177d3..f473690 100644 --- a/src/mesa/drivers/dri/i9
[Mesa-dev] [PATCH v2 35/42] glsl: Translate atomic intrinsic functions on shared variables
When an intrinsic atomic operation is used on a shared variable, we translate it to a new 'share variable' specific intrinsic function call. For example, add call to __intrinsic_atomic_add when used on a shared variable will be translated to a call to __intrinsic_atomic_add_shared. Signed-off-by: Jordan Justen --- src/glsl/lower_shared_reference.cpp | 151 1 file changed, 151 insertions(+) diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp index 21bc5d5..7da32a5 100644 --- a/src/glsl/lower_shared_reference.cpp +++ b/src/glsl/lower_shared_reference.cpp @@ -80,6 +80,10 @@ public: ir_visitor_status visit_enter(ir_assignment *ir); void handle_assignment(ir_assignment *ir); + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + unsigned get_shared_offset(const ir_variable *); ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, @@ -342,6 +346,153 @@ lower_shared_reference_visitor::shared_load(void *mem_ctx, return new(mem_ctx) ir_call(sig, deref_result, &call_params); } +/* Lowers the intrinsic call to a new internal intrinsic that swaps the + * access to the buffer variable in the first parameter by an offset + * and block index. This involves creating the new internal intrinsic + * (i.e. the new function signature). + */ +ir_call * +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) +{ + /* Shared atomics usually have 2 parameters, the shared variable and an +* integer argument. The exception is CompSwap, that has an additional +* integer parameter. +*/ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer buffer variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference and the +* block index +*/ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + const glsl_type *iface = var->get_interface_type(); + unsigned packing = + iface ? iface->interface_packing : GLSL_INTERFACE_PACKING_STD430; + buffer_access_type = shared_atomic_access; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + + /* Create the new internal function signature that will take a block +* index and offset instead of a buffer variable +*/ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) +ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_shared", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + ret
[Mesa-dev] [PATCH v2 31/42] glsl: Allow atomic functions to be used with shared variables
Signed-off-by: Jordan Justen Reviewed-by: Timothy Arceri --- src/glsl/ast_function.cpp | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 466ece6..da1167a 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -143,19 +143,21 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, } static bool -verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, +verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, ir_variable *var) { - if (!var || !var->is_in_shader_storage_block()) { + if (!var || + (!var->is_in_shader_storage_block() && +var->data.mode != ir_var_shader_shared)) { _mesa_glsl_error(loc, state, "First argument to atomic function " - "must be a buffer variable"); + "must be a buffer or shared variable"); return false; } return true; } static bool -is_atomic_ssbo_function(const char *func_name) +is_atomic_function(const char *func_name) { return !strcmp(func_name, "atomicAdd") || !strcmp(func_name, "atomicMin") || @@ -276,16 +278,16 @@ verify_parameter_modes(_mesa_glsl_parse_state *state, /* The first parameter of atomic functions must be a buffer variable */ const char *func_name = sig->function_name(); - bool is_atomic_ssbo = is_atomic_ssbo_function(func_name); - if (is_atomic_ssbo) { + bool is_atomic = is_atomic_function(func_name); + if (is_atomic) { const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head; const ast_expression *const actual_ast = exec_node_data(ast_expression, actual_ast_parameters.head, link); YYLTYPE loc = actual_ast->get_location(); - if (!verify_first_atomic_ssbo_parameter(&loc, state, - actual->variable_referenced())) { + if (!verify_first_atomic_parameter(&loc, state, + actual->variable_referenced())) { return false; } } -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 02/42] i965: Keep track of whether LRI is allowed in the context struct.
From: Francisco Jerez This stores the result of can_do_pipelined_register_writes() in the context struct so we can find out later whether LRI can be used to program the L3 configuration. v2: * Split change of gen check in can_do_pipelined_register_writes (jljusten) Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.h | 5 + src/mesa/drivers/dri/i965/intel_extensions.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8d6bc19..c1ce4ff 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -842,6 +842,11 @@ struct brw_context bool use_resource_streamer; /** +* Whether LRI can be used to write register values from the batch buffer. +*/ + bool can_do_pipelined_register_writes; + + /** * Some versions of Gen hardware don't do centroid interpolation correctly * on unlit pixels, causing incorrect values for derivatives near triangle * edges. Enabling this flag causes the fragment shader to use diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 386b63c..f70f403 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -319,6 +319,8 @@ intelInitExtensions(struct gl_context *ctx) } brw->predicate.supported = false; + brw->can_do_pipelined_register_writes = + can_do_pipelined_register_writes(brw); if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; @@ -334,7 +336,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; - if (can_do_pipelined_register_writes(brw)) { + if (brw->can_do_pipelined_register_writes) { ctx->Extensions.ARB_draw_indirect = true; ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 04/42] i965: Define state flag to signal that the URB size has been altered.
From: Francisco Jerez This will make sure that we recalculate the URB layout anytime the URB size is modified by the L3 partitioning code. Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen7_urb.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c1ce4ff..55be020 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -213,6 +213,7 @@ enum brw_state_id { BRW_STATE_VS_ATTRIB_WORKAROUNDS, BRW_STATE_COMPUTE_PROGRAM, BRW_STATE_CS_WORK_GROUPS, + BRW_STATE_URB_SIZE, BRW_NUM_STATE_BITS }; @@ -293,6 +294,7 @@ enum brw_state_id { #define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) #define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM) #define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS) +#define BRW_NEW_URB_SIZE(1ull << BRW_STATE_URB_SIZE) struct brw_state_flags { /** State update flags signalled by mesa internals */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6f8daf6..aab5c91 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -618,6 +618,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS), + DEFINE_BIT(BRW_NEW_URB_SIZE), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 161de77..99a9d3c 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -153,6 +153,7 @@ gen7_upload_urb(struct brw_context *brw) * skip the rest of the logic. */ if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) && + !(brw->ctx.NewDriverState & BRW_NEW_URB_SIZE) && brw->urb.vsize == vs_size && brw->urb.gs_present == gs_present && brw->urb.gsize == gs_size) { @@ -176,6 +177,7 @@ gen7_upload_urb(struct brw_context *brw) unsigned chunk_size_bytes = 8192; /* Determine the size of the URB in chunks. +* BRW_NEW_URB_SIZE */ unsigned urb_chunks = brw->urb.size * 1024 / chunk_size_bytes; @@ -314,6 +316,7 @@ const struct brw_tracked_state gen7_urb = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT | + BRW_NEW_URB_SIZE | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_GS_PROG_DATA | BRW_NEW_VS_PROG_DATA, -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 38/42] nir: Add nir intrinsics for shared variable atomic operations
Signed-off-by: Jordan Justen --- src/glsl/nir/glsl_to_nir.cpp | 53 +++ src/glsl/nir/nir_intrinsics.h | 25 2 files changed, 78 insertions(+) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 83724d3..a7ee4be 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -731,6 +731,22 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_load_shared; } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { op = nir_intrinsic_store_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { + op = nir_intrinsic_shared_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { + op = nir_intrinsic_shared_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { + op = nir_intrinsic_shared_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { + op = nir_intrinsic_shared_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { + op = nir_intrinsic_shared_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { + op = nir_intrinsic_shared_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { + op = nir_intrinsic_shared_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { + op = nir_intrinsic_shared_atomic_comp_swap; } else { unreachable("not reached"); } @@ -1036,6 +1052,43 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_min: + case nir_intrinsic_shared_atomic_max: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Offset */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { +assert(op == nir_intrinsic_shared_atomic_comp_swap); +param = param->get_next(); +inst = (ir_instruction *) param; +instr->src[2] = + nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } default: unreachable("not reached"); } diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 6912258..31b83fe 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -203,6 +203,31 @@ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) +/* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + *operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + *in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_min, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_max, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0
[Mesa-dev] [PATCH v2 26/42] i965: Disable vector splitting on shared variables
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index cab5af3..2c7e0dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -107,6 +107,7 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) switch (var->data.mode) { case ir_var_uniform: case ir_var_shader_storage: + case ir_var_shader_shared: case ir_var_shader_in: case ir_var_shader_out: case ir_var_system_value: -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 37/42] glsl: Disable several optimizations on shared variables
Shared variables can be accessed by other threads within the same local workgroup. This prevents us from performing certain optimizations with shared variables. Signed-off-by: Jordan Justen --- src/glsl/opt_constant_propagation.cpp | 3 ++- src/glsl/opt_constant_variable.cpp| 3 ++- src/glsl/opt_copy_propagation.cpp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp index 184aaa1..fb24a4f 100644 --- a/src/glsl/opt_constant_propagation.cpp +++ b/src/glsl/opt_constant_propagation.cpp @@ -500,7 +500,8 @@ ir_constant_propagation_visitor::add_constant(ir_assignment *ir) * the variable value isn't modified between this assignment and the next * instruction where its value is read. */ - if (deref->var->data.mode == ir_var_shader_storage) + if (deref->var->data.mode == ir_var_shader_storage || + deref->var->data.mode == ir_var_shader_shared) return; entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant); diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp index cdfbc34..56f6a81 100644 --- a/src/glsl/opt_constant_variable.cpp +++ b/src/glsl/opt_constant_variable.cpp @@ -120,7 +120,8 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir) * and we can't be sure that this variable won't be written by another * thread. */ - if (var->data.mode == ir_var_shader_storage) + if (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared) return visit_continue; constval = ir->rhs->constant_expression_value(); diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp index f206995..5d4cb4f 100644 --- a/src/glsl/opt_copy_propagation.cpp +++ b/src/glsl/opt_copy_propagation.cpp @@ -330,7 +330,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir) */ ir->condition = new(ralloc_parent(ir)) ir_constant(false); this->progress = true; - } else if (lhs_var->data.mode != ir_var_shader_storage) { + } else if (lhs_var->data.mode != ir_var_shader_storage && + lhs_var->data.mode != ir_var_shader_shared) { entry = new(this->acp) acp_entry(lhs_var, rhs_var); this->acp->push_tail(entry); } -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 12/42] i965: Add debug flag to print out the new L3 state during transitions.
From: Francisco Jerez --- src/mesa/drivers/dri/i965/gen7_l3_state.c | 17 + src/mesa/drivers/dri/i965/intel_debug.c | 1 + src/mesa/drivers/dri/i965/intel_debug.h | 1 + 3 files changed, 19 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 58eb07b..45bad02 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -435,6 +435,18 @@ update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg) } } +/** + * Print out the specified L3 configuration. + */ +static void +dump_l3_config(const struct brw_l3_config *cfg) +{ + fprintf(stderr, "SLM=%d URB=%d ALL=%d DC=%d RO=%d IS=%d C=%d T=%d\n", + cfg->n[L3P_SLM], cfg->n[L3P_URB], cfg->n[L3P_ALL], + cfg->n[L3P_DC], cfg->n[L3P_RO], + cfg->n[L3P_IS], cfg->n[L3P_C], cfg->n[L3P_T]); +} + static void emit_l3_state(struct brw_context *brw) { @@ -464,6 +476,11 @@ emit_l3_state(struct brw_context *brw) setup_l3_config(brw, cfg); update_urb_size(brw, cfg); brw->l3.config = cfg; + + if (unlikely(INTEL_DEBUG & DEBUG_L3)) { + fprintf(stderr, "L3 config transition (%f > %f): ", dw, dw_threshold); + dump_l3_config(cfg); + } } } diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index f53c4ab..3fe941c 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -79,6 +79,7 @@ static const struct debug_control debug_control[] = { { "tcs", DEBUG_TCS }, { "ds", DEBUG_TES }, { "tes", DEBUG_TES }, + { "l3", DEBUG_L3 }, { NULL,0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 9c6030a..5d3f97a 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -71,6 +71,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_COMPACTION (1ull << 35) #define DEBUG_TCS (1ull << 36) #define DEBUG_TES (1ull << 37) +#define DEBUG_L3 (1ull << 38) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 28/42] i965/fs: Handle nir shared variable store intrinsic function
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 1 file changed, 54 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index e9336fd..c8c6370 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2330,6 +2330,60 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_store_shared_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index; + unsigned index = BRW_SLM_SURFACE_INDEX; + surf_index = fs_reg(index); + + /* Offset */ + fs_reg offset_reg = vgrf(glsl_type::uint_type); + unsigned const_offset_bytes = 0; + if (has_indirect) { + bld.MOV(offset_reg, get_nir_src(instr->src[1])); + } else { + const_offset_bytes = instr->const_index[0]; + bld.MOV(offset_reg, fs_reg(const_offset_bytes)); + } + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Write each component present in the writemask */ + unsigned skipped_channels = 0; + for (int i = 0; i < instr->num_components; i++) { + int component_mask = 1 << i; + if (writemask & component_mask) { +if (skipped_channels) { + if (!has_indirect) { + const_offset_bytes += 4 * skipped_channels; + bld.MOV(offset_reg, fs_reg(const_offset_bytes)); + } else { + bld.ADD(offset_reg, offset_reg, + brw_imm_ud(4 * skipped_channels)); + } + skipped_channels = 0; +} + +emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, i), + 1 /* dims */, 1 /* size */, + BRW_PREDICATE_NONE); + } + + skipped_channels++; + } + break; + } + case nir_intrinsic_load_input_indirect: has_indirect = true; /* fallthrough */ -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 11/42] i965: Implement L3 state atom.
From: Francisco Jerez The L3 state atom calculates the target L3 partition weights when the program bound to some shader stage is modified, and in case they are far enough from the current partitioning it makes sure that the L3 state is re-emitted. --- src/mesa/drivers/dri/i965/brw_context.h | 6 src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/gen7_l3_state.c | 60 +++ 3 files changed, 67 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 55be020..cc011f7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -678,6 +678,8 @@ enum brw_predicate_state { struct shader_times; +struct brw_l3_config; + /** * brw_context is derived from gl_context. */ @@ -1220,6 +1222,10 @@ struct brw_context int basevertex; struct { + const struct brw_l3_config *config; + } l3; + + struct { drm_intel_bo *bo; const char **names; int *ids; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 94734ba..49f301a 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -129,6 +129,7 @@ extern const struct brw_tracked_state gen7_depthbuffer; extern const struct brw_tracked_state gen7_clip_state; extern const struct brw_tracked_state gen7_disable_stages; extern const struct brw_tracked_state gen7_gs_state; +extern const struct brw_tracked_state gen7_l3_state; extern const struct brw_tracked_state gen7_ps_state; extern const struct brw_tracked_state gen7_push_constant_space; extern const struct brw_tracked_state gen7_sbe_state; diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 1a88261..58eb07b 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -418,3 +418,63 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg) } } } + +/** + * Update the URB size in the context state for the specified L3 + * configuration. + */ +static void +update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg) +{ + const unsigned sz = cfg->n[L3P_URB] * + get_l3_way_size(brw->intelScreen->devinfo); + + if (brw->urb.size != sz) { + brw->urb.size = sz; + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; + } +} + +static void +emit_l3_state(struct brw_context *brw) +{ + const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw); + const float dw = diff_l3_weights(w, get_config_l3_weights(brw->l3.config)); + /* The distance between any two compatible weight vectors cannot exceed two +* due to the triangle inequality. +*/ + const float large_dw_threshold = 2.0; + /* Somewhat arbitrary, simply makes sure that there will be no repeated +* transitions to the same L3 configuration, could probably do better here. +*/ + const float small_dw_threshold = 0.5; + /* If we're emitting a new batch the caches should already be clean and the +* transition should be relatively cheap, so it shouldn't hurt much to use +* the smaller threshold. Otherwise use the larger threshold so that we +* only reprogram the L3 mid-batch if the most recently programmed +* configuration is incompatible with the current pipeline state. +*/ + const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ? + small_dw_threshold : large_dw_threshold); + + if (dw > dw_threshold && brw->can_do_pipelined_register_writes) { + const struct brw_l3_config *const cfg = + get_l3_config(brw->intelScreen->devinfo, w); + + setup_l3_config(brw, cfg); + update_urb_size(brw, cfg); + brw->l3.config = cfg; + } +} + +const struct brw_tracked_state gen7_l3_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | + BRW_NEW_VS_PROG_DATA | + BRW_NEW_GS_PROG_DATA | + BRW_NEW_FS_PROG_DATA | + BRW_NEW_CS_PROG_DATA, + }, + .emit = emit_l3_state +}; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 16/42] glsl ubo/ssbo: Split buffer access to insert_buffer_access
This allows the code in emit_access to be generic enough to also be for lowering shared variables. Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga Reviewed-by: Iago Toral Quiroga --- src/glsl/lower_ubo_reference.cpp | 78 ++-- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index b2a1f25..545df33 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -170,6 +170,9 @@ public: ssbo_atomic_access, } buffer_access_type; + void insert_buffer_access(ir_dereference *deref, const glsl_type *type, + ir_rvalue *offset, unsigned mask, int channel); + void emit_access(bool is_write, ir_dereference *deref, ir_variable *base_offset, unsigned int deref_offset, bool row_major, int matrix_columns, @@ -689,6 +692,41 @@ lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type, return new(mem_ctx) ir_call(sig, deref_result, &call_params); } +void +lower_ubo_reference_visitor::insert_buffer_access(ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + switch (this->buffer_access_type) { + case ubo_load_access: + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), +ubo_load(type, offset), +mask)); + break; + case ssbo_load_access: { + ir_call *load_ssbo = ssbo_load(type, offset); + base_ir->insert_before(load_ssbo); + ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); + ir_assignment *assignment = + assign(deref->clone(mem_ctx, NULL), value, mask); + base_ir->insert_before(assignment); + break; + } + case ssbo_store_access: + if (channel >= 0) { + base_ir->insert_after(ssbo_store(swizzle(deref, channel, 1), + offset, 1)); + } else { + base_ir->insert_after(ssbo_store(deref, offset, mask)); + } + break; + default: + unreachable("invalid buffer_access_type in insert_buffer_access"); + } +} + static inline int writemask_for_size(unsigned n) { @@ -802,19 +840,9 @@ lower_ubo_reference_visitor::emit_access(bool is_write, if (!row_major) { ir_rvalue *offset = add(base_offset, new(mem_ctx) ir_constant(deref_offset)); - if (is_write) - base_ir->insert_after(ssbo_store(deref, offset, write_mask)); - else { - if (this->buffer_access_type == ubo_load_access) { - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(deref->type, offset))); - } else { -ir_call *load_ssbo = ssbo_load(deref->type, offset); -base_ir->insert_before(load_ssbo); -ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); -base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value)); - } - } + unsigned mask = + is_write ? write_mask : (1 << deref->type->vector_elements) - 1; + insert_buffer_access(deref, deref->type, offset, mask, -1); } else { unsigned N = deref->type->is_double() ? 8 : 4; @@ -863,28 +891,8 @@ lower_ubo_reference_visitor::emit_access(bool is_write, ir_rvalue *chan_offset = add(base_offset, new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); - if (is_write) { -/* If the component is not in the writemask, then don't - * store any value. - */ -if (!((1 << i) & write_mask)) - continue; - -base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1)); - } else { -if (this->buffer_access_type == ubo_load_access) { - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(deref_type, chan_offset), - (1U << i))); -} else { - ir_call *load_ssbo = ssbo_load(deref_type, chan_offset); - base_ir->insert_before(load_ssbo); - ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - value, - (1U << i))); -} - } + if (!is_write || ((1U << i) & write_mask)) +insert_buffer_access(deref, der
[Mesa-dev] [PATCH v2 32/42] glsl: Replace atomic_ssbo and ssbo_atomic with atomic
The atomic functions can also be used with shared variables in compute shaders. When lowering the intrinsic in lower_ubo_reference, we still create an SSBO specific intrinsic since SSBO accesses can be indirectly addressed, whereas all compute shader shared variable live in a single shared variable area. v2: * Also remove the _internal suffix from ssbo atomic intrinsic names (Iago) Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga Reviewed-by: Iago Toral Quiroga --- src/glsl/builtin_functions.cpp | 230 +++ src/glsl/lower_ubo_reference.cpp | 18 +-- src/glsl/nir/glsl_to_nir.cpp | 16 +-- 3 files changed, 132 insertions(+), 132 deletions(-) diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 1349444..3e767e8 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -759,16 +759,16 @@ private: ir_function_signature *_atomic_counter_op(const char *intrinsic, builtin_available_predicate avail); - ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_op2(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_op3(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); + ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); B1(min3) B1(max3) @@ -915,53 +915,53 @@ builtin_builder::create_intrinsics() _atomic_counter_intrinsic(shader_atomic_counters), NULL); - add_function("__intrinsic_ssbo_atomic_add", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_min", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_max", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_and", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_or", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_xor", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::int_type), -NULL); - add_function("__intrinsic_ssbo_atomic_exchange", -_atomic_ssbo_intrinsic2(shader_storage_buffer_object, -glsl_type::uint_type), -_atomic_ssbo_int
[Mesa-dev] [PATCH v2 25/42] nir: Translate glsl shared var store intrinsic to nir intrinsic
Signed-off-by: Jordan Justen --- src/glsl/nir/glsl_to_nir.cpp | 33 + src/glsl/nir/nir_intrinsics.h | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index a59d09c..a832570 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -729,6 +729,8 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_memory_barrier_shared; } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { op = nir_intrinsic_load_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { + op = nir_intrinsic_store_shared; } else { unreachable("not reached"); } @@ -1003,6 +1005,37 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_store_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + /* Check if we need the indirect version */ + ir_constant *const_offset = offset->as_constant(); + if (!const_offset) { +op = nir_intrinsic_store_shared_indirect; +ralloc_free(instr); +instr = nir_intrinsic_instr_create(shader, op); +instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); +instr->const_index[0] = 0; + } else { +instr->const_index[0] = const_offset->value.u[0]; + } + + instr->const_index[1] = write_mask->value.u[0]; + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } default: unreachable("not reached"); } diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index de15128..6912258 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -277,5 +277,6 @@ LOAD(shared, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) STORE(output, 0, 0, 0, 0) STORE(per_vertex_output, 1, 1, 0, 0) STORE(ssbo, 1, 1, 1, 0) +STORE(shared, 0, 0, 0, 0) -LAST_INTRINSIC(store_ssbo_indirect) +LAST_INTRINSIC(store_shared_indirect) -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 21/42] glsl: Add default matrix ordering in lower_buffer_access
For compute shader shared variable we will set a default of column major. Signed-off-by: Jordan Justen --- src/glsl/lower_buffer_access.cpp | 5 +++-- src/glsl/lower_buffer_access.h | 10 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp index 297ed69..66e7abe 100644 --- a/src/glsl/lower_buffer_access.cpp +++ b/src/glsl/lower_buffer_access.cpp @@ -281,8 +281,9 @@ lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) switch (matrix_layout) { case GLSL_MATRIX_LAYOUT_INHERITED: -assert(!matrix); -return false; +assert(default_matrix_layout != GLSL_MATRIX_LAYOUT_INHERITED || + !matrix); +return default_matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: return false; case GLSL_MATRIX_LAYOUT_ROW_MAJOR: diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h index f8e1070..82b35ed 100644 --- a/src/glsl/lower_buffer_access.h +++ b/src/glsl/lower_buffer_access.h @@ -39,6 +39,14 @@ namespace lower_buffer_access { class lower_buffer_access : public ir_rvalue_enter_visitor { public: + lower_buffer_access() : + default_matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED) + {} + + lower_buffer_access(enum glsl_matrix_layout default_matrix_layout) : + default_matrix_layout(default_matrix_layout) + {} + virtual void insert_buffer_access(void *mem_ctx, ir_dereference *deref, const glsl_type *type, ir_rvalue *offset, @@ -55,6 +63,8 @@ public: ir_rvalue **offset, unsigned *const_offset, bool *row_major, int *matrix_columns, unsigned packing); + + enum glsl_matrix_layout default_matrix_layout; }; } /* namespace lower_buffer_access */ -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 40/42] i965: Enable ARB_compute_shader extension on supported hardware
Enable ARB_compute_shader on gen7+, on hardware that supports the OpenGL 4.3 requirements of a local group size of 1024. With SIMD16 support, this is limited to Ivy Bridge and Haswell. Broadwell will work with a local group size up to 896 on SIMD16 meaning programs that use this size or lower should run when setting MESA_EXTENSION_OVERRIDE=GL_ARB_compute_shader. Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.c | 11 ++- src/mesa/drivers/dri/i965/intel_extensions.c | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 2ea0a9e..fc32a2f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -330,7 +330,9 @@ brw_initialize_context_constants(struct brw_context *brw) [MESA_SHADER_TESS_EVAL] = false, [MESA_SHADER_GEOMETRY] = brw->gen >= 6, [MESA_SHADER_FRAGMENT] = true, - [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader, + [MESA_SHADER_COMPUTE] = + (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || + _mesa_extension_override_enables.ARB_compute_shader, }; unsigned num_stages = 0; @@ -613,7 +615,7 @@ brw_initialize_context_constants(struct brw_context *brw) } static void -brw_adjust_cs_context_constants(struct brw_context *brw) +brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads) { struct gl_context *ctx = &brw->ctx; @@ -627,7 +629,7 @@ brw_adjust_cs_context_constants(struct brw_context *brw) */ const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8; - const uint32_t max_invocations = simd_size * brw->max_cs_threads; + const uint32_t max_invocations = simd_size * max_threads; ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; @@ -817,6 +819,7 @@ brwCreateContext(gl_api api, if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; + brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads); brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset @@ -871,8 +874,6 @@ brwCreateContext(gl_api api, brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; - brw_adjust_cs_context_constants(brw); - /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 81215db..6326cfc 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -356,6 +356,8 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_viewport_array = true; ctx->Extensions.AMD_vertex_shader_viewport_index = true; ctx->Extensions.ARB_shader_subroutine = true; + if (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) +ctx->Extensions.ARB_compute_shader = true; } } -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 15/42] glsl ubo/ssbo: Use enum to track current buffer access type
v2: * Rename ssbo_get_array_length to ssbo_unsized_array_length_access (Iago) * Use always use this-> when referencing buffer_access_type (Iago) Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga --- src/glsl/lower_ubo_reference.cpp | 26 +- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index b74aa3d..b2a1f25 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -162,6 +162,14 @@ public: ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask); + enum { + ubo_load_access, + ssbo_load_access, + ssbo_store_access, + ssbo_unsized_array_length_access, + ssbo_atomic_access, + } buffer_access_type; + void emit_access(bool is_write, ir_dereference *deref, ir_variable *base_offset, unsigned int deref_offset, bool row_major, int matrix_columns, @@ -189,7 +197,6 @@ public: struct gl_uniform_buffer_variable *ubo_var; ir_rvalue *uniform_block; bool progress; - bool is_shader_storage; }; /** @@ -339,10 +346,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, deref, &nonconst_block_index); /* Locate the block by interface name */ - this->is_shader_storage = var->is_in_shader_storage_block(); unsigned num_blocks; struct gl_uniform_block **blocks; - if (this->is_shader_storage) { + if (this->buffer_access_type != ubo_load_access) { num_blocks = shader->NumShaderStorageBlocks; blocks = shader->ShaderStorageBlocks; } else { @@ -552,6 +558,10 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; + this->buffer_access_type = + var->is_in_shader_storage_block() ? + ssbo_load_access : ubo_load_access; + /* Compute the offset to the start if the dereference as well as other * information we need to configure the write */ @@ -795,7 +805,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write, if (is_write) base_ir->insert_after(ssbo_store(deref, offset, write_mask)); else { - if (!this->is_shader_storage) { + if (this->buffer_access_type == ubo_load_access) { base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), ubo_load(deref->type, offset))); } else { @@ -862,7 +872,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write, base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1)); } else { -if (!this->is_shader_storage) { +if (this->buffer_access_type == ubo_load_access) { base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), ubo_load(deref_type, chan_offset), (1U << i))); @@ -891,6 +901,8 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; + this->buffer_access_type = ssbo_store_access; + /* Compute the offset to the start if the dereference as well as other * information we need to configure the write */ @@ -1068,6 +1080,8 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu unsigned packing = var->get_interface_type()->interface_packing; int unsized_array_stride = calculate_unsized_array_stride(deref, packing); + this->buffer_access_type = ssbo_unsized_array_length_access; + /* Compute the offset to the start if the dereference as well as other * information we need to calculate the length. */ @@ -1181,6 +1195,8 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; + this->buffer_access_type = ssbo_atomic_access; + setup_for_load_or_store(var, deref, &offset, &const_offset, &row_major, &matrix_columns, -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 34/42] glsl: Check for SSBO variable in check_for_ssbo_store
The compiler probably already blocks this earlier on, but we should be checking for an SSBO here. Signed-off-by: Jordan Justen Cc: Samuel Iglesias Gonsalvez Cc: Iago Toral Quiroga --- src/glsl/lower_ubo_reference.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 667a80e..cf55a2e 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -723,7 +723,7 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) return; ir_variable *var = ir->lhs->variable_referenced(); - if (!var || !var->is_in_buffer_block()) + if (!var || !var->is_in_shader_storage_block()) return; /* We have a write to a buffer variable, so declare a temporary and rewrite -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 22/42] glsl: Don't lower_variable_index_to_cond_assign for shared variables
Signed-off-by: Jordan Justen --- src/glsl/lower_variable_index_to_cond_assign.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 1ab3afe..a1ba934 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -378,6 +378,9 @@ public: case ir_var_shader_storage: return this->lower_uniforms; + case ir_var_shader_shared: +return false; + case ir_var_function_in: case ir_var_const_in: return this->lower_temps; -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 09/42] i965: Implement selection of the closest L3 configuration based on a vector of weights.
From: Francisco Jerez The input of the L3 set-up code is a vector giving the approximate desired relative size of each partition. This implements logic to compare the input vector against the table of validated configurations for the device and pick the closest compatible one. --- src/mesa/drivers/dri/i965/gen7_l3_state.c | 95 +++ 1 file changed, 95 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index c863b7f..4d0cfcd 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -163,6 +163,101 @@ get_l3_way_size(const struct brw_device_info *devinfo) } /** + * L3 configuration represented as a vector of weights giving the desired + * relative size of each partition. The scale is arbitrary, only the ratios + * between weights will have an influence on the selection of the closest L3 + * configuration. + */ +struct brw_l3_weights { + float w[NUM_L3P]; +}; + +/** + * L1-normalize a vector of L3 partition weights. + */ +static struct brw_l3_weights +norm_l3_weights(struct brw_l3_weights w) +{ + float sz = 0; + + for (unsigned i = 0; i < NUM_L3P; i++) + sz += w.w[i]; + + for (unsigned i = 0; i < NUM_L3P; i++) + w.w[i] /= sz; + + return w; +} + +/** + * Get the relative partition weights of the specified L3 configuration. + */ +static struct brw_l3_weights +get_config_l3_weights(const struct brw_l3_config *cfg) +{ + if (cfg) { + struct brw_l3_weights w; + + for (unsigned i = 0; i < NUM_L3P; i++) + w.w[i] = cfg->n[i]; + + return norm_l3_weights(w); + } else { + const struct brw_l3_weights w = { { 0 } }; + return w; + } +} + +/** + * Distance between two L3 configurations represented as vectors of weights. + * Usually just the L1 metric except when the two configurations are + * considered incompatible in which case the distance will be infinite. Note + * that the compatibility condition is asymmetric -- They will be considered + * incompatible whenever the reference configuration \p w0 requires SLM, DC, + * or URB but \p w1 doesn't provide it. + */ +static float +diff_l3_weights(struct brw_l3_weights w0, struct brw_l3_weights w1) +{ + if ((w0.w[L3P_SLM] && !w1.w[L3P_SLM]) || + (w0.w[L3P_DC] && !w1.w[L3P_DC] && !w1.w[L3P_ALL]) || + (w0.w[L3P_URB] && !w1.w[L3P_URB])) { + return HUGE_VALF; + + } else { + float dw = 0; + + for (unsigned i = 0; i < NUM_L3P; i++) + dw += fabs(w0.w[i] - w1.w[i]); + + return dw; + } +} + +/** + * Return the closest validated L3 configuration for the specified device and + * weight vector. + */ +static const struct brw_l3_config * +get_l3_config(const struct brw_device_info *devinfo, struct brw_l3_weights w0) +{ + const struct brw_l3_config *const cfgs = get_l3_configs(devinfo); + const struct brw_l3_config *cfg_best = NULL; + float dw_best = HUGE_VALF; + + for (const struct brw_l3_config *cfg = cfgs; cfg->n[L3P_URB]; cfg++) { + const float dw = diff_l3_weights(w0, get_config_l3_weights(cfg)); + + if (dw < dw_best) { + cfg_best = cfg; + dw_best = dw; + } + } + + return cfg_best; +} + +/** * Program the hardware to use the specified L3 configuration. */ static void -- 2.6.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev