On Thu, Nov 3, 2016 at 8:47 PM, Francisco Jerez <curroje...@riseup.net> wrote: > Ian Romanick <i...@freedesktop.org> writes: > >> On 10/28/2016 04:13 PM, Marek Olšák wrote: >>> From: Marek Olšák <marek.ol...@amd.com> >>> >>> --- >>> src/compiler/glsl/ir_optimization.h | 3 ++- >>> src/compiler/glsl/lower_if_to_cond_assign.cpp | 23 ++++++++++++++++++++--- >>> src/compiler/glsl/test_optpass.cpp | 2 +- >>> src/mesa/drivers/dri/i965/brw_link.cpp | 2 +- >>> src/mesa/program/ir_to_mesa.cpp | 3 ++- >>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- >>> 6 files changed, 28 insertions(+), 8 deletions(-) >>> >>> diff --git a/src/compiler/glsl/ir_optimization.h >>> b/src/compiler/glsl/ir_optimization.h >>> index 6f2bc32..c033f6b 100644 >>> --- a/src/compiler/glsl/ir_optimization.h >>> +++ b/src/compiler/glsl/ir_optimization.h >>> @@ -101,21 +101,22 @@ bool do_dead_code(exec_list *instructions, bool >>> uniform_locations_assigned); >>> bool do_dead_code_local(exec_list *instructions); >>> bool do_dead_code_unlinked(exec_list *instructions); >>> bool do_dead_functions(exec_list *instructions); >>> bool opt_flip_matrices(exec_list *instructions); >>> bool do_function_inlining(exec_list *instructions); >>> bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, >>> bool lower_sub_return = true, bool lower_main_return = false, bool >>> lower_continue = false, bool lower_break = false); >>> bool do_lower_texture_projection(exec_list *instructions); >>> bool do_if_simplification(exec_list *instructions); >>> bool opt_flatten_nested_if_blocks(exec_list *instructions); >>> bool do_discard_simplification(exec_list *instructions); >>> -bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = >>> 0); >>> +bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list >>> *instructions, >>> + unsigned max_depth = 0); >>> bool do_mat_op_to_vec(exec_list *instructions); >>> bool do_minmax_prune(exec_list *instructions); >>> bool do_noop_swizzle(exec_list *instructions); >>> bool do_structure_splitting(exec_list *instructions); >>> bool do_swizzle_swizzle(exec_list *instructions); >>> bool do_vectorize(exec_list *instructions); >>> bool do_tree_grafting(exec_list *instructions); >>> bool do_vec_index_to_cond_assign(exec_list *instructions); >>> bool do_vec_index_to_swizzle(exec_list *instructions); >>> bool lower_discard(exec_list *instructions); >>> diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp >>> b/src/compiler/glsl/lower_if_to_cond_assign.cpp >>> index 01a7335..a413306 100644 >>> --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp >>> +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp >>> @@ -47,56 +47,60 @@ >>> >>> #include "compiler/glsl_types.h" >>> #include "ir.h" >>> #include "util/set.h" >>> #include "util/hash_table.h" /* Needed for the hashing functions */ >>> >>> namespace { >>> >>> class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { >>> public: >>> - ir_if_to_cond_assign_visitor(unsigned max_depth) >>> + ir_if_to_cond_assign_visitor(gl_shader_stage stage, >>> + unsigned max_depth) >>> { >>> this->progress = false; >>> + this->stage = stage; >>> this->max_depth = max_depth; >>> this->depth = 0; >>> >>> this->condition_variables = >>> _mesa_set_create(NULL, _mesa_hash_pointer, >>> _mesa_key_pointer_equal); >>> } >>> >>> ~ir_if_to_cond_assign_visitor() >>> { >>> _mesa_set_destroy(this->condition_variables, NULL); >>> } >>> >>> ir_visitor_status visit_enter(ir_if *); >>> ir_visitor_status visit_leave(ir_if *); >>> >>> bool found_unsupported_op; >>> bool progress; >>> + gl_shader_stage stage; >>> unsigned max_depth; >>> unsigned depth; >>> >>> struct set *condition_variables; >>> }; >>> >>> } /* anonymous namespace */ >>> >>> bool >>> -lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth) >>> +lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, >>> + unsigned max_depth) >>> { >>> if (max_depth == UINT_MAX) >>> return false; >>> >>> - ir_if_to_cond_assign_visitor v(max_depth); >>> + ir_if_to_cond_assign_visitor v(stage, max_depth); >>> >>> visit_list_elements(&v, instructions); >>> >>> return v.progress; >>> } >>> >>> void >>> check_ir_node(ir_instruction *ir, void *data) >>> { >>> ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data; >>> @@ -105,20 +109,33 @@ check_ir_node(ir_instruction *ir, void *data) >>> case ir_type_call: >>> case ir_type_discard: >>> case ir_type_loop: >>> case ir_type_loop_jump: >>> case ir_type_return: >>> case ir_type_emit_vertex: >>> case ir_type_end_primitive: >>> case ir_type_barrier: >>> v->found_unsupported_op = true; >>> break; >>> + >>> + case ir_type_dereference_variable: { >>> + ir_variable *var = >>> ir->as_dereference_variable()->variable_referenced(); >>> + >>> + /* Tess control shader outputs are like shared memory with complex >>> + * side effects, so treat it that way. >>> + */ >>> + if (v->stage == MESA_SHADER_TESS_CTRL && >>> + var->data.mode == ir_var_shader_out) >>> + v->found_unsupported_op = true; >> >> Hmm... it seems like anything that modifies shared static (shared >> tessellation data, shared compute, atomic, image, and SSBOs) should >> probably disable this. Reads should be fine. >> > > Yeah, you're right, but I believe that at least atomic counters and > images will already cause the optimization pass to bail because they can > only be modified using GLSL IR intrinsics, other kinds of shared data > probably need special handling.
All drivers lower shared variables to intrinsics. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev