On Sun, Jan 4, 2015 at 8:56 PM, Connor Abbott <cwabbo...@gmail.com> wrote:
> Reviewed-by: Connor Abbott <cwabbo...@gmail.com> > > Nice to see that this idea worked out well! > > On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <ja...@jlekstrand.net> > wrote: > >> We used to have the number of components built into the intrinsic. This >> meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4 >> variants. This lead to piles of switch statements to generate the correct >> texture names, and introspection to figure out the number of components. >> > > This doesn't touch textures, I think you can just delete "texture" and > it'll make more sense. > Yeah, I think s/texture/intrinsic/ would be the thing to do > > >> We can make things much nicer by allowing "vectorized" intrinsics. >> --- >> src/glsl/nir/glsl_to_nir.cpp | 60 ++++------------ >> src/glsl/nir/nir.h | 15 +++- >> src/glsl/nir/nir_intrinsics.h | 79 +++++++-------------- >> src/glsl/nir/nir_lower_io.c | 115 >> +++++++------------------------ >> src/glsl/nir/nir_lower_locals_to_regs.c | 18 ++--- >> src/glsl/nir/nir_lower_system_values.c | 3 +- >> src/glsl/nir/nir_lower_variables.c | 74 +++++++------------- >> src/glsl/nir/nir_validate.c | 10 +-- >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 64 +++++------------ >> 9 files changed, 123 insertions(+), 315 deletions(-) >> >> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp >> index f85b50e..088a8e9 100644 >> --- a/src/glsl/nir/glsl_to_nir.cpp >> +++ b/src/glsl/nir/glsl_to_nir.cpp >> @@ -629,7 +629,8 @@ nir_visitor::visit(ir_call *ir) >> nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); >> >> nir_intrinsic_instr *store_instr = >> - nir_intrinsic_instr_create(shader, >> nir_intrinsic_store_var_vec1); >> + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); >> + store_instr->num_components = 1; >> >> ir->return_deref->accept(this); >> store_instr->variables[0] = this->deref_head; >> @@ -704,17 +705,9 @@ nir_visitor::visit(ir_assignment *ir) >> * back into the LHS. Copy propagation should get rid of the mess. >> */ >> >> - nir_intrinsic_op load_op; >> - switch (ir->lhs->type->vector_elements) { >> - case 1: load_op = nir_intrinsic_load_var_vec1; break; >> - case 2: load_op = nir_intrinsic_load_var_vec2; break; >> - case 3: load_op = nir_intrinsic_load_var_vec3; break; >> - case 4: load_op = nir_intrinsic_load_var_vec4; break; >> - default: unreachable("Invalid number of components"); break; >> - } >> - >> - nir_intrinsic_instr *load = >> nir_intrinsic_instr_create(this->shader, >> - load_op); >> + nir_intrinsic_instr *load = >> + nir_intrinsic_instr_create(this->shader, >> nir_intrinsic_load_var); >> + load->num_components = ir->lhs->type->vector_elements; >> load->dest.is_ssa = true; >> nir_ssa_def_init(&load->instr, &load->dest.ssa, >> num_components, NULL); >> @@ -759,17 +752,9 @@ nir_visitor::visit(ir_assignment *ir) >> src.ssa = &vec->dest.dest.ssa; >> } >> >> - nir_intrinsic_op store_op; >> - switch (ir->lhs->type->vector_elements) { >> - case 1: store_op = nir_intrinsic_store_var_vec1; break; >> - case 2: store_op = nir_intrinsic_store_var_vec2; break; >> - case 3: store_op = nir_intrinsic_store_var_vec3; break; >> - case 4: store_op = nir_intrinsic_store_var_vec4; break; >> - default: unreachable("Invalid number of components"); break; >> - } >> - >> - nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader, >> - store_op); >> + nir_intrinsic_instr *store = >> + nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); >> + store->num_components = ir->lhs->type->vector_elements; >> nir_deref *store_deref = nir_copy_deref(this->shader, >> &lhs_deref->deref); >> store->variables[0] = nir_deref_as_var(store_deref); >> store->src[0] = src; >> @@ -848,17 +833,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) >> * must emit a variable load. >> */ >> >> - nir_intrinsic_op load_op; >> - switch (ir->type->vector_elements) { >> - case 1: load_op = nir_intrinsic_load_var_vec1; break; >> - case 2: load_op = nir_intrinsic_load_var_vec2; break; >> - case 3: load_op = nir_intrinsic_load_var_vec3; break; >> - case 4: load_op = nir_intrinsic_load_var_vec4; break; >> - default: unreachable("Invalid number of components"); >> - } >> - >> nir_intrinsic_instr *load_instr = >> - nir_intrinsic_instr_create(this->shader, load_op); >> + nir_intrinsic_instr_create(this->shader, >> nir_intrinsic_load_var); >> + load_instr->num_components = ir->type->vector_elements; >> load_instr->variables[0] = this->deref_head; >> add_instr(&load_instr->instr, ir->type->vector_elements); >> } >> @@ -917,23 +894,12 @@ nir_visitor::visit(ir_expression *ir) >> >> nir_intrinsic_op op; >> if (const_index) { >> - switch (ir->type->vector_elements) { >> - case 1: op = nir_intrinsic_load_ubo_vec1; break; >> - case 2: op = nir_intrinsic_load_ubo_vec2; break; >> - case 3: op = nir_intrinsic_load_ubo_vec3; break; >> - case 4: op = nir_intrinsic_load_ubo_vec4; break; >> - default: assert(0); break; >> - } >> + op = nir_intrinsic_load_ubo; >> } else { >> - switch (ir->type->vector_elements) { >> - case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break; >> - case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break; >> - case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break; >> - case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break; >> - default: assert(0); break; >> - } >> + op = nir_intrinsic_load_ubo_indirect; >> } >> nir_intrinsic_instr *load = >> nir_intrinsic_instr_create(this->shader, op); >> + load->num_components = ir->type->vector_elements; >> load->const_index[0] = ir->operands[0]->as_constant()->value.u[0]; >> load->const_index[1] = const_index ? const_index->value.u[0] : 0; >> /* base offset */ >> load->const_index[2] = 1; /* number of vec4's */ >> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h >> index 30146d6..412ceea 100644 >> --- a/src/glsl/nir/nir.h >> +++ b/src/glsl/nir/nir.h >> @@ -693,6 +693,9 @@ typedef struct { >> >> nir_dest dest; >> >> + /** number of components if this is a vectorized intrinsic */ >> + uint8_t num_components; >> + >> int const_index[3]; >> >> nir_deref_var *variables[2]; >> @@ -732,12 +735,20 @@ typedef struct { >> >> unsigned num_srcs; /** < number of register/SSA inputs */ >> >> - /** number of components of each input register */ >> + /** number of components of each input register >> + * >> + * If this value is 0, the number of components is given by the >> + * num_components field of nir_intrinsic_instr. >> + */ >> unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; >> >> bool has_dest; >> >> - /** number of components of each output register */ >> + /** number of components of the output register >> + * >> + * If this value is 0, the number of components is given by the >> + * num_components field of nir_intrinsic_instr. >> + */ >> unsigned dest_components; >> >> /** the number of inputs/outputs that are variables */ >> diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h >> index e4ad8cd..75bd12f 100644 >> --- a/src/glsl/nir/nir_intrinsics.h >> +++ b/src/glsl/nir/nir_intrinsics.h >> @@ -42,19 +42,9 @@ >> #define ARR(...) { __VA_ARGS__ } >> >> >> -INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0, >> - NIR_INTRINSIC_CAN_ELIMINATE) >> -INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0, >> - NIR_INTRINSIC_CAN_ELIMINATE) >> -INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0, >> - NIR_INTRINSIC_CAN_ELIMINATE) >> -INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0, >> - NIR_INTRINSIC_CAN_ELIMINATE) >> -INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0) >> -INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0) >> -INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0) >> -INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0) >> -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) >> +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) >> +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) >> +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) >> >> /* >> * a barrier is an intrinsic with no inputs/outputs but which can't be >> moved >> @@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2) >> SYSTEM_VALUE(sample_mask_in, 1) >> SYSTEM_VALUE(invocation_id, 1) >> >> -#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \ >> - INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), >> true, 1, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), >> true, 2, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), >> true, 3, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \ >> - INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), >> true, 4, \ >> - 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) >> - >> -#define LOAD(name, num_indices, flags) \ >> - LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags) >> - >> /* >> * The first index is the address to load from, and the second index is >> the >> * number of array elements to load. For UBO's (and SSBO's), the first >> index >> @@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1) >> * elements begin immediately after the previous array element. >> */ >> >> +#define LOAD(name, num_indices, flags) \ >> + INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \ >> + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ >> + INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, >> \ >> + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \ >> + >> LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER) >> LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER) >> LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) >> @@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER) >> * interp_at_offset* intrinsics take a second source that is either a >> * sample id or a vec2 position offset. >> */ >> -#define INTERP(name, flags) \ >> - LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags) >> - >> -#define INTERP_WITH_ARG(name, src_comps, flags) \ >> - LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags) >> >> -INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER) >> -INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER) >> -INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER) >> +#define INTERP(name, num_srcs, src_comps) \ >> + INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \ >> + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | >> NIR_INTRINSIC_CAN_REORDER) \ >> + INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), >> true, \ >> + 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | >> NIR_INTRINSIC_CAN_REORDER) >> >> -#define STORE(name, num_indices, flags) \ >> - INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, >> flags) \ >> - INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, >> flags) \ >> - INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, >> flags) \ >> - INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, >> flags) \ >> - INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \ >> - num_indices, flags) \ >> - INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \ >> - num_indices, flags) \ >> - INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \ >> - num_indices, flags) \ >> - INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \ >> - num_indices, flags) \ >> +INTERP(at_centroid, 0, 0) >> +INTERP(at_sample, 1, 1) >> +INTERP(at_offset, 1, 1) >> >> /* >> * Stores work the same way as loads, except now the first register >> input is >> @@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, >> NIR_INTRINSIC_CAN_REORDER) >> * offset. >> */ >> >> +#define STORE(name, num_indices, flags) \ >> + INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \ >> + INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ >> + num_indices, flags) \ >> + >> STORE(output, 2, 0) >> /* STORE(ssbo, 3, 0) */ >> >> -LAST_INTRINSIC(store_output_vec4_indirect) >> +LAST_INTRINSIC(store_output_indirect) >> diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c >> index a3b8186..ed3ce81 100644 >> --- a/src/glsl/nir/nir_lower_io.c >> +++ b/src/glsl/nir/nir_lower_io.c >> @@ -186,66 +186,6 @@ get_io_offset(nir_deref_var *deref, nir_instr >> *instr, nir_src *indirect, >> return base_offset; >> } >> >> -static nir_intrinsic_op >> -get_load_op(nir_variable_mode mode, bool indirect, unsigned >> num_components) >> -{ >> - if (indirect) { >> - switch (mode) { >> - case nir_var_shader_in: >> - switch (num_components) { >> - case 1: return nir_intrinsic_load_input_vec1_indirect; >> - case 2: return nir_intrinsic_load_input_vec2_indirect; >> - case 3: return nir_intrinsic_load_input_vec3_indirect; >> - case 4: return nir_intrinsic_load_input_vec4_indirect; >> - default: unreachable("Invalid number of components"); break; >> - } >> - break; >> - >> - case nir_var_uniform: >> - switch (num_components) { >> - case 1: return nir_intrinsic_load_uniform_vec1_indirect; >> - case 2: return nir_intrinsic_load_uniform_vec2_indirect; >> - case 3: return nir_intrinsic_load_uniform_vec3_indirect; >> - case 4: return nir_intrinsic_load_uniform_vec4_indirect; >> - default: unreachable("Invalid number of components"); break; >> - } >> - break; >> - >> - default: >> - unreachable("Invalid input type"); >> - break; >> - } >> - } else { >> - switch (mode) { >> - case nir_var_shader_in: >> - switch (num_components) { >> - case 1: return nir_intrinsic_load_input_vec1; >> - case 2: return nir_intrinsic_load_input_vec2; >> - case 3: return nir_intrinsic_load_input_vec3; >> - case 4: return nir_intrinsic_load_input_vec4; >> - default: unreachable("Invalid number of components"); break; >> - } >> - break; >> - >> - case nir_var_uniform: >> - switch (num_components) { >> - case 1: return nir_intrinsic_load_uniform_vec1; >> - case 2: return nir_intrinsic_load_uniform_vec2; >> - case 3: return nir_intrinsic_load_uniform_vec3; >> - case 4: return nir_intrinsic_load_uniform_vec4; >> - default: unreachable("Invalid number of components"); break; >> - } >> - break; >> - >> - default: >> - unreachable("Invalid input type"); >> - break; >> - } >> - } >> - >> - return nir_intrinsic_load_input_vec1; >> -} >> - >> static bool >> nir_lower_io_block(nir_block *block, void *void_state) >> { >> @@ -258,22 +198,35 @@ nir_lower_io_block(nir_block *block, void >> *void_state) >> nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); >> >> switch (intrin->intrinsic) { >> - case nir_intrinsic_load_var_vec1: >> - case nir_intrinsic_load_var_vec2: >> - case nir_intrinsic_load_var_vec3: >> - case nir_intrinsic_load_var_vec4: { >> + case nir_intrinsic_load_var: { >> nir_variable_mode mode = intrin->variables[0]->var->data.mode; >> if (mode != nir_var_shader_in && mode != nir_var_uniform) >> continue; >> >> bool has_indirect = deref_has_indirect(intrin->variables[0]); >> - unsigned num_components = >> - nir_intrinsic_infos[intrin->intrinsic].dest_components; >> >> - nir_intrinsic_op load_op = get_load_op(mode, has_indirect, >> - num_components); >> + nir_intrinsic_op load_op; >> + switch (mode) { >> + case nir_var_shader_in: >> + if (has_indirect) { >> + load_op = nir_intrinsic_load_input_indirect; >> + } else { >> + load_op = nir_intrinsic_load_input; >> + } >> + break; >> + case nir_var_uniform: >> + if (has_indirect) { >> + load_op = nir_intrinsic_load_uniform_indirect; >> + } else { >> + load_op = nir_intrinsic_load_uniform; >> + } >> + break; >> + default: >> + unreachable("Unknown variable mode"); >> + } >> nir_intrinsic_instr *load = >> nir_intrinsic_instr_create(state->mem_ctx, >> load_op); >> + load->num_components = intrin->num_components; >> >> nir_src indirect; >> unsigned offset = get_io_offset(intrin->variables[0], >> @@ -289,7 +242,7 @@ nir_lower_io_block(nir_block *block, void *void_state) >> if (intrin->dest.is_ssa) { >> load->dest.is_ssa = true; >> nir_ssa_def_init(&load->instr, &load->dest.ssa, >> - num_components, NULL); >> + intrin->num_components, NULL); >> >> nir_src new_src = { >> .is_ssa = true, >> @@ -307,38 +260,22 @@ nir_lower_io_block(nir_block *block, void >> *void_state) >> break; >> } >> >> - case nir_intrinsic_store_var_vec1: >> - case nir_intrinsic_store_var_vec2: >> - case nir_intrinsic_store_var_vec3: >> - case nir_intrinsic_store_var_vec4: { >> + case nir_intrinsic_store_var: { >> if (intrin->variables[0]->var->data.mode != nir_var_shader_out) >> continue; >> >> bool has_indirect = deref_has_indirect(intrin->variables[0]); >> - unsigned num_components = >> - nir_intrinsic_infos[intrin->intrinsic].src_components[0]; >> >> nir_intrinsic_op store_op; >> if (has_indirect) { >> - switch (num_components) { >> - case 1: store_op = nir_intrinsic_store_output_vec1_indirect; >> break; >> - case 2: store_op = nir_intrinsic_store_output_vec2_indirect; >> break; >> - case 3: store_op = nir_intrinsic_store_output_vec3_indirect; >> break; >> - case 4: store_op = nir_intrinsic_store_output_vec4_indirect; >> break; >> - default: unreachable("Invalid number of components"); break; >> - } >> + store_op = nir_intrinsic_store_output_indirect; >> } else { >> - switch (num_components) { >> - case 1: store_op = nir_intrinsic_store_output_vec1; break; >> - case 2: store_op = nir_intrinsic_store_output_vec2; break; >> - case 3: store_op = nir_intrinsic_store_output_vec3; break; >> - case 4: store_op = nir_intrinsic_store_output_vec4; break; >> - default: unreachable("Invalid number of components"); break; >> - } >> + store_op = nir_intrinsic_store_output; >> } >> >> nir_intrinsic_instr *store = >> nir_intrinsic_instr_create(state->mem_ctx, >> >> store_op); >> + store->num_components = intrin->num_components; >> >> nir_src indirect; >> unsigned offset = get_io_offset(intrin->variables[0], >> diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c >> b/src/glsl/nir/nir_lower_locals_to_regs.c >> index caf1c29..081ed6b 100644 >> --- a/src/glsl/nir/nir_lower_locals_to_regs.c >> +++ b/src/glsl/nir/nir_lower_locals_to_regs.c >> @@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void >> *void_state) >> nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); >> >> switch (intrin->intrinsic) { >> - case nir_intrinsic_load_var_vec1: >> - case nir_intrinsic_load_var_vec2: >> - case nir_intrinsic_load_var_vec3: >> - case nir_intrinsic_load_var_vec4: { >> + case nir_intrinsic_load_var: { >> if (intrin->variables[0]->var->data.mode != nir_var_local) >> continue; >> >> nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, >> nir_op_imov); >> mov->src[0].src = get_deref_reg_src(intrin->variables[0], >> &intrin->instr, state); >> - unsigned num_components = >> mov->src[0].src.reg.reg->num_components; >> - mov->dest.write_mask = (1 << num_components) - 1; >> + mov->dest.write_mask = (1 << intrin->num_components) - 1; >> if (intrin->dest.is_ssa) { >> mov->dest.dest.is_ssa = true; >> nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, >> - num_components, NULL); >> + intrin->num_components, NULL); >> >> nir_src new_src = { >> .is_ssa = true, >> @@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void >> *void_state) >> break; >> } >> >> - case nir_intrinsic_store_var_vec1: >> - case nir_intrinsic_store_var_vec2: >> - case nir_intrinsic_store_var_vec3: >> - case nir_intrinsic_store_var_vec4: { >> + case nir_intrinsic_store_var: { >> if (intrin->variables[0]->var->data.mode != nir_var_local) >> continue; >> >> nir_src reg_src = get_deref_reg_src(intrin->variables[0], >> &intrin->instr, state); >> - unsigned num_components = reg_src.reg.reg->num_components; >> >> nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, >> nir_op_imov); >> mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx); >> - mov->dest.write_mask = (1 << num_components) - 1; >> + mov->dest.write_mask = (1 << intrin->num_components) - 1; >> mov->dest.dest.is_ssa = false; >> mov->dest.dest.reg.reg = reg_src.reg.reg; >> mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; >> diff --git a/src/glsl/nir/nir_lower_system_values.c >> b/src/glsl/nir/nir_lower_system_values.c >> index cbd1dac..e700df4 100644 >> --- a/src/glsl/nir/nir_lower_system_values.c >> +++ b/src/glsl/nir/nir_lower_system_values.c >> @@ -30,8 +30,7 @@ >> static void >> convert_instr(nir_intrinsic_instr *instr) >> { >> - if (instr->intrinsic != nir_intrinsic_load_var_vec1 && >> - instr->intrinsic != nir_intrinsic_load_var_vec2) >> + if (instr->intrinsic != nir_intrinsic_load_var) >> return; >> >> nir_variable *var = instr->variables[0]->var; >> diff --git a/src/glsl/nir/nir_lower_variables.c >> b/src/glsl/nir/nir_lower_variables.c >> index 052b021..dab3639 100644 >> --- a/src/glsl/nir/nir_lower_variables.c >> +++ b/src/glsl/nir/nir_lower_variables.c >> @@ -449,17 +449,11 @@ fill_deref_tables_block(nir_block *block, void >> *void_state) >> nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); >> >> switch (intrin->intrinsic) { >> - case nir_intrinsic_load_var_vec1: >> - case nir_intrinsic_load_var_vec2: >> - case nir_intrinsic_load_var_vec3: >> - case nir_intrinsic_load_var_vec4: >> + case nir_intrinsic_load_var: >> register_load_instr(intrin, true, state); >> break; >> >> - case nir_intrinsic_store_var_vec1: >> - case nir_intrinsic_store_var_vec2: >> - case nir_intrinsic_store_var_vec3: >> - case nir_intrinsic_store_var_vec4: >> + case nir_intrinsic_store_var: >> register_store_instr(intrin, true, state); >> break; >> >> @@ -541,17 +535,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, >> nir_deref *src_deref = nir_copy_deref(state->mem_ctx, >> &src_head->deref); >> nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, >> &dest_head->deref); >> >> - nir_intrinsic_op load_op; >> - switch (num_components) { >> - case 1: load_op = nir_intrinsic_load_var_vec1; break; >> - case 2: load_op = nir_intrinsic_load_var_vec2; break; >> - case 3: load_op = nir_intrinsic_load_var_vec3; break; >> - case 4: load_op = nir_intrinsic_load_var_vec4; break; >> - default: unreachable("Invalid number of components"); break; >> - } >> - >> - nir_intrinsic_instr *load = >> nir_intrinsic_instr_create(state->mem_ctx, >> - load_op); >> + nir_intrinsic_instr *load = >> + nir_intrinsic_instr_create(state->mem_ctx, >> nir_intrinsic_load_var); >> + load->num_components = num_components; >> load->variables[0] = nir_deref_as_var(src_deref); >> load->dest.is_ssa = true; >> nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, >> NULL); >> @@ -559,17 +545,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, >> nir_instr_insert_before(©_instr->instr, &load->instr); >> register_load_instr(load, false, state); >> >> - nir_intrinsic_op store_op; >> - switch (num_components) { >> - case 1: store_op = nir_intrinsic_store_var_vec1; break; >> - case 2: store_op = nir_intrinsic_store_var_vec2; break; >> - case 3: store_op = nir_intrinsic_store_var_vec3; break; >> - case 4: store_op = nir_intrinsic_store_var_vec4; break; >> - default: unreachable("Invalid number of components"); break; >> - } >> - >> - nir_intrinsic_instr *store = >> nir_intrinsic_instr_create(state->mem_ctx, >> - store_op); >> + nir_intrinsic_instr *store = >> + nir_intrinsic_instr_create(state->mem_ctx, >> nir_intrinsic_store_var); >> + store->num_components = num_components; >> store->variables[0] = nir_deref_as_var(dest_deref); >> store->src[0].is_ssa = true; >> store->src[0].ssa = &load->dest.ssa; >> @@ -782,14 +760,9 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); >> >> switch (intrin->intrinsic) { >> - case nir_intrinsic_load_var_vec1: >> - case nir_intrinsic_load_var_vec2: >> - case nir_intrinsic_load_var_vec3: >> - case nir_intrinsic_load_var_vec4: { >> + case nir_intrinsic_load_var: { >> struct deref_node *node = >> get_deref_node(intrin->variables[0], >> false, state); >> - unsigned num_chans = >> - nir_intrinsic_infos[intrin->intrinsic].dest_components; >> >> if (node == NULL) { >> /* If we hit this path then we are referencing an invalid >> @@ -799,7 +772,8 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> */ >> nir_ssa_undef_instr *undef = >> nir_ssa_undef_instr_create(state->mem_ctx); >> - nir_ssa_def_init(&undef->instr, &undef->def, num_chans, >> NULL); >> + nir_ssa_def_init(&undef->instr, &undef->def, >> + intrin->num_components, NULL); >> >> nir_instr_insert_before(&intrin->instr, &undef->instr); >> nir_instr_remove(&intrin->instr); >> @@ -821,14 +795,15 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> nir_op_imov); >> mov->src[0].src.is_ssa = true; >> mov->src[0].src.ssa = get_ssa_def_for_block(node, block, >> state); >> - for (unsigned i = num_chans; i < 4; i++) >> + for (unsigned i = intrin->num_components; i < 4; i++) >> mov->src[0].swizzle[i] = 0; >> >> assert(intrin->dest.is_ssa); >> >> - mov->dest.write_mask = (1 << num_chans) - 1; >> + mov->dest.write_mask = (1 << intrin->num_components) - 1; >> mov->dest.dest.is_ssa = true; >> - nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, >> num_chans, NULL); >> + nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, >> + intrin->num_components, NULL); >> >> nir_instr_insert_before(&intrin->instr, &mov->instr); >> nir_instr_remove(&intrin->instr); >> @@ -843,10 +818,7 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> break; >> } >> >> - case nir_intrinsic_store_var_vec1: >> - case nir_intrinsic_store_var_vec2: >> - case nir_intrinsic_store_var_vec3: >> - case nir_intrinsic_store_var_vec4: { >> + case nir_intrinsic_store_var: { >> struct deref_node *node = >> get_deref_node(intrin->variables[0], >> false, state); >> >> @@ -860,7 +832,8 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> if (!node->lower_to_ssa) >> continue; >> >> - unsigned num_chans = glsl_get_vector_elements(node->type); >> + assert(intrin->num_components == >> + glsl_get_vector_elements(node->type)); >> >> assert(intrin->src[0].is_ssa); >> >> @@ -873,12 +846,12 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> >> mov->src[1].src.is_ssa = true; >> mov->src[1].src.ssa = intrin->src[0].ssa; >> - for (unsigned i = num_chans; i < 4; i++) >> + for (unsigned i = intrin->num_components; i < 4; i++) >> mov->src[1].swizzle[i] = 0; >> >> mov->src[2].src.is_ssa = true; >> mov->src[2].src.ssa = get_ssa_def_for_block(node, block, >> state); >> - for (unsigned i = num_chans; i < 4; i++) >> + for (unsigned i = intrin->num_components; i < 4; i++) >> mov->src[2].swizzle[i] = 0; >> >> } else { >> @@ -886,13 +859,14 @@ lower_deref_to_ssa_block(nir_block *block, void >> *void_state) >> >> mov->src[0].src.is_ssa = true; >> mov->src[0].src.ssa = intrin->src[0].ssa; >> - for (unsigned i = num_chans; i < 4; i++) >> + for (unsigned i = intrin->num_components; i < 4; i++) >> mov->src[0].swizzle[i] = 0; >> } >> >> - mov->dest.write_mask = (1 << num_chans) - 1; >> + mov->dest.write_mask = (1 << intrin->num_components) - 1; >> mov->dest.dest.is_ssa = true; >> - nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, >> num_chans, NULL); >> + nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, >> + intrin->num_components, NULL); >> >> nir_instr_insert_before(&intrin->instr, &mov->instr); >> nir_instr_remove(&intrin->instr); >> diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c >> index b8ef802..ee29fc3 100644 >> --- a/src/glsl/nir/nir_validate.c >> +++ b/src/glsl/nir/nir_validate.c >> @@ -338,16 +338,10 @@ validate_intrinsic_instr(nir_intrinsic_instr >> *instr, validate_state *state) >> } >> >> switch (instr->intrinsic) { >> - case nir_intrinsic_load_var_vec1: >> - case nir_intrinsic_load_var_vec2: >> - case nir_intrinsic_load_var_vec3: >> - case nir_intrinsic_load_var_vec4: >> + case nir_intrinsic_load_var: >> assert(instr->variables[0]->var->data.mode != nir_var_shader_out); >> break; >> - case nir_intrinsic_store_var_vec1: >> - case nir_intrinsic_store_var_vec2: >> - case nir_intrinsic_store_var_vec3: >> - case nir_intrinsic_store_var_vec4: >> + case nir_intrinsic_store_var: >> assert(instr->variables[0]->var->data.mode != nir_var_shader_in && >> instr->variables[0]->var->data.mode != nir_var_uniform); >> break; >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index dbb2470..4c1805d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -1312,14 +1312,10 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_load_uniform_vec1: >> - case nir_intrinsic_load_uniform_vec2: >> - case nir_intrinsic_load_uniform_vec3: >> - case nir_intrinsic_load_uniform_vec4: { >> + case nir_intrinsic_load_uniform: { >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].dest_components; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg src = nir_uniforms; >> src.reg_offset = instr->const_index[0] + index; >> src.type = dest.type; >> @@ -1335,14 +1331,10 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_load_uniform_vec1_indirect: >> - case nir_intrinsic_load_uniform_vec2_indirect: >> - case nir_intrinsic_load_uniform_vec3_indirect: >> - case nir_intrinsic_load_uniform_vec4_indirect: { >> + case nir_intrinsic_load_uniform_indirect: { >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].dest_components; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg src = nir_uniforms; >> src.reg_offset = instr->const_index[0] + index; >> src.reladdr = new(mem_ctx) >> fs_reg(get_nir_src(instr->src[0])); >> @@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> break; >> } >> >> - case nir_intrinsic_load_ubo_vec1: >> - case nir_intrinsic_load_ubo_vec2: >> - case nir_intrinsic_load_ubo_vec3: >> - case nir_intrinsic_load_ubo_vec4: { >> + case nir_intrinsic_load_ubo: { >> fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + >> (unsigned) instr->const_index[0]); >> fs_reg packed_consts = fs_reg(this, glsl_type::float_type); >> @@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, >> packed_consts, surf_index, >> const_offset_reg)); >> >> - for (unsigned i = 0; >> - i < nir_intrinsic_infos[instr->intrinsic].dest_components; >> i++) { >> + for (unsigned i = 0; i < instr->num_components; i++) { >> packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i); >> >> /* The std140 packing rules don't allow vectors to cross 16-byte >> @@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> break; >> } >> >> - case nir_intrinsic_load_ubo_vec1_indirect: >> - case nir_intrinsic_load_ubo_vec2_indirect: >> - case nir_intrinsic_load_ubo_vec3_indirect: >> - case nir_intrinsic_load_ubo_vec4_indirect: { >> + case nir_intrinsic_load_ubo_indirect: { >> fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + >> instr->const_index[0]); >> /* Turn the byte offset into a dword offset. */ >> @@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr >> *instr) >> emit(SHR(offset, retype(get_nir_src(instr->src[0]), >> BRW_REGISTER_TYPE_D), >> fs_reg(2))); >> >> - for (unsigned i = 0; >> - i < nir_intrinsic_infos[instr->intrinsic].dest_components; >> i++) { >> + for (unsigned i = 0; i < instr->num_components; i++) { >> exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index, >> offset, base_offset >> + i); >> fs_inst *last_inst = (fs_inst *) list.get_tail(); >> @@ -1418,14 +1402,10 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_load_input_vec1: >> - case nir_intrinsic_load_input_vec2: >> - case nir_intrinsic_load_input_vec3: >> - case nir_intrinsic_load_input_vec4: { >> + case nir_intrinsic_load_input: { >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].dest_components; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg src = nir_inputs; >> src.reg_offset = instr->const_index[0] + index; >> src.type = dest.type; >> @@ -1441,14 +1421,10 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_load_input_vec1_indirect: >> - case nir_intrinsic_load_input_vec2_indirect: >> - case nir_intrinsic_load_input_vec3_indirect: >> - case nir_intrinsic_load_input_vec4_indirect: { >> + case nir_intrinsic_load_input_indirect: { >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].dest_components; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg src = nir_inputs; >> src.reg_offset = instr->const_index[0] + index; >> src.reladdr = new(mem_ctx) >> fs_reg(get_nir_src(instr->src[0])); >> @@ -1466,15 +1442,11 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_store_output_vec1: >> - case nir_intrinsic_store_output_vec2: >> - case nir_intrinsic_store_output_vec3: >> - case nir_intrinsic_store_output_vec4: { >> + case nir_intrinsic_store_output: { >> fs_reg src = get_nir_src(instr->src[0]); >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg new_dest = nir_outputs; >> new_dest.reg_offset = instr->const_index[0] + index; >> new_dest.type = src.type; >> @@ -1489,16 +1461,12 @@ >> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) >> break; >> } >> >> - case nir_intrinsic_store_output_vec1_indirect: >> - case nir_intrinsic_store_output_vec2_indirect: >> - case nir_intrinsic_store_output_vec3_indirect: >> - case nir_intrinsic_store_output_vec4_indirect: { >> + case nir_intrinsic_store_output_indirect: { >> fs_reg src = get_nir_src(instr->src[0]); >> fs_reg indirect = get_nir_src(instr->src[1]); >> unsigned index = 0; >> for (int i = 0; i < instr->const_index[1]; i++) { >> - for (unsigned j = 0; >> - j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; >> j++) { >> + for (unsigned j = 0; j < instr->num_components; j++) { >> fs_reg new_dest = nir_outputs; >> new_dest.reg_offset = instr->const_index[0] + index; >> new_dest.reladdr = new(mem_ctx) fs_reg(indirect); >> -- >> 2.2.0 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev