Here we add a new param to the type_size functions in order to pass in the size of a varying once packing is taken into account. --- src/compiler/nir/nir.h | 6 +++-- src/compiler/nir/nir_lower_io.c | 37 +++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_blorp.c | 6 +++-- src/mesa/drivers/dri/i965/brw_fs.cpp | 26 ++++++++++++++---- src/mesa/drivers/dri/i965/brw_nir.c | 16 +++++------ src/mesa/drivers/dri/i965/brw_nir.h | 6 +++-- src/mesa/drivers/dri/i965/brw_shader.h | 6 +++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 19 ++++++++++--- src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- src/mesa/state_tracker/st_glsl_types.cpp | 3 ++- src/mesa/state_tracker/st_glsl_types.h | 2 +- 12 files changed, 90 insertions(+), 41 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 514b455..5789f67 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2317,11 +2317,13 @@ void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, unsigned base_offset, - int (*type_size)(const struct glsl_type *)); + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components)); void nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)); + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components)); nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 0a6e1a8..941aa2d 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -37,14 +37,16 @@ struct lower_io_state { nir_builder builder; void *mem_ctx; - int (*type_size)(const struct glsl_type *type); + int (*type_size)(const struct glsl_type *type, + unsigned num_packed_components); nir_variable_mode modes; }; void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, unsigned base_offset, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components)) { unsigned location = 0; @@ -74,13 +76,13 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size, if (locations[idx][var->data.index] == -1) { var->data.driver_location = location; locations[idx][var->data.index] = location; - location += type_size(var->type); + location += type_size(var->type, var->data.num_packed_components); } else { var->data.driver_location = locations[idx][var->data.index]; } } else { var->data.driver_location = location; - location += type_size(var->type); + location += type_size(var->type, var->data.num_packed_components); } } @@ -113,7 +115,9 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var) static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components), + unsigned num_packed_components) { nir_deref *tail = &deref->deref; @@ -141,7 +145,7 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, if (tail->deref_type == nir_deref_type_array) { nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = type_size(tail->type); + unsigned size = type_size(tail->type, num_packed_components); offset = nir_iadd(b, offset, nir_imm_int(b, size * deref_array->base_offset)); @@ -158,7 +162,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, unsigned field_offset = 0; for (unsigned i = 0; i < deref_struct->index; i++) { - field_offset += type_size(glsl_get_struct_field(parent_type, i)); + field_offset += + type_size(glsl_get_struct_field(parent_type, i), 0); } offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); } @@ -289,7 +294,9 @@ nir_lower_io_block(nir_block *block, offset = get_io_offset(b, intrin->variables[0], per_vertex ? &vertex_index : NULL, - state->type_size); + state->type_size, + intrin->variables[0]->var-> + data.num_packed_components); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, @@ -305,7 +312,7 @@ nir_lower_io_block(nir_block *block, if (load->intrinsic == nir_intrinsic_load_uniform) { nir_intrinsic_set_range(load, - state->type_size(intrin->variables[0]->var->type)); + state->type_size(intrin->variables[0]->var->type, 0)); } if (per_vertex) @@ -339,7 +346,9 @@ nir_lower_io_block(nir_block *block, offset = get_io_offset(b, intrin->variables[0], per_vertex ? &vertex_index : NULL, - state->type_size); + state->type_size, + intrin->variables[0]->var-> + data.num_packed_components); nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, @@ -381,7 +390,7 @@ nir_lower_io_block(nir_block *block, nir_ssa_def *offset; offset = get_io_offset(b, intrin->variables[0], - NULL, state->type_size); + NULL, state->type_size, 0); nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(state->mem_ctx, @@ -424,7 +433,8 @@ nir_lower_io_block(nir_block *block, static void nir_lower_io_impl(nir_function_impl *impl, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components)) { struct lower_io_state state; @@ -443,7 +453,8 @@ nir_lower_io_impl(nir_function_impl *impl, void nir_lower_io(nir_shader *shader, nir_variable_mode modes, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *, + unsigned num_packed_components)) { nir_foreach_function(function, shader) { if (function->impl) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9590968..97ddfa9 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -157,8 +157,10 @@ brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key) } static int -nir_uniform_type_size(const struct glsl_type *type) +nir_uniform_type_size(const struct glsl_type *type, unsigned x) { + (void) x; + /* Only very basic types are allowed */ assert(glsl_type_is_vector_or_scalar(type)); assert(glsl_get_bit_size(type) == 32); @@ -216,7 +218,7 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, nir->num_uniforms = 0; nir_foreach_variable(var, &nir->uniforms) { var->data.driver_location = var->data.location; - unsigned end = var->data.location + nir_uniform_type_size(var->type); + unsigned end = var->data.location + nir_uniform_type_size(var->type, 0); nir->num_uniforms = MAX2(nir->num_uniforms, end); } nir_lower_io(nir, nir_var_uniform, nir_uniform_type_size); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8997e1a..22a48bc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -464,7 +464,8 @@ fs_reg::component_size(unsigned width) const } extern "C" int -type_size_scalar(const struct glsl_type *type) +type_size_scalar_packed(const struct glsl_type *type, + unsigned num_packed_components) { unsigned int size, i; @@ -473,11 +474,18 @@ type_size_scalar(const struct glsl_type *type) case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: - return type->components(); + if (num_packed_components) + return num_packed_components; + else + return type->components(); case GLSL_TYPE_DOUBLE: + if (num_packed_components) + return num_packed_components * 2; + else return type->components() * 2; case GLSL_TYPE_ARRAY: - return type_size_scalar(type->fields.array) * type->length; + return type_size_scalar_packed(type->fields.array, + num_packed_components) * type->length; case GLSL_TYPE_STRUCT: size = 0; for (i = 0; i < type->length; i++) { @@ -505,6 +513,12 @@ type_size_scalar(const struct glsl_type *type) return 0; } +extern int +type_size_scalar(const struct glsl_type *type) +{ + return type_size_scalar_packed(type, 0); +} + /** * Returns the number of scalar components needed to store type, assuming * that vectors are padded out to vec4. @@ -513,8 +527,9 @@ type_size_scalar(const struct glsl_type *type) * similar to type_size_scalar(). */ extern "C" int -type_size_vec4_times_4(const struct glsl_type *type) +type_size_vec4_times_4(const struct glsl_type *type, unsigned x) { + (void) x; return 4 * type_size_vec4(type); } @@ -522,8 +537,9 @@ type_size_vec4_times_4(const struct glsl_type *type) * except for double-precision types, which are loaded as one dvec4. */ extern "C" int -type_size_vs_input(const struct glsl_type *type) +type_size_vs_input(const struct glsl_type *type, unsigned x) { + (void) x; if (type->is_double()) { return type_size_dvec4(type); } else { diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 5aac0d7..ef81722 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -236,7 +236,7 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, } /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + nir_lower_io(nir, nir_var_shader_in, type_size_vec4_packed); if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { /* This pass needs actual constants */ @@ -261,7 +261,7 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) var->data.driver_location = var->data.location; } - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + nir_lower_io(nir, nir_var_shader_in, type_size_vec4_packed); /* This pass needs actual constants */ nir_opt_constant_folding(nir); @@ -283,8 +283,8 @@ void brw_nir_lower_fs_inputs(nir_shader *nir) { nir_assign_var_locations(&nir->inputs, &nir->num_inputs, VARYING_SLOT_VAR0, - type_size_scalar); - nir_lower_io(nir, nir_var_shader_in, type_size_scalar); + type_size_scalar_packed); + nir_lower_io(nir, nir_var_shader_in, type_size_scalar_packed); } void @@ -299,7 +299,7 @@ brw_nir_lower_vue_outputs(nir_shader *nir, } else { nir_foreach_variable(var, &nir->outputs) var->data.driver_location = var->data.location; - nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + nir_lower_io(nir, nir_var_shader_out, type_size_vec4_packed); } } @@ -310,7 +310,7 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) var->data.driver_location = var->data.location; } - nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + nir_lower_io(nir, nir_var_shader_out, type_size_vec4_packed); /* This pass needs actual constants */ nir_opt_constant_folding(nir); @@ -332,8 +332,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir) { nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - FRAG_RESULT_DATA0, type_size_scalar); - nir_lower_io(nir, nir_var_shader_out, type_size_scalar); + FRAG_RESULT_DATA0, type_size_scalar_packed); + nir_lower_io(nir, nir_var_shader_out, type_size_scalar_packed); } void diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 74c354f..780a9da 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -33,14 +33,16 @@ extern "C" { #endif static inline int -type_size_scalar_bytes(const struct glsl_type *type) +type_size_scalar_bytes(const struct glsl_type *type, unsigned x) { + (void) x; return type_size_scalar(type) * 4; } static inline int -type_size_vec4_bytes(const struct glsl_type *type) +type_size_vec4_bytes(const struct glsl_type *type, unsigned x) { + (void) x; return type_size_vec4(type) * 16; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 656dc89..2bb6838 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -294,8 +294,10 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_dvec4(const struct glsl_type *type); -int type_size_vec4_times_4(const struct glsl_type *type); -int type_size_vs_input(const struct glsl_type *type); +int type_size_vs_input(const struct glsl_type *type, unsigned x); +int type_size_scalar_packed(const struct glsl_type *type, unsigned x); +int type_size_vec4_packed(const struct glsl_type *type, unsigned x); +int type_size_vec4_times_4(const struct glsl_type *type, unsigned x); unsigned tesslevel_outer_components(GLenum tes_primitive_mode); unsigned tesslevel_inner_components(GLenum tes_primitive_mode); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b392919..536f0ca 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -566,9 +566,15 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } -/* - * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 == - * false) elements needed to pack a type. +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. */ static int type_size_xvec4(const struct glsl_type *type, bool as_vec4) @@ -667,6 +673,13 @@ type_size_dvec4(const struct glsl_type *type) return type_size_xvec4(type, false); } +extern "C" int +type_size_vec4_packed(const struct glsl_type *type, unsigned x) +{ + (void) x; + return type_size_vec4(type); +} + src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 3008411..73b7e8a 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -188,7 +188,7 @@ st_nir_assign_uniform_locations(struct gl_program *prog, uniform->data.driver_location = loc; - max = MAX2(max, loc + st_glsl_type_size(uniform->type)); + max = MAX2(max, loc + st_glsl_type_size(uniform->type, 0)); } *size = max; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b7eaa13..e6bf467 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1171,7 +1171,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input) static int type_size(const struct glsl_type *type) { - return st_glsl_type_size(type); + return st_glsl_type_size(type, 0); } /** diff --git a/src/mesa/state_tracker/st_glsl_types.cpp b/src/mesa/state_tracker/st_glsl_types.cpp index 857e143..14ec5d8 100644 --- a/src/mesa/state_tracker/st_glsl_types.cpp +++ b/src/mesa/state_tracker/st_glsl_types.cpp @@ -95,7 +95,8 @@ st_glsl_attrib_type_size(const struct glsl_type *type, bool is_vs_input) } int -st_glsl_type_size(const struct glsl_type *type) +st_glsl_type_size(const struct glsl_type *type, unsigned x) { + (void) x; return st_glsl_attrib_type_size(type, false); } diff --git a/src/mesa/state_tracker/st_glsl_types.h b/src/mesa/state_tracker/st_glsl_types.h index 3a39cee..86d155a 100644 --- a/src/mesa/state_tracker/st_glsl_types.h +++ b/src/mesa/state_tracker/st_glsl_types.h @@ -34,7 +34,7 @@ extern "C" { #endif int st_glsl_attrib_type_size(const struct glsl_type *type, bool is_vs_input); -int st_glsl_type_size(const struct glsl_type *type); +int st_glsl_type_size(const struct glsl_type *type, unsigned x); #ifdef __cplusplus -- 2.5.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev