On Mon, Mar 14, 2016 at 7:48 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > > > On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez > <sigles...@igalia.com> wrote: >> >> From: Connor Abbott <connor.w.abb...@intel.com> >> >> v2: Use the bit-size information from the opcode information if defined >> (Iago) >> >> Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> >> >> FIXME: This should be squashed into the previous commit so we don't break >> the build. The break happens because the python script that generates the >> constant folding pass does not know how to handle the sized types >> introduced >> by the previous commit until this patch, so it ends up generating code >> with >> invalid types. Keep it separated for review purposes. >> --- >> src/compiler/nir/nir_constant_expressions.h | 2 +- >> src/compiler/nir/nir_constant_expressions.py | 246 >> +++++++++++++++++---------- >> src/compiler/nir/nir_opt_constant_folding.c | 24 ++- >> 3 files changed, 182 insertions(+), 90 deletions(-) >> >> diff --git a/src/compiler/nir/nir_constant_expressions.h >> b/src/compiler/nir/nir_constant_expressions.h >> index 97997f2..201f278 100644 >> --- a/src/compiler/nir/nir_constant_expressions.h >> +++ b/src/compiler/nir/nir_constant_expressions.h >> @@ -28,4 +28,4 @@ >> #include "nir.h" >> >> nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, >> - nir_const_value *src); >> + unsigned bit_size, nir_const_value >> *src); >> diff --git a/src/compiler/nir/nir_constant_expressions.py >> b/src/compiler/nir/nir_constant_expressions.py >> index 32784f6..972d281 100644 >> --- a/src/compiler/nir/nir_constant_expressions.py >> +++ b/src/compiler/nir/nir_constant_expressions.py >> @@ -1,4 +1,43 @@ >> #! /usr/bin/python2 >> + >> +def type_has_size(type_): >> + return type_[-1:].isdigit() >> + >> +def type_sizes(type_): >> + if type_.endswith("8"): >> + return [8] >> + elif type_.endswith("16"): >> + return [16] >> + elif type_.endswith("32"): >> + return [32] >> + elif type_.endswith("64"): >> + return [64] >> + else: >> + return [32, 64] >> + >> +def type_add_size(type_, size): >> + if type_has_size(type_): >> + return type_ >> + return type_ + str(size) >> + >> +def get_const_field(type_): >> + if type_ == "int32": >> + return "i" >> + if type_ == "uint32": >> + return "u" >> + if type_ == "int64": >> + return "l" >> + if type_ == "uint64": >> + return "ul" >> + if type_ == "bool32": >> + return "b" >> + if type_ == "float32": >> + return "f" >> + if type_ == "float64": >> + return "d" >> + raise Exception(str(type_)) >> + assert(0) >> + >> template = """\ >> /* >> * Copyright (C) 2014 Intel Corporation >> @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u) >> } >> >> /* Some typed vector structures to make things like src0.y work */ >> -% for type in ["float", "int", "uint", "bool"]: >> -struct ${type}_vec { >> - ${type} x; >> - ${type} y; >> - ${type} z; >> - ${type} w; >> +typedef float float32_t; >> +typedef double float64_t; >> +typedef bool bool32_t; >> +% for type in ["float", "int", "uint"]: >> +% for width in [32, 64]: >> +struct ${type}${width}_vec { >> + ${type}${width}_t x; >> + ${type}${width}_t y; >> + ${type}${width}_t z; >> + ${type}${width}_t w; >> }; >> % endfor >> +% endfor >> + >> +struct bool32_vec { >> + bool x; >> + bool y; >> + bool z; >> + bool w; >> +}; >> >> % for name, op in sorted(opcodes.iteritems()): >> static nir_const_value >> -evaluate_${name}(unsigned num_components, nir_const_value *_src) >> +evaluate_${name}(unsigned num_components, unsigned bit_size, >> + nir_const_value *_src) >> { >> nir_const_value _dst_val = { { {0, 0, 0, 0} } }; >> >> - ## For each non-per-component input, create a variable srcN that >> - ## contains x, y, z, and w elements which are filled in with the >> - ## appropriately-typed values. >> - % for j in range(op.num_inputs): >> - % if op.input_sizes[j] == 0: >> - <% continue %> >> - % elif "src" + str(j) not in op.const_expr: >> - ## Avoid unused variable warnings >> - <% continue %> >> - %endif >> - >> - struct ${op.input_types[j]}_vec src${j} = { >> - % for k in range(op.input_sizes[j]): >> - % if op.input_types[j] == "bool": >> - _src[${j}].u[${k}] != 0, >> - % else: >> - _src[${j}].${op.input_types[j][:1]}[${k}], >> - % endif >> - % endfor >> - }; >> - % endfor >> + switch (bit_size) { >> + % for bit_size in [32, 64]: >> + case ${bit_size}: { >> + <% >> + output_type = type_add_size(op.output_type, bit_size) >> + input_types = [type_add_size(type_, bit_size) for type_ in >> op.input_types] >> + %> >> + >> + ## For each non-per-component input, create a variable srcN that >> + ## contains x, y, z, and w elements which are filled in with the >> + ## appropriately-typed values. >> + % for j in range(op.num_inputs): >> + % if op.input_sizes[j] == 0: >> + <% continue %> >> + % elif "src" + str(j) not in op.const_expr: >> + ## Avoid unused variable warnings >> + <% continue %> >> + %endif >> >> - % if op.output_size == 0: >> - ## For per-component instructions, we need to iterate over the >> - ## components and apply the constant expression one component >> - ## at a time. >> - for (unsigned _i = 0; _i < num_components; _i++) { >> - ## For each per-component input, create a variable srcN that >> - ## contains the value of the current (_i'th) component. >> - % for j in range(op.num_inputs): >> - % if op.input_sizes[j] != 0: >> - <% continue %> >> - % elif "src" + str(j) not in op.const_expr: >> - ## Avoid unused variable warnings >> - <% continue %> >> - % elif op.input_types[j] == "bool": >> - bool src${j} = _src[${j}].u[_i] != 0; >> + struct ${input_types[j]}_vec src${j} = { >> + % for k in range(op.input_sizes[j]): >> + % if input_types[j] == "bool32": >> + _src[${j}].u[${k}] != 0, >> % else: >> - ${op.input_types[j]} src${j} = >> _src[${j}].${op.input_types[j][:1]}[_i]; >> + _src[${j}].${get_const_field(input_types[j])}[${k}], >> % endif >> % endfor >> + }; >> + % endfor >> + >> + % if op.output_size == 0: >> + ## For per-component instructions, we need to iterate over the >> + ## components and apply the constant expression one component >> + ## at a time. >> + for (unsigned _i = 0; _i < num_components; _i++) { >> + ## For each per-component input, create a variable srcN that >> + ## contains the value of the current (_i'th) component. >> + % for j in range(op.num_inputs): >> + % if op.input_sizes[j] != 0: >> + <% continue %> >> + % elif "src" + str(j) not in op.const_expr: >> + ## Avoid unused variable warnings >> + <% continue %> >> + % elif input_types[j] == "bool32": >> + bool src${j} = _src[${j}].u[_i] != 0; >> + % else: >> + ${input_types[j]}_t src${j} = >> + _src[${j}].${get_const_field(input_types[j])}[_i]; >> + % endif >> + % endfor >> + >> + ## Create an appropriately-typed variable dst and assign the >> + ## result of the const_expr to it. If const_expr already >> contains >> + ## writes to dst, just include const_expr directly. >> + % if "dst" in op.const_expr: >> + ${output_type}_t dst; >> + ${op.const_expr} >> + % else: >> + ${output_type}_t dst = ${op.const_expr}; >> + % endif >> + >> + ## Store the current component of the actual destination to >> the >> + ## value of dst. >> + % if output_type == "bool32": >> + ## Sanitize the C value to a proper NIR bool >> + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; >> + % else: >> + _dst_val.${get_const_field(output_type)}[_i] = dst; >> + % endif >> + } >> + % else: >> + ## In the non-per-component case, create a struct dst with >> + ## appropriately-typed elements x, y, z, and w and assign the >> result >> + ## of the const_expr to all components of dst, or include the >> + ## const_expr directly if it writes to dst already. >> + struct ${output_type}_vec dst; >> >> - ## Create an appropriately-typed variable dst and assign the >> - ## result of the const_expr to it. If const_expr already >> contains >> - ## writes to dst, just include const_expr directly. >> % if "dst" in op.const_expr: >> - ${op.output_type} dst; >> ${op.const_expr} >> % else: >> - ${op.output_type} dst = ${op.const_expr}; >> + ## Splat the value to all components. This way expressions >> which >> + ## write the same value to all components don't need to >> explicitly >> + ## write to dest. One such example is fnoise which has a >> + ## const_expr of 0.0f. >> + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; >> % endif >> >> - ## Store the current component of the actual destination to the >> - ## value of dst. >> - % if op.output_type == "bool": >> - ## Sanitize the C value to a proper NIR bool >> - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; >> - % else: >> - _dst_val.${op.output_type[:1]}[_i] = dst; >> - % endif >> - } >> - % else: >> - ## In the non-per-component case, create a struct dst with >> - ## appropriately-typed elements x, y, z, and w and assign the >> result >> - ## of the const_expr to all components of dst, or include the >> - ## const_expr directly if it writes to dst already. >> - struct ${op.output_type}_vec dst; >> - >> - % if "dst" in op.const_expr: >> - ${op.const_expr} >> - % else: >> - ## Splat the value to all components. This way expressions >> which >> - ## write the same value to all components don't need to >> explicitly >> - ## write to dest. One such example is fnoise which has a >> - ## const_expr of 0.0f. >> - dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; >> + ## For each component in the destination, copy the value of dst >> to >> + ## the actual destination. >> + % for k in range(op.output_size): >> + % if output_type == "bool32": >> + ## Sanitize the C value to a proper NIR bool >> + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : >> NIR_FALSE; >> + % else: >> + _dst_val.${get_const_field(output_type)}[${k}] = >> dst.${"xyzw"[k]}; >> + % endif >> + % endfor >> % endif >> >> - ## For each component in the destination, copy the value of dst to >> - ## the actual destination. >> - % for k in range(op.output_size): >> - % if op.output_type == "bool": >> - ## Sanitize the C value to a proper NIR bool >> - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; >> - % else: >> - _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; >> - % endif >> - % endfor >> - % endif >> + break; >> + } >> + % endfor >> + >> + default: >> + unreachable("unknown bit width"); >> + } >> >> return _dst_val; >> } >> @@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, >> nir_const_value *_src) >> >> nir_const_value >> nir_eval_const_opcode(nir_op op, unsigned num_components, >> - nir_const_value *src) >> + unsigned bit_width, nir_const_value *src) >> { >> switch (op) { >> % for name in sorted(opcodes.iterkeys()): >> case nir_op_${name}: { >> - return evaluate_${name}(num_components, src); >> + return evaluate_${name}(num_components, bit_width, src); >> break; >> } >> % endfor >> @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned >> num_components, >> from nir_opcodes import opcodes >> from mako.template import Template >> >> -print Template(template).render(opcodes=opcodes) >> +print Template(template).render(opcodes=opcodes, type_sizes=type_sizes, >> + type_has_size=type_has_size, >> + type_add_size=type_add_size, >> + get_const_field=get_const_field) >> diff --git a/src/compiler/nir/nir_opt_constant_folding.c >> b/src/compiler/nir/nir_opt_constant_folding.c >> index 04876a4..29905a0 100644 >> --- a/src/compiler/nir/nir_opt_constant_folding.c >> +++ b/src/compiler/nir/nir_opt_constant_folding.c >> @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr *instr, void >> *mem_ctx) >> if (!instr->dest.dest.is_ssa) >> return false; >> >> + unsigned bit_size = 0; >> + if (!(nir_op_infos[instr->op].output_type & NIR_ALU_TYPE_SIZE_MASK)) >> + bit_size = instr->dest.dest.ssa.bit_size; >> + else >> + bit_size = nir_op_infos[instr->op].output_type & >> NIR_ALU_TYPE_SIZE_MASK; > > > This isn't right. We need to look at all the unsized types and try to pull > it from one of those. We shouldn't fall back to grabbing from the sized > type. > >> >> + >> for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { >> if (!instr->src[i].src.is_ssa) >> return false; >> >> + if (bit_size == 0) { >> + if (!(nir_op_infos[instr->op].input_sizes[i] & >> NIR_ALU_TYPE_SIZE_MASK)) >> + bit_size = instr->src[i].src.ssa->bit_size; >> + else >> + bit_size = nir_op_infos[instr->op].input_sizes[i] & >> NIR_ALU_TYPE_SIZE_MASK; > > > Same here. If they don't have any unsized sources or destinations to grab > from, we should let bit_size be zero. > > Also, if we have multiple sources with the same unsized type, we should > assert that the sizes match. > >> >> + } >> + >> nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; >> >> if (src_instr->type != nir_instr_type_load_const) >> @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void >> *mem_ctx) >> >> for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, >> i); >> j++) { >> - src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; >> + if (load_const->def.bit_size == 64) >> + src[i].ul[j] = >> load_const->value.ul[instr->src[i].swizzle[j]]; >> + else >> + src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; >> } >> >> /* We shouldn't have any source modifiers in the optimization loop. >> */ >> assert(!instr->src[i].abs && !instr->src[i].negate); >> } >> >> + if (bit_size == 0) >> + bit_size = 32; > > > Then it'll get set here.
Yeah, idk what I was thinking here. I think the validator already asserts that the unsized sources/dests have equal bitsizes, but we could assert here too. > >> >> + >> /* We shouldn't have any saturate modifiers in the optimization loop. >> */ >> assert(!instr->dest.saturate); >> >> nir_const_value dest = >> nir_eval_const_opcode(instr->op, >> instr->dest.dest.ssa.num_components, >> - src); >> + bit_size, src); >> >> nir_load_const_instr *new_instr = >> nir_load_const_instr_create(mem_ctx, >> instr->dest.dest.ssa.num_components); >> >> + new_instr->def.bit_size = instr->dest.dest.ssa.bit_size; >> new_instr->value = dest; >> >> nir_instr_insert_before(&instr->instr, &new_instr->instr); >> -- >> 2.7.0 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev