On Mon, 2016-03-14 at 16:48 -0700, Jason Ekstrand wrote: > > > On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez > <sigles...@igalia.com> wrote: > From: Connor Abbott <connor.w.abb...@intel.com> > > v2: Use the bit-size information from the opcode information > if defined (Iago) > > Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> > > FIXME: This should be squashed into the previous commit so we > don't break > the build. The break happens because the python script that > generates the > constant folding pass does not know how to handle the sized > types introduced > by the previous commit until this patch, so it ends up > generating code with > invalid types. Keep it separated for review purposes. > --- > src/compiler/nir/nir_constant_expressions.h | 2 +- > src/compiler/nir/nir_constant_expressions.py | 246 > +++++++++++++++++---------- > src/compiler/nir/nir_opt_constant_folding.c | 24 ++- > 3 files changed, 182 insertions(+), 90 deletions(-) > > diff --git a/src/compiler/nir/nir_constant_expressions.h > b/src/compiler/nir/nir_constant_expressions.h > index 97997f2..201f278 100644 > --- a/src/compiler/nir/nir_constant_expressions.h > +++ b/src/compiler/nir/nir_constant_expressions.h > @@ -28,4 +28,4 @@ > #include "nir.h" > > nir_const_value nir_eval_const_opcode(nir_op op, unsigned > num_components, > - nir_const_value *src); > + unsigned bit_size, > nir_const_value *src); > diff --git a/src/compiler/nir/nir_constant_expressions.py > b/src/compiler/nir/nir_constant_expressions.py > index 32784f6..972d281 100644 > --- a/src/compiler/nir/nir_constant_expressions.py > +++ b/src/compiler/nir/nir_constant_expressions.py > @@ -1,4 +1,43 @@ > #! /usr/bin/python2 > + > +def type_has_size(type_): > + return type_[-1:].isdigit() > + > +def type_sizes(type_): > + if type_.endswith("8"): > + return [8] > + elif type_.endswith("16"): > + return [16] > + elif type_.endswith("32"): > + return [32] > + elif type_.endswith("64"): > + return [64] > + else: > + return [32, 64] > + > +def type_add_size(type_, size): > + if type_has_size(type_): > + return type_ > + return type_ + str(size) > + > +def get_const_field(type_): > + if type_ == "int32": > + return "i" > + if type_ == "uint32": > + return "u" > + if type_ == "int64": > + return "l" > + if type_ == "uint64": > + return "ul" > + if type_ == "bool32": > + return "b" > + if type_ == "float32": > + return "f" > + if type_ == "float64": > + return "d" > + raise Exception(str(type_)) > + assert(0) > + > template = """\ > /* > * Copyright (C) 2014 Intel Corporation > @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u) > } > > /* Some typed vector structures to make things like src0.y > work */ > -% for type in ["float", "int", "uint", "bool"]: > -struct ${type}_vec { > - ${type} x; > - ${type} y; > - ${type} z; > - ${type} w; > +typedef float float32_t; > +typedef double float64_t; > +typedef bool bool32_t; > +% for type in ["float", "int", "uint"]: > +% for width in [32, 64]: > +struct ${type}${width}_vec { > + ${type}${width}_t x; > + ${type}${width}_t y; > + ${type}${width}_t z; > + ${type}${width}_t w; > }; > % endfor > +% endfor > + > +struct bool32_vec { > + bool x; > + bool y; > + bool z; > + bool w; > +}; > > % for name, op in sorted(opcodes.iteritems()): > static nir_const_value > -evaluate_${name}(unsigned num_components, nir_const_value > *_src) > +evaluate_${name}(unsigned num_components, unsigned bit_size, > + nir_const_value *_src) > { > nir_const_value _dst_val = { { {0, 0, 0, 0} } }; > > - ## For each non-per-component input, create a variable > srcN that > - ## contains x, y, z, and w elements which are filled in > with the > - ## appropriately-typed values. > - % for j in range(op.num_inputs): > - % if op.input_sizes[j] == 0: > - <% continue %> > - % elif "src" + str(j) not in op.const_expr: > - ## Avoid unused variable warnings > - <% continue %> > - %endif > - > - struct ${op.input_types[j]}_vec src${j} = { > - % for k in range(op.input_sizes[j]): > - % if op.input_types[j] == "bool": > - _src[${j}].u[${k}] != 0, > - % else: > - _src[${j}].${op.input_types[j][:1]}[${k}], > - % endif > - % endfor > - }; > - % endfor > + switch (bit_size) { > + % for bit_size in [32, 64]: > + case ${bit_size}: { > + <% > + output_type = type_add_size(op.output_type, bit_size) > + input_types = [type_add_size(type_, bit_size) for type_ > in op.input_types] > + %> > + > + ## For each non-per-component input, create a variable > srcN that > + ## contains x, y, z, and w elements which are filled in > with the > + ## appropriately-typed values. > + % for j in range(op.num_inputs): > + % if op.input_sizes[j] == 0: > + <% continue %> > + % elif "src" + str(j) not in op.const_expr: > + ## Avoid unused variable warnings > + <% continue %> > + %endif > > - % if op.output_size == 0: > - ## For per-component instructions, we need to iterate > over the > - ## components and apply the constant expression one > component > - ## at a time. > - for (unsigned _i = 0; _i < num_components; _i++) { > - ## For each per-component input, create a variable > srcN that > - ## contains the value of the current (_i'th) > component. > - % for j in range(op.num_inputs): > - % if op.input_sizes[j] != 0: > - <% continue %> > - % elif "src" + str(j) not in op.const_expr: > - ## Avoid unused variable warnings > - <% continue %> > - % elif op.input_types[j] == "bool": > - bool src${j} = _src[${j}].u[_i] != 0; > + struct ${input_types[j]}_vec src${j} = { > + % for k in range(op.input_sizes[j]): > + % if input_types[j] == "bool32": > + _src[${j}].u[${k}] != 0, > % else: > - ${op.input_types[j]} src${j} = > _src[${j}].${op.input_types[j][:1]}[_i]; > + > _src[${j}].${get_const_field(input_types[j])}[${k}], > % endif > % endfor > + }; > + % endfor > + > + % if op.output_size == 0: > + ## For per-component instructions, we need to > iterate over the > + ## components and apply the constant expression one > component > + ## at a time. > + for (unsigned _i = 0; _i < num_components; _i++) { > + ## For each per-component input, create a > variable srcN that > + ## contains the value of the current (_i'th) > component. > + % for j in range(op.num_inputs): > + % if op.input_sizes[j] != 0: > + <% continue %> > + % elif "src" + str(j) not in op.const_expr: > + ## Avoid unused variable warnings > + <% continue %> > + % elif input_types[j] == "bool32": > + bool src${j} = _src[${j}].u[_i] != 0; > + % else: > + ${input_types[j]}_t src${j} = > + > _src[${j}].${get_const_field(input_types[j])}[_i]; > + % endif > + % endfor > + > + ## Create an appropriately-typed variable dst and > assign the > + ## result of the const_expr to it. If const_expr > already contains > + ## writes to dst, just include const_expr > directly. > + % if "dst" in op.const_expr: > + ${output_type}_t dst; > + ${op.const_expr} > + % else: > + ${output_type}_t dst = ${op.const_expr}; > + % endif > + > + ## Store the current component of the actual > destination to the > + ## value of dst. > + % if output_type == "bool32": > + ## Sanitize the C value to a proper NIR bool > + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; > + % else: > + _dst_val.${get_const_field(output_type)}[_i] = > dst; > + % endif > + } > + % else: > + ## In the non-per-component case, create a struct > dst with > + ## appropriately-typed elements x, y, z, and w and > assign the result > + ## of the const_expr to all components of dst, or > include the > + ## const_expr directly if it writes to dst already. > + struct ${output_type}_vec dst; > > - ## Create an appropriately-typed variable dst and > assign the > - ## result of the const_expr to it. If const_expr > already contains > - ## writes to dst, just include const_expr directly. > % if "dst" in op.const_expr: > - ${op.output_type} dst; > ${op.const_expr} > % else: > - ${op.output_type} dst = ${op.const_expr}; > + ## Splat the value to all components. This way > expressions which > + ## write the same value to all components don't > need to explicitly > + ## write to dest. One such example is fnoise > which has a > + ## const_expr of 0.0f. > + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; > % endif > > - ## Store the current component of the actual > destination to the > - ## value of dst. > - % if op.output_type == "bool": > - ## Sanitize the C value to a proper NIR bool > - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; > - % else: > - _dst_val.${op.output_type[:1]}[_i] = dst; > - % endif > - } > - % else: > - ## In the non-per-component case, create a struct dst > with > - ## appropriately-typed elements x, y, z, and w and > assign the result > - ## of the const_expr to all components of dst, or > include the > - ## const_expr directly if it writes to dst already. > - struct ${op.output_type}_vec dst; > - > - % if "dst" in op.const_expr: > - ${op.const_expr} > - % else: > - ## Splat the value to all components. This way > expressions which > - ## write the same value to all components don't need > to explicitly > - ## write to dest. One such example is fnoise which > has a > - ## const_expr of 0.0f. > - dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; > + ## For each component in the destination, copy the > value of dst to > + ## the actual destination. > + % for k in range(op.output_size): > + % if output_type == "bool32": > + ## Sanitize the C value to a proper NIR bool > + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? > NIR_TRUE : NIR_FALSE; > + % else: > + _dst_val.${get_const_field(output_type)}[${k}] > = dst.${"xyzw"[k]}; > + % endif > + % endfor > % endif > > - ## For each component in the destination, copy the > value of dst to > - ## the actual destination. > - % for k in range(op.output_size): > - % if op.output_type == "bool": > - ## Sanitize the C value to a proper NIR bool > - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : > NIR_FALSE; > - % else: > - _dst_val.${op.output_type[:1]}[${k}] = > dst.${"xyzw"[k]}; > - % endif > - % endfor > - % endif > + break; > + } > + % endfor > + > + default: > + unreachable("unknown bit width"); > + } > > return _dst_val; > } > @@ -316,12 +385,12 @@ evaluate_${name}(unsigned > num_components, nir_const_value *_src) > > nir_const_value > nir_eval_const_opcode(nir_op op, unsigned num_components, > - nir_const_value *src) > + unsigned bit_width, nir_const_value > *src) > { > switch (op) { > % for name in sorted(opcodes.iterkeys()): > case nir_op_${name}: { > - return evaluate_${name}(num_components, src); > + return evaluate_${name}(num_components, bit_width, > src); > break; > } > % endfor > @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned > num_components, > from nir_opcodes import opcodes > from mako.template import Template > > -print Template(template).render(opcodes=opcodes) > +print Template(template).render(opcodes=opcodes, > type_sizes=type_sizes, > + type_has_size=type_has_size, > + type_add_size=type_add_size, > + > get_const_field=get_const_field) > diff --git a/src/compiler/nir/nir_opt_constant_folding.c > b/src/compiler/nir/nir_opt_constant_folding.c > index 04876a4..29905a0 100644 > --- a/src/compiler/nir/nir_opt_constant_folding.c > +++ b/src/compiler/nir/nir_opt_constant_folding.c > @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr > *instr, void *mem_ctx) > if (!instr->dest.dest.is_ssa) > return false; > > + unsigned bit_size = 0; > + if (!(nir_op_infos[instr->op].output_type & > NIR_ALU_TYPE_SIZE_MASK)) > + bit_size = instr->dest.dest.ssa.bit_size; > + else > + bit_size = nir_op_infos[instr->op].output_type & > NIR_ALU_TYPE_SIZE_MASK; > > > This isn't right. We need to look at all the unsized types and try to > pull it from one of those. We shouldn't fall back to grabbing from > the sized type.
Ok, so you don't like that in the case that the alu operation has a sized destination we grab the bit-size from the opcode definition? I am not sure I see the problem with that... isn't the opcode mandating a specific bit-size in that case? How can the bit-size we want be different from that? > > + > for (unsigned i = 0; i < > nir_op_infos[instr->op].num_inputs; i++) { > if (!instr->src[i].src.is_ssa) > return false; > > + if (bit_size == 0) { > + if (!(nir_op_infos[instr->op].input_sizes[i] & > NIR_ALU_TYPE_SIZE_MASK)) > + bit_size = instr->src[i].src.ssa->bit_size; > + else > + bit_size = nir_op_infos[instr->op].input_sizes[i] > & NIR_ALU_TYPE_SIZE_MASK; > > > Same here. If they don't have any unsized sources or destinations to > grab from, we should let bit_size be zero. But if we have an opcode with all sized 64-bit types then... > Also, if we have multiple sources with the same unsized type, we > should assert that the sizes match. > > > + } > + > nir_instr *src_instr = > instr->src[i].src.ssa->parent_instr; > > if (src_instr->type != nir_instr_type_load_const) > @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr > *instr, void *mem_ctx) > > for (unsigned j = 0; j < > nir_ssa_alu_instr_src_components(instr, i); > j++) { > - src[i].u[j] = > load_const->value.u[instr->src[i].swizzle[j]]; > + if (load_const->def.bit_size == 64) > + src[i].ul[j] = > load_const->value.ul[instr->src[i].swizzle[j]]; > + else > + src[i].u[j] = > load_const->value.u[instr->src[i].swizzle[j]]; > } > > /* We shouldn't have any source modifiers in the > optimization loop. */ > assert(!instr->src[i].abs && !instr->src[i].negate); > } > > + if (bit_size == 0) > + bit_size = 32; ... this default to 32 here would not be correct any more. If at this point the bit-size is 0 (meaning that all inputs and output are sized) then we should take the bit-size from the opcode's output type, which is known to be sized, right? > > Then it'll get set here. > > > + > /* We shouldn't have any saturate modifiers in the > optimization loop. */ > assert(!instr->dest.saturate); > > nir_const_value dest = > nir_eval_const_opcode(instr->op, > instr->dest.dest.ssa.num_components, > - src); > + bit_size, src); > > nir_load_const_instr *new_instr = > nir_load_const_instr_create(mem_ctx, > > instr->dest.dest.ssa.num_components); > > + new_instr->def.bit_size = instr->dest.dest.ssa.bit_size; > new_instr->value = dest; > > nir_instr_insert_before(&instr->instr, &new_instr->instr); > -- > 2.7.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev