On Tue, Mar 15, 2016 at 5:53 AM, Iago Toral <ito...@igalia.com> wrote: > On Mon, 2016-03-14 at 16:48 -0700, Jason Ekstrand wrote: >> >> >> On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez >> <sigles...@igalia.com> wrote: >> From: Connor Abbott <connor.w.abb...@intel.com> >> >> v2: Use the bit-size information from the opcode information >> if defined (Iago) >> >> Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> >> >> FIXME: This should be squashed into the previous commit so we >> don't break >> the build. The break happens because the python script that >> generates the >> constant folding pass does not know how to handle the sized >> types introduced >> by the previous commit until this patch, so it ends up >> generating code with >> invalid types. Keep it separated for review purposes. >> --- >> src/compiler/nir/nir_constant_expressions.h | 2 +- >> src/compiler/nir/nir_constant_expressions.py | 246 >> +++++++++++++++++---------- >> src/compiler/nir/nir_opt_constant_folding.c | 24 ++- >> 3 files changed, 182 insertions(+), 90 deletions(-) >> >> diff --git a/src/compiler/nir/nir_constant_expressions.h >> b/src/compiler/nir/nir_constant_expressions.h >> index 97997f2..201f278 100644 >> --- a/src/compiler/nir/nir_constant_expressions.h >> +++ b/src/compiler/nir/nir_constant_expressions.h >> @@ -28,4 +28,4 @@ >> #include "nir.h" >> >> nir_const_value nir_eval_const_opcode(nir_op op, unsigned >> num_components, >> - nir_const_value *src); >> + unsigned bit_size, >> nir_const_value *src); >> diff --git a/src/compiler/nir/nir_constant_expressions.py >> b/src/compiler/nir/nir_constant_expressions.py >> index 32784f6..972d281 100644 >> --- a/src/compiler/nir/nir_constant_expressions.py >> +++ b/src/compiler/nir/nir_constant_expressions.py >> @@ -1,4 +1,43 @@ >> #! /usr/bin/python2 >> + >> +def type_has_size(type_): >> + return type_[-1:].isdigit() >> + >> +def type_sizes(type_): >> + if type_.endswith("8"): >> + return [8] >> + elif type_.endswith("16"): >> + return [16] >> + elif type_.endswith("32"): >> + return [32] >> + elif type_.endswith("64"): >> + return [64] >> + else: >> + return [32, 64] >> + >> +def type_add_size(type_, size): >> + if type_has_size(type_): >> + return type_ >> + return type_ + str(size) >> + >> +def get_const_field(type_): >> + if type_ == "int32": >> + return "i" >> + if type_ == "uint32": >> + return "u" >> + if type_ == "int64": >> + return "l" >> + if type_ == "uint64": >> + return "ul" >> + if type_ == "bool32": >> + return "b" >> + if type_ == "float32": >> + return "f" >> + if type_ == "float64": >> + return "d" >> + raise Exception(str(type_)) >> + assert(0) >> + >> template = """\ >> /* >> * Copyright (C) 2014 Intel Corporation >> @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u) >> } >> >> /* Some typed vector structures to make things like src0.y >> work */ >> -% for type in ["float", "int", "uint", "bool"]: >> -struct ${type}_vec { >> - ${type} x; >> - ${type} y; >> - ${type} z; >> - ${type} w; >> +typedef float float32_t; >> +typedef double float64_t; >> +typedef bool bool32_t; >> +% for type in ["float", "int", "uint"]: >> +% for width in [32, 64]: >> +struct ${type}${width}_vec { >> + ${type}${width}_t x; >> + ${type}${width}_t y; >> + ${type}${width}_t z; >> + ${type}${width}_t w; >> }; >> % endfor >> +% endfor >> + >> +struct bool32_vec { >> + bool x; >> + bool y; >> + bool z; >> + bool w; >> +}; >> >> % for name, op in sorted(opcodes.iteritems()): >> static nir_const_value >> -evaluate_${name}(unsigned num_components, nir_const_value >> *_src) >> +evaluate_${name}(unsigned num_components, unsigned bit_size, >> + nir_const_value *_src) >> { >> nir_const_value _dst_val = { { {0, 0, 0, 0} } }; >> >> - ## For each non-per-component input, create a variable >> srcN that >> - ## contains x, y, z, and w elements which are filled in >> with the >> - ## appropriately-typed values. >> - % for j in range(op.num_inputs): >> - % if op.input_sizes[j] == 0: >> - <% continue %> >> - % elif "src" + str(j) not in op.const_expr: >> - ## Avoid unused variable warnings >> - <% continue %> >> - %endif >> - >> - struct ${op.input_types[j]}_vec src${j} = { >> - % for k in range(op.input_sizes[j]): >> - % if op.input_types[j] == "bool": >> - _src[${j}].u[${k}] != 0, >> - % else: >> - _src[${j}].${op.input_types[j][:1]}[${k}], >> - % endif >> - % endfor >> - }; >> - % endfor >> + switch (bit_size) { >> + % for bit_size in [32, 64]: >> + case ${bit_size}: { >> + <% >> + output_type = type_add_size(op.output_type, bit_size) >> + input_types = [type_add_size(type_, bit_size) for type_ >> in op.input_types] >> + %> >> + >> + ## For each non-per-component input, create a variable >> srcN that >> + ## contains x, y, z, and w elements which are filled in >> with the >> + ## appropriately-typed values. >> + % for j in range(op.num_inputs): >> + % if op.input_sizes[j] == 0: >> + <% continue %> >> + % elif "src" + str(j) not in op.const_expr: >> + ## Avoid unused variable warnings >> + <% continue %> >> + %endif >> >> - % if op.output_size == 0: >> - ## For per-component instructions, we need to iterate >> over the >> - ## components and apply the constant expression one >> component >> - ## at a time. >> - for (unsigned _i = 0; _i < num_components; _i++) { >> - ## For each per-component input, create a variable >> srcN that >> - ## contains the value of the current (_i'th) >> component. >> - % for j in range(op.num_inputs): >> - % if op.input_sizes[j] != 0: >> - <% continue %> >> - % elif "src" + str(j) not in op.const_expr: >> - ## Avoid unused variable warnings >> - <% continue %> >> - % elif op.input_types[j] == "bool": >> - bool src${j} = _src[${j}].u[_i] != 0; >> + struct ${input_types[j]}_vec src${j} = { >> + % for k in range(op.input_sizes[j]): >> + % if input_types[j] == "bool32": >> + _src[${j}].u[${k}] != 0, >> % else: >> - ${op.input_types[j]} src${j} = >> _src[${j}].${op.input_types[j][:1]}[_i]; >> + >> _src[${j}].${get_const_field(input_types[j])}[${k}], >> % endif >> % endfor >> + }; >> + % endfor >> + >> + % if op.output_size == 0: >> + ## For per-component instructions, we need to >> iterate over the >> + ## components and apply the constant expression one >> component >> + ## at a time. >> + for (unsigned _i = 0; _i < num_components; _i++) { >> + ## For each per-component input, create a >> variable srcN that >> + ## contains the value of the current (_i'th) >> component. >> + % for j in range(op.num_inputs): >> + % if op.input_sizes[j] != 0: >> + <% continue %> >> + % elif "src" + str(j) not in op.const_expr: >> + ## Avoid unused variable warnings >> + <% continue %> >> + % elif input_types[j] == "bool32": >> + bool src${j} = _src[${j}].u[_i] != 0; >> + % else: >> + ${input_types[j]}_t src${j} = >> + >> _src[${j}].${get_const_field(input_types[j])}[_i]; >> + % endif >> + % endfor >> + >> + ## Create an appropriately-typed variable dst and >> assign the >> + ## result of the const_expr to it. If const_expr >> already contains >> + ## writes to dst, just include const_expr >> directly. >> + % if "dst" in op.const_expr: >> + ${output_type}_t dst; >> + ${op.const_expr} >> + % else: >> + ${output_type}_t dst = ${op.const_expr}; >> + % endif >> + >> + ## Store the current component of the actual >> destination to the >> + ## value of dst. >> + % if output_type == "bool32": >> + ## Sanitize the C value to a proper NIR bool >> + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; >> + % else: >> + _dst_val.${get_const_field(output_type)}[_i] = >> dst; >> + % endif >> + } >> + % else: >> + ## In the non-per-component case, create a struct >> dst with >> + ## appropriately-typed elements x, y, z, and w and >> assign the result >> + ## of the const_expr to all components of dst, or >> include the >> + ## const_expr directly if it writes to dst already. >> + struct ${output_type}_vec dst; >> >> - ## Create an appropriately-typed variable dst and >> assign the >> - ## result of the const_expr to it. If const_expr >> already contains >> - ## writes to dst, just include const_expr directly. >> % if "dst" in op.const_expr: >> - ${op.output_type} dst; >> ${op.const_expr} >> % else: >> - ${op.output_type} dst = ${op.const_expr}; >> + ## Splat the value to all components. This way >> expressions which >> + ## write the same value to all components don't >> need to explicitly >> + ## write to dest. One such example is fnoise >> which has a >> + ## const_expr of 0.0f. >> + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; >> % endif >> >> - ## Store the current component of the actual >> destination to the >> - ## value of dst. >> - % if op.output_type == "bool": >> - ## Sanitize the C value to a proper NIR bool >> - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; >> - % else: >> - _dst_val.${op.output_type[:1]}[_i] = dst; >> - % endif >> - } >> - % else: >> - ## In the non-per-component case, create a struct dst >> with >> - ## appropriately-typed elements x, y, z, and w and >> assign the result >> - ## of the const_expr to all components of dst, or >> include the >> - ## const_expr directly if it writes to dst already. >> - struct ${op.output_type}_vec dst; >> - >> - % if "dst" in op.const_expr: >> - ${op.const_expr} >> - % else: >> - ## Splat the value to all components. This way >> expressions which >> - ## write the same value to all components don't need >> to explicitly >> - ## write to dest. One such example is fnoise which >> has a >> - ## const_expr of 0.0f. >> - dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; >> + ## For each component in the destination, copy the >> value of dst to >> + ## the actual destination. >> + % for k in range(op.output_size): >> + % if output_type == "bool32": >> + ## Sanitize the C value to a proper NIR bool >> + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? >> NIR_TRUE : NIR_FALSE; >> + % else: >> + _dst_val.${get_const_field(output_type)}[${k}] >> = dst.${"xyzw"[k]}; >> + % endif >> + % endfor >> % endif >> >> - ## For each component in the destination, copy the >> value of dst to >> - ## the actual destination. >> - % for k in range(op.output_size): >> - % if op.output_type == "bool": >> - ## Sanitize the C value to a proper NIR bool >> - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : >> NIR_FALSE; >> - % else: >> - _dst_val.${op.output_type[:1]}[${k}] = >> dst.${"xyzw"[k]}; >> - % endif >> - % endfor >> - % endif >> + break; >> + } >> + % endfor >> + >> + default: >> + unreachable("unknown bit width"); >> + } >> >> return _dst_val; >> } >> @@ -316,12 +385,12 @@ evaluate_${name}(unsigned >> num_components, nir_const_value *_src) >> >> nir_const_value >> nir_eval_const_opcode(nir_op op, unsigned num_components, >> - nir_const_value *src) >> + unsigned bit_width, nir_const_value >> *src) >> { >> switch (op) { >> % for name in sorted(opcodes.iterkeys()): >> case nir_op_${name}: { >> - return evaluate_${name}(num_components, src); >> + return evaluate_${name}(num_components, bit_width, >> src); >> break; >> } >> % endfor >> @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned >> num_components, >> from nir_opcodes import opcodes >> from mako.template import Template >> >> -print Template(template).render(opcodes=opcodes) >> +print Template(template).render(opcodes=opcodes, >> type_sizes=type_sizes, >> + type_has_size=type_has_size, >> + type_add_size=type_add_size, >> + >> get_const_field=get_const_field) >> diff --git a/src/compiler/nir/nir_opt_constant_folding.c >> b/src/compiler/nir/nir_opt_constant_folding.c >> index 04876a4..29905a0 100644 >> --- a/src/compiler/nir/nir_opt_constant_folding.c >> +++ b/src/compiler/nir/nir_opt_constant_folding.c >> @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr >> *instr, void *mem_ctx) >> if (!instr->dest.dest.is_ssa) >> return false; >> >> + unsigned bit_size = 0; >> + if (!(nir_op_infos[instr->op].output_type & >> NIR_ALU_TYPE_SIZE_MASK)) >> + bit_size = instr->dest.dest.ssa.bit_size; >> + else >> + bit_size = nir_op_infos[instr->op].output_type & >> NIR_ALU_TYPE_SIZE_MASK; >> >> >> This isn't right. We need to look at all the unsized types and try to >> pull it from one of those. We shouldn't fall back to grabbing from >> the sized type. > > Ok, so you don't like that in the case that the alu operation has a > sized destination we grab the bit-size from the opcode definition? I am > not sure I see the problem with that... isn't the opcode mandating a > specific bit-size in that case? How can the bit-size we want be > different from that? > >> >> + >> for (unsigned i = 0; i < >> nir_op_infos[instr->op].num_inputs; i++) { >> if (!instr->src[i].src.is_ssa) >> return false; >> >> + if (bit_size == 0) { >> + if (!(nir_op_infos[instr->op].input_sizes[i] & >> NIR_ALU_TYPE_SIZE_MASK)) >> + bit_size = instr->src[i].src.ssa->bit_size; >> + else >> + bit_size = nir_op_infos[instr->op].input_sizes[i] >> & NIR_ALU_TYPE_SIZE_MASK; >> >> >> Same here. If they don't have any unsized sources or destinations to >> grab from, we should let bit_size be zero. > > But if we have an opcode with all sized 64-bit types then... > >> Also, if we have multiple sources with the same unsized type, we >> should assert that the sizes match. >> >> >> + } >> + >> nir_instr *src_instr = >> instr->src[i].src.ssa->parent_instr; >> >> if (src_instr->type != nir_instr_type_load_const) >> @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr >> *instr, void *mem_ctx) >> >> for (unsigned j = 0; j < >> nir_ssa_alu_instr_src_components(instr, i); >> j++) { >> - src[i].u[j] = >> load_const->value.u[instr->src[i].swizzle[j]]; >> + if (load_const->def.bit_size == 64) >> + src[i].ul[j] = >> load_const->value.ul[instr->src[i].swizzle[j]]; >> + else >> + src[i].u[j] = >> load_const->value.u[instr->src[i].swizzle[j]]; >> } >> >> /* We shouldn't have any source modifiers in the >> optimization loop. */ >> assert(!instr->src[i].abs && !instr->src[i].negate); >> } >> >> + if (bit_size == 0) >> + bit_size = 32; > > ... this default to 32 here would not be correct any more. If at this > point the bit-size is 0 (meaning that all inputs and output are sized) > then we should take the bit-size from the opcode's output type, which is > known to be sized, right?
Not quite. xd > >> >> Then it'll get set here. >> >> >> + >> /* We shouldn't have any saturate modifiers in the >> optimization loop. */ >> assert(!instr->dest.saturate); >> >> nir_const_value dest = >> nir_eval_const_opcode(instr->op, >> instr->dest.dest.ssa.num_components, >> - src); >> + bit_size, src); >> >> nir_load_const_instr *new_instr = >> nir_load_const_instr_create(mem_ctx, >> >> instr->dest.dest.ssa.num_components); >> >> + new_instr->def.bit_size = instr->dest.dest.ssa.bit_size; >> new_instr->value = dest; >> >> nir_instr_insert_before(&instr->instr, &new_instr->instr); >> -- >> 2.7.0 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev