Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_constant_expressions.py | 8 +++- src/glsl/nir/nir_opcodes.py | 78 +++++++++++++++++++++++++++++++- 3 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 697d37e..3c9d5ba 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -642,6 +642,7 @@ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, typedef enum { nir_type_invalid = 0, /* Not a valid type */ nir_type_float, + nir_type_hfloat, nir_type_int, nir_type_unsigned, nir_type_bool @@ -1064,6 +1065,7 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) typedef struct { union { float f[4]; + float h[4]; int32_t i[4]; uint32_t u[4]; }; diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py index bf82fe5..6e570d1 100644 --- a/src/glsl/nir/nir_constant_expressions.py +++ b/src/glsl/nir/nir_constant_expressions.py @@ -31,6 +31,12 @@ template = """\ #include "util/rounding.h" /* for _mesa_roundeven */ #include "nir_constant_expressions.h" +/** + * Constant values for half floats are treated as normal single precision + * floats in compile time. + */ +#define hfloat float + #if defined(_MSC_VER) && (_MSC_VER < 1800) static int isnormal(double x) { @@ -224,7 +230,7 @@ unpack_half_1x16(uint16_t u) } /* Some typed vector structures to make things like src0.y work */ -% for type in ["float", "int", "unsigned", "bool"]: +% for type in ["float", "hfloat", "int", "unsigned", "bool"]: struct ${type}_vec { ${type} x; ${type} y; diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 56e96d9..766bfd9 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -89,6 +89,7 @@ class Opcode(object): # helper variables for strings tfloat = "float" +thalf = "hfloat" tint = "int" tbool = "bool" tunsigned = "unsigned" @@ -136,70 +137,106 @@ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr, final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) -# These two move instructions differ in what modifiers they support and what +# These three move instructions differ in what modifiers they support and what # the negate modifier means. Otherwise, they are identical. unop("fmov", tfloat, "src0") +unop("hmov", thalf, "src0") unop("imov", tint, "src0") unop("ineg", tint, "-src0") unop("fneg", tfloat, "-src0") +unop("hneg", thalf, "-src0") unop("inot", tint, "~src0") # invert every bit of the integer unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f") +unop("hnot", thalf, "(src0 == 0.0f) ? 1.0f : 0.0f") unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") +unop("hsign", thalf, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") unop("iabs", tint, "(src0 < 0) ? -src0 : src0") unop("fabs", tfloat, "fabsf(src0)") +unop("habs", thalf, "fabsf(src0)") unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") +unop("hsat", thalf, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") unop("frcp", tfloat, "1.0f / src0") +unop("hrcp", thalf, "1.0f / src0") unop("frsq", tfloat, "1.0f / sqrtf(src0)") +unop("hrsq", thalf, "1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)") +unop("hsqrt", thalf, "sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") +unop("hexp2", thalf, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") +unop("hlog2", thalf, "log2f(src0)") unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. +unop_convert("h2i", thalf, tint, "src0") # Float-to-integer conversion. unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion +unop_convert("h2u", thalf, tunsigned, "src0") # Float-to-unsigned conversion unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. +unop_convert("i2h", tint, thalf, "src0") # Integer-to-float conversion. +unop_convert("h2f", thalf, tfloat, "src0") # Half-to-float conversion. +unop_convert("f2h", tfloat, thalf, "src0") # Float-to-half conversion. # Float-to-boolean conversion unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") +unop_convert("h2b", thalf, tbool, "src0 != 0.0f") # Boolean-to-float conversion unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") +unop_convert("b2h", tbool, thalf, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion unop_convert("i2b", tint, tbool, "src0 != 0") unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. +unop_convert("u2h", tunsigned, thalf, "src0") #Unsigned-to-float conversion. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}") unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}", "{src} ? 1.0f : 0.0f") +unop_reduce("hany", 1, thalf, thalf, "{src} != 0.0f", "{src0} || {src1}", + "{src} ? 1.0f : 0.0f") unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}", "{src} ? 1.0f : 0.0f") +unop_reduce("hall", 1, thalf, thalf, "{src} != 0.0f", "{src0} && {src1}", + "{src} ? 1.0f : 0.0f") # Unary floating-point rounding operations. unop("ftrunc", tfloat, "truncf(src0)") +unop("htrunc", thalf, "truncf(src0)") unop("fceil", tfloat, "ceilf(src0)") +unop("hceil", thalf, "ceilf(src0)") unop("ffloor", tfloat, "floorf(src0)") +unop("hfloor", thalf, "floorf(src0)") unop("ffract", tfloat, "src0 - floorf(src0)") +unop("hfract", thalf, "src0 - floorf(src0)") unop("fround_even", tfloat, "_mesa_roundevenf(src0)") +unop("hround_even", thalf, "_mesa_roundevenf(src0)") # Trigonometric operations. unop("fsin", tfloat, "sinf(src0)") +unop("hsin", thalf, "sinf(src0)") unop("fcos", tfloat, "cosf(src0)") +unop("hcos", thalf, "cosf(src0)") # Partial derivatives. unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. +unop("hddx", thalf, "0.0f") unop("fddy", tfloat, "0.0f") +unop("hddy", thalf, "0.0f") unop("fddx_fine", tfloat, "0.0f") +unop("hddx_fine", thalf, "0.0f") unop("fddy_fine", tfloat, "0.0f") +unop("hddy_fine", thalf, "0.0f") unop("fddx_coarse", tfloat, "0.0f") +unop("hddx_coarse", thalf, "0.0f") unop("fddy_coarse", tfloat, "0.0f") +unop("hddy_coarse", thalf, "0.0f") # Floating point pack and unpack operations. @@ -310,6 +347,10 @@ for i in xrange(1, 5): for j in xrange(1, 5): unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") +for i in xrange(1, 5): + for j in xrange(1, 5): + unop_horiz("hnoise{0}_{1}".format(i, j), i, thalf, j, thalf, "0.0f") + def binop_convert(name, out_type, in_type, alg_props, const_expr): opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) @@ -347,11 +388,14 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) binop("fadd", tfloat, commutative + associative, "src0 + src1") +binop("hadd", thalf, commutative + associative, "src0 + src1") binop("iadd", tint, commutative + associative, "src0 + src1") binop("fsub", tfloat, "", "src0 - src1") +binop("hsub", thalf, "", "src0 - src1") binop("isub", tint, "", "src0 - src1") binop("fmul", tfloat, commutative + associative, "src0 * src1") +binop("hmul", thalf, commutative + associative, "src0 * src1") # low 32-bits of signed/unsigned integer multiply binop("imul", tint, commutative + associative, "src0 * src1") # high 32-bits of signed integer multiply @@ -362,6 +406,7 @@ binop("umul_high", tunsigned, commutative, "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") binop("fdiv", tfloat, "", "src0 / src1") +binop("hdiv", thalf, "", "src0 / src1") binop("idiv", tint, "", "src0 / src1") binop("udiv", tunsigned, "", "src0 / src1") @@ -376,6 +421,7 @@ binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0") binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0") binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") +binop("hmod", thalf, "", "src0 - src1 * floorf(src0 / src1)") binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1") # @@ -386,9 +432,13 @@ binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1") # these integer-aware comparisons return a boolean (0 or ~0) binop_compare("flt", tfloat, "", "src0 < src1") +binop_compare("hlt", thalf, "", "src0 < src1") binop_compare("fge", tfloat, "", "src0 >= src1") +binop_compare("hge", thalf, "", "src0 >= src1") binop_compare("feq", tfloat, commutative, "src0 == src1") +binop_compare("heq", thalf, commutative, "src0 == src1") binop_compare("fne", tfloat, commutative, "src0 != src1") +binop_compare("hne", thalf, commutative, "src0 != src1") binop_compare("ilt", tint, "", "src0 < src1") binop_compare("ige", tint, "", "src0 >= src1") binop_compare("ieq", tint, commutative, "src0 == src1") @@ -400,8 +450,12 @@ binop_compare("uge", tunsigned, "", "src0 >= src1") binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}", "{src0} && {src1}", "{src}") +binop_reduce("ball_hequal", 1, tbool, thalf, "{src0} == {src1}", + "{src0} && {src1}", "{src}") binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}", "{src0} || {src1}", "{src}") +binop_reduce("bany_hnequal", 1, tbool, thalf, "{src0} != {src1}", + "{src0} || {src1}", "{src}") binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}", "{src0} && {src1}", "{src}") binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", @@ -411,16 +465,24 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", "{src0} && {src1}", "{src} ? 1.0f : 0.0f") +binop_reduce("hall_equal", 1, thalf, thalf, "{src0} == {src1}", + "{src0} && {src1}", "{src} ? 1.0f : 0.0f") binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", "{src0} || {src1}", "{src} ? 1.0f : 0.0f") +binop_reduce("hany_nequal", 1, thalf, thalf, "{src0} != {src1}", + "{src0} || {src1}", "{src} ? 1.0f : 0.0f") # These comparisons for integer-less hardware return 1.0 and 0.0 for true # and false respectively binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than +binop("hslt", thalf, "", "(src0 < src1) ? 1.0f : 0.0f") binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal +binop("hsge", thalf, "", "(src0 >= src1) ? 1.0f : 0.0f") binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal +binop("hseq", thalf, commutative, "(src0 == src1) ? 1.0f : 0.0f") binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal +binop("hsne", thalf, commutative, "(src0 != src1) ? 1.0f : 0.0f") binop("ishl", tint, "", "src0 << src1") @@ -445,22 +507,33 @@ binop("ixor", tunsigned, commutative + associative, "src0 ^ src1") binop("fand", tfloat, commutative, "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("hand", thalf, commutative, + "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") binop("for", tfloat, commutative, "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("hor", thalf, commutative, + "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") binop("fxor", tfloat, commutative, "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") +binop("hxor", thalf, commutative, + "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", "{src}") +binop_reduce("hdot", 1, thalf, thalf, "{src0} * {src1}", "{src0} + {src1}", + "{src}") binop("fmin", tfloat, "", "fminf(src0, src1)") +binop("hmin", thalf, "", "fminf(src0, src1)") binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1") binop("fmax", tfloat, "", "fmaxf(src0, src1)") +binop("hmax", thalf, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") binop("fpow", tfloat, "", "powf(src0, src1)") +binop("hpow", thalf, "", "powf(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") @@ -495,8 +568,10 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): [tunsigned, tunsigned, tunsigned], "", const_expr) triop("ffma", tfloat, "src0 * src1 + src2") +triop("hfma", thalf, "src0 * src1 + src2") triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") +triop("hlrp", thalf, "src0 * (1 - src2) + src1 * src2") # Conditional Select # @@ -506,6 +581,7 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") +triop("hcsel", thalf, "(src0 != 0.0f) ? src1 : src2") opcode("bcsel", 0, tunsigned, [0, 0, 0], [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2") -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev