[Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)

Dave Airlie Mon, 12 Mar 2018 21:27:07 -0700

From: Elie Tournier <tournier.e...@gmail.com>

This currently uses fp64->fp32, sqrt(fp32), fp32->fp64.


[airlied: The code is include from soft float for doing proper sqrt64
but it needs to be decided if we need to pursue this and
how to optimise it better.]

Signed-off-by: Elie Tournier <elie.tourn...@collabora.com>
---
 src/compiler/glsl/builtin_float64.h     | 393 ++++++++++++++++++++++++++++++++
 src/compiler/glsl/builtin_functions.cpp |   4 +
 src/compiler/glsl/builtin_functions.h   |   3 +
 src/compiler/glsl/float64.glsl          | 275 ++++++++++++++++++++++
 src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
 5 files changed, 676 insertions(+)

diff --git a/src/compiler/glsl/builtin_float64.h 
b/src/compiler/glsl/builtin_float64.h
index 034d2d0..6fbe12d 100644
--- a/src/compiler/glsl/builtin_float64.h
+++ b/src/compiler/glsl/builtin_float64.h
@@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate 
avail)
    sig->replace_parameters(&sig_parameters);
    return sig;
 }
+ir_function_signature *
+fsqrt64(void *mem_ctx, builtin_available_predicate avail)
+{
+   ir_function_signature *const sig =
+      new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail);
+   ir_factory body(&sig->body, mem_ctx);
+   sig->is_defined = true;
+
+   exec_list sig_parameters;
+
+   ir_variable *const r09A9 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, 
"a", ir_var_function_in);
+   sig_parameters.push_tail(r09A9);
+   ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a");
+   body.emit(assign(r09AA, r09A9, 0x03));
+
+   ir_variable *const r09AB = body.make_temp(glsl_type::float_type, 
"return_value");
+   ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, 
"extractFloat64FracHi_retval");
+   body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), body.constant(1048575u)), 
0x01));
+
+   ir_variable *const r09AD = body.make_temp(glsl_type::int_type, 
"extractFloat64Exp_retval");
+   ir_expression *const r09AE = rshift(swizzle_y(r09A9), 
body.constant(int(20)));
+   ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u));
+   body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01));
+
+   ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, 
"extractFloat64Sign_retval");
+   body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), 
0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047)));
+   ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val);
+   exec_list *const f09B1_parent_instructions = body.instructions;
+
+      /* THEN INSTRUCTIONS */
+      body.instructions = &f09B1->then_instructions;
+
+      ir_variable *const r09B3 = new(mem_ctx) 
ir_variable(glsl_type::float_type, "rval", ir_var_auto);
+      body.emit(r09B3);
+      ir_expression *const r09B4 = lshift(swizzle_y(r09A9), 
body.constant(int(12)));
+      ir_expression *const r09B5 = rshift(swizzle_x(r09A9), 
body.constant(int(20)));
+      body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02));
+
+      body.emit(assign(r09AA, lshift(swizzle_x(r09A9), 
body.constant(int(12))), 0x01));
+
+      ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31)));
+      ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u));
+      ir_expression *const r09B8 = rshift(swizzle_y(r09AA), 
body.constant(int(9)));
+      ir_expression *const r09B9 = bit_or(r09B7, r09B8);
+      body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01));
+
+      ir_variable *const r09BA = body.make_temp(glsl_type::float_type, 
"mix_retval");
+      ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9));
+      ir_expression *const r09BC = nequal(r09BB, body.constant(0u));
+      ir_expression *const r09BD = lshift(r09B0, body.constant(int(31)));
+      ir_expression *const r09BE = add(r09BD, body.constant(2139095040u));
+      ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE);
+      body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), 0x01));
+
+      body.emit(assign(r09B3, r09BA, 0x01));
+
+      body.emit(assign(r09AB, r09BA, 0x01));
+
+
+      /* ELSE INSTRUCTIONS */
+      body.instructions = &f09B1->else_instructions;
+
+      ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, 
"mix_retval");
+      ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10)));
+      ir_expression *const r09C2 = rshift(swizzle_x(r09A9), 
body.constant(int(22)));
+      ir_expression *const r09C3 = bit_or(r09C1, r09C2);
+      ir_expression *const r09C4 = lshift(swizzle_x(r09A9), 
body.constant(int(10)));
+      ir_expression *const r09C5 = nequal(r09C4, body.constant(0u));
+      ir_expression *const r09C6 = expr(ir_unop_b2i, r09C5);
+      ir_expression *const r09C7 = expr(ir_unop_i2u, r09C6);
+      body.emit(assign(r09C0, bit_or(r09C3, r09C7), 0x01));
+
+      ir_variable *const r09C8 = body.make_temp(glsl_type::uint_type, 
"mix_retval");
+      ir_expression *const r09C9 = nequal(r09AD, body.constant(int(0)));
+      ir_expression *const r09CA = bit_or(r09C0, body.constant(1073741824u));
+      body.emit(assign(r09C8, expr(ir_triop_csel, r09C9, r09CA, r09C0), 0x01));
+
+      ir_variable *const r09CB = body.make_temp(glsl_type::int_type, "zExp");
+      body.emit(assign(r09CB, add(r09AD, body.constant(int(-897))), 0x01));
+
+      ir_variable *const r09CC = body.make_temp(glsl_type::uint_type, "zFrac");
+      body.emit(assign(r09CC, r09C8, 0x01));
+
+      ir_variable *const r09CD = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+      body.emit(assign(r09CD, body.constant(true), 0x01));
+
+      ir_variable *const r09CE = body.make_temp(glsl_type::float_type, 
"return_value");
+      ir_variable *const r09CF = new(mem_ctx) ir_variable(glsl_type::int_type, 
"roundBits", ir_var_auto);
+      body.emit(r09CF);
+      ir_expression *const r09D0 = bit_and(r09C8, body.constant(127u));
+      body.emit(assign(r09CF, expr(ir_unop_u2i, r09D0), 0x01));
+
+      /* IF CONDITION */
+      ir_expression *const r09D2 = expr(ir_unop_i2u, r09CB);
+      ir_expression *const r09D3 = gequal(r09D2, body.constant(253u));
+      ir_if *f09D1 = new(mem_ctx) ir_if(operand(r09D3).val);
+      exec_list *const f09D1_parent_instructions = body.instructions;
+
+         /* THEN INSTRUCTIONS */
+         body.instructions = &f09D1->then_instructions;
+
+         /* IF CONDITION */
+         ir_expression *const r09D5 = less(body.constant(int(253)), r09CB);
+         ir_expression *const r09D6 = equal(r09CB, body.constant(int(253)));
+         ir_expression *const r09D7 = expr(ir_unop_u2i, r09C8);
+         ir_expression *const r09D8 = less(r09D7, body.constant(int(-64)));
+         ir_expression *const r09D9 = logic_and(r09D6, r09D8);
+         ir_expression *const r09DA = logic_or(r09D5, r09D9);
+         ir_if *f09D4 = new(mem_ctx) ir_if(operand(r09DA).val);
+         exec_list *const f09D4_parent_instructions = body.instructions;
+
+            /* THEN INSTRUCTIONS */
+            body.instructions = &f09D4->then_instructions;
+
+            ir_expression *const r09DB = lshift(r09B0, body.constant(int(31)));
+            ir_expression *const r09DC = add(r09DB, 
body.constant(2139095040u));
+            body.emit(assign(r09CE, expr(ir_unop_bitcast_u2f, r09DC), 0x01));
+
+            body.emit(assign(r09CD, body.constant(false), 0x01));
+
+
+            /* ELSE INSTRUCTIONS */
+            body.instructions = &f09D4->else_instructions;
+
+            ir_variable *const r09DD = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+            body.emit(assign(r09DD, neg(r09CB), 0x01));
+
+            ir_variable *const r09DE = body.make_temp(glsl_type::bool_type, 
"assignment_tmp");
+            body.emit(assign(r09DE, less(r09CB, body.constant(int(0))), 0x01));
+
+            ir_variable *const r09DF = body.make_temp(glsl_type::uint_type, 
"mix_retval");
+            ir_expression *const r09E0 = neg(r09CB);
+            ir_expression *const r09E1 = less(r09E0, body.constant(int(32)));
+            ir_expression *const r09E2 = rshift(r09C8, r09DD);
+            ir_expression *const r09E3 = neg(r09DD);
+            ir_expression *const r09E4 = bit_and(r09E3, 
body.constant(int(31)));
+            ir_expression *const r09E5 = lshift(r09C8, r09E4);
+            ir_expression *const r09E6 = nequal(r09E5, body.constant(0u));
+            ir_expression *const r09E7 = expr(ir_unop_b2i, r09E6);
+            ir_expression *const r09E8 = expr(ir_unop_i2u, r09E7);
+            ir_expression *const r09E9 = bit_or(r09E2, r09E8);
+            ir_expression *const r09EA = nequal(r09C8, body.constant(0u));
+            ir_expression *const r09EB = expr(ir_unop_b2i, r09EA);
+            ir_expression *const r09EC = expr(ir_unop_i2u, r09EB);
+            ir_expression *const r09ED = expr(ir_triop_csel, r09E1, r09E9, 
r09EC);
+            body.emit(assign(r09DF, expr(ir_triop_csel, r09DE, r09ED, r09C8), 
0x01));
+
+            body.emit(assign(r09CC, r09DF, 0x01));
+
+            ir_expression *const r09EE = expr(ir_unop_u2i, r09DF);
+            ir_expression *const r09EF = bit_and(r09EE, 
body.constant(int(127)));
+            body.emit(assign(r09CF, expr(ir_triop_csel, r09DE, r09EF, r09CF), 
0x01));
+
+            body.emit(assign(r09CB, expr(ir_triop_csel, r09DE, 
body.constant(int(0)), r09CB), 0x01));
+
+
+         body.instructions = f09D4_parent_instructions;
+         body.emit(f09D4);
+
+         /* END IF */
+
+
+      body.instructions = f09D1_parent_instructions;
+      body.emit(f09D1);
+
+      /* END IF */
+
+      /* IF CONDITION */
+      ir_if *f09F0 = new(mem_ctx) ir_if(operand(r09CD).val);
+      exec_list *const f09F0_parent_instructions = body.instructions;
+
+         /* THEN INSTRUCTIONS */
+         body.instructions = &f09F0->then_instructions;
+
+         ir_expression *const r09F1 = add(r09CC, body.constant(64u));
+         body.emit(assign(r09CC, rshift(r09F1, body.constant(int(7))), 0x01));
+
+         ir_expression *const r09F2 = bit_xor(r09CF, body.constant(int(64)));
+         ir_expression *const r09F3 = equal(r09F2, body.constant(int(0)));
+         ir_expression *const r09F4 = expr(ir_unop_b2i, r09F3);
+         ir_expression *const r09F5 = expr(ir_unop_i2u, r09F4);
+         ir_expression *const r09F6 = expr(ir_unop_bit_not, r09F5);
+         body.emit(assign(r09CC, bit_and(r09CC, r09F6), 0x01));
+
+         ir_expression *const r09F7 = lshift(r09B0, body.constant(int(31)));
+         ir_expression *const r09F8 = equal(r09CC, body.constant(0u));
+         ir_expression *const r09F9 = expr(ir_triop_csel, r09F8, 
body.constant(int(0)), r09CB);
+         ir_expression *const r09FA = expr(ir_unop_i2u, r09F9);
+         ir_expression *const r09FB = lshift(r09FA, body.constant(int(23)));
+         ir_expression *const r09FC = add(r09F7, r09FB);
+         ir_expression *const r09FD = add(r09FC, r09CC);
+         body.emit(assign(r09CE, expr(ir_unop_bitcast_u2f, r09FD), 0x01));
+
+         body.emit(assign(r09CD, body.constant(false), 0x01));
+
+
+      body.instructions = f09F0_parent_instructions;
+      body.emit(f09F0);
+
+      /* END IF */
+
+      body.emit(assign(r09AB, r09CE, 0x01));
+
+
+   body.instructions = f09B1_parent_instructions;
+   body.emit(f09B1);
+
+   /* END IF */
+
+   ir_variable *const r09FE = body.make_temp(glsl_type::bool_type, 
"execute_flag");
+   body.emit(assign(r09FE, body.constant(true), 0x01));
+
+   ir_variable *const r09FF = body.make_temp(glsl_type::uvec2_type, 
"return_value");
+   ir_variable *const r0A00 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aSign", ir_var_auto);
+   body.emit(r0A00);
+   ir_variable *const r0A01 = new(mem_ctx) ir_variable(glsl_type::int_type, 
"aExp", ir_var_auto);
+   body.emit(r0A01);
+   ir_variable *const r0A02 = new(mem_ctx) ir_variable(glsl_type::uint_type, 
"aFrac", ir_var_auto);
+   body.emit(r0A02);
+   ir_variable *const r0A03 = body.make_temp(glsl_type::uint_type, 
"floatBitsToUint_retval");
+   ir_expression *const r0A04 = expr(ir_unop_sqrt, r09AB);
+   body.emit(assign(r0A03, expr(ir_unop_bitcast_f2u, r0A04), 0x01));
+
+   ir_variable *const r0A05 = body.make_temp(glsl_type::uint_type, 
"assignment_tmp");
+   body.emit(assign(r0A05, bit_and(r0A03, body.constant(8388607u)), 0x01));
+
+   body.emit(assign(r0A02, r0A05, 0x01));
+
+   ir_variable *const r0A06 = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+   ir_expression *const r0A07 = rshift(r0A03, body.constant(int(23)));
+   ir_expression *const r0A08 = bit_and(r0A07, body.constant(255u));
+   body.emit(assign(r0A06, expr(ir_unop_u2i, r0A08), 0x01));
+
+   body.emit(assign(r0A01, r0A06, 0x01));
+
+   body.emit(assign(r0A00, rshift(r0A03, body.constant(int(31))), 0x01));
+
+   /* IF CONDITION */
+   ir_expression *const r0A0A = equal(r0A06, body.constant(int(255)));
+   ir_if *f0A09 = new(mem_ctx) ir_if(operand(r0A0A).val);
+   exec_list *const f0A09_parent_instructions = body.instructions;
+
+      /* THEN INSTRUCTIONS */
+      body.instructions = &f0A09->then_instructions;
+
+      /* IF CONDITION */
+      ir_expression *const r0A0C = nequal(r0A05, body.constant(0u));
+      ir_if *f0A0B = new(mem_ctx) ir_if(operand(r0A0C).val);
+      exec_list *const f0A0B_parent_instructions = body.instructions;
+
+         /* THEN INSTRUCTIONS */
+         body.instructions = &f0A0B->then_instructions;
+
+         ir_variable *const r0A0D = body.make_temp(glsl_type::uint_type, 
"assignment_tmp");
+         body.emit(assign(r0A0D, lshift(r0A03, body.constant(int(9))), 0x01));
+
+         ir_variable *const r0A0E = body.make_temp(glsl_type::uvec2_type, 
"vec_ctor");
+         ir_expression *const r0A0F = lshift(r0A0D, body.constant(int(20)));
+         body.emit(assign(r0A0E, bit_or(r0A0F, body.constant(0u)), 0x01));
+
+         ir_expression *const r0A10 = rshift(r0A0D, body.constant(int(12)));
+         ir_expression *const r0A11 = lshift(r0A00, body.constant(int(31)));
+         ir_expression *const r0A12 = bit_or(r0A11, 
body.constant(2146959360u));
+         body.emit(assign(r0A0E, bit_or(r0A10, r0A12), 0x02));
+
+         body.emit(assign(r09FF, r0A0E, 0x03));
+
+         body.emit(assign(r09FE, body.constant(false), 0x01));
+
+
+         /* ELSE INSTRUCTIONS */
+         body.instructions = &f0A0B->else_instructions;
+
+         ir_variable *const r0A13 = new(mem_ctx) 
ir_variable(glsl_type::uvec2_type, "z", ir_var_auto);
+         body.emit(r0A13);
+         ir_expression *const r0A14 = lshift(r0A00, body.constant(int(31)));
+         body.emit(assign(r0A13, add(r0A14, body.constant(2146435072u)), 
0x02));
+
+         body.emit(assign(r0A13, body.constant(0u), 0x01));
+
+         body.emit(assign(r09FF, r0A13, 0x03));
+
+         body.emit(assign(r09FE, body.constant(false), 0x01));
+
+
+      body.instructions = f0A0B_parent_instructions;
+      body.emit(f0A0B);
+
+      /* END IF */
+
+
+      /* ELSE INSTRUCTIONS */
+      body.instructions = &f0A09->else_instructions;
+
+      /* IF CONDITION */
+      ir_expression *const r0A16 = equal(r0A06, body.constant(int(0)));
+      ir_if *f0A15 = new(mem_ctx) ir_if(operand(r0A16).val);
+      exec_list *const f0A15_parent_instructions = body.instructions;
+
+         /* THEN INSTRUCTIONS */
+         body.instructions = &f0A15->then_instructions;
+
+         /* IF CONDITION */
+         ir_expression *const r0A18 = equal(r0A05, body.constant(0u));
+         ir_if *f0A17 = new(mem_ctx) ir_if(operand(r0A18).val);
+         exec_list *const f0A17_parent_instructions = body.instructions;
+
+            /* THEN INSTRUCTIONS */
+            body.instructions = &f0A17->then_instructions;
+
+            ir_variable *const r0A19 = new(mem_ctx) 
ir_variable(glsl_type::uvec2_type, "z", ir_var_auto);
+            body.emit(r0A19);
+            body.emit(assign(r0A19, lshift(r0A00, body.constant(int(31))), 
0x02));
+
+            body.emit(assign(r0A19, body.constant(0u), 0x01));
+
+            body.emit(assign(r09FF, r0A19, 0x03));
+
+            body.emit(assign(r09FE, body.constant(false), 0x01));
+
+
+            /* ELSE INSTRUCTIONS */
+            body.instructions = &f0A17->else_instructions;
+
+            ir_variable *const r0A1A = body.make_temp(glsl_type::int_type, 
"assignment_tmp");
+            ir_expression *const r0A1B = equal(r0A05, body.constant(0u));
+            ir_expression *const r0A1C = expr(ir_unop_find_msb, r0A05);
+            ir_expression *const r0A1D = sub(body.constant(int(31)), r0A1C);
+            ir_expression *const r0A1E = expr(ir_triop_csel, r0A1B, 
body.constant(int(32)), r0A1D);
+            body.emit(assign(r0A1A, add(r0A1E, body.constant(int(-8))), 0x01));
+
+            body.emit(assign(r0A02, lshift(r0A05, r0A1A), 0x01));
+
+            body.emit(assign(r0A01, sub(body.constant(int(1)), r0A1A), 0x01));
+
+            body.emit(assign(r0A01, add(r0A01, body.constant(int(-1))), 0x01));
+
+
+         body.instructions = f0A17_parent_instructions;
+         body.emit(f0A17);
+
+         /* END IF */
+
+
+      body.instructions = f0A15_parent_instructions;
+      body.emit(f0A15);
+
+      /* END IF */
+
+      /* IF CONDITION */
+      ir_if *f0A1F = new(mem_ctx) ir_if(operand(r09FE).val);
+      exec_list *const f0A1F_parent_instructions = body.instructions;
+
+         /* THEN INSTRUCTIONS */
+         body.instructions = &f0A1F->then_instructions;
+
+         ir_variable *const r0A20 = new(mem_ctx) 
ir_variable(glsl_type::uvec2_type, "z", ir_var_auto);
+         body.emit(r0A20);
+         ir_expression *const r0A21 = lshift(r0A00, body.constant(int(31)));
+         ir_expression *const r0A22 = add(r0A01, body.constant(int(896)));
+         ir_expression *const r0A23 = expr(ir_unop_i2u, r0A22);
+         ir_expression *const r0A24 = lshift(r0A23, body.constant(int(20)));
+         ir_expression *const r0A25 = add(r0A21, r0A24);
+         ir_expression *const r0A26 = rshift(r0A02, body.constant(int(3)));
+         body.emit(assign(r0A20, add(r0A25, r0A26), 0x02));
+
+         ir_expression *const r0A27 = lshift(r0A02, body.constant(int(29)));
+         body.emit(assign(r0A20, bit_or(r0A27, body.constant(0u)), 0x01));
+
+         body.emit(assign(r09FF, r0A20, 0x03));
+
+         body.emit(assign(r09FE, body.constant(false), 0x01));
+
+
+      body.instructions = f0A1F_parent_instructions;
+      body.emit(f0A1F);
+
+      /* END IF */
+
+
+   body.instructions = f0A09_parent_instructions;
+   body.emit(f0A09);
+
+   /* END IF */
+
+   body.emit(ret(r09FF));
+
+   sig->replace_parameters(&sig_parameters);
+   return sig;
+}
diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 48e0b20..d919873 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3394,6 +3394,10 @@ builtin_builder::create_builtins()
                 generate_ir::int_to_fp64(mem_ctx, integer_functions_supported),
                 NULL);
 
+   add_function("__builtin_fsqrt64",
+                generate_ir::fsqrt64(mem_ctx, integer_functions_supported),
+                NULL);
+
 #undef F
 #undef FI
 #undef FIUD_VEC
diff --git a/src/compiler/glsl/builtin_functions.h 
b/src/compiler/glsl/builtin_functions.h
index f9cc0ad..2f72f51 100644
--- a/src/compiler/glsl/builtin_functions.h
+++ b/src/compiler/glsl/builtin_functions.h
@@ -106,6 +106,9 @@ fp64_to_fp32(void *mem_ctx, builtin_available_predicate 
avail);
 ir_function_signature *
 fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail);
 
+ir_function_signature *
+fsqrt64(void *mem_ctx, builtin_available_predicate avail);
+
 }
 
 #endif /* BULITIN_FUNCTIONS_H */
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index 748e4af..c03f0f6 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -1014,3 +1014,278 @@ fp32_to_fp64(float f)
    shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);
    return packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);
 }
+
+/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
+ * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+ * modulo 2^96, so any carry out is lost.  The result is broken into three
+ * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
+ * `z1Ptr', and `z2Ptr'.
+ */
+/*void
+add96(uint a0, uint a1, uint a2,
+      uint b0, uint b1, uint b2,
+      inout uint z0Ptr,
+      inout uint z1Ptr,
+      inout uint z2Ptr)
+{
+   uint z2 = a2 + b2;
+   uint carry1 = uint(z2 < a2);
+   uint z1 = a1 + b1;
+   uint carry0 = uint(z1 < a1);
+   uint z0 = a0 + b0;
+   z1 += carry1;
+   z0 += uint(z1 < carry1);
+   z0 += carry0;
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}*/
+
+/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
+ * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
+ * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
+ * into three 32-bit pieces which are stored at the locations pointed to by
+ * `z0Ptr', `z1Ptr', and `z2Ptr'.
+ */
+/*void
+sub96(uint a0, uint a1, uint a2,
+      uint b0, uint b1, uint b2,
+      inout uint z0Ptr,
+      inout uint z1Ptr,
+      inout uint z2Ptr)
+{
+   uint z2 = a2 - b2;
+   uint borrow1 = uint(a2 < b2);
+   uint z1 = a1 - b1;
+   uint borrow0 = uint(a1 < b1);
+   uint z0 = a0 - b0;
+   z0 -= uint(z1 < borrow1);
+   z1 -= borrow1;
+   z0 -= borrow0;
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}*/
+
+/* Returns an approximation to the 32-bit integer quotient obtained by dividing
+ * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
+ * divisor `b' must be at least 2^31.  If q is the exact quotient truncated
+ * toward zero, the approximation returned lies between q and q + 2 inclusive.
+ * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
+ * unsigned integer is returned.
+ */
+/*uint
+estimateDiv64To32(uint a0, uint a1, uint b)
+{
+   uint b0;
+   uint b1;
+   uint rem0 = 0u;
+   uint rem1 = 0u;
+   uint term0 = 0u;
+   uint term1 = 0u;
+   uint z;
+
+   if (b <= a0)
+      return 0xFFFFFFFFu;
+   b0 = b>>16;
+   z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;
+   mul32To64(b, z, term0, term1);
+   sub64(a0, a1, term0, term1, rem0, rem1);
+   while (int(rem0) < 0) {
+      z -= 0x10000u;
+      b1 = b<<16;
+      add64(rem0, rem1, b0, b1, rem0, rem1);
+   }
+   rem0 = (rem0<<16) | (rem1>>16);
+   z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;
+   return z;
+}*/
+
+/*uint
+sqrtOddAdjustments(int index)
+{
+   uint res = 0u;
+   if (index == 0)
+      res = 0x0004u;
+   if (index == 1)
+      res = 0x0022u;
+   if (index == 2)
+      res = 0x005Du;
+   if (index == 3)
+      res = 0x00B1u;
+   if (index == 4)
+      res = 0x011Du;
+   if (index == 5)
+      res = 0x019Fu;
+   if (index == 6)
+      res = 0x0236u;
+   if (index == 7)
+      res = 0x02E0u;
+   if (index == 8)
+      res = 0x039Cu;
+   if (index == 9)
+      res = 0x0468u;
+   if (index == 10)
+      res = 0x0545u;
+   if (index == 11)
+      res = 0x631u;
+   if (index == 12)
+      res = 0x072Bu;
+   if (index == 13)
+      res = 0x0832u;
+   if (index == 14)
+      res = 0x0946u;
+   if (index == 15)
+      res = 0x0A67u;
+
+   return res;
+}
+
+uint
+sqrtEvenAdjustments(int index)
+{
+   uint res = 0u;
+   if (index == 0)
+      res = 0x0A2Du;
+   if (index == 1)
+      res = 0x08AFu;
+   if (index == 2)
+      res = 0x075Au;
+   if (index == 3)
+      res = 0x0629u;
+   if (index == 4)
+      res = 0x051Au;
+   if (index == 5)
+      res = 0x0429u;
+   if (index == 6)
+      res = 0x0356u;
+   if (index == 7)
+      res = 0x029Eu;
+   if (index == 8)
+      res = 0x0200u;
+   if (index == 9)
+      res = 0x0179u;
+   if (index == 10)
+      res = 0x0109u;
+   if (index == 11)
+      res = 0x00AFu;
+   if (index == 12)
+      res = 0x0068u;
+   if (index == 13)
+      res = 0x0034u;
+   if (index == 14)
+      res = 0x0012u;
+   if (index == 15)
+      res = 0x0002u;
+
+   return res;
+}*/
+
+/* Returns an approximation to the square root of the 32-bit significand given
+ * by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+ * `aExp' (the least significant bit) is 1, the integer returned approximates
+ * 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+ * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+ * case, the approximation returned lies strictly within +/-2 of the exact
+ * value.
+ */
+/*uint estimateSqrt32(int aExp, uint a)
+{
+   uint z;
+
+   int index = int(a>>27 & 15u);
+   if ((aExp & 1) != 0) {
+      z = 0x4000u + (a>>17) - sqrtOddAdjustments(index);
+      z = ((a / z)<<14) + (z<<15);
+      a >>= 1;
+   } else {
+      z = 0x8000u + (a>>17) - sqrtEvenAdjustments(index);
+      z = a / z + z;
+      z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);
+      if (z <= a)
+         return uint(int(a)>>1);
+   }
+   return ((estimateDiv64To32(a, 0u, z))>>1) + (z>>1);
+}*/
+
+/* Returns the square root of the double-precision floating-point value `a'.
+ * The operation is performed according to the IEEE Standard for Floating-Point
+ * Arithmetic.
+ */
+uvec2
+fsqrt64(uvec2 a)
+{
+/*   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   uint zFrac2 = 0u;
+   uint doubleZFrac0 = 0u;
+   uint rem0 = 0u;
+   uint rem1 = 0u;
+   uint rem2 = 0u;
+   uint rem3 = 0u;
+   uint term0 = 0u;
+   uint term1 = 0u;
+   uint term2 = 0u;
+   uint term3 = 0u;
+   uvec2 default_nan;
+   default_nan.y = 0xFFFFFFFFu;
+   default_nan.x = 0xFFFFFFFFu;
+
+   uint aFracLo = extractFloat64FracLo(a);
+   uint aFracHi = extractFloat64FracHi(a);
+   int aExp = extractFloat64Exp(a);
+   uint aSign = extractFloat64Sign(a);
+   if (aExp == 0x7FF) {
+      if ((aFracHi | aFracLo) != 0u)
+         return propagateFloat64NaN(a, a);
+      if (aSign == 0u)
+         return a;
+      return default_nan;
+   }
+   if (aSign != 0u) {
+      if ((uint(aExp) | aFracHi | aFracLo) == 0u)
+         return a;
+      return default_nan;
+   }
+   if (aExp == 0) {
+      if ((aFracHi | aFracLo) == 0u)
+         return packFloat64(0u, 0, 0u, 0u);
+      normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);
+   }
+   int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;
+   aFracHi |= 0x00100000u;
+   shortShift64Left(aFracHi, aFracLo, 11, term0, term1);
+   zFrac0 = (estimateSqrt32(aExp, term0)>>1) + 1u;
+   if (zFrac0 == 0u)
+      zFrac0 = 0x7FFFFFFFu;
+   doubleZFrac0 = zFrac0 + zFrac0;
+   shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);
+   mul32To64(zFrac0, zFrac0, term0, term1);
+   sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);
+   while (int(rem0) < 0) {
+      --zFrac0;
+      doubleZFrac0 -= 2u;
+      add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);
+   }
+   zFrac1 = estimateDiv64To32(rem1, 0u, doubleZFrac0);
+   if ((zFrac1 & 0x1FFu) <= 5u) {
+      if (zFrac1 == 0u)
+         zFrac1 = 1u;
+      mul32To64(doubleZFrac0, zFrac1, term1, term2);
+      sub64(rem1, 0u, term1, term2, rem1, rem2);
+      mul32To64(zFrac1, zFrac1, term2, term3);
+      sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);
+      while (int(rem1) < 0) {
+         --zFrac1;
+         shortShift64Left(0u, zFrac1, 1, term2, term3);
+         term3 |= 1u;
+         term2 |= doubleZFrac0;
+         add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);
+      }
+      zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);
+   }
+   shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);
+   return roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);*/
+
+   return fp32_to_fp64(sqrt(fp64_to_fp32(a)));
+}
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 3fcdcb0..d2411c5 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -2381,6 +2381,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
          add_builtin_define(parser, "__have_builtin_builtin_int_to_fp64", 1);
          add_builtin_define(parser, "__have_builtin_builtin_fp64_to_fp32", 1);
          add_builtin_define(parser, "__have_builtin_builtin_fp32_to_fp64", 1);
+         add_builtin_define(parser, "__have_builtin_builtin_fsqrt64", 1);
       }
    }
 
-- 
2.9.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/50] glsl: Add "built-in" functions to do sqrt(fp64)

Reply via email to