From: Elie Tournier <tournier.e...@gmail.com> This currently uses fp64->fp32, sqrt(fp32), fp32->fp64.
[airlied: The code is include from soft float for doing proper sqrt64 but it needs to be decided if we need to pursue this and how to optimise it better.] Signed-off-by: Elie Tournier <elie.tourn...@collabora.com> --- src/compiler/glsl/builtin_float64.h | 393 ++++++++++++++++++++++++++++++++ src/compiler/glsl/builtin_functions.cpp | 4 + src/compiler/glsl/builtin_functions.h | 3 + src/compiler/glsl/float64.glsl | 275 ++++++++++++++++++++++ src/compiler/glsl/glcpp/glcpp-parse.y | 1 + 5 files changed, 676 insertions(+) diff --git a/src/compiler/glsl/builtin_float64.h b/src/compiler/glsl/builtin_float64.h index 034d2d0..6fbe12d 100644 --- a/src/compiler/glsl/builtin_float64.h +++ b/src/compiler/glsl/builtin_float64.h @@ -6242,3 +6242,396 @@ fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail) sig->replace_parameters(&sig_parameters); return sig; } +ir_function_signature * +fsqrt64(void *mem_ctx, builtin_available_predicate avail) +{ + ir_function_signature *const sig = + new(mem_ctx) ir_function_signature(glsl_type::uvec2_type, avail); + ir_factory body(&sig->body, mem_ctx); + sig->is_defined = true; + + exec_list sig_parameters; + + ir_variable *const r09A9 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "a", ir_var_function_in); + sig_parameters.push_tail(r09A9); + ir_variable *const r09AA = body.make_temp(glsl_type::uvec2_type, "a"); + body.emit(assign(r09AA, r09A9, 0x03)); + + ir_variable *const r09AB = body.make_temp(glsl_type::float_type, "return_value"); + ir_variable *const r09AC = body.make_temp(glsl_type::uint_type, "extractFloat64FracHi_retval"); + body.emit(assign(r09AC, bit_and(swizzle_y(r09A9), body.constant(1048575u)), 0x01)); + + ir_variable *const r09AD = body.make_temp(glsl_type::int_type, "extractFloat64Exp_retval"); + ir_expression *const r09AE = rshift(swizzle_y(r09A9), body.constant(int(20))); + ir_expression *const r09AF = bit_and(r09AE, body.constant(2047u)); + body.emit(assign(r09AD, expr(ir_unop_u2i, r09AF), 0x01)); + + ir_variable *const r09B0 = body.make_temp(glsl_type::uint_type, "extractFloat64Sign_retval"); + body.emit(assign(r09B0, rshift(swizzle_y(r09A9), body.constant(int(31))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r09B2 = equal(r09AD, body.constant(int(2047))); + ir_if *f09B1 = new(mem_ctx) ir_if(operand(r09B2).val); + exec_list *const f09B1_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f09B1->then_instructions; + + ir_variable *const r09B3 = new(mem_ctx) ir_variable(glsl_type::float_type, "rval", ir_var_auto); + body.emit(r09B3); + ir_expression *const r09B4 = lshift(swizzle_y(r09A9), body.constant(int(12))); + ir_expression *const r09B5 = rshift(swizzle_x(r09A9), body.constant(int(20))); + body.emit(assign(r09AA, bit_or(r09B4, r09B5), 0x02)); + + body.emit(assign(r09AA, lshift(swizzle_x(r09A9), body.constant(int(12))), 0x01)); + + ir_expression *const r09B6 = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09B7 = bit_or(r09B6, body.constant(2143289344u)); + ir_expression *const r09B8 = rshift(swizzle_y(r09AA), body.constant(int(9))); + ir_expression *const r09B9 = bit_or(r09B7, r09B8); + body.emit(assign(r09B3, expr(ir_unop_bitcast_u2f, r09B9), 0x01)); + + ir_variable *const r09BA = body.make_temp(glsl_type::float_type, "mix_retval"); + ir_expression *const r09BB = bit_or(r09AC, swizzle_x(r09A9)); + ir_expression *const r09BC = nequal(r09BB, body.constant(0u)); + ir_expression *const r09BD = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09BE = add(r09BD, body.constant(2139095040u)); + ir_expression *const r09BF = expr(ir_unop_bitcast_u2f, r09BE); + body.emit(assign(r09BA, expr(ir_triop_csel, r09BC, r09B3, r09BF), 0x01)); + + body.emit(assign(r09B3, r09BA, 0x01)); + + body.emit(assign(r09AB, r09BA, 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f09B1->else_instructions; + + ir_variable *const r09C0 = body.make_temp(glsl_type::uint_type, "mix_retval"); + ir_expression *const r09C1 = lshift(r09AC, body.constant(int(10))); + ir_expression *const r09C2 = rshift(swizzle_x(r09A9), body.constant(int(22))); + ir_expression *const r09C3 = bit_or(r09C1, r09C2); + ir_expression *const r09C4 = lshift(swizzle_x(r09A9), body.constant(int(10))); + ir_expression *const r09C5 = nequal(r09C4, body.constant(0u)); + ir_expression *const r09C6 = expr(ir_unop_b2i, r09C5); + ir_expression *const r09C7 = expr(ir_unop_i2u, r09C6); + body.emit(assign(r09C0, bit_or(r09C3, r09C7), 0x01)); + + ir_variable *const r09C8 = body.make_temp(glsl_type::uint_type, "mix_retval"); + ir_expression *const r09C9 = nequal(r09AD, body.constant(int(0))); + ir_expression *const r09CA = bit_or(r09C0, body.constant(1073741824u)); + body.emit(assign(r09C8, expr(ir_triop_csel, r09C9, r09CA, r09C0), 0x01)); + + ir_variable *const r09CB = body.make_temp(glsl_type::int_type, "zExp"); + body.emit(assign(r09CB, add(r09AD, body.constant(int(-897))), 0x01)); + + ir_variable *const r09CC = body.make_temp(glsl_type::uint_type, "zFrac"); + body.emit(assign(r09CC, r09C8, 0x01)); + + ir_variable *const r09CD = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r09CD, body.constant(true), 0x01)); + + ir_variable *const r09CE = body.make_temp(glsl_type::float_type, "return_value"); + ir_variable *const r09CF = new(mem_ctx) ir_variable(glsl_type::int_type, "roundBits", ir_var_auto); + body.emit(r09CF); + ir_expression *const r09D0 = bit_and(r09C8, body.constant(127u)); + body.emit(assign(r09CF, expr(ir_unop_u2i, r09D0), 0x01)); + + /* IF CONDITION */ + ir_expression *const r09D2 = expr(ir_unop_i2u, r09CB); + ir_expression *const r09D3 = gequal(r09D2, body.constant(253u)); + ir_if *f09D1 = new(mem_ctx) ir_if(operand(r09D3).val); + exec_list *const f09D1_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f09D1->then_instructions; + + /* IF CONDITION */ + ir_expression *const r09D5 = less(body.constant(int(253)), r09CB); + ir_expression *const r09D6 = equal(r09CB, body.constant(int(253))); + ir_expression *const r09D7 = expr(ir_unop_u2i, r09C8); + ir_expression *const r09D8 = less(r09D7, body.constant(int(-64))); + ir_expression *const r09D9 = logic_and(r09D6, r09D8); + ir_expression *const r09DA = logic_or(r09D5, r09D9); + ir_if *f09D4 = new(mem_ctx) ir_if(operand(r09DA).val); + exec_list *const f09D4_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f09D4->then_instructions; + + ir_expression *const r09DB = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09DC = add(r09DB, body.constant(2139095040u)); + body.emit(assign(r09CE, expr(ir_unop_bitcast_u2f, r09DC), 0x01)); + + body.emit(assign(r09CD, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f09D4->else_instructions; + + ir_variable *const r09DD = body.make_temp(glsl_type::int_type, "assignment_tmp"); + body.emit(assign(r09DD, neg(r09CB), 0x01)); + + ir_variable *const r09DE = body.make_temp(glsl_type::bool_type, "assignment_tmp"); + body.emit(assign(r09DE, less(r09CB, body.constant(int(0))), 0x01)); + + ir_variable *const r09DF = body.make_temp(glsl_type::uint_type, "mix_retval"); + ir_expression *const r09E0 = neg(r09CB); + ir_expression *const r09E1 = less(r09E0, body.constant(int(32))); + ir_expression *const r09E2 = rshift(r09C8, r09DD); + ir_expression *const r09E3 = neg(r09DD); + ir_expression *const r09E4 = bit_and(r09E3, body.constant(int(31))); + ir_expression *const r09E5 = lshift(r09C8, r09E4); + ir_expression *const r09E6 = nequal(r09E5, body.constant(0u)); + ir_expression *const r09E7 = expr(ir_unop_b2i, r09E6); + ir_expression *const r09E8 = expr(ir_unop_i2u, r09E7); + ir_expression *const r09E9 = bit_or(r09E2, r09E8); + ir_expression *const r09EA = nequal(r09C8, body.constant(0u)); + ir_expression *const r09EB = expr(ir_unop_b2i, r09EA); + ir_expression *const r09EC = expr(ir_unop_i2u, r09EB); + ir_expression *const r09ED = expr(ir_triop_csel, r09E1, r09E9, r09EC); + body.emit(assign(r09DF, expr(ir_triop_csel, r09DE, r09ED, r09C8), 0x01)); + + body.emit(assign(r09CC, r09DF, 0x01)); + + ir_expression *const r09EE = expr(ir_unop_u2i, r09DF); + ir_expression *const r09EF = bit_and(r09EE, body.constant(int(127))); + body.emit(assign(r09CF, expr(ir_triop_csel, r09DE, r09EF, r09CF), 0x01)); + + body.emit(assign(r09CB, expr(ir_triop_csel, r09DE, body.constant(int(0)), r09CB), 0x01)); + + + body.instructions = f09D4_parent_instructions; + body.emit(f09D4); + + /* END IF */ + + + body.instructions = f09D1_parent_instructions; + body.emit(f09D1); + + /* END IF */ + + /* IF CONDITION */ + ir_if *f09F0 = new(mem_ctx) ir_if(operand(r09CD).val); + exec_list *const f09F0_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f09F0->then_instructions; + + ir_expression *const r09F1 = add(r09CC, body.constant(64u)); + body.emit(assign(r09CC, rshift(r09F1, body.constant(int(7))), 0x01)); + + ir_expression *const r09F2 = bit_xor(r09CF, body.constant(int(64))); + ir_expression *const r09F3 = equal(r09F2, body.constant(int(0))); + ir_expression *const r09F4 = expr(ir_unop_b2i, r09F3); + ir_expression *const r09F5 = expr(ir_unop_i2u, r09F4); + ir_expression *const r09F6 = expr(ir_unop_bit_not, r09F5); + body.emit(assign(r09CC, bit_and(r09CC, r09F6), 0x01)); + + ir_expression *const r09F7 = lshift(r09B0, body.constant(int(31))); + ir_expression *const r09F8 = equal(r09CC, body.constant(0u)); + ir_expression *const r09F9 = expr(ir_triop_csel, r09F8, body.constant(int(0)), r09CB); + ir_expression *const r09FA = expr(ir_unop_i2u, r09F9); + ir_expression *const r09FB = lshift(r09FA, body.constant(int(23))); + ir_expression *const r09FC = add(r09F7, r09FB); + ir_expression *const r09FD = add(r09FC, r09CC); + body.emit(assign(r09CE, expr(ir_unop_bitcast_u2f, r09FD), 0x01)); + + body.emit(assign(r09CD, body.constant(false), 0x01)); + + + body.instructions = f09F0_parent_instructions; + body.emit(f09F0); + + /* END IF */ + + body.emit(assign(r09AB, r09CE, 0x01)); + + + body.instructions = f09B1_parent_instructions; + body.emit(f09B1); + + /* END IF */ + + ir_variable *const r09FE = body.make_temp(glsl_type::bool_type, "execute_flag"); + body.emit(assign(r09FE, body.constant(true), 0x01)); + + ir_variable *const r09FF = body.make_temp(glsl_type::uvec2_type, "return_value"); + ir_variable *const r0A00 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aSign", ir_var_auto); + body.emit(r0A00); + ir_variable *const r0A01 = new(mem_ctx) ir_variable(glsl_type::int_type, "aExp", ir_var_auto); + body.emit(r0A01); + ir_variable *const r0A02 = new(mem_ctx) ir_variable(glsl_type::uint_type, "aFrac", ir_var_auto); + body.emit(r0A02); + ir_variable *const r0A03 = body.make_temp(glsl_type::uint_type, "floatBitsToUint_retval"); + ir_expression *const r0A04 = expr(ir_unop_sqrt, r09AB); + body.emit(assign(r0A03, expr(ir_unop_bitcast_f2u, r0A04), 0x01)); + + ir_variable *const r0A05 = body.make_temp(glsl_type::uint_type, "assignment_tmp"); + body.emit(assign(r0A05, bit_and(r0A03, body.constant(8388607u)), 0x01)); + + body.emit(assign(r0A02, r0A05, 0x01)); + + ir_variable *const r0A06 = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0A07 = rshift(r0A03, body.constant(int(23))); + ir_expression *const r0A08 = bit_and(r0A07, body.constant(255u)); + body.emit(assign(r0A06, expr(ir_unop_u2i, r0A08), 0x01)); + + body.emit(assign(r0A01, r0A06, 0x01)); + + body.emit(assign(r0A00, rshift(r0A03, body.constant(int(31))), 0x01)); + + /* IF CONDITION */ + ir_expression *const r0A0A = equal(r0A06, body.constant(int(255))); + ir_if *f0A09 = new(mem_ctx) ir_if(operand(r0A0A).val); + exec_list *const f0A09_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A09->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0A0C = nequal(r0A05, body.constant(0u)); + ir_if *f0A0B = new(mem_ctx) ir_if(operand(r0A0C).val); + exec_list *const f0A0B_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A0B->then_instructions; + + ir_variable *const r0A0D = body.make_temp(glsl_type::uint_type, "assignment_tmp"); + body.emit(assign(r0A0D, lshift(r0A03, body.constant(int(9))), 0x01)); + + ir_variable *const r0A0E = body.make_temp(glsl_type::uvec2_type, "vec_ctor"); + ir_expression *const r0A0F = lshift(r0A0D, body.constant(int(20))); + body.emit(assign(r0A0E, bit_or(r0A0F, body.constant(0u)), 0x01)); + + ir_expression *const r0A10 = rshift(r0A0D, body.constant(int(12))); + ir_expression *const r0A11 = lshift(r0A00, body.constant(int(31))); + ir_expression *const r0A12 = bit_or(r0A11, body.constant(2146959360u)); + body.emit(assign(r0A0E, bit_or(r0A10, r0A12), 0x02)); + + body.emit(assign(r09FF, r0A0E, 0x03)); + + body.emit(assign(r09FE, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A0B->else_instructions; + + ir_variable *const r0A13 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "z", ir_var_auto); + body.emit(r0A13); + ir_expression *const r0A14 = lshift(r0A00, body.constant(int(31))); + body.emit(assign(r0A13, add(r0A14, body.constant(2146435072u)), 0x02)); + + body.emit(assign(r0A13, body.constant(0u), 0x01)); + + body.emit(assign(r09FF, r0A13, 0x03)); + + body.emit(assign(r09FE, body.constant(false), 0x01)); + + + body.instructions = f0A0B_parent_instructions; + body.emit(f0A0B); + + /* END IF */ + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A09->else_instructions; + + /* IF CONDITION */ + ir_expression *const r0A16 = equal(r0A06, body.constant(int(0))); + ir_if *f0A15 = new(mem_ctx) ir_if(operand(r0A16).val); + exec_list *const f0A15_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A15->then_instructions; + + /* IF CONDITION */ + ir_expression *const r0A18 = equal(r0A05, body.constant(0u)); + ir_if *f0A17 = new(mem_ctx) ir_if(operand(r0A18).val); + exec_list *const f0A17_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A17->then_instructions; + + ir_variable *const r0A19 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "z", ir_var_auto); + body.emit(r0A19); + body.emit(assign(r0A19, lshift(r0A00, body.constant(int(31))), 0x02)); + + body.emit(assign(r0A19, body.constant(0u), 0x01)); + + body.emit(assign(r09FF, r0A19, 0x03)); + + body.emit(assign(r09FE, body.constant(false), 0x01)); + + + /* ELSE INSTRUCTIONS */ + body.instructions = &f0A17->else_instructions; + + ir_variable *const r0A1A = body.make_temp(glsl_type::int_type, "assignment_tmp"); + ir_expression *const r0A1B = equal(r0A05, body.constant(0u)); + ir_expression *const r0A1C = expr(ir_unop_find_msb, r0A05); + ir_expression *const r0A1D = sub(body.constant(int(31)), r0A1C); + ir_expression *const r0A1E = expr(ir_triop_csel, r0A1B, body.constant(int(32)), r0A1D); + body.emit(assign(r0A1A, add(r0A1E, body.constant(int(-8))), 0x01)); + + body.emit(assign(r0A02, lshift(r0A05, r0A1A), 0x01)); + + body.emit(assign(r0A01, sub(body.constant(int(1)), r0A1A), 0x01)); + + body.emit(assign(r0A01, add(r0A01, body.constant(int(-1))), 0x01)); + + + body.instructions = f0A17_parent_instructions; + body.emit(f0A17); + + /* END IF */ + + + body.instructions = f0A15_parent_instructions; + body.emit(f0A15); + + /* END IF */ + + /* IF CONDITION */ + ir_if *f0A1F = new(mem_ctx) ir_if(operand(r09FE).val); + exec_list *const f0A1F_parent_instructions = body.instructions; + + /* THEN INSTRUCTIONS */ + body.instructions = &f0A1F->then_instructions; + + ir_variable *const r0A20 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "z", ir_var_auto); + body.emit(r0A20); + ir_expression *const r0A21 = lshift(r0A00, body.constant(int(31))); + ir_expression *const r0A22 = add(r0A01, body.constant(int(896))); + ir_expression *const r0A23 = expr(ir_unop_i2u, r0A22); + ir_expression *const r0A24 = lshift(r0A23, body.constant(int(20))); + ir_expression *const r0A25 = add(r0A21, r0A24); + ir_expression *const r0A26 = rshift(r0A02, body.constant(int(3))); + body.emit(assign(r0A20, add(r0A25, r0A26), 0x02)); + + ir_expression *const r0A27 = lshift(r0A02, body.constant(int(29))); + body.emit(assign(r0A20, bit_or(r0A27, body.constant(0u)), 0x01)); + + body.emit(assign(r09FF, r0A20, 0x03)); + + body.emit(assign(r09FE, body.constant(false), 0x01)); + + + body.instructions = f0A1F_parent_instructions; + body.emit(f0A1F); + + /* END IF */ + + + body.instructions = f0A09_parent_instructions; + body.emit(f0A09); + + /* END IF */ + + body.emit(ret(r09FF)); + + sig->replace_parameters(&sig_parameters); + return sig; +} diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 48e0b20..d919873 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3394,6 +3394,10 @@ builtin_builder::create_builtins() generate_ir::int_to_fp64(mem_ctx, integer_functions_supported), NULL); + add_function("__builtin_fsqrt64", + generate_ir::fsqrt64(mem_ctx, integer_functions_supported), + NULL); + #undef F #undef FI #undef FIUD_VEC diff --git a/src/compiler/glsl/builtin_functions.h b/src/compiler/glsl/builtin_functions.h index f9cc0ad..2f72f51 100644 --- a/src/compiler/glsl/builtin_functions.h +++ b/src/compiler/glsl/builtin_functions.h @@ -106,6 +106,9 @@ fp64_to_fp32(void *mem_ctx, builtin_available_predicate avail); ir_function_signature * fp32_to_fp64(void *mem_ctx, builtin_available_predicate avail); +ir_function_signature * +fsqrt64(void *mem_ctx, builtin_available_predicate avail); + } #endif /* BULITIN_FUNCTIONS_H */ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index 748e4af..c03f0f6 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -1014,3 +1014,278 @@ fp32_to_fp64(float f) shift64Right(aFrac, 0u, 3, zFrac0, zFrac1); return packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1); } + +/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the + * 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is + * modulo 2^96, so any carry out is lost. The result is broken into three + * 32-bit pieces which are stored at the locations pointed to by `z0Ptr', + * `z1Ptr', and `z2Ptr'. + */ +/*void +add96(uint a0, uint a1, uint a2, + uint b0, uint b1, uint b2, + inout uint z0Ptr, + inout uint z1Ptr, + inout uint z2Ptr) +{ + uint z2 = a2 + b2; + uint carry1 = uint(z2 < a2); + uint z1 = a1 + b1; + uint carry0 = uint(z1 < a1); + uint z0 = a0 + b0; + z1 += carry1; + z0 += uint(z1 < carry1); + z0 += carry0; + z2Ptr = z2; + z1Ptr = z1; + z0Ptr = z0; +}*/ + +/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from + * the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction + * is modulo 2^96, so any borrow out (carry out) is lost. The result is broken + * into three 32-bit pieces which are stored at the locations pointed to by + * `z0Ptr', `z1Ptr', and `z2Ptr'. + */ +/*void +sub96(uint a0, uint a1, uint a2, + uint b0, uint b1, uint b2, + inout uint z0Ptr, + inout uint z1Ptr, + inout uint z2Ptr) +{ + uint z2 = a2 - b2; + uint borrow1 = uint(a2 < b2); + uint z1 = a1 - b1; + uint borrow0 = uint(a1 < b1); + uint z0 = a0 - b0; + z0 -= uint(z1 < borrow1); + z1 -= borrow1; + z0 -= borrow0; + z2Ptr = z2; + z1Ptr = z1; + z0Ptr = z0; +}*/ + +/* Returns an approximation to the 32-bit integer quotient obtained by dividing + * `b' into the 64-bit value formed by concatenating `a0' and `a1'. The + * divisor `b' must be at least 2^31. If q is the exact quotient truncated + * toward zero, the approximation returned lies between q and q + 2 inclusive. + * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit + * unsigned integer is returned. + */ +/*uint +estimateDiv64To32(uint a0, uint a1, uint b) +{ + uint b0; + uint b1; + uint rem0 = 0u; + uint rem1 = 0u; + uint term0 = 0u; + uint term1 = 0u; + uint z; + + if (b <= a0) + return 0xFFFFFFFFu; + b0 = b>>16; + z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16; + mul32To64(b, z, term0, term1); + sub64(a0, a1, term0, term1, rem0, rem1); + while (int(rem0) < 0) { + z -= 0x10000u; + b1 = b<<16; + add64(rem0, rem1, b0, b1, rem0, rem1); + } + rem0 = (rem0<<16) | (rem1>>16); + z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0; + return z; +}*/ + +/*uint +sqrtOddAdjustments(int index) +{ + uint res = 0u; + if (index == 0) + res = 0x0004u; + if (index == 1) + res = 0x0022u; + if (index == 2) + res = 0x005Du; + if (index == 3) + res = 0x00B1u; + if (index == 4) + res = 0x011Du; + if (index == 5) + res = 0x019Fu; + if (index == 6) + res = 0x0236u; + if (index == 7) + res = 0x02E0u; + if (index == 8) + res = 0x039Cu; + if (index == 9) + res = 0x0468u; + if (index == 10) + res = 0x0545u; + if (index == 11) + res = 0x631u; + if (index == 12) + res = 0x072Bu; + if (index == 13) + res = 0x0832u; + if (index == 14) + res = 0x0946u; + if (index == 15) + res = 0x0A67u; + + return res; +} + +uint +sqrtEvenAdjustments(int index) +{ + uint res = 0u; + if (index == 0) + res = 0x0A2Du; + if (index == 1) + res = 0x08AFu; + if (index == 2) + res = 0x075Au; + if (index == 3) + res = 0x0629u; + if (index == 4) + res = 0x051Au; + if (index == 5) + res = 0x0429u; + if (index == 6) + res = 0x0356u; + if (index == 7) + res = 0x029Eu; + if (index == 8) + res = 0x0200u; + if (index == 9) + res = 0x0179u; + if (index == 10) + res = 0x0109u; + if (index == 11) + res = 0x00AFu; + if (index == 12) + res = 0x0068u; + if (index == 13) + res = 0x0034u; + if (index == 14) + res = 0x0012u; + if (index == 15) + res = 0x0002u; + + return res; +}*/ + +/* Returns an approximation to the square root of the 32-bit significand given + * by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of + * `aExp' (the least significant bit) is 1, the integer returned approximates + * 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' + * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either + * case, the approximation returned lies strictly within +/-2 of the exact + * value. + */ +/*uint estimateSqrt32(int aExp, uint a) +{ + uint z; + + int index = int(a>>27 & 15u); + if ((aExp & 1) != 0) { + z = 0x4000u + (a>>17) - sqrtOddAdjustments(index); + z = ((a / z)<<14) + (z<<15); + a >>= 1; + } else { + z = 0x8000u + (a>>17) - sqrtEvenAdjustments(index); + z = a / z + z; + z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15); + if (z <= a) + return uint(int(a)>>1); + } + return ((estimateDiv64To32(a, 0u, z))>>1) + (z>>1); +}*/ + +/* Returns the square root of the double-precision floating-point value `a'. + * The operation is performed according to the IEEE Standard for Floating-Point + * Arithmetic. + */ +uvec2 +fsqrt64(uvec2 a) +{ +/* uint zFrac0 = 0u; + uint zFrac1 = 0u; + uint zFrac2 = 0u; + uint doubleZFrac0 = 0u; + uint rem0 = 0u; + uint rem1 = 0u; + uint rem2 = 0u; + uint rem3 = 0u; + uint term0 = 0u; + uint term1 = 0u; + uint term2 = 0u; + uint term3 = 0u; + uvec2 default_nan; + default_nan.y = 0xFFFFFFFFu; + default_nan.x = 0xFFFFFFFFu; + + uint aFracLo = extractFloat64FracLo(a); + uint aFracHi = extractFloat64FracHi(a); + int aExp = extractFloat64Exp(a); + uint aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if ((aFracHi | aFracLo) != 0u) + return propagateFloat64NaN(a, a); + if (aSign == 0u) + return a; + return default_nan; + } + if (aSign != 0u) { + if ((uint(aExp) | aFracHi | aFracLo) == 0u) + return a; + return default_nan; + } + if (aExp == 0) { + if ((aFracHi | aFracLo) == 0u) + return packFloat64(0u, 0, 0u, 0u); + normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); + } + int zExp = ((aExp - 0x3FF)>>1) + 0x3FE; + aFracHi |= 0x00100000u; + shortShift64Left(aFracHi, aFracLo, 11, term0, term1); + zFrac0 = (estimateSqrt32(aExp, term0)>>1) + 1u; + if (zFrac0 == 0u) + zFrac0 = 0x7FFFFFFFu; + doubleZFrac0 = zFrac0 + zFrac0; + shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo); + mul32To64(zFrac0, zFrac0, term0, term1); + sub64(aFracHi, aFracLo, term0, term1, rem0, rem1); + while (int(rem0) < 0) { + --zFrac0; + doubleZFrac0 -= 2u; + add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1); + } + zFrac1 = estimateDiv64To32(rem1, 0u, doubleZFrac0); + if ((zFrac1 & 0x1FFu) <= 5u) { + if (zFrac1 == 0u) + zFrac1 = 1u; + mul32To64(doubleZFrac0, zFrac1, term1, term2); + sub64(rem1, 0u, term1, term2, rem1, rem2); + mul32To64(zFrac1, zFrac1, term2, term3); + sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3); + while (int(rem1) < 0) { + --zFrac1; + shortShift64Left(0u, zFrac1, 1, term2, term3); + term3 |= 1u; + term2 |= doubleZFrac0; + add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3); + } + zFrac1 |= uint((rem1 | rem2 | rem3) != 0u); + } + shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2); + return roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);*/ + + return fp32_to_fp64(sqrt(fp64_to_fp32(a))); +} diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 3fcdcb0..d2411c5 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2381,6 +2381,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "__have_builtin_builtin_int_to_fp64", 1); add_builtin_define(parser, "__have_builtin_builtin_fp64_to_fp32", 1); add_builtin_define(parser, "__have_builtin_builtin_fp32_to_fp64", 1); + add_builtin_define(parser, "__have_builtin_builtin_fsqrt64", 1); } } -- 2.9.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev