pratlucas created this revision. Herald added subscribers: cfe-commits, kristof.beyls. Herald added a project: clang.
Sign information was being lost on Neon's vdot_lane intrinsics arguments when bitcasting explicitly to uint32 vector types. This patch introduces a new cast option to allow casting the operand to the equivalent vector type of 32-bit integers, keeping the sign information. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D74617 Files: clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon_incl.td clang/utils/TableGen/NeonEmitter.cpp Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -238,6 +238,11 @@ NumVectors = 1; } + void make32BitElement() { + assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); + ElementBitwidth = 32; + } + void doubleLanes() { assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); Bitwidth = 128; @@ -1486,6 +1491,8 @@ castToType.doubleLanes(); } else if (SI->getAsUnquotedString() == "8") { castToType.makeInteger(8, true); + } else if (SI->getAsUnquotedString() == "32") { + castToType.make32BitElement(); } else { castToType = Type::fromTypedefName(SI->getAsUnquotedString()); assert_with_loc(!castToType.isVoid(), "Unknown typedef"); Index: clang/include/clang/Basic/arm_neon_incl.td =================================================================== --- clang/include/clang/Basic/arm_neon_incl.td +++ clang/include/clang/Basic/arm_neon_incl.td @@ -79,6 +79,7 @@ // - "D" - Double the number of lanes in the type. // - "8" - Convert type to an equivalent vector of 8-bit signed // integers. +// - "32" - Convert type to an equivalent vector of 32-bit integers. // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return // value is of type "int32x4_t". // (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0 Index: clang/include/clang/Basic/arm_neon.td =================================================================== --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -207,10 +207,10 @@ def OP_DOT_LN : Op<(call "vdot", $p0, $p1, - (bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>; + (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>; def OP_DOT_LNQ : Op<(call "vdot", $p0, $p1, - (bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>; + (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>; def OP_FMLAL_LN : Op<(call "vfmlal_low", $p0, $p1, (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -238,6 +238,11 @@ NumVectors = 1; } + void make32BitElement() { + assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); + ElementBitwidth = 32; + } + void doubleLanes() { assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); Bitwidth = 128; @@ -1486,6 +1491,8 @@ castToType.doubleLanes(); } else if (SI->getAsUnquotedString() == "8") { castToType.makeInteger(8, true); + } else if (SI->getAsUnquotedString() == "32") { + castToType.make32BitElement(); } else { castToType = Type::fromTypedefName(SI->getAsUnquotedString()); assert_with_loc(!castToType.isVoid(), "Unknown typedef"); Index: clang/include/clang/Basic/arm_neon_incl.td =================================================================== --- clang/include/clang/Basic/arm_neon_incl.td +++ clang/include/clang/Basic/arm_neon_incl.td @@ -79,6 +79,7 @@ // - "D" - Double the number of lanes in the type. // - "8" - Convert type to an equivalent vector of 8-bit signed // integers. +// - "32" - Convert type to an equivalent vector of 32-bit integers. // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return // value is of type "int32x4_t". // (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0 Index: clang/include/clang/Basic/arm_neon.td =================================================================== --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -207,10 +207,10 @@ def OP_DOT_LN : Op<(call "vdot", $p0, $p1, - (bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>; + (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>; def OP_DOT_LNQ : Op<(call "vdot", $p0, $p1, - (bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>; + (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>; def OP_FMLAL_LN : Op<(call "vfmlal_low", $p0, $p1, (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits