pratlucas created this revision.
Herald added subscribers: cfe-commits, kristof.beyls.
Herald added a project: clang.

Sign information was being lost on Neon's vdot_lane intrinsics arguments
when bitcasting explicitly to uint32 vector types. This patch introduces
a new cast option to allow casting the operand to the equivalent vector
type of 32-bit integers, keeping the sign information.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74617

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/include/clang/Basic/arm_neon_incl.td
  clang/utils/TableGen/NeonEmitter.cpp


Index: clang/utils/TableGen/NeonEmitter.cpp
===================================================================
--- clang/utils/TableGen/NeonEmitter.cpp
+++ clang/utils/TableGen/NeonEmitter.cpp
@@ -238,6 +238,11 @@
     NumVectors = 1;
   }
 
+  void make32BitElement() {
+    assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
+    ElementBitwidth = 32;
+  }
+
   void doubleLanes() {
     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
     Bitwidth = 128;
@@ -1486,6 +1491,8 @@
         castToType.doubleLanes();
       } else if (SI->getAsUnquotedString() == "8") {
         castToType.makeInteger(8, true);
+      } else if (SI->getAsUnquotedString() == "32") {
+        castToType.make32BitElement();
       } else {
         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
Index: clang/include/clang/Basic/arm_neon_incl.td
===================================================================
--- clang/include/clang/Basic/arm_neon_incl.td
+++ clang/include/clang/Basic/arm_neon_incl.td
@@ -79,6 +79,7 @@
 //          - "D" - Double the number of lanes in the type.
 //          - "8" - Convert type to an equivalent vector of 8-bit signed
 //                  integers.
+//          - "32" - Convert type to an equivalent vector of 32-bit integers.
 // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
 //           value is of type "int32x4_t".
 //          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
Index: clang/include/clang/Basic/arm_neon.td
===================================================================
--- clang/include/clang/Basic/arm_neon.td
+++ clang/include/clang/Basic/arm_neon.td
@@ -207,10 +207,10 @@
 
 def OP_DOT_LN
     : Op<(call "vdot", $p0, $p1,
-          (bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>;
+          (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>;
 def OP_DOT_LNQ
     : Op<(call "vdot", $p0, $p1,
-          (bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>;
+          (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>;
 
 def OP_FMLAL_LN     : Op<(call "vfmlal_low", $p0, $p1,
                            (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;


Index: clang/utils/TableGen/NeonEmitter.cpp
===================================================================
--- clang/utils/TableGen/NeonEmitter.cpp
+++ clang/utils/TableGen/NeonEmitter.cpp
@@ -238,6 +238,11 @@
     NumVectors = 1;
   }
 
+  void make32BitElement() {
+    assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
+    ElementBitwidth = 32;
+  }
+
   void doubleLanes() {
     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
     Bitwidth = 128;
@@ -1486,6 +1491,8 @@
         castToType.doubleLanes();
       } else if (SI->getAsUnquotedString() == "8") {
         castToType.makeInteger(8, true);
+      } else if (SI->getAsUnquotedString() == "32") {
+        castToType.make32BitElement();
       } else {
         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
Index: clang/include/clang/Basic/arm_neon_incl.td
===================================================================
--- clang/include/clang/Basic/arm_neon_incl.td
+++ clang/include/clang/Basic/arm_neon_incl.td
@@ -79,6 +79,7 @@
 //          - "D" - Double the number of lanes in the type.
 //          - "8" - Convert type to an equivalent vector of 8-bit signed
 //                  integers.
+//          - "32" - Convert type to an equivalent vector of 32-bit integers.
 // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
 //           value is of type "int32x4_t".
 //          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
Index: clang/include/clang/Basic/arm_neon.td
===================================================================
--- clang/include/clang/Basic/arm_neon.td
+++ clang/include/clang/Basic/arm_neon.td
@@ -207,10 +207,10 @@
 
 def OP_DOT_LN
     : Op<(call "vdot", $p0, $p1,
-          (bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>;
+          (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>;
 def OP_DOT_LNQ
     : Op<(call "vdot", $p0, $p1,
-          (bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>;
+          (bitcast $p1, (splat(bitcast "32", $p2), $p3)))>;
 
 def OP_FMLAL_LN     : Op<(call "vfmlal_low", $p0, $p1,
                            (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to