When implementing the vector template for copysign, we used vector
floating-point AND and IOR operations. This allows AND and IOR operands
to be vector floating-point types. However, the constraint YC does not
handle vector floating-point constants, resulting in ICE.
PR target/122097
gcc/ChangeLog:
* config/loongarch/loongarch.cc
(loongarch_const_vector_bitimm_set_p): Add support for vector float.
(loongarch_const_vector_bitimm_clr_p): Likewise.
(loongarch_print_operand): Likewise.
* config/loongarch/simd.md (and<mode>3): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/pr122097.c: New test.
---
gcc/config/loongarch/loongarch.cc | 78 ++++-
gcc/config/loongarch/simd.md | 22 +-
gcc/testsuite/gcc.target/loongarch/pr122097.c | 271 ++++++++++++++++++
3 files changed, 362 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr122097.c
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 3ccc75cc978..34d1a9f9a59 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1719,14 +1719,36 @@ loongarch_symbol_binds_local_p (const_rtx x)
bool
loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
{
- if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
+ if (GET_CODE (op) == CONST_VECTOR
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT))
{
- unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+ unsigned HOST_WIDE_INT val;
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0];
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp;
+ }
+ }
+ else
+ val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
if (vlog2 != -1)
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
return loongarch_const_vector_same_val_p (op, mode);
}
@@ -1741,14 +1763,35 @@ loongarch_const_vector_bitimm_set_p (rtx op,
machine_mode mode)
bool
loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
{
- if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
+ if (GET_CODE (op) == CONST_VECTOR
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT))
{
- unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
+ unsigned HOST_WIDE_INT val;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp);
+ }
+ }
+ else
+ val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
if (vlog2 != -1)
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
return loongarch_const_vector_same_val_p (op, mode);
}
@@ -6396,7 +6439,28 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
if (CONST_VECTOR_P (op))
{
machine_mode mode = GET_MODE_INNER (GET_MODE (op));
- unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ unsigned HOST_WIDE_INT val;
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) (tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp;
+ }
+ }
+ else
+ val = UINTVAL (val_s);
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
if (vlog2 != -1)
fprintf (file, "%d", vlog2);
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 3334a59da80..9f4525a25f0 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -1012,8 +1012,26 @@ (define_insn "and<mode>3"
case 1:
{
rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
- unsigned HOST_WIDE_INT val = ~UINTVAL (elt0);
- operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val));
+ unsigned HOST_WIDE_INT val;
+ if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT)
+ {
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (elt0);
+ if (GET_MODE (elt0) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp);
+ }
+ }
+ else
+ val = ~UINTVAL (elt0);
+ operands[2] = loongarch_gen_const_int_vector (<VIMODE>mode, val &
(-val));
return "<x>vbitclri.%v0\t%<wu>0,%<wu>1,%V2";
}
case 2:
diff --git a/gcc/testsuite/gcc.target/loongarch/pr122097.c
b/gcc/testsuite/gcc.target/loongarch/pr122097.c
new file mode 100644
index 00000000000..5d32b191baf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr122097.c
@@ -0,0 +1,271 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mabi=lp64d -mlsx" } */
+/* { dg-final { scan-assembler "vbitseti\.d\t\\\$vr\[0-9\]+,\\\$vr\[0-9\]+,63"
} } */
+
+typedef long unsigned int size_t;
+typedef unsigned char simde__mmask8;
+typedef long simde__m128i __attribute__ ((__aligned__ ((16))))
+__attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+typedef union
+{
+
+ __attribute__ ((__aligned__ ((16)))) long i64
+ __attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+} simde__m128i_private;
+typedef double simde_float64;
+typedef simde_float64 simde__m128d __attribute__ ((__aligned__ ((16))))
+__attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+typedef long int int_fast32_t;
+typedef union
+{
+
+ __attribute__ ((__aligned__ ((16)))) int_fast32_t i32f
+ __attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+ __attribute__ ((__aligned__ ((16)))) long i64
+ __attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+ __attribute__ ((__aligned__ ((16)))) simde_float64 f64
+ __attribute__ ((__vector_size__ (16))) __attribute__ ((__may_alias__));
+} simde__m128d_private;
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde__m128d_from_private (simde__m128d_private v)
+{
+ simde__m128d r;
+ __builtin_memcpy (&r, &v, sizeof (r));
+ return r;
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_set_pd (simde_float64 e1, simde_float64 e0)
+{
+
+ simde__m128d_private r_;
+ r_.f64[0] = e0;
+ r_.f64[1] = e1;
+
+ return simde__m128d_from_private (r_);
+}
+__attribute__ ((__always_inline__)) inline static simde__m128i
+simde_mm_castpd_si128 (simde__m128d a)
+{
+ simde__m128i r;
+ __builtin_memcpy (&r, &a, sizeof (a));
+ return r;
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128i
+simde__m128i_from_private (simde__m128i_private v)
+{
+ simde__m128i r;
+ __builtin_memcpy (&r, &v, sizeof (r));
+ return r;
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128i_private
+simde__m128i_to_private (simde__m128i v)
+{
+ simde__m128i_private r;
+ __builtin_memcpy (&r, &v, sizeof (r));
+ return r;
+}
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_castsi128_pd (simde__m128i a)
+{
+ simde__m128d r;
+ __builtin_memcpy (&r, &a, sizeof (a));
+ return r;
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128i
+simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a)
+{
+
+ simde__m128i_private src_ = simde__m128i_to_private (src),
+ a_ = simde__m128i_to_private (a), r_;
+
+ for (size_t i = 0; i < (sizeof (r_.i64) / sizeof (r_.i64[0])); i++)
+ {
+ r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i];
+ }
+
+ return simde__m128i_from_private (r_);
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_mask_mov_pd (simde__m128d src, simde__mmask8 k, simde__m128d a)
+{
+ return simde_mm_castsi128_pd (simde_mm_mask_mov_epi64 (
+ simde_mm_castpd_si128 (src), k, simde_mm_castpd_si128 (a)));
+}
+
+static double
+simde_test_f64_precision_to_slop (int precision)
+{
+ return __builtin_expect (!!(precision == 0x7fffffff), 0)
+ ? 0.0
+ : __builtin_pow (10.0, -((double)(precision)));
+}
+__attribute__ ((__always_inline__)) inline static void
+simde_mm_storeu_pd (simde_float64 *mem_addr, simde__m128d a)
+{
+
+ __builtin_memcpy (mem_addr, &a, sizeof (a));
+}
+int simde_test_equal_f64 (simde_float64 a, simde_float64 b,
+ simde_float64 slop);
+void simde_test_debug_printf_ (const char *format, ...);
+static int
+simde_assert_equal_vf64_ (size_t vec_len, simde_float64 const a[(vec_len)],
+ simde_float64 const b[(vec_len)], simde_float64 slop,
+ const char *filename, int line, const char *astr,
+ const char *bstr)
+{
+ for (size_t i = 0; i < vec_len; i++)
+ {
+ if (__builtin_expect (!!(!simde_test_equal_f64 (a[i], b[i], slop)), 0))
+ {
+ simde_test_debug_printf_ (
+ "%s:%d: assertion failed: %s[%zu] ~= %s[%zu] (%f ~= %f)\n",
+ filename, line, astr, i, bstr, i, ((double)(a[i])),
+ ((double)(b[i])));
+ return 1;
+ }
+ }
+ return 0;
+}
+static int
+simde_test_x86_assert_equal_f64x2_ (simde__m128d a, simde__m128d b,
+ simde_float64 slop, const char *filename,
+ int line, const char *astr,
+ const char *bstr)
+{
+ simde_float64 a_[sizeof (a) / sizeof (simde_float64)],
+ b_[sizeof (a) / sizeof (simde_float64)];
+ simde_mm_storeu_pd (a_, a);
+ simde_mm_storeu_pd (b_, b);
+ return simde_assert_equal_vf64_ (sizeof (a_) / sizeof (a_[0]), a_, b_, slop,
+ filename, line, astr, bstr);
+}
+__attribute__ ((__always_inline__)) inline static simde__m128d_private
+simde__m128d_to_private (simde__m128d v)
+{
+ simde__m128d_private r;
+ __builtin_memcpy (&r, &v, sizeof (r));
+ return r;
+}
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_min_pd (simde__m128d a, simde__m128d b)
+{
+
+ simde__m128d_private r_, a_ = simde__m128d_to_private (a),
+ b_ = simde__m128d_to_private (b);
+
+ for (size_t i = 0; i < (sizeof (r_.f64) / sizeof (r_.f64[0])); i++)
+ {
+ r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i];
+ }
+
+ return simde__m128d_from_private (r_);
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_max_pd (simde__m128d a, simde__m128d b)
+{
+
+ simde__m128d_private r_, a_ = simde__m128d_to_private (a),
+ b_ = simde__m128d_to_private (b);
+
+ for (size_t i = 0; i < (sizeof (r_.f64) / sizeof (r_.f64[0])); i++)
+ {
+ r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i];
+ }
+
+ return simde__m128d_from_private (r_);
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_x_mm_abs_pd (simde__m128d a)
+{
+
+ simde__m128d_private r_, a_ = simde__m128d_to_private (a);
+ for (size_t i = 0; i < (sizeof (r_.f64) / sizeof (r_.f64[0])); i++)
+ {
+ r_.f64[i] = __builtin_fabs (a_.f64[i]);
+ }
+
+ return simde__m128d_from_private (r_);
+}
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_cmple_pd (simde__m128d a, simde__m128d b)
+{
+
+ simde__m128d_private r_, a_ = simde__m128d_to_private (a),
+ b_ = simde__m128d_to_private (b);
+
+ r_.i64 = ((__typeof__ (r_.i64))((a_.f64 <= b_.f64)));
+ return simde__m128d_from_private (r_);
+}
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_x_mm_select_pd (simde__m128d a, simde__m128d b, simde__m128d mask)
+{
+ simde__m128d_private r_, a_ = simde__m128d_to_private (a),
+ b_ = simde__m128d_to_private (b),
+ mask_ = simde__m128d_to_private (mask);
+
+ r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);
+ return simde__m128d_from_private (r_);
+}
+simde__m128d simde_mm_cmpge_pd (simde__m128d a, simde__m128d b);
+
+simde__m128d
+simde_x_mm_copysign_pd (simde__m128d dest, simde__m128d src)
+{
+ simde__m128d_private r_, dest_ = simde__m128d_to_private (dest),
+ src_ = simde__m128d_to_private (src);
+ for (size_t i = 0; i < (sizeof (r_.f64) / sizeof (r_.f64[0])); i++)
+ {
+ r_.f64[i] = __builtin_copysign (dest_.f64[i], src_.f64[i]);
+ }
+
+ return simde__m128d_from_private (r_);
+}
+simde__m128d simde_mm_or_pd (simde__m128d a, simde__m128d b);
+
+simde__m128d simde_mm_set1_pd (simde_float64 a);
+
+__attribute__ ((__always_inline__)) inline static simde__m128d
+simde_mm_range_pd (simde__m128d a, simde__m128d b, int imm8)
+{
+ simde__m128d r;
+
+ r = simde_x_mm_select_pd (
+ b, a, simde_mm_cmple_pd (simde_x_mm_abs_pd (a), simde_x_mm_abs_pd (b)));
+
+ r = simde_x_mm_copysign_pd (r, a);
+
+ return r;
+}
+int
+test_simde_mm_mask_range_pd (void)
+{
+
+ simde__m128d src, a, b, e, r;
+
+ src = simde_mm_set_pd (-2.92, -85.39);
+ a = simde_mm_set_pd (-47.59, -122.31);
+ b = simde_mm_set_pd (877.42, 69.15);
+ e = simde_mm_set_pd (-47.59, -69.15);
+ r = simde_mm_mask_mov_pd (src, 143, simde_mm_range_pd (a, b, 2));
+ do
+ {
+ if (simde_test_x86_assert_equal_f64x2_ (
+ r, e, simde_test_f64_precision_to_slop (1),
+ "../test/x86/avx512/range.c", 1454, "r", "e"))
+ {
+ return 1;
+ }
+ }
+ while (0);
+
+ return 0;
+}
--
2.34.1