Hi:
  Rtx cost of sse_to_integer would be used by pass_stv as a
measurement for the scalar-to-vector transformation. As
https://gcc.gnu.org/pipermail/gcc-patches/2019-August/528839.html
indicates, movement between sse regs and gprs should be much expensive
than movement inside gprs(which is 2 as default). This patch would
also fix "pr96861".

  Bootstrap is ok, regression test is ok for both "i386.exp=*
--target_board='unix{-m32,}'" and "i386.exp=*
--target_board='unix{-m32\ -march=cascadelake,-m64\
-march=cascadelake}"".
  No big impact on SPEC2017.
  Ok for trunk?

gcc/ChangeLog

        PR target/96861
        * config/i386/x86-tune-costs.h (skylake_cost): increase rtx
        cost of sse_to_integer from 2 to 6.

gcc/testsuite

        * gcc.target/i386/pr95021-3.c: Add -mtune=generic.


--
BR,
Hongtao
From 2a9a943ec56ca3ea1ba1a2447a32b103c2a1c790 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Wed, 16 Sep 2020 10:53:52 +0800
Subject: [PATCH] Increase rtx cost of sse_to_integer in skylake_cost.

As https://gcc.gnu.org/pipermail/gcc-patches/2019-August/528839.html
indicates, movement between SSE and gpr should be much expensive than
movement inside gpr(which is 2 as default).

gcc/ChangeLog

	PR target/96861
	* config/i386/x86-tune-costs.h (skylake_cost): increase rtx
	cost of sse_to_integer from 2 to 6.

gcc/testsuite

	* gcc.target/i386/pr95021-3.c: Add -mtune=generic.
---
 gcc/config/i386/x86-tune-costs.h          | 2 +-
 gcc/testsuite/gcc.target/i386/pr95021-3.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index a782a9dd9e3..c289b6ba454 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1769,7 +1769,7 @@ struct processor_costs skylake_cost = {
   {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
-  2,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving SSE register to integer.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c b/gcc/testsuite/gcc.target/i386/pr95021-3.c
index 1748161a77c..52f9e4569b3 100644
--- a/gcc/testsuite/gcc.target/i386/pr95021-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
+/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W -mtune=generic" } */
 /* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
 
 #include "pr95021-1.c"
-- 
2.18.1

Reply via email to