gcc/ChangeLog:
* config/loongarch/loongarch.h (LARCH_MAX_MOVE_PER_INSN):
Define to the maximum amount of bytes able to be loaded or
stored with one machine instruction.
* config/loongarch/loongarch.cc (loongarch_mode_for_move_size):
New static function.
(loongarch_block_move_straight): Call
loongarch_mode_for_move_size for machine_mode to be moved.
(loongarch_expand_block_move): Use LARCH_MAX_MOVE_PER_INSN
instead of UNITS_PER_WORD.
---
Bootstrapped and regtested on loongarch64-linux-gnu, with PR110939 patch
applied, the "lib_build_self_spec = %<..." line in t-linux commented out
(because it's silently making -mlasx in BOOT_CFLAGS ineffective, Yujie
is working on a proper fix), and BOOT_CFLAGS="-O3 -mlasx". Ok for trunk?
gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++++++----
gcc/config/loongarch/loongarch.h | 3 +++
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 6698414281e..509ef2b97f1 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5191,6 +5191,20 @@ loongarch_function_ok_for_sibcall (tree decl
ATTRIBUTE_UNUSED,
return true;
}
+static machine_mode
+loongarch_mode_for_move_size (HOST_WIDE_INT size)
+{
+ switch (size)
+ {
+ case 32:
+ return V32QImode;
+ case 16:
+ return V16QImode;
+ }
+
+ return int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
+}
+
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
Assume that the areas do not overlap. */
@@ -5220,7 +5234,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
{
- mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
+ mode = loongarch_mode_for_move_size (delta_cur);
for (; offs + delta_cur <= length; offs += delta_cur, i++)
{
@@ -5231,7 +5245,7 @@ loongarch_block_move_straight (rtx dest, rtx src,
HOST_WIDE_INT length,
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
{
- mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
+ mode = loongarch_mode_for_move_size (delta_cur);
for (; offs + delta_cur <= length; offs += delta_cur, i++)
loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
@@ -5326,8 +5340,8 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx
r_length, rtx r_align)
HOST_WIDE_INT align = INTVAL (r_align);
- if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
- align = UNITS_PER_WORD;
+ if (!TARGET_STRICT_ALIGN || align > LARCH_MAX_MOVE_PER_INSN)
+ align = LARCH_MAX_MOVE_PER_INSN;
if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
{
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 3fc9dc43ab1..7e391205583 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1181,6 +1181,9 @@ typedef struct {
least twice. */
#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2)
+#define LARCH_MAX_MOVE_PER_INSN \
+ (ISA_HAS_LASX ? 32 : (ISA_HAS_LSX ? 16 : UNITS_PER_WORD))
+
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
values were determined experimentally by benchmarking with CSiBE.
*/