Sorry for the problem in this patch. I will send it again after modification.

在 2022/10/29 下午3:05, Lulu Cheng 写道:
Co-Authored-By: xujiahao <xujia...@loongson.cn>

gcc/ChangeLog:

        * config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
        * config/loongarch/loongarch-protos.h (loongarch_prefetch_cookie):
        Function declaration.
        * config/loongarch/loongarch-tune.h (struct loongarch_cache):
        Define number of parallel prefetch.
        * config/loongarch/loongarch.cc (loongarch_option_override_internal):
        Set up parameters to be used in prefetching algorithm.
        (loongarch_prefetch_cookie): Select load or store based on the value of 
write.
        * config/loongarch/loongarch.md (prefetch): New template.
        (*prefetch_indexed_<mode>): New template.
---
  gcc/config/loongarch/loongarch-def.c    |  2 ++
  gcc/config/loongarch/loongarch-protos.h |  1 +
  gcc/config/loongarch/loongarch-tune.h   |  1 +
  gcc/config/loongarch/loongarch.cc       | 48 +++++++++++++++++++++++++
  gcc/config/loongarch/loongarch.md       | 23 ++++++++++++
  5 files changed, 75 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-def.c 
b/gcc/config/loongarch/loongarch-def.c
index cbf995d81b5..80ab10a52a8 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
        .l1d_line_size = 64,
        .l1d_size = 64,
        .l2d_size = 256,
+      .simultaneous_prefetches = 4,
    },
    [CPU_LA464] = {
        .l1d_line_size = 64,
        .l1d_size = 64,
        .l2d_size = 256,
+      .simultaneous_prefetches = 4,
    },
  };
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 77b2217247d..489525b520e 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -179,5 +179,6 @@ extern tree loongarch_builtin_decl (unsigned int, bool);
  extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget 
ATTRIBUTE_UNUSED,
                                     machine_mode, int);
  extern tree loongarch_build_builtin_va_list (void);
+extern rtx loongarch_prefetch_cookie (rtx, rtx);
#endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch-tune.h 
b/gcc/config/loongarch/loongarch-tune.h
index 6f3530f5c02..8e3eb29472b 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -45,6 +45,7 @@ struct loongarch_cache {
      int l1d_line_size;  /* bytes */
      int l1d_size;       /* KiB */
      int l2d_size;       /* kiB */
+    int simultaneous_prefetches; /* number of parallel prefetch */
  };
#endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 5e8cd293645..d663afe434d 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "context.h"
  #include "builtins.h"
  #include "rtl-iter.h"
+#include "params.h"
/* This file should be included last. */
  #include "target-def.h"
@@ -6126,6 +6127,33 @@ loongarch_option_override_internal (struct gcc_options 
*opts)
    if (loongarch_branch_cost == 0)
      loongarch_branch_cost = loongarch_cost->branch_cost;
+ /* Set up parameters to be used in prefetching algorithm. */
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+                        
loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches,
+                        opts->x_param_values,
+                        opts_set->x_param_values);
+
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+                        loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size,
+                        opts->x_param_values,
+                        opts_set->x_param_values);
+
+  maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+                        loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size,
+                        opts->x_param_values,
+                        opts_set->x_param_values);
+
+  maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+                        loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size,
+                        opts->x_param_values,
+                        opts_set->x_param_values);
+
+  /* Enable sw prefetching at -O3 and higher. */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && !opts->x_optimize_size)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
    if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
      error ("%qs cannot be used for compiling a shared library",
           "-mdirect-extern-access");
@@ -6506,6 +6534,26 @@ loongarch_asan_shadow_offset (void)
    return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
  }
+/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8
+   (prefetch for store), other hint just scale to hint = 0 and hint = 1. */
+
+rtx
+loongarch_prefetch_cookie (rtx write, rtx locality)
+{
+  if (INTVAL (locality) == 1 && INTVAL (write) == 0)
+    return GEN_INT (INTVAL (write) + 2);
+
+  /* store.  */
+  if (INTVAL (write) == 1)
+    return GEN_INT (INTVAL (write) + 7);
+
+  /* load.  */
+  if (INTVAL (write) == 0)
+    return GEN_INT (INTVAL (write));
+
+  gcc_unreachable ();
+}
+
  /* Initialize the GCC target structure.  */
  #undef TARGET_ASM_ALIGNED_HI_OP
  #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 7eaa9ab66e3..be247164eb4 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3201,6 +3201,29 @@ (define_expand "untyped_call"
  ;;  ....................
  ;;
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "p")
+            (match_operand 1 "const_int_operand" "n")
+            (match_operand 2 "const_int_operand" "n"))]
+  ""
+{
+  operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
+  return "preld\t%1,%a0";
+}
+  [(set_attr "type" "prefetch")])
+
+(define_insn "*prefetch_indexed_<mode>"
+  [(prefetch (plus:P (match_operand 0 "register_operand" "r")
+                    (match_operand 1 "register_operand" "r"))
+            (match_operand 2 "const_int_operand" "n")
+            (match_operand 3 "const_int_operand" "n"))]
+  ""
+{
+  operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]);
+  return "preldx\t%2,%1,%0";
+}
+  [(set_attr "type" "prefetchx")])
+
  (define_insn "nop"
    [(const_int 0)]
    ""

Reply via email to