On Tue, 15 Jan 2019 08:06:47 PST (-0800), bjorn.to...@gmail.com wrote:
Den tis 15 jan. 2019 kl 16:39 skrev Christoph Hellwig <h...@infradead.org>:

Hmm, while the RISC-V spec requires misaligned load/store support,
who says they are efficient?  Maybe add a little comment that says
on which cpus they are efficient.

Good point! :-) I need to check how other architectures does this.
Enabling it for *all* RV64 is probably not correct.

RISC-V mandates that misaligned memory accesses execute correctly in S-mode, but allow them to be trapped and emulated in M-mode. As a result they can be quite slow. Every microarchitecture I know of traps misaligned accesses into M-mode, so for now we're probably safe just unconditionally saying they're slow.

GCC does have a tuning parameter that says "are misaligned accesses fast?" that we set depending on -mtune, but it doesn't appear to be exposed as a preprocessor macro. I think it's probably best to just expose the tuning parameter as a macro so software that needs to know this has one standard way of doing it.

Jim, would you be opposed to something like this?

   diff --git a/riscv-c-api.md b/riscv-c-api.md
   index 0b0236c38826..a790f5cc23ee 100644
   --- a/riscv-c-api.md
   +++ b/riscv-c-api.md
   @@ -52,6 +52,10 @@ https://creativecommons.org/licenses/by/4.0/.
    * `__riscv_cmodel_medlow`
    * `__riscv_cmodel_medany`
    * `__riscv_cmodel_pic`
   +* `__riscv_tune_misaligned_load_cost`: The number of cycles a word-sized
   +  misaligned load will take.
   +* `__riscv_tune_misaligned_store_cost`: The number of cycles a word-sized
   +  misaligned store will take.
## Function Attributes

Which I think shouldn't be too much of a headache to implement in GCC -- I haven't compiled this yet, though...

   diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
   index ca72de74a7b4..fa71a4a22104 100644
   --- a/gcc/config/riscv/riscv-c.c
   +++ b/gcc/config/riscv/riscv-c.c
   @@ -98,4 +98,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
          builtin_define ("__riscv_cmodel_pic");
          break;
        }
   +
   +    builtin_define_with_int_value ("__riscv_tune_misaligned_load_cost",
   +                                   riscv_tune_info->slow_unaligned_access ? 
1024 : 1);
   +    builtin_define_with_int_value ("__riscv_tune_misaligned_store_cost",
   +                                   riscv_tune_info->slow_unaligned_access ? 
1024 : 1);
    }
   diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
   index a3ab6cec33b4..d58a307d27b4 100644
   --- a/gcc/config/riscv/riscv-opts.h
   +++ b/gcc/config/riscv/riscv-opts.h
   @@ -39,4 +39,6 @@ enum riscv_code_model {
    };
    extern enum riscv_code_model riscv_cmodel;
+extern struct riscv_tune_info riscv_tune_info;
   +
    #endif /* ! GCC_RISCV_OPTS_H */
   diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
   index bf4571d91b8c..671c2ddaaa0f 100644
   --- a/gcc/config/riscv/riscv.c
   +++ b/gcc/config/riscv/riscv.c
   @@ -226,7 +226,7 @@ struct riscv_cpu_info {
      const char *name;
/* Tuning parameters for this CPU. */
   -  const struct riscv_tune_info *tune_info;
   +  const struct riscv_tune_info *riscv_tune_info;
    };
/* Global variables for machine-dependent things. */
   @@ -243,7 +243,7 @@ unsigned riscv_stack_boundary;
    static int epilogue_cfa_sp_offset;
/* Which tuning parameters to use. */
   -static const struct riscv_tune_info *tune_info;
   +const struct riscv_tune_info *riscv_tune_info;
/* Index R is the smallest register class that contains register R. */
    const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   @@ -1528,7 +1528,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
         instructions it needs.  */
          if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
        {
   -      *total = COSTS_N_INSNS (cost + tune_info->memory_cost);
   +      *total = COSTS_N_INSNS (cost + riscv_tune_info->memory_cost);
          return true;
        }
          /* Otherwise use the default handling.  */
   @@ -1592,7 +1592,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
         mode instead.  */
          mode = GET_MODE (XEXP (x, 0));
          if (float_mode_p)
   -    *total = tune_info->fp_add[mode == DFmode];
   +    *total = riscv_tune_info->fp_add[mode == DFmode];
          else
        *total = riscv_binary_cost (x, 1, 3);
          return false;
   @@ -1601,14 +1601,14 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        case ORDERED:
          /* (FEQ(A, A) & FEQ(B, B)) compared against 0.  */
          mode = GET_MODE (XEXP (x, 0));
   -      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
   +      *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
          return false;
case UNEQ:
        case LTGT:
          /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B).  */
          mode = GET_MODE (XEXP (x, 0));
   -      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
   +      *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
          return false;
case UNGE:
   @@ -1617,13 +1617,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        case UNLT:
          /* FLT or FLE, but guarded by an FFLAGS read and write.  */
          mode = GET_MODE (XEXP (x, 0));
   -      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
   +      *total = riscv_tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
          return false;
case MINUS:
        case PLUS:
          if (float_mode_p)
   -    *total = tune_info->fp_add[mode == DFmode];
   +    *total = riscv_tune_info->fp_add[mode == DFmode];
          else
        *total = riscv_binary_cost (x, 1, 4);
          return false;
   @@ -1633,7 +1633,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        rtx op = XEXP (x, 0);
        if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
          {
   -        *total = (tune_info->fp_mul[mode == DFmode]
   +        *total = (riscv_tune_info->fp_mul[mode == DFmode]
                      + set_src_cost (XEXP (op, 0), mode, speed)
                      + set_src_cost (XEXP (op, 1), mode, speed)
                      + set_src_cost (XEXP (op, 2), mode, speed));
   @@ -1642,23 +1642,23 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
          }
if (float_mode_p)
   -    *total = tune_info->fp_add[mode == DFmode];
   +    *total = riscv_tune_info->fp_add[mode == DFmode];
          else
        *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
          return false;
case MULT:
          if (float_mode_p)
   -    *total = tune_info->fp_mul[mode == DFmode];
   +    *total = riscv_tune_info->fp_mul[mode == DFmode];
          else if (!TARGET_MUL)
        /* Estimate the cost of a library call.  */
        *total = COSTS_N_INSNS (speed ? 32 : 6);
          else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   -    *total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2);
   +    *total = 3 * riscv_tune_info->int_mul[0] + COSTS_N_INSNS (2);
          else if (!speed)
        *total = COSTS_N_INSNS (1);
          else
   -    *total = tune_info->int_mul[mode == DImode];
   +    *total = riscv_tune_info->int_mul[mode == DImode];
          return false;
case DIV:
   @@ -1666,7 +1666,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        case MOD:
          if (float_mode_p)
        {
   -      *total = tune_info->fp_div[mode == DFmode];
   +      *total = riscv_tune_info->fp_div[mode == DFmode];
          return false;
        }
          /* Fall through.  */
   @@ -1677,7 +1677,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        /* Estimate the cost of a library call.  */
        *total = COSTS_N_INSNS (speed ? 32 : 6);
          else if (speed)
   -    *total = tune_info->int_div[mode == DImode];
   +    *total = riscv_tune_info->int_div[mode == DImode];
          else
        *total = COSTS_N_INSNS (1);
          return false;
   @@ -1699,11 +1699,11 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
        case FIX:
        case FLOAT_EXTEND:
        case FLOAT_TRUNCATE:
   -      *total = tune_info->fp_add[mode == DFmode];
   +      *total = riscv_tune_info->fp_add[mode == DFmode];
          return false;
case FMA:
   -      *total = (tune_info->fp_mul[mode == DFmode]
   +      *total = (riscv_tune_info->fp_mul[mode == DFmode]
                + set_src_cost (XEXP (x, 0), mode, speed)
                + set_src_cost (XEXP (x, 1), mode, speed)
                + set_src_cost (XEXP (x, 2), mode, speed));
   @@ -4165,7 +4165,7 @@ riscv_class_max_nregs (reg_class_t rclass, 
machine_mode mode)
    static int
    riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
    {
   -  return (tune_info->memory_cost
   +  return (riscv_tune_info->memory_cost
          + memory_move_secondary_cost (mode, rclass, in));
    }
@@ -4174,7 +4174,7 @@ riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
    static int
    riscv_issue_rate (void)
    {
   -  return tune_info->issue_rate;
   +  return riscv_tune_info->issue_rate;
    }
/* Implement TARGET_ASM_FILE_START. */
   @@ -4307,22 +4307,22 @@ riscv_option_override (void)
      /* Handle -mtune.  */
      cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string :
                         RISCV_TUNE_STRING_DEFAULT);
   -  tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info;
   +  riscv_tune_info = optimize_size ? &optimize_size_tune_info : 
cpu->riscv_tune_info;
/* Use -mtune's setting for slow_unaligned_access, even when optimizing
         for size.  For architectures that trap and emulate unaligned accesses,
         the performance cost is too great, even for -Os.  Similarly, if
         -m[no-]strict-align is left unspecified, heed -mtune's advice.  */
   -  riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access
   +  riscv_slow_unaligned_access_p = 
(cpu->riscv_tune_info->slow_unaligned_access
                                   || TARGET_STRICT_ALIGN);
      if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
   -      && cpu->tune_info->slow_unaligned_access)
   +      && cpu->riscv_tune_info->slow_unaligned_access)
        target_flags |= MASK_STRICT_ALIGN;
/* If the user hasn't specified a branch cost, use the processor's
         default.  */
      if (riscv_branch_cost == 0)
   -    riscv_branch_cost = tune_info->branch_cost;
   +    riscv_branch_cost = riscv_tune_info->branch_cost;
/* Function to allocate machine-dependent function status. */
      init_machine_status = &riscv_init_machine_status;

Reply via email to