On 05/05/15 16:17, James Greenhalgh wrote: > On Sat, Apr 25, 2015 at 12:26:16AM +0100, Kugan wrote: >> >> Thanks for the review. I have updated the patch based on the comments >> with some other minor changes. Bootstrapped and regression tested on >> aarch64-none-linux-gnu with no-new regressions. Is this OK for trunk? >> >> >> Thanks, >> Kugan >> >> >> gcc/ChangeLog: >> >> 2015-04-24 Kugan Vivekanandarajah <kug...@linaro.org> >> Jim Wilson <jim.wil...@linaro.org> >> >> * config/arm/aarch-common-protos.h (struct mem_cost_table): Added >> new fields loadv and storev. >> * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs): >> Initialize loadv and storev. >> * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. >> (cortexa53_extra_costs): Likewise. >> (cortexa57_extra_costs): Likewise. >> (xgene1_extra_costs): Likewise. >> * config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector >> rtx_costs. > > Hi Kugan, > > Just a few syle comments, regarding the placements of comments in single-line > if statements. I know the current code does not neccesarily always follow the > comments below, I'll write a patch cleaning that up at some point when I'm > back > at my desk. > > Thanks, > James > >> @@ -5667,6 +5668,14 @@ aarch64_rtx_costs (rtx x, int code, int outer >> ATTRIBUTE_UNUSED, >> case NEG: >> op0 = XEXP (x, 0); >> >> + if (VECTOR_MODE_P (mode)) >> + { >> + if (speed) >> + /* FNEG. */ >> + *cost += extra_cost->vect.alu; >> + return false; >> + } >> + >> if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) >> { >> if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE > > Personally, I find commented if statements without braces hard to > quickly parse. Something like this is much faster for me: > > if (speed) > { > /* FNEG. */ > *cost += extra_cost->vect.alu; > } > >> @@ -5844,7 +5872,10 @@ cost_minus: >> >> if (speed) >> { >> - if (GET_MODE_CLASS (mode) == MODE_INT) >> + if (VECTOR_MODE_P (mode)) >> + /* Vector SUB. */ >> + *cost += extra_cost->vect.alu; >> + else if (GET_MODE_CLASS (mode) == MODE_INT) >> /* SUB(S). */ >> *cost += extra_cost->alu.arith; >> else if (GET_MODE_CLASS (mode) == MODE_FLOAT) > > As above. > >> @@ -5888,7 +5919,6 @@ cost_plus: >> { >> if (speed) >> *cost += extra_cost->alu.arith_shift; >> - >> *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), >> (enum rtx_code) GET_CODE (op0), >> 0, speed); > > Drop this whitespace change. > >> @@ -5913,7 +5943,10 @@ cost_plus: >> >> if (speed) >> { >> - if (GET_MODE_CLASS (mode) == MODE_INT) >> + if (VECTOR_MODE_P (mode)) >> + /* Vector ADD. */ >> + *cost += extra_cost->vect.alu; >> + else if (GET_MODE_CLASS (mode) == MODE_INT) >> /* ADD. */ >> *cost += extra_cost->alu.arith; >> else if (GET_MODE_CLASS (mode) == MODE_FLOAT) > > As above. > >> @@ -6013,10 +6061,15 @@ cost_plus: >> return false; >> >> case NOT: >> - /* MVN. */ >> if (speed) >> - *cost += extra_cost->alu.logical; >> - >> + { >> + /* Vector NOT. */ >> + if (VECTOR_MODE_P (mode)) >> + *cost += extra_cost->vect.alu; >> + /* MVN. */ >> + else >> + *cost += extra_cost->alu.logical; >> + } >> /* The logical instruction could have the shifted register form, >> but the cost is the same if the shift is processed as a separate >> instruction, so we don't bother with it here. */ > > As above. > >> @@ -6055,10 +6108,15 @@ cost_plus: >> return true; >> } >> >> - /* UXTB/UXTH. */ >> if (speed) >> - *cost += extra_cost->alu.extend; >> - >> + { >> + if (VECTOR_MODE_P (mode)) >> + /* UMOV. */ >> + *cost += extra_cost->vect.alu; >> + else >> + /* UXTB/UXTH. */ >> + *cost += extra_cost->alu.extend; >> + } >> return false; >> >> ca§se SIGN_EXTEND: > > And again :) > >> @@ -6087,10 +6150,16 @@ cost_plus: >> >> if (CONST_INT_P (op1)) >> { >> - /* LSL (immediate), UBMF, UBFIZ and friends. These are all >> - aliases. */ >> if (speed) >> - *cost += extra_cost->alu.shift; >> + { >> + /* Vector shift (immediate). */ >> + if (VECTOR_MODE_P (mode)) >> + *cost += extra_cost->vect.alu; >> + /* LSL (immediate), UBMF, UBFIZ and friends. These are all >> + aliases. */ >> + else >> + *cost += extra_cost->alu.shift; >> + } >> >> /* We can incorporate zero/sign extend for free. */ >> if (GET_CODE (op0) == ZERO_EXTEND > > Again, the comment here makes it very difficult to spot the form of > the if/else statement. > >> @@ -6102,10 +6171,15 @@ cost_plus: >> } >> else >> { >> - /* LSLV. */ >> if (speed) >> - *cost += extra_cost->alu.shift_reg; >> - >> + { >> + /* Vector shift (register). */ >> + if (VECTOR_MODE_P (mode)) >> + *cost += extra_cost->vect.alu; >> + /* LSLV. */ >> + else >> + *cost += extra_cost->alu.shift_reg; >> + } >> return false; /* All arguments need to be in registers. */ >> } >> > > Likewise here. > >
Thanks James for the review. Attached patch changes this. Is this OK ? Thanks, Kugan
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index ae2b547..939125c 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -83,7 +83,9 @@ const struct cpu_cost_table thunderx_extra_costs = 0, /* N/A: Stm_regs_per_insn_subsequent. */ 0, /* Storef. */ 0, /* Stored. */ - COSTS_N_INSNS (1) /* Store_unaligned. */ + COSTS_N_INSNS (1), /* Store_unaligned. */ + COSTS_N_INSNS (1), /* Loadv. */ + COSTS_N_INSNS (1) /* Storev. */ }, { /* FP SFmode */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index cba3c1a..586caaf 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, above this default. */ *cost = COSTS_N_INSNS (1); - /* TODO: The cost infrastructure currently does not handle - vector operations. Assume that all vector operations - are equally expensive. */ - if (VECTOR_MODE_P (mode)) - { - if (speed) - *cost += extra_cost->vect.alu; - return true; - } - switch (code) { case SET: @@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (speed) { rtx address = XEXP (op0, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->ldst.storev; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.store; else if (mode == SFmode) *cost += extra_cost->ldst.storef; @@ -5544,15 +5536,22 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, /* Fall through. */ case REG: + /* The cost is one per vector-register copied. */ + if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1)) + { + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + / GET_MODE_SIZE (V4SImode); + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } /* const0_rtx is in general free, but we will use an instruction to set a register to 0. */ - if (REG_P (op1) || op1 == const0_rtx) - { - /* The cost is 1 per register copied. */ - int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + else if (REG_P (op1) || op1 == const0_rtx) + { + /* The cost is 1 per register copied. */ + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) / UNITS_PER_WORD; - *cost = COSTS_N_INSNS (n_minus_1 + 1); - } + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } else /* Cost is just the cost of the RHS of the set. */ *cost += rtx_cost (op1, SET, 1, speed); @@ -5650,7 +5649,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, approximation for the additional cost of the addressing mode. */ rtx address = XEXP (x, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->ldst.loadv; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.load; else if (mode == SFmode) *cost += extra_cost->ldst.loadf; @@ -5667,6 +5668,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case NEG: op0 = XEXP (x, 0); + if (VECTOR_MODE_P (mode)) + { + if (speed) + { + /* FNEG. */ + *cost += extra_cost->vect.alu; + } + return false; + } + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) { if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE @@ -5705,7 +5716,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case CLRSB: case CLZ: if (speed) - *cost += extra_cost->alu.clz; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.clz; + } return false; @@ -5790,6 +5806,20 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; } + if (VECTOR_MODE_P (mode)) + { + /* Vector compare. */ + if (speed) + *cost += extra_cost->vect.alu; + + if (aarch64_float_const_zero_rtx_p (op1)) + { + /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra + cost. */ + return true; + } + return false; + } return false; case MINUS: @@ -5844,12 +5874,21 @@ cost_minus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) - /* SUB(S). */ - *cost += extra_cost->alu.arith; + if (VECTOR_MODE_P (mode)) + { + /* Vector SUB. */ + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* SUB(S). */ + *cost += extra_cost->alu.arith; + } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) - /* FSUB. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + { + /* FSUB. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return true; } @@ -5913,12 +5952,21 @@ cost_plus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) - /* ADD. */ - *cost += extra_cost->alu.arith; + if (VECTOR_MODE_P (mode)) + { + /* Vector ADD. */ + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* ADD. */ + *cost += extra_cost->alu.arith; + } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) - /* FADD. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + { + /* FADD. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return true; } @@ -5927,8 +5975,12 @@ cost_plus: *cost = COSTS_N_INSNS (1); if (speed) - *cost += extra_cost->alu.rev; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } return false; case IOR: @@ -5936,10 +5988,14 @@ cost_plus: { *cost = COSTS_N_INSNS (1); - if (speed) - *cost += extra_cost->alu.rev; - - return true; + if (speed) + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } + return true; } /* Fall through. */ case XOR: @@ -5948,6 +6004,13 @@ cost_plus: op0 = XEXP (x, 0); op1 = XEXP (x, 1); + if (VECTOR_MODE_P (mode)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } + if (code == AND && GET_CODE (op0) == MULT && CONST_INT_P (XEXP (op0, 1)) @@ -6013,10 +6076,19 @@ cost_plus: return false; case NOT: - /* MVN. */ if (speed) - *cost += extra_cost->alu.logical; - + { + if (VECTOR_MODE_P (mode)) + { + /* Vector NOT. */ + *cost += extra_cost->vect.alu; + } + else + { + /* MVN. */ + *cost += extra_cost->alu.logical; + } + } /* The logical instruction could have the shifted register form, but the cost is the same if the shift is processed as a separate instruction, so we don't bother with it here. */ @@ -6055,10 +6127,19 @@ cost_plus: return true; } - /* UXTB/UXTH. */ if (speed) - *cost += extra_cost->alu.extend; - + { + if (VECTOR_MODE_P (mode)) + { + /* UMOV. */ + *cost += extra_cost->vect.alu; + } + else + { + /* UXTB/UXTH. */ + *cost += extra_cost->alu.extend; + } + } return false; case SIGN_EXTEND: @@ -6078,7 +6159,12 @@ cost_plus: } if (speed) - *cost += extra_cost->alu.extend; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.extend; + } return false; case ASHIFT: @@ -6087,10 +6173,20 @@ cost_plus: if (CONST_INT_P (op1)) { - /* LSL (immediate), UBMF, UBFIZ and friends. These are all - aliases. */ if (speed) - *cost += extra_cost->alu.shift; + { + if (VECTOR_MODE_P (mode)) + { + /* Vector shift (immediate). */ + *cost += extra_cost->vect.alu; + } + else + { + /* LSL (immediate), UBMF, UBFIZ and friends. These are all + aliases. */ + *cost += extra_cost->alu.shift; + } + } /* We can incorporate zero/sign extend for free. */ if (GET_CODE (op0) == ZERO_EXTEND @@ -6102,10 +6198,19 @@ cost_plus: } else { - /* LSLV. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + if (VECTOR_MODE_P (mode)) + { + /* Vector shift (register). */ + *cost += extra_cost->vect.alu; + } + else + { + /* LSLV. */ + *cost += extra_cost->alu.shift_reg; + } + } return false; /* All arguments need to be in registers. */ } @@ -6120,7 +6225,12 @@ cost_plus: { /* ASR (immediate) and friends. */ if (speed) - *cost += extra_cost->alu.shift; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.shift; + } *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); return true; @@ -6130,8 +6240,12 @@ cost_plus: /* ASR (register) and friends. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.shift_reg; + } return false; /* All arguments need to be in registers. */ } @@ -6179,7 +6293,12 @@ cost_plus: case SIGN_EXTRACT: /* UBFX/SBFX. */ if (speed) - *cost += extra_cost->alu.bfx; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.bfx; + } /* We can trust that the immediates used will be correct (there are no by-register forms), so we need only cost op0. */ @@ -6196,7 +6315,9 @@ cost_plus: case UMOD: if (speed) { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) *cost += (extra_cost->mult[GET_MODE (x) == DImode].add + extra_cost->mult[GET_MODE (x) == DImode].idiv); else if (GET_MODE (x) == DFmode) @@ -6213,7 +6334,9 @@ cost_plus: case SQRT: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (mode) == MODE_INT) /* There is no integer SQRT, so only DIV and UDIV can get here. */ *cost += extra_cost->mult[mode == DImode].idiv; @@ -6245,7 +6368,12 @@ cost_plus: op2 = XEXP (x, 2); if (speed) - *cost += extra_cost->fp[mode == DFmode].fma; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[mode == DFmode].fma; + } /* FMSUB, FNMADD, and FNMSUB are free. */ if (GET_CODE (op0) == NEG) @@ -6285,12 +6413,28 @@ cost_plus: case FLOAT_EXTEND: if (speed) - *cost += extra_cost->fp[mode == DFmode].widen; + { + if (VECTOR_MODE_P (mode)) + { + /*Vector truncate. */ + *cost += extra_cost->vect.alu; + } + else + *cost += extra_cost->fp[mode == DFmode].widen; + } return false; case FLOAT_TRUNCATE: if (speed) - *cost += extra_cost->fp[mode == DFmode].narrow; + { + if (VECTOR_MODE_P (mode)) + { + /*Vector conversion. */ + *cost += extra_cost->vect.alu; + } + else + *cost += extra_cost->fp[mode == DFmode].narrow; + } return false; case FIX: @@ -6311,13 +6455,23 @@ cost_plus: } if (speed) - *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; + } *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); return true; case ABS: - if (GET_MODE_CLASS (mode) == MODE_FLOAT) + if (VECTOR_MODE_P (mode)) + { + /* ABS (vector). */ + if (speed) + *cost += extra_cost->vect.alu; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { /* FABS and FNEG are analogous. */ if (speed) @@ -6338,10 +6492,15 @@ cost_plus: case SMIN: if (speed) { - /* FMAXNM/FMINNM/FMAX/FMIN. - TODO: This may not be accurate for all implementations, but - we do not model this in the cost tables. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + { + /* FMAXNM/FMINNM/FMAX/FMIN. + TODO: This may not be accurate for all implementations, but + we do not model this in the cost tables. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } } return false; diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 3ee7ebf..29f7c99 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -102,6 +102,8 @@ struct mem_cost_table const int storef; /* SFmode. */ const int stored; /* DFmode. */ const int store_unaligned; /* Extra for unaligned stores. */ + const int loadv; /* Vector load. */ + const int storev; /* Vector store. */ }; struct fp_cost_table diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index 05e96a9..809feb8 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -81,7 +81,9 @@ const struct cpu_cost_table generic_extra_costs = 1, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (3), /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -182,7 +184,9 @@ const struct cpu_cost_table cortexa53_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -283,7 +287,9 @@ const struct cpu_cost_table cortexa57_extra_costs = 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ - COSTS_N_INSNS (1) /* store_unaligned. */ + COSTS_N_INSNS (1), /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ @@ -385,6 +391,8 @@ const struct cpu_cost_table xgene1_extra_costs = 0, /* storef. */ 0, /* stored. */ 0, /* store_unaligned. */ + COSTS_N_INSNS (1), /* loadv. */ + COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */