https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89628

            Bug ID: 89628
           Summary: aarch64_vector_pcs does not use v24-v31 as temp regs
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: nsz at gcc dot gnu.org
  Target Milestone: ---

consider

typedef __Float32x4_t vec;

__attribute__((aarch64_vector_pcs))
vec f(vec a0, vec a1, vec a2, vec a3, vec a4, vec a5, vec a6, vec a7)
{
        vec t0, t1, t2, t3, t4, t5, t6, t7, s0, s1, s2, s3;
        t0 = a0 - a7;
        t1 = a1 - a6;
        t2 = a2 - a5;
        t3 = a3 - a4;
        t4 = a4 - a3;
        t5 = a5 - a2;
        t6 = a6 - a1;
        t7 = a7 - a0;
        s0 = t0 * t1;
        s1 = t2 * t3;
        s2 = t4 * t5;
        s3 = t6 * t7;
        return s0 * s1 * s2 * s3 * a0 * a1 * a2 * a3 * a4 * a5 * a6 * a7;
}

the aarch64 vpcs has 8 arg + 8 temp regs to use, so i think such code should
not need to spill, however current gcc seems to compile it as

f:
        stp     q16, q17, [sp, -96]!
        fsub    v16.4s, v2.4s, v5.4s
        stp     q18, q19, [sp, 32]
        fsub    v17.4s, v0.4s, v7.4s
        stp     q20, q21, [sp, 64]
        fsub    v18.4s, v1.4s, v6.4s
        fsub    v20.4s, v3.4s, v4.4s
        fsub    v21.4s, v5.4s, v2.4s
        fsub    v19.4s, v4.4s, v3.4s
        fmul    v17.4s, v17.4s, v18.4s
        fmul    v16.4s, v16.4s, v20.4s
        fsub    v18.4s, v6.4s, v1.4s
        fsub    v20.4s, v7.4s, v0.4s
        fmul    v19.4s, v19.4s, v21.4s
        fmul    v16.4s, v17.4s, v16.4s
        fmul    v17.4s, v18.4s, v20.4s
        ldp     q20, q21, [sp, 64]
        fmul    v16.4s, v16.4s, v19.4s
        ldp     q18, q19, [sp, 32]
        fmul    v16.4s, v16.4s, v17.4s
        fmul    v16.4s, v16.4s, v0.4s
        fmul    v1.4s, v16.4s, v1.4s
        ldp     q16, q17, [sp], 96
        fmul    v2.4s, v1.4s, v2.4s
        fmul    v3.4s, v2.4s, v3.4s
        fmul    v4.4s, v3.4s, v4.4s
        fmul    v5.4s, v4.4s, v5.4s
        fmul    v6.4s, v5.4s, v6.4s
        fmul    v0.4s, v6.4s, v7.4s
        ret

note that v24..v31 regs are not used but there are 6 spills.

Reply via email to