Sorry again for the slow review. :(

I only really looked at the unreversed version earlier, on the basis
that the comments would apply to both versions.  But I've got a couple
of comments about the reversed version below:

Mariam Arutunian <mariamarutun...@gmail.com> writes:
> [...]
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index ee12d8897a8..546a379fd74 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -30265,6 +30265,126 @@ aarch64_retrieve_sysreg (const char *regname, bool 
> write_p, bool is128op)
>    return sysreg->encoding;
>  }
>  
> +/* Generate assembly to calculate CRC
> +   using carry-less multiplication instruction.
> +   OPERANDS[1] is input CRC,
> +   OPERANDS[2] is data (message),
> +   OPERANDS[3] is the polynomial without the leading 1.  */
> +
> +void
> +aarch64_expand_crc_using_pmull (scalar_mode crc_mode,
> +                             scalar_mode data_mode,
> +                             rtx *operands)
> +{
> +  /* Check and keep arguments.  */
> +  gcc_assert (!CONST_INT_P (operands[0]));
> +  gcc_assert (CONST_INT_P (operands[3]));
> +  rtx crc = operands[1];
> +  rtx data = operands[2];
> +  rtx polynomial = operands[3];
> +
> +  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode);
> +  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode);
> +  gcc_assert (crc_size <= 32);
> +  gcc_assert (data_size <= crc_size);
> +
> +  /* Calculate the quotient.  */
> +  unsigned HOST_WIDE_INT
> +      q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
> +  /* CRC calculation's main part.  */
> +  if (crc_size > data_size)
> +    crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size,
> +                     NULL_RTX, 1);
> +
> +  rtx t0 = force_reg (DImode, gen_int_mode (q, DImode));
> +  polynomial = simplify_gen_unary (ZERO_EXTEND, DImode, polynomial,
> +                                GET_MODE (polynomial));
> +  rtx t1 = force_reg (DImode, polynomial);
> +
> +  rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
> +                      OPTAB_WIDEN);
> +
> +  rtx clmul_res = gen_reg_rtx (TImode);
> +  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0));
> +  a0 = gen_lowpart (DImode, clmul_res);
> +
> +  a0 = expand_shift (RSHIFT_EXPR, DImode, a0, crc_size, NULL_RTX, 1);
> +
> +  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1));
> +  a0 = gen_lowpart (DImode, clmul_res);
> +
> +  if (crc_size > data_size)
> +    {
> +      rtx crc_part = expand_shift (LSHIFT_EXPR, DImode, operands[1], 
> data_size,
> +                                NULL_RTX, 0);
> +      a0 = expand_binop (DImode, xor_optab, a0, crc_part, NULL_RTX, 1,
> +                      OPTAB_DIRECT);
> +    }
> +
> +  /* Zero upper bits beyond crc_size.  */

The comment no longer applies.  Otherwise this function looks good to me.

> +  aarch64_emit_move (operands[0], gen_lowpart (crc_mode, a0));
> +}
> +
> +/* Generate assembly to calculate reversed CRC
> +   using carry-less multiplication instruction.
> +   OPERANDS[1] is input CRC,
> +   OPERANDS[2] is data,
> +   OPERANDS[3] is the polynomial without the leading 1.  */
> +
> +void
> +aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
> +                                      scalar_mode data_mode,
> +                                      rtx *operands)
> +{
> +  /* Check and keep arguments.  */
> +  gcc_assert (!CONST_INT_P (operands[0]));
> +  gcc_assert (CONST_INT_P (operands[3]));
> +  rtx crc = operands[1];
> +  rtx data = operands[2];
> +  rtx polynomial = operands[3];
> +
> +  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode);
> +  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode);
> +  gcc_assert (crc_size <= 32);
> +  gcc_assert (data_size <= crc_size);
> +
> +  /* Calculate the quotient.  */
> +  unsigned HOST_WIDE_INT
> +      q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
> +  /* Reflect the calculated quotient.  */
> +  q = reflect (q);
> +  rtx t0 = force_reg (DImode, gen_int_mode (q >> (data_size - 4), DImode));
> +
> +  /* Reflect the polynomial.  */
> +  unsigned HOST_WIDE_INT ref_polynomial = reflect (UINTVAL (polynomial));

It looks like reflect() autodetects the bitwidth based on the assumption
that the upper half will be nonzero.  But that might not be true for all
possible polynomials (when the implicit leading coefficient is absent)
E.g. it looks like the 64-bit HDLC CRC polynomial is 0x1b (just the
lowest byte nonzero), and although we don't support 64-bit polynomials
here, the approach wouldn't work for it.

I think it'd be safer to pass the bitwidth as an explicit parameter.
Also, maybe it could go in hwint.* instead of expr.* and be called
something like reflect_hwi.

> +  rtx t1 = force_reg (DImode, gen_int_mode (ref_polynomial << 1, DImode));
> +
> +  /* CRC calculation's main part.  */
> +  rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
> +                      OPTAB_WIDEN);
> +
> +  /* Perform carry-less multiplication and get low part.  */
> +  rtx clmul_res = gen_reg_rtx (TImode);
> +  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0));
> +  a0 = gen_lowpart (DImode, clmul_res);
> +
> +  a0 = expand_shift (LSHIFT_EXPR, DImode, a0, 64 - crc_size - 3, NULL_RTX, 
> 0);

I haven't really looked at this implementation strategy before, so this
is probably a silly question sorry :-) but is there a specific reason
for selecting the bias of 4 in the right shift above, cancelled out by
subtracting the same amount from this left shift?  It looks like we could
use any value in the range [1. crc_size - 1], is that right?

Just asking out of curiosity though.  I agree it works.

> +
> +  /* Perform carry-less multiplication and get high part.  */
> +  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1));
> +  a0 = gen_highpart (DImode, clmul_res);

Although this works, it's taking a DImode highpart of a single TImode
register, which requires a spill from the register allocator.  So we get
something like:

        pmull   v31.1q, v31.1d, v14.1d
        str     q31, [sp, 48]
        ldr     d31, [sp, 48]
        shl     d31, d31, 53
        pmull   v31.1q, v31.1d, v15.1d
        str     q31, [sp, 48]
        ldrb    w0, [sp, 56]

(I think the first STR and LDR come from reusing the same pseudo register
for the temporary results; it's the STR+LDRB that causes the spill.)

One way around that would be:

  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1));
  a0 = gen_reg_rtx (DImode);
  rtx v2di_res = gen_lowpart (V2DImode, clmul_res);
  rtx high_index = BYTES_BIG_ENDIAN ? const0_rtx : const1_rtx;
  emit_insn (gen_aarch64_get_lanev2di (a0, v2di_res, high_index));

That generates:

        pmull   v30.1q, v29.1d, v30.1d
        shl     d30, d30, 53
        pmull   v30.1q, v30.1d, v31.1d
        umov    x0, v30.d[1]

which is pretty nice.

There again, it might not be too bad if we ionstead used the flipped
version of the unreversed approach, such as the following minor adaption:

  /* Calculate the quotient.  */
  unsigned HOST_WIDE_INT
      q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
  /* Reflect the calculated quotient.  */
  q = reflect_hwi (q, crc_size + 1);
  rtx t0 = force_reg (DImode, gen_int_mode (q, DImode));
 
  /* Reflect the polynomial.  */
  unsigned HOST_WIDE_INT ref_polynomial = reflect_hwi (UINTVAL (polynomial),
                                                       crc_size);
  /* An unshifted multiplier would require the final result to be extracted
     using a shift right by DATA_SIZE - 1 bits.  Shift the multiplier left
     so that the shift right can be by CRC_SIZE bits instead.  */
  ref_polynomial <<= crc_size - data_size + 1;
  rtx t1 = force_reg (DImode, gen_int_mode (ref_polynomial, DImode));

  /* CRC calculation's main part.  */
  rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
                         OPTAB_WIDEN);

  /* Perform carry-less multiplication and get low part.  */
  rtx clmul_res = gen_reg_rtx (TImode);
  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0));
  a0 = gen_lowpart (DImode, clmul_res);

  a0 = expand_binop (DImode, and_optab, a0,
                     gen_int_mode (GET_MODE_MASK (data_mode), DImode),
                     NULL_RTX, 1, OPTAB_WIDEN);

  /* Perform carry-less multiplication.  */
  emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1));

  /* Perform a shift right by CRC_SIZE as an extraction of lane 1.  */
  machine_mode crc_vmode = aarch64_vq_mode (crc_mode).require ();
  a0 = (crc_size > data_size ? gen_reg_rtx (crc_mode) : operands[0]);
  emit_insn (gen_aarch64_get_lane (crc_vmode, a0,
                                   gen_lowpart (crc_vmode, clmul_res),
                                   aarch64_endian_lane_rtx (crc_vmode, 1)));

  if (crc_size > data_size)
    {
      rtx crc_part = expand_shift (RSHIFT_EXPR, crc_mode, crc, data_size,
                                   NULL_RTX, 1);
      a0 = expand_binop (crc_mode, xor_optab, a0, crc_part, operands[0], 1,
                         OPTAB_WIDEN);
      aarch64_emit_move (operands[0], a0);
    }

This gives:

        pmull   v30.1q, v30.1d, v31.1d
        movi    v31.2d, 0xff
        and     v30.8b, v30.8b, v31.8b
        pmull   v30.1q, v30.1d, v31.1d
        umov    w0, v30.b[1]

or, with SVE enabled:

        pmull   v30.1q, v31.1d, v30.1d
        and     z30.d, z30.d, #255
        pmull   v30.1q, v30.1d, v31.1d
        umov    w0, v30.b[1]

This is preferable since ANDs are generally cheaper than shifts.

That's just a suggestion though; the original version is ok too.

Minor comment: it'd be good to use "pmull_res" instead of "clmul_res"
for the variables: :)

LGTM otherwise.

Thanks,
Richard

> +
> +  if (crc_size > data_size)
> +    {
> +      rtx crc_part = expand_shift (RSHIFT_EXPR, DImode, crc, data_size,
> +                                NULL_RTX, 1);
> +      a0 = expand_binop (DImode, xor_optab, a0, crc_part, NULL_RTX, 1,
> +                      OPTAB_DIRECT);
> +    }
> +
> +  aarch64_emit_move (operands[0], gen_lowpart (crc_mode, a0));
> +}
> +
>  /* Target-specific selftests.  */
>  
>  #if CHECKING_P
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 9dff2d7a2b0..08c588bc475 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4543,6 +4543,63 @@
>    [(set_attr "type" "crc")]
>  )
>  
> +;; Reversed CRC
> +(define_expand "crc_rev<ALLI:mode><ALLX:mode>4"
> +  [;; return value (calculated CRC)
> +   (match_operand:ALLX 0 "register_operand" "=r")
> +   ;; initial CRC
> +   (match_operand:ALLX 1 "register_operand" "r")
> +   ;; data
> +   (match_operand:ALLI 2 "register_operand" "r")
> +   ;; polynomial without leading 1
> +   (match_operand:ALLX 3)]
> +  ""
> +  {
> +    /* If the polynomial is the same as the polynomial of crc32c* 
> instruction,
> +       put that instruction.  crc32c uses iSCSI polynomial.  */
> +    if (TARGET_CRC32 && INTVAL (operands[3]) == 0x1EDC6F41
> +     && <ALLX:MODE>mode == SImode)
> +      emit_insn (gen_aarch64_crc32c<ALLI:crc_data_type> (operands[0],
> +                                                      operands[1],
> +                                                      operands[2]));
> +    /* If the polynomial is the same as the polynomial of crc32* instruction,
> +     put that instruction.  crc32 uses HDLC etc.  polynomial.  */
> +    else if (TARGET_CRC32 && INTVAL (operands[3]) == 0x04C11DB7
> +          && <ALLX:MODE>mode == SImode)
> +      emit_insn (gen_aarch64_crc32<ALLI:crc_data_type> (operands[0],
> +                                                     operands[1],
> +                                                     operands[2]));
> +    else if (TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>)
> +      aarch64_expand_reversed_crc_using_pmull (<ALLX:MODE>mode,
> +                                            <ALLI:MODE>mode,
> +                                            operands);
> +    else
> +      /* Otherwise, generate table-based CRC.  */
> +      expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
> +                                    operands[3], <ALLI:MODE>mode,
> +                                    generate_reflecting_code_standard);
> +    DONE;
> +  }
> +)
> +
> +;; Bit-forward CRC
> +(define_expand "crc<ALLI:mode><ALLX:mode>4"
> +  [;; return value (calculated CRC)
> +   (match_operand:ALLX 0 "register_operand" "=r")
> +   ;; initial CRC
> +   (match_operand:ALLX 1 "register_operand" "r")
> +   ;; data
> +   (match_operand:ALLI 2 "register_operand" "r")
> +   ;; polynomial without leading 1
> +   (match_operand:ALLX 3)]
> +  "TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>"
> +  {
> +    aarch64_expand_crc_using_pmull (<ALLX:MODE>mode, <ALLI:MODE>mode,
> +                                 operands);
> +    DONE;
> +  }
> +)
> +
>  (define_insn "*csinc2<mode>_insn"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>          (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 99cde46f1ba..86e4863d684 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1276,6 +1276,10 @@
>  ;; Map a mode to a specific constraint character.
>  (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
>  
> +;; Map a mode to a specific constraint character for calling
> +;; appropriate version of crc.
> +(define_mode_attr crc_data_type [(QI "b") (HI "h") (SI "w") (DI "x")])
> +
>  ;; Map modes to Usg and Usj constraints for SISD right shifts
>  (define_mode_attr cmode_simd [(SI "g") (DI "j")])
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c
> new file mode 100644
> index 00000000000..4043251dbd8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
> +
> +#include "../../gcc.dg/torture/crc-1.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c
> new file mode 100644
> index 00000000000..dd866b38e83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-10.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c
> new file mode 100644
> index 00000000000..16d901eeaef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-12.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c
> new file mode 100644
> index 00000000000..5f7741fad0f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-13.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c
> new file mode 100644
> index 00000000000..cdedbbd3db1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-14.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c
> new file mode 100644
> index 00000000000..c219e49a2b1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-17.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c
> new file mode 100644
> index 00000000000..124900a979b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-18.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c
> new file mode 100644
> index 00000000000..3cae1a7f57b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-21.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c
> new file mode 100644
> index 00000000000..0ec2e312f8f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-22.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c
> new file mode 100644
> index 00000000000..0c4542adb40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-23.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c
> new file mode 100644
> index 00000000000..08f1d3b69d7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-4.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c
> new file mode 100644
> index 00000000000..91bf5e6353d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -w -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-5.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c
> new file mode 100644
> index 00000000000..4680eafe758
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-6.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c
> new file mode 100644
> index 00000000000..655484d10d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-7.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c
> new file mode 100644
> index 00000000000..6c2acc84c32
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-8.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c
> new file mode 100644
> index 00000000000..e76f3c77b59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-9.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c
> new file mode 100644
> index 00000000000..21520474564
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-CCIT-data16.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c
> new file mode 100644
> index 00000000000..3dcc92320f3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto" } } */
> +
> +#include "../../gcc.dg/torture/crc-CCIT-data8.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c
> new file mode 100644
> index 00000000000..e5196aaafef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c
> @@ -0,0 +1,9 @@
> +/* { dg-do run } */
> +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish 
> -fdump-tree-crc" } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include "../../gcc.dg/torture/crc-coremark16-data16.c"
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c
> new file mode 100644
> index 00000000000..e82cb04fcc3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint16_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint16_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint16_t i = 0; i < 0xffff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c
> new file mode 100644
> index 00000000000..a7564a7e28a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint32_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 32; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint32_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 32; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint8_t i = 0; i < 0xff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c
> new file mode 100644
> index 00000000000..c88cafadedc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint8_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint8_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0xEDB88320;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint8_t i = 0; i < 0xff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c
> new file mode 100644
> index 00000000000..d82e6252603
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint16_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint16_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint16_t i = 0; i < 0xffff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c
> new file mode 100644
> index 00000000000..7acb6fc239c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint32_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 32; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint32_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 32; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint8_t i = 0; i < 0xff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c 
> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c
> new file mode 100644
> index 00000000000..e8a8901e453
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c
> @@ -0,0 +1,53 @@
> +/* { dg-do run } */
> +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish -fdump-tree-crc" 
> } */
> +/* { dg-skip-if "" { *-*-* } { "-flto"} } */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +__attribute__ ((noinline,optimize(0)))
> +uint32_t _crc32_O0 (uint32_t crc, uint8_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +uint32_t _crc32 (uint32_t crc, uint8_t data) {
> +  int i;
> +  crc = crc ^ data;
> +
> +  for (i = 0; i < 8; i++) {
> +      if (crc & 1)
> +     crc = (crc >> 1) ^ 0x82F63B78;
> +      else
> +     crc = (crc >> 1);
> +    }
> +
> +  return crc;
> +}
> +
> +int main ()
> +{
> +  uint32_t crc = 0x0D800D80;
> +  for (uint8_t i = 0; i < 0xff; i++)
> +    {
> +      uint32_t res1 = _crc32_O0 (crc, i);
> +      uint32_t res2 = _crc32 (crc, i);
> +      if (res1 != res2)
> +      abort ();
> +      crc = res1;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
> +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 
> "crc"} } */
> +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */
> +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */

Reply via email to