I sent the new version and added you as a co-author. Thanks, Mariam
On Fri, Aug 16, 2024 at 7:25 PM Mariam Arutunian <mariamarutun...@gmail.com> wrote: > > > On Fri, Aug 9, 2024 at 7:22 PM Richard Sandiford < > richard.sandif...@arm.com> wrote: > >> Sorry again for the slow review. :( >> >> I only really looked at the unreversed version earlier, on the basis >> that the comments would apply to both versions. But I've got a couple >> of comments about the reversed version below: >> >> Mariam Arutunian <mariamarutun...@gmail.com> writes: >> > [...] >> > diff --git a/gcc/config/aarch64/aarch64.cc >> b/gcc/config/aarch64/aarch64.cc >> > index ee12d8897a8..546a379fd74 100644 >> > --- a/gcc/config/aarch64/aarch64.cc >> > +++ b/gcc/config/aarch64/aarch64.cc >> > @@ -30265,6 +30265,126 @@ aarch64_retrieve_sysreg (const char *regname, >> bool write_p, bool is128op) >> > return sysreg->encoding; >> > } >> > >> > +/* Generate assembly to calculate CRC >> > + using carry-less multiplication instruction. >> > + OPERANDS[1] is input CRC, >> > + OPERANDS[2] is data (message), >> > + OPERANDS[3] is the polynomial without the leading 1. */ >> > + >> > +void >> > +aarch64_expand_crc_using_pmull (scalar_mode crc_mode, >> > + scalar_mode data_mode, >> > + rtx *operands) >> > +{ >> > + /* Check and keep arguments. */ >> > + gcc_assert (!CONST_INT_P (operands[0])); >> > + gcc_assert (CONST_INT_P (operands[3])); >> > + rtx crc = operands[1]; >> > + rtx data = operands[2]; >> > + rtx polynomial = operands[3]; >> > + >> > + unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode); >> > + unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode); >> > + gcc_assert (crc_size <= 32); >> > + gcc_assert (data_size <= crc_size); >> > + >> > + /* Calculate the quotient. */ >> > + unsigned HOST_WIDE_INT >> > + q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size); >> > + /* CRC calculation's main part. */ >> > + if (crc_size > data_size) >> > + crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size, >> > + NULL_RTX, 1); >> > + >> > + rtx t0 = force_reg (DImode, gen_int_mode (q, DImode)); >> > + polynomial = simplify_gen_unary (ZERO_EXTEND, DImode, polynomial, >> > + GET_MODE (polynomial)); >> > + rtx t1 = force_reg (DImode, polynomial); >> > + >> > + rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1, >> > + OPTAB_WIDEN); >> > + >> > + rtx clmul_res = gen_reg_rtx (TImode); >> > + emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0)); >> > + a0 = gen_lowpart (DImode, clmul_res); >> > + >> > + a0 = expand_shift (RSHIFT_EXPR, DImode, a0, crc_size, NULL_RTX, 1); >> > + >> > + emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1)); >> > + a0 = gen_lowpart (DImode, clmul_res); >> > + >> > + if (crc_size > data_size) >> > + { >> > + rtx crc_part = expand_shift (LSHIFT_EXPR, DImode, operands[1], >> data_size, >> > + NULL_RTX, 0); >> > + a0 = expand_binop (DImode, xor_optab, a0, crc_part, NULL_RTX, 1, >> > + OPTAB_DIRECT); >> > + } >> > + >> > + /* Zero upper bits beyond crc_size. */ >> >> The comment no longer applies. Otherwise this function looks good to me. >> >> > Ok.) > > >> > + aarch64_emit_move (operands[0], gen_lowpart (crc_mode, a0)); >> > +} >> > + >> > +/* Generate assembly to calculate reversed CRC >> > + using carry-less multiplication instruction. >> > + OPERANDS[1] is input CRC, >> > + OPERANDS[2] is data, >> > + OPERANDS[3] is the polynomial without the leading 1. */ >> > + >> > +void >> > +aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode, >> > + scalar_mode data_mode, >> > + rtx *operands) >> > +{ >> > + /* Check and keep arguments. */ >> > + gcc_assert (!CONST_INT_P (operands[0])); >> > + gcc_assert (CONST_INT_P (operands[3])); >> > + rtx crc = operands[1]; >> > + rtx data = operands[2]; >> > + rtx polynomial = operands[3]; >> > + >> > + unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode); >> > + unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode); >> > + gcc_assert (crc_size <= 32); >> > + gcc_assert (data_size <= crc_size); >> > + >> > + /* Calculate the quotient. */ >> > + unsigned HOST_WIDE_INT >> > + q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size); >> > + /* Reflect the calculated quotient. */ >> > + q = reflect (q); >> > + rtx t0 = force_reg (DImode, gen_int_mode (q >> (data_size - 4), >> DImode)); >> > + >> > + /* Reflect the polynomial. */ >> > + unsigned HOST_WIDE_INT ref_polynomial = reflect (UINTVAL >> (polynomial)); >> >> It looks like reflect() autodetects the bitwidth based on the assumption >> that the upper half will be nonzero. But that might not be true for all >> possible polynomials (when the implicit leading coefficient is absent) >> E.g. it looks like the 64-bit HDLC CRC polynomial is 0x1b (just the >> lowest byte nonzero), and although we don't support 64-bit polynomials >> here, the approach wouldn't work for it. >> >> I think it'd be safer to pass the bitwidth as an explicit parameter. >> Also, maybe it could go in hwint.* instead of expr.* and be called >> something like reflect_hwi. >> > > Yes. In the CRC loop recognition part, I only support those polynomials > whose upper half is nonzero. I.e required memories for the polynomial and > the CRC are equal. The reason for this is that, as the polynomial's > leading bit is emitted, it is not possible to precisely assume what CRC is > being calculated (e.g., 64, 32, etc.). Because there are some > implementations, where more memory than needed is used to store the CRC. > For example, in some implementations, a 64-bit memory may be used to > calculate a 32-bit CRC. > But, I agree, it's safer to add an explicit parameter. > I'll make the suggested changes. > > >> > + rtx t1 = force_reg (DImode, gen_int_mode (ref_polynomial << 1, >> DImode)); >> > + >> > + /* CRC calculation's main part. */ >> > + rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1, >> > + OPTAB_WIDEN); >> > + >> > + /* Perform carry-less multiplication and get low part. */ >> > + rtx clmul_res = gen_reg_rtx (TImode); >> > + emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0)); >> > + a0 = gen_lowpart (DImode, clmul_res); >> > + >> > + a0 = expand_shift (LSHIFT_EXPR, DImode, a0, 64 - crc_size - 3, >> NULL_RTX, 0); >> >> I haven't really looked at this implementation strategy before, so this >> is probably a silly question sorry :-) but is there a specific reason >> for selecting the bias of 4 in the right shift above, cancelled out by >> subtracting the same amount from this left shift? It looks like we could >> use any value in the range [1. crc_size - 1], is that right? >> >> Just asking out of curiosity though. I agree it works. >> >> > Sorry, I don’t remember exactly why I used a bias of 4. Upon reviewing it, > I think it's related to the reflection process, where zeros are added to > the quotient. > For example, in the case of CRC-8, the quotient is 9 bits but is stored in > a 16-bit value, so 7 bits are zeros. > It might be better to use q >> (crc_size - 1) and then do the left shift > with 64 - data_size instead of 64 - crc_size - 3. > However, I noticed that you provided a better solution. Thank you very > much.) > > > + >> > + /* Perform carry-less multiplication and get high part. */ >> > + emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1)); >> > + a0 = gen_highpart (DImode, clmul_res); >> >> Although this works, it's taking a DImode highpart of a single TImode >> register, which requires a spill from the register allocator. So we get >> something like: >> >> pmull v31.1q, v31.1d, v14.1d >> str q31, [sp, 48] >> ldr d31, [sp, 48] >> shl d31, d31, 53 >> pmull v31.1q, v31.1d, v15.1d >> str q31, [sp, 48] >> ldrb w0, [sp, 56] >> >> (I think the first STR and LDR come from reusing the same pseudo register >> for the temporary results; it's the STR+LDRB that causes the spill.) >> >> One way around that would be: >> >> emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1)); >> a0 = gen_reg_rtx (DImode); >> rtx v2di_res = gen_lowpart (V2DImode, clmul_res); >> rtx high_index = BYTES_BIG_ENDIAN ? const0_rtx : const1_rtx; >> emit_insn (gen_aarch64_get_lanev2di (a0, v2di_res, high_index)); >> >> That generates: >> >> pmull v30.1q, v29.1d, v30.1d >> shl d30, d30, 53 >> pmull v30.1q, v30.1d, v31.1d >> umov x0, v30.d[1] >> >> which is pretty nice. >> > > There again, it might not be too bad if we ionstead used the flipped >> version of the unreversed approach, such as the following minor adaption: >> >> /* Calculate the quotient. */ >> unsigned HOST_WIDE_INT >> q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size); >> /* Reflect the calculated quotient. */ >> q = reflect_hwi (q, crc_size + 1); >> rtx t0 = force_reg (DImode, gen_int_mode (q, DImode)); >> >> /* Reflect the polynomial. */ >> unsigned HOST_WIDE_INT ref_polynomial = reflect_hwi (UINTVAL >> (polynomial), >> crc_size); >> /* An unshifted multiplier would require the final result to be >> extracted >> using a shift right by DATA_SIZE - 1 bits. Shift the multiplier left >> so that the shift right can be by CRC_SIZE bits instead. */ >> ref_polynomial <<= crc_size - data_size + 1; >> rtx t1 = force_reg (DImode, gen_int_mode (ref_polynomial, DImode)); >> >> /* CRC calculation's main part. */ >> rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1, >> OPTAB_WIDEN); >> >> /* Perform carry-less multiplication and get low part. */ >> rtx clmul_res = gen_reg_rtx (TImode); >> emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t0)); >> a0 = gen_lowpart (DImode, clmul_res); >> >> a0 = expand_binop (DImode, and_optab, a0, >> gen_int_mode (GET_MODE_MASK (data_mode), DImode), >> NULL_RTX, 1, OPTAB_WIDEN); >> >> /* Perform carry-less multiplication. */ >> emit_insn (gen_aarch64_crypto_pmulldi (clmul_res, a0, t1)); >> >> /* Perform a shift right by CRC_SIZE as an extraction of lane 1. */ >> machine_mode crc_vmode = aarch64_vq_mode (crc_mode).require (); >> a0 = (crc_size > data_size ? gen_reg_rtx (crc_mode) : operands[0]); >> emit_insn (gen_aarch64_get_lane (crc_vmode, a0, >> gen_lowpart (crc_vmode, clmul_res), >> aarch64_endian_lane_rtx (crc_vmode, >> 1))); >> >> if (crc_size > data_size) >> { >> rtx crc_part = expand_shift (RSHIFT_EXPR, crc_mode, crc, data_size, >> NULL_RTX, 1); >> a0 = expand_binop (crc_mode, xor_optab, a0, crc_part, operands[0], >> 1, >> OPTAB_WIDEN); >> aarch64_emit_move (operands[0], a0); >> } >> >> This gives: >> >> pmull v30.1q, v30.1d, v31.1d >> movi v31.2d, 0xff >> and v30.8b, v30.8b, v31.8b >> pmull v30.1q, v30.1d, v31.1d >> umov w0, v30.b[1] >> >> or, with SVE enabled: >> >> pmull v30.1q, v31.1d, v30.1d >> and z30.d, z30.d, #255 >> pmull v30.1q, v30.1d, v31.1d >> umov w0, v30.b[1] >> >> This is preferable since ANDs are generally cheaper than shifts. >> >> That's just a suggestion though; the original version is ok too. >> >> > Thanks for the suggestions. I don't have much experience with gcc, so this > is a huge help for me. > I'll apply all the changes and send the new version. > > >> Minor comment: it'd be good to use "pmull_res" instead of "clmul_res" >> for the variables: :) >> >> > Ok) > > >> LGTM otherwise. >> >> > Thanks, > Mariam > > >> Thanks, >> Richard >> >> > + >> > + if (crc_size > data_size) >> > + { >> > + rtx crc_part = expand_shift (RSHIFT_EXPR, DImode, crc, data_size, >> > + NULL_RTX, 1); >> > + a0 = expand_binop (DImode, xor_optab, a0, crc_part, NULL_RTX, 1, >> > + OPTAB_DIRECT); >> > + } >> > + >> > + aarch64_emit_move (operands[0], gen_lowpart (crc_mode, a0)); >> > +} >> > + >> > /* Target-specific selftests. */ >> > >> > #if CHECKING_P >> > diff --git a/gcc/config/aarch64/aarch64.md >> b/gcc/config/aarch64/aarch64.md >> > index 9dff2d7a2b0..08c588bc475 100644 >> > --- a/gcc/config/aarch64/aarch64.md >> > +++ b/gcc/config/aarch64/aarch64.md >> > @@ -4543,6 +4543,63 @@ >> > [(set_attr "type" "crc")] >> > ) >> > >> > +;; Reversed CRC >> > +(define_expand "crc_rev<ALLI:mode><ALLX:mode>4" >> > + [;; return value (calculated CRC) >> > + (match_operand:ALLX 0 "register_operand" "=r") >> > + ;; initial CRC >> > + (match_operand:ALLX 1 "register_operand" "r") >> > + ;; data >> > + (match_operand:ALLI 2 "register_operand" "r") >> > + ;; polynomial without leading 1 >> > + (match_operand:ALLX 3)] >> > + "" >> > + { >> > + /* If the polynomial is the same as the polynomial of crc32c* >> instruction, >> > + put that instruction. crc32c uses iSCSI polynomial. */ >> > + if (TARGET_CRC32 && INTVAL (operands[3]) == 0x1EDC6F41 >> > + && <ALLX:MODE>mode == SImode) >> > + emit_insn (gen_aarch64_crc32c<ALLI:crc_data_type> (operands[0], >> > + operands[1], >> > + operands[2])); >> > + /* If the polynomial is the same as the polynomial of crc32* >> instruction, >> > + put that instruction. crc32 uses HDLC etc. polynomial. */ >> > + else if (TARGET_CRC32 && INTVAL (operands[3]) == 0x04C11DB7 >> > + && <ALLX:MODE>mode == SImode) >> > + emit_insn (gen_aarch64_crc32<ALLI:crc_data_type> (operands[0], >> > + operands[1], >> > + operands[2])); >> > + else if (TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>) >> > + aarch64_expand_reversed_crc_using_pmull (<ALLX:MODE>mode, >> > + <ALLI:MODE>mode, >> > + operands); >> > + else >> > + /* Otherwise, generate table-based CRC. */ >> > + expand_reversed_crc_table_based (operands[0], operands[1], >> operands[2], >> > + operands[3], <ALLI:MODE>mode, >> > + generate_reflecting_code_standard); >> > + DONE; >> > + } >> > +) >> > + >> > +;; Bit-forward CRC >> > +(define_expand "crc<ALLI:mode><ALLX:mode>4" >> > + [;; return value (calculated CRC) >> > + (match_operand:ALLX 0 "register_operand" "=r") >> > + ;; initial CRC >> > + (match_operand:ALLX 1 "register_operand" "r") >> > + ;; data >> > + (match_operand:ALLI 2 "register_operand" "r") >> > + ;; polynomial without leading 1 >> > + (match_operand:ALLX 3)] >> > + "TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>" >> > + { >> > + aarch64_expand_crc_using_pmull (<ALLX:MODE>mode, <ALLI:MODE>mode, >> > + operands); >> > + DONE; >> > + } >> > +) >> > + >> > (define_insn "*csinc2<mode>_insn" >> > [(set (match_operand:GPI 0 "register_operand" "=r") >> > (plus:GPI (match_operand 2 "aarch64_comparison_operation" "") >> > diff --git a/gcc/config/aarch64/iterators.md >> b/gcc/config/aarch64/iterators.md >> > index 99cde46f1ba..86e4863d684 100644 >> > --- a/gcc/config/aarch64/iterators.md >> > +++ b/gcc/config/aarch64/iterators.md >> > @@ -1276,6 +1276,10 @@ >> > ;; Map a mode to a specific constraint character. >> > (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")]) >> > >> > +;; Map a mode to a specific constraint character for calling >> > +;; appropriate version of crc. >> > +(define_mode_attr crc_data_type [(QI "b") (HI "h") (SI "w") (DI "x")]) >> > + >> > ;; Map modes to Usg and Usj constraints for SISD right shifts >> > (define_mode_attr cmode_simd [(SI "g") (DI "j")]) >> > >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c >> > new file mode 100644 >> > index 00000000000..4043251dbd8 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-1-pmul.c >> > @@ -0,0 +1,8 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */ >> > + >> > +#include "../../gcc.dg/torture/crc-1.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c >> > new file mode 100644 >> > index 00000000000..dd866b38e83 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-10-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-10.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c >> > new file mode 100644 >> > index 00000000000..16d901eeaef >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-12-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-12.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c >> > new file mode 100644 >> > index 00000000000..5f7741fad0f >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-13-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-13.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c >> > new file mode 100644 >> > index 00000000000..cdedbbd3db1 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-14-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-14.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c >> > new file mode 100644 >> > index 00000000000..c219e49a2b1 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-17-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-17.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c >> > new file mode 100644 >> > index 00000000000..124900a979b >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-18-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-18.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c >> > new file mode 100644 >> > index 00000000000..3cae1a7f57b >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-21-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-21.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c >> > new file mode 100644 >> > index 00000000000..0ec2e312f8f >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-22-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-22.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c >> > new file mode 100644 >> > index 00000000000..0c4542adb40 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-23-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-23.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c >> > new file mode 100644 >> > index 00000000000..08f1d3b69d7 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-4-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-4.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c >> > new file mode 100644 >> > index 00000000000..91bf5e6353d >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-5-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -w -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-5.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c >> > new file mode 100644 >> > index 00000000000..4680eafe758 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-6-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-6.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c >> > new file mode 100644 >> > index 00000000000..655484d10d4 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-7-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-7.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c >> > new file mode 100644 >> > index 00000000000..6c2acc84c32 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-8-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-8.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c >> > new file mode 100644 >> > index 00000000000..e76f3c77b59 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-9-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-9.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c >> > new file mode 100644 >> > index 00000000000..21520474564 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data16-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-CCIT-data16.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c >> > new file mode 100644 >> > index 00000000000..3dcc92320f3 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-CCIT-data8-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ >> > + >> > +#include "../../gcc.dg/torture/crc-CCIT-data8.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git >> a/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c >> b/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c >> > new file mode 100644 >> > index 00000000000..e5196aaafef >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-coremark-16bitdata-pmul.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-w -march=armv8-a+crypto -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include "../../gcc.dg/torture/crc-coremark16-data16.c" >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "pmull" "dfinish"} } */ >> > \ No newline at end of file >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c >> > new file mode 100644 >> > index 00000000000..e82cb04fcc3 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data16.c >> > @@ -0,0 +1,53 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > + >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint16_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint16_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint16_t i = 0; i < 0xffff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c >> > new file mode 100644 >> > index 00000000000..a7564a7e28a >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data32.c >> > @@ -0,0 +1,52 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint32_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 32; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint32_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 32; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint8_t i = 0; i < 0xff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c >> > new file mode 100644 >> > index 00000000000..c88cafadedc >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32-data8.c >> > @@ -0,0 +1,53 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > + >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint8_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint8_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0xEDB88320; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint8_t i = 0; i < 0xff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c >> > new file mode 100644 >> > index 00000000000..d82e6252603 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data16.c >> > @@ -0,0 +1,53 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > + >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint16_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint16_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint16_t i = 0; i < 0xffff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c >> > new file mode 100644 >> > index 00000000000..7acb6fc239c >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data32.c >> > @@ -0,0 +1,52 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint32_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 32; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint32_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 32; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint8_t i = 0; i < 0xff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c >> b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c >> > new file mode 100644 >> > index 00000000000..e8a8901e453 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/crc-crc32c-data8.c >> > @@ -0,0 +1,53 @@ >> > +/* { dg-do run } */ >> > +/* { dg-options "-march=armv8-a+crc -O2 -fdump-rtl-dfinish >> -fdump-tree-crc" } */ >> > +/* { dg-skip-if "" { *-*-* } { "-flto"} } */ >> > + >> > +#include <stdint.h> >> > +#include <stdlib.h> >> > + >> > +__attribute__ ((noinline,optimize(0))) >> > +uint32_t _crc32_O0 (uint32_t crc, uint8_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +uint32_t _crc32 (uint32_t crc, uint8_t data) { >> > + int i; >> > + crc = crc ^ data; >> > + >> > + for (i = 0; i < 8; i++) { >> > + if (crc & 1) >> > + crc = (crc >> 1) ^ 0x82F63B78; >> > + else >> > + crc = (crc >> 1); >> > + } >> > + >> > + return crc; >> > +} >> > + >> > +int main () >> > +{ >> > + uint32_t crc = 0x0D800D80; >> > + for (uint8_t i = 0; i < 0xff; i++) >> > + { >> > + uint32_t res1 = _crc32_O0 (crc, i); >> > + uint32_t res2 = _crc32 (crc, i); >> > + if (res1 != res2) >> > + abort (); >> > + crc = res1; >> > + } >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */ >> > +/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC >> code." 0 "crc"} } */ >> > +/* { dg-final { scan-rtl-dump "UNSPEC_CRC32C" "dfinish"} } */ >> > +/* { dg-final { scan-rtl-dump-times "pmull" 0 "dfinish"} } */ >> >