Mihail Ionescu <mihail.ione...@foss.arm.com> writes: > Hi Richard, > > On 02/18/2020 05:06 PM, Richard Sandiford wrote: >> Thanks. When trying a bootstrap locally I get: >> >> include/arm_neon.h:34709:38: error: cannot convert ‘const __bf16*’ to ‘const >> __fp16*’ >> 34709 | __o = __builtin_aarch64_ld1x2v4hf ((const >> __builtin_aarch64_simd_bf *) __a); >> | >> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> | | >> | const __bf16* >> >> I think the affected lines are: >> >> Mihail Ionescu <mihail.ione...@foss.arm.com> writes: >>> +__extension__ extern __inline bfloat16x4x2_t >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >>> +vld1_bf16_x2 (const bfloat16_t *__a) >>> +{ >>> + bfloat16x4x2_t ret; >>> + __builtin_aarch64_simd_oi __o; >>> + __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_bf *) >>> __a); >> >> bf rather than hf here (the error above). >> >>> + ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); >>> + ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); >>> + return ret; >>> +} >>> [...] >>> +__extension__ extern __inline bfloat16x4x2_t >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >>> +vld2_dup_bf16 (const bfloat16_t * __a) >>> +{ >>> + bfloat16x4x2_t ret; >>> + __builtin_aarch64_simd_oi __o; >>> + __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) >>> __a); >>> + ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0); >> >> Here too (although the choice is cosmetic). >> >>> + ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); >>> + return ret; >>> +} >>> + >>> +__extension__ extern __inline bfloat16x8x2_t >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >>> +vld2q_dup_bf16 (const bfloat16_t * __a) >>> +{ >>> + bfloat16x8x2_t ret; >>> + __builtin_aarch64_simd_oi __o; >>> + __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) >>> __a); >>> + ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); >> >> Same here (again cosmetic). >> >>> + ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); >>> + return ret; >>> +} >>> [...] >>> +__extension__ extern __inline void >>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >>> +vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val) >>> +{ >>> + __builtin_aarch64_simd_ci __o; >>> + bfloat16x8x3_t __temp; >>> + __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 >>> (__AARCH64_UINT64_C (0))); >>> + __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 >>> (__AARCH64_UINT64_C (0))); >>> + __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 >>> (__AARCH64_UINT64_C (0))); >>> + __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) >>> __temp.val[0], 0); >>> + __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) >>> __temp.val[1], 1); >>> + __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) >>> __temp.val[2], 2); >>> + __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, __o); >> >> Here too, to avoid: >> >> include/arm_neon.h:35000:32: error: cannot convert ‘__bf16*’ to ‘__fp16*’ >> 35000 | __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, >> __o); >> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> | | >> | __bf16* >> >> Looks good otherwise. I guess this shows we should be running the >> intrinsics tests for C++ as well as C (in general, not just for this patch). >> >> Richard >> > > Thanks for pointing this out. > I've updated the patch with the fixes. > The ChangeLog is the same.
LGTM thanks, pushed. Richard