Mihail Ionescu <mihail.ione...@foss.arm.com> writes:
> Hi Richard,
>
> On 02/18/2020 05:06 PM, Richard Sandiford wrote:
>> Thanks.  When trying a bootstrap locally I get:
>> 
>> include/arm_neon.h:34709:38: error: cannot convert ‘const __bf16*’ to ‘const 
>> __fp16*’
>> 34709 |   __o = __builtin_aarch64_ld1x2v4hf ((const 
>> __builtin_aarch64_simd_bf *) __a);
>>        |                                      
>> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>>        |                                      |
>>        |                                      const __bf16*
>> 
>> I think the affected lines are:
>> 
>> Mihail Ionescu <mihail.ione...@foss.arm.com> writes:
>>> +__extension__ extern __inline bfloat16x4x2_t
>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>> +vld1_bf16_x2 (const bfloat16_t *__a)
>>> +{
>>> +  bfloat16x4x2_t ret;
>>> +  __builtin_aarch64_simd_oi __o;
>>> +  __o = __builtin_aarch64_ld1x2v4hf ((const __builtin_aarch64_simd_bf *) 
>>> __a);
>> 
>> bf rather than hf here (the error above).
>> 
>>> +  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0);
>>> +  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
>>> +  return ret;
>>> +}
>>> [...]
>>> +__extension__ extern __inline bfloat16x4x2_t
>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>> +vld2_dup_bf16 (const bfloat16_t * __a)
>>> +{
>>> +  bfloat16x4x2_t ret;
>>> +  __builtin_aarch64_simd_oi __o;
>>> +  __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) 
>>> __a);
>>> +  ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 0);
>> 
>> Here too (although the choice is cosmetic).
>> 
>>> +  ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1);
>>> +  return ret;
>>> +}
>>> +
>>> +__extension__ extern __inline bfloat16x8x2_t
>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>> +vld2q_dup_bf16 (const bfloat16_t * __a)
>>> +{
>>> +  bfloat16x8x2_t ret;
>>> +  __builtin_aarch64_simd_oi __o;
>>> +  __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) 
>>> __a);
>>> +  ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
>> 
>> Same here (again cosmetic).
>> 
>>> +  ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1);
>>> +  return ret;
>>> +}
>>> [...]
>>> +__extension__ extern __inline void
>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>> +vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val)
>>> +{
>>> +  __builtin_aarch64_simd_ci __o;
>>> +  bfloat16x8x3_t __temp;
>>> +  __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 
>>> (__AARCH64_UINT64_C (0)));
>>> +  __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 
>>> (__AARCH64_UINT64_C (0)));
>>> +  __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 
>>> (__AARCH64_UINT64_C (0)));
>>> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) 
>>> __temp.val[0], 0);
>>> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) 
>>> __temp.val[1], 1);
>>> +  __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) 
>>> __temp.val[2], 2);
>>> +  __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, __o);
>> 
>> Here too, to avoid:
>> 
>> include/arm_neon.h:35000:32: error: cannot convert ‘__bf16*’ to ‘__fp16*’
>> 35000 |   __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_bf *) __a, 
>> __o);
>>        |                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>>        |                                |
>>        |                                __bf16*
>> 
>> Looks good otherwise.  I guess this shows we should be running the
>> intrinsics tests for C++ as well as C (in general, not just for this patch).
>> 
>> Richard
>> 
>
> Thanks for pointing this out.
> I've updated the patch with the fixes.
> The ChangeLog is the same.

LGTM thanks, pushed.

Richard

Reply via email to