On 9/11/23 08:56, Stefan Schulze Frielinghaus wrote:
> On Mon, Aug 28, 2023 at 11:33:37AM +0200, Andreas Krebbel wrote:
>> Hi Stefan,
>>
>> do you really need to introduce a new flag for U64 given that the type of 
>> the builtin is unsigned long?
> 
> In function s390_const_operand_ok the immediate is checked whether it is
> valide w.r.t. the flag:
> 
>   tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1)
> 
> Here bitwidth is derived from the flag.

I see, it is about enabling the constant check at all.

Ok, thanks!

Andreas

> 
> Cheers,
> Stefan
> 
>>
>> Andreas
>>
>> On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote:
>>> The second argument of these builtins is an unsigned immediate.  For
>>> vec_rli the API allows immediates up to 64 bits whereas the instruction
>>> verll only allows immediates up to 32 bits.  Since the shift count
>>> equals the immediate modulo vector element size, truncating those
>>> immediates is fine.
>>>
>>> Bootstrapped and regtested on s390.  Ok for mainline?
>>>
>>> gcc/ChangeLog:
>>>
>>>     * config/s390/s390-builtins.def (O_U64): New.
>>>     (O1_U64): Ditto.
>>>     (O2_U64): Ditto.
>>>     (O3_U64): Ditto.
>>>     (O4_U64): Ditto.
>>>     (O_M12): Change bit position.
>>>     (O_S2): Ditto.
>>>     (O_S3): Ditto.
>>>     (O_S4): Ditto.
>>>     (O_S5): Ditto.
>>>     (O_S8): Ditto.
>>>     (O_S12): Ditto.
>>>     (O_S16): Ditto.
>>>     (O_S32): Ditto.
>>>     (O_ELEM): Ditto.
>>>     (O_LIT): Ditto.
>>>     (OB_DEF_VAR): Add operand constraints.
>>>     (B_DEF): Ditto.
>>>     * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit
>>>     operands.
>>> ---
>>>  gcc/config/s390/s390-builtins.def | 60 ++++++++++++++++++-------------
>>>  gcc/config/s390/s390.cc           |  6 ++--
>>>  2 files changed, 39 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/gcc/config/s390/s390-builtins.def 
>>> b/gcc/config/s390/s390-builtins.def
>>> index a16983b18bd..c829f445a11 100644
>>> --- a/gcc/config/s390/s390-builtins.def
>>> +++ b/gcc/config/s390/s390-builtins.def
>>> @@ -28,6 +28,7 @@
>>>  #undef O_U12
>>>  #undef O_U16
>>>  #undef O_U32
>>> +#undef O_U64
>>>  
>>>  #undef O_M12
>>>  
>>> @@ -88,6 +89,11 @@
>>>  #undef O3_U32
>>>  #undef O4_U32
>>>  
>>> +#undef O1_U64
>>> +#undef O2_U64
>>> +#undef O3_U64
>>> +#undef O4_U64
>>> +
>>>  #undef O1_M12
>>>  #undef O2_M12
>>>  #undef O3_M12
>>> @@ -157,20 +163,21 @@
>>>  #define O_U12    7 /* unsigned 16 bit literal */
>>>  #define O_U16    8 /* unsigned 16 bit literal */
>>>  #define O_U32    9 /* unsigned 32 bit literal */
>>> +#define O_U64   10 /* unsigned 64 bit literal */
>>>  
>>> -#define O_M12   10 /* matches bitmask of 12 */
>>> +#define O_M12   11 /* matches bitmask of 12 */
>>>  
>>> -#define O_S2    11 /* signed  2 bit literal */
>>> -#define O_S3    12 /* signed  3 bit literal */
>>> -#define O_S4    13 /* signed  4 bit literal */
>>> -#define O_S5    14 /* signed  5 bit literal */
>>> -#define O_S8    15 /* signed  8 bit literal */
>>> -#define O_S12   16 /* signed 12 bit literal */
>>> -#define O_S16   17 /* signed 16 bit literal */
>>> -#define O_S32   18 /* signed 32 bit literal */
>>> +#define O_S2    12 /* signed  2 bit literal */
>>> +#define O_S3    13 /* signed  3 bit literal */
>>> +#define O_S4    14 /* signed  4 bit literal */
>>> +#define O_S5    15 /* signed  5 bit literal */
>>> +#define O_S8    16 /* signed  8 bit literal */
>>> +#define O_S12   17 /* signed 12 bit literal */
>>> +#define O_S16   18 /* signed 16 bit literal */
>>> +#define O_S32   19 /* signed 32 bit literal */
>>>  
>>> -#define O_ELEM  19 /* Element selector requiring modulo arithmetic. */
>>> -#define O_LIT   20 /* Operand must be a literal fitting the target type.  
>>> */
>>> +#define O_ELEM  20 /* Element selector requiring modulo arithmetic. */
>>> +#define O_LIT   21 /* Operand must be a literal fitting the target type.  
>>> */
>>>  
>>>  #define O_SHIFT 5
>>>  
>>> @@ -223,6 +230,11 @@
>>>  #define O3_U32 (O_U32 << (2 * O_SHIFT))
>>>  #define O4_U32 (O_U32 << (3 * O_SHIFT))
>>>  
>>> +#define O1_U64 O_U64
>>> +#define O2_U64 (O_U64 << O_SHIFT)
>>> +#define O3_U64 (O_U64 << (2 * O_SHIFT))
>>> +#define O4_U64 (O_U64 << (3 * O_SHIFT))
>>> +
>>>  #define O1_M12 O_M12
>>>  #define O2_M12 (O_M12 << O_SHIFT)
>>>  #define O3_M12 (O_M12 << (2 * O_SHIFT))
>>> @@ -1989,19 +2001,19 @@ B_DEF      (s390_verllvf,               vrotlv4si3, 
>>>         0,
>>>  B_DEF      (s390_verllvg,               vrotlv2di3,         0,             
>>>      B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
>>>  
>>>  OB_DEF     (s390_vec_rli,               s390_vec_rli_u8,    
>>> s390_vec_rli_s64,   B_VX,               BT_FN_OV4SI_OV4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u8,            s390_verllb,        0,             
>>>      0,                  BT_OV_UV16QI_UV16QI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s8,            s390_verllb,        0,             
>>>      0,                  BT_OV_V16QI_V16QI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u16,           s390_verllh,        0,             
>>>      0,                  BT_OV_UV8HI_UV8HI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s16,           s390_verllh,        0,             
>>>      0,                  BT_OV_V8HI_V8HI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u32,           s390_verllf,        0,             
>>>      0,                  BT_OV_UV4SI_UV4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s32,           s390_verllf,        0,             
>>>      0,                  BT_OV_V4SI_V4SI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_u64,           s390_verllg,        0,             
>>>      0,                  BT_OV_UV2DI_UV2DI_ULONG)
>>> -OB_DEF_VAR (s390_vec_rli_s64,           s390_verllg,        0,             
>>>      0,                  BT_OV_V2DI_V2DI_ULONG)
>>> -
>>> -B_DEF      (s390_verllb,                rotlv16qi3,         0,             
>>>      B_VX,               0,                  BT_FN_UV16QI_UV16QI_UINT)
>>> -B_DEF      (s390_verllh,                rotlv8hi3,          0,             
>>>      B_VX,               0,                  BT_FN_UV8HI_UV8HI_UINT)
>>> -B_DEF      (s390_verllf,                rotlv4si3,          0,             
>>>      B_VX,               0,                  BT_FN_UV4SI_UV4SI_UINT)
>>> -B_DEF      (s390_verllg,                rotlv2di3,          0,             
>>>      B_VX,               0,                  BT_FN_UV2DI_UV2DI_UINT)
>>> +OB_DEF_VAR (s390_vec_rli_u8,            s390_verllb,        0,             
>>>      O2_U64,             BT_OV_UV16QI_UV16QI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s8,            s390_verllb,        0,             
>>>      O2_U64,             BT_OV_V16QI_V16QI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u16,           s390_verllh,        0,             
>>>      O2_U64,             BT_OV_UV8HI_UV8HI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s16,           s390_verllh,        0,             
>>>      O2_U64,             BT_OV_V8HI_V8HI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u32,           s390_verllf,        0,             
>>>      O2_U64,             BT_OV_UV4SI_UV4SI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s32,           s390_verllf,        0,             
>>>      O2_U64,             BT_OV_V4SI_V4SI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_u64,           s390_verllg,        0,             
>>>      O2_U64,             BT_OV_UV2DI_UV2DI_ULONG)
>>> +OB_DEF_VAR (s390_vec_rli_s64,           s390_verllg,        0,             
>>>      O2_U64,             BT_OV_V2DI_V2DI_ULONG)
>>> +
>>> +B_DEF      (s390_verllb,                rotlv16qi3,         0,             
>>>      B_VX,               O2_U32,             BT_FN_UV16QI_UV16QI_UINT)
>>> +B_DEF      (s390_verllh,                rotlv8hi3,          0,             
>>>      B_VX,               O2_U32,             BT_FN_UV8HI_UV8HI_UINT)
>>> +B_DEF      (s390_verllf,                rotlv4si3,          0,             
>>>      B_VX,               O2_U32,             BT_FN_UV4SI_UV4SI_UINT)
>>> +B_DEF      (s390_verllg,                rotlv2di3,          0,             
>>>      B_VX,               O2_U32,             BT_FN_UV2DI_UV2DI_UINT)
>>>  
>>>  OB_DEF     (s390_vec_rl_mask,           
>>> s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX,              
>>> BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
>>>  OB_DEF_VAR (s390_vec_rl_mask_s8,        s390_verimb,        0,             
>>>      O3_U8,              BT_OV_V16QI_V16QI_UV16QI_UCHAR)
>>> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
>>> index 49ab4fc7360..64f56d8effa 100644
>>> --- a/gcc/config/s390/s390.cc
>>> +++ b/gcc/config/s390/s390.cc
>>> @@ -815,8 +815,8 @@ s390_const_operand_ok (tree arg, int argnum, int 
>>> op_flags, tree decl)
>>>  {
>>>    if (O_UIMM_P (op_flags))
>>>      {
>>> -      unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 
>>> 4 };
>>> -      unsigned HOST_WIDE_INT bitmasks[]  = { 0, 0, 0, 0, 0, 0,  0,  0,  0, 
>>> 12 };
>>> +      unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 
>>> 64,  4 };
>>> +      unsigned HOST_WIDE_INT bitmasks[]  = { 0, 0, 0, 0, 0, 0,  0,  0,  0, 
>>>  0, 12 };
>>>        unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
>>>        unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
>>>  
>>> @@ -824,7 +824,7 @@ s390_const_operand_ok (tree arg, int argnum, int 
>>> op_flags, tree decl)
>>>        gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
>>>  
>>>        if (!tree_fits_uhwi_p (arg)
>>> -     || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
>>> +     || tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 
>>> 1)
>>>       || (bitmask && tree_to_uhwi (arg) & ~bitmask))
>>>     {
>>>       if (bitmask)
>>

Reply via email to