https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107563

--- Comment #7 from cqwrteur <unlvsur at live dot com> ---
(In reply to Hongtao.liu from comment #6)
> Shufd only handles
> 
> void foo1(temp_vec_type& v) noexcept
> {
>       v=__builtin_shufflevector(v,v,12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
> }
> 
> Not the case in #c0.

I am using it for byte swap

actually, clang has a solution

                        using x86_64_v4si [[__gnu__::__vector_size__ (16)]] =
int;
                        using x86_64_v16qi [[__gnu__::__vector_size__ (16)]] =
char;
                        using x86_64_v8hi [[__gnu__::__vector_size__ (16)]] =
short;
                        constexpr x86_64_v16qi zero{};
                        if constexpr(sizeof(T)==8)
                        {
                                auto
res0{__builtin_ia32_punpcklbw128(temp_vec,zero)};
                                auto
res1{__builtin_ia32_pshufd((x86_64_v4si)res0,78)};
                                auto
res2{__builtin_ia32_pshuflw((x86_64_v8hi)res1,27)};
                                auto res3{__builtin_ia32_pshufhw(res2,27)};
                                auto
res4{__builtin_ia32_punpckhbw128(temp_vec,zero)};
                                auto
res5{__builtin_ia32_pshufd((x86_64_v4si)res4,78)};
                                auto
res6{__builtin_ia32_pshuflw((x86_64_v8hi)res5,27)};
                                auto res7{__builtin_ia32_pshufhw(res6,27)};
                                temp_vec=__builtin_ia32_packuswb128(res3,res7);
                        }
                        else if constexpr(sizeof(T)==4)
                        {
                                auto
res0{__builtin_ia32_punpcklbw128(temp_vec,zero)};
                                auto
res2{__builtin_ia32_pshuflw((x86_64_v8hi)res0,27)};
                                auto res3{__builtin_ia32_pshufhw(res2,27)};
                                auto
res4{__builtin_ia32_punpckhbw128(temp_vec,zero)};
                                auto
res6{__builtin_ia32_pshuflw((x86_64_v8hi)res4,27)};
                                auto res7{__builtin_ia32_pshufhw(res6,27)};
                                temp_vec=__builtin_ia32_packuswb128(res3,res7);
                        }
                        else if constexpr(sizeof(T)==2)
                        {
                                using x86_64_v8hu [[__gnu__::__vector_size__
(16)]] = unsigned short;
                                auto res0{(x86_64_v8hu)temp_vec};
                                temp_vec=(x86_64_v16qi)((res0>>8)|(res0<<8));
                        }

Reply via email to