Ka-Ka added inline comments.
================ Comment at: clang/test/CodeGen/builtins-x86.c:127-128 - tmp_V2LLi = __builtin_ia32_undef128(); - tmp_V4LLi = __builtin_ia32_undef256(); + tmp_V2LLi = (V2LLi)__builtin_ia32_undef128(); + tmp_V4LLi = (V4LLi)__builtin_ia32_undef256(); ---------------- I don't like the introduced casts. Can we change the testcase to operate on the appropriate types instead? The above two lines could probably be replaced by: tmp_V2d = __builtin_ia32_undef128(); tmp_V4d = __builtin_ia32_undef256(); What do you think? ================ Comment at: clang/test/CodeGen/builtins-x86.c:228-231 + tmp_V8s = (V8s)__builtin_ia32_packsswb128(tmp_V8s, tmp_V8s); + tmp_V4i = (V4i)__builtin_ia32_packssdw128(tmp_V4i, tmp_V4i); + tmp_V8s = (V8s)__builtin_ia32_packuswb128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_pmulhuw128(tmp_V8s, tmp_V8s); ---------------- same here? ================ Comment at: clang/test/CodeGen/builtins-x86.c:250 tmp_V4s = __builtin_ia32_phsubsw(tmp_V4s, tmp_V4s); - tmp_V16c = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c); + tmp_V16c = (V16c)__builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c); tmp_V8c = __builtin_ia32_pmaddubsw(tmp_V8c, tmp_V8c); ---------------- and here? ================ Comment at: clang/test/CodeGen/builtins-x86.c:428 tmp_V4i = __builtin_ia32_psradi128(tmp_V4i, tmp_i); - tmp_V8s = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s); + tmp_V8s = (V8s)__builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s); (void) __builtin_ia32_monitor(tmp_vp, tmp_Ui, tmp_Ui); ---------------- Can we change this line to? tmp_V4i = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s); ================ Comment at: clang/test/CodeGen/builtins-x86.c:432 tmp_V16c = __builtin_ia32_lddqu(tmp_cCp); - tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i); - tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i); + tmp_V2LLi = (V2LLi)__builtin_ia32_palignr128((V16c)tmp_V2LLi, (V16c)tmp_V2LLi, imm_i); + tmp_V1LLi = (V1LLi)__builtin_ia32_palignr((V8c)tmp_V1LLi, (V8c)tmp_V1LLi, imm_i); ---------------- Can we change this line to? tmp_V16c = __builtin_ia32_palignr128(tmp_V16c, tmp_V16c, imm_i); ================ Comment at: clang/test/CodeGen/builtins-x86.c:433 + tmp_V2LLi = (V2LLi)__builtin_ia32_palignr128((V16c)tmp_V2LLi, (V16c)tmp_V2LLi, imm_i); + tmp_V1LLi = (V1LLi)__builtin_ia32_palignr((V8c)tmp_V1LLi, (V8c)tmp_V1LLi, imm_i); #ifdef USE_SSE4 ---------------- Can we change this line to? tmp_V8c = __builtin_ia32_palignr(tmp_V8c, tmp_V8c, imm_i); Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D62580/new/ https://reviews.llvm.org/D62580 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits