Author: sampo Date: Sun Feb 3 01:18:54 2008 New Revision: 46681 URL: http://llvm.org/viewvc/llvm-project?rev=46681&view=rev Log: SSE 4.1 Intrinsics and detection
Modified: llvm/trunk/include/llvm/IntrinsicsX86.td llvm/trunk/lib/Target/X86/X86.td llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/lib/Target/X86/X86Subtarget.cpp llvm/trunk/lib/Target/X86/X86Subtarget.h Modified: llvm/trunk/include/llvm/IntrinsicsX86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/include/llvm/IntrinsicsX86.td (original) +++ llvm/trunk/include/llvm/IntrinsicsX86.td Sun Feb 3 01:18:54 2008 @@ -674,6 +674,156 @@ } //===----------------------------------------------------------------------===// +// SSE4.1 + +// FP rounding ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">, + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">, + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty], [IntrNoMem]>; +} + +// Vector sign and zero extend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>; + def int_x86_sse41_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>; + def int_x86_sse41_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>; + def int_x86_sse41_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>; +} + +// Vector min element +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty]>; +} + +// Vector compare, min, max +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pcmpeqq : GCCBuiltin<"__builtin_ia32_pcmpeqq">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty]>; + def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pmaxsd : GCCBuiltin<"__builtin_ia32_pmaxsd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pmaxud : GCCBuiltin<"__builtin_ia32_pmaxud128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pmaxuw : GCCBuiltin<"__builtin_ia32_pmaxuw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>; + def int_x86_sse41_pminsb : GCCBuiltin<"__builtin_ia32_pminsb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pminsd : GCCBuiltin<"__builtin_ia32_pminsd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pminud : GCCBuiltin<"__builtin_ia32_pminud128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pminuw : GCCBuiltin<"__builtin_ia32_pminuw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>; +} + +// Vector pack +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; +} + +// Vector multiply +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">, + Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; + def int_x86_sse41_pmulld : GCCBuiltin<"__builtin_ia32_pmulld128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>; +} + +// Vector extract +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pextrb : GCCBuiltin<"__builtin_ia32_vec_ext_v16qi">, + Intrinsic<[llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty]>; + def int_x86_sse41_pextrd : GCCBuiltin<"__builtin_ia32_vec_ext_v4si">, + Intrinsic<[llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty]>; + def int_x86_sse41_pextrq : GCCBuiltin<"__builtin_ia32_vec_ext_v2di">, + Intrinsic<[llvm_i64_ty, llvm_v2i64_ty, llvm_i32_ty]>; + def int_x86_sse41_extractps : GCCBuiltin<"__builtin_ia32_extractps128">, + Intrinsic<[llvm_i32_ty, llvm_v4f32_ty, llvm_i32_ty]>; +} + +// Vector insert +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pinsrb : GCCBuiltin<"__builtin_ia32_vec_set_v16qi">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty]>; + def int_x86_sse41_pinsrd : GCCBuiltin<"__builtin_ia32_vec_set_v4si">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty]>; + def int_x86_sse41_pinsrq : GCCBuiltin<"__builtin_ia32_vec_set_v2di">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty]>; + def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; +} + +// Vector blend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>; + def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty]>; + def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">, + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>; + def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; + def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">, + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty]>; + def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty]>; +} + +// Vector dot product +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, + Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>; + def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, + Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>; +} + +// Vector sum of absolute differences +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty]>; +} + +// Vector sum of absolute differences +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa">, + Intrinsic<[llvm_v2i64_ty, llvm_ptr_ty]>; +} + + +//===----------------------------------------------------------------------===// // MMX // Empty MMX state op. Modified: llvm/trunk/lib/Target/X86/X86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.td (original) +++ llvm/trunk/lib/Target/X86/X86.td Sun Feb 3 01:18:54 2008 @@ -34,6 +34,12 @@ def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", "Enable SSSE3 instructions", [FeatureSSE3]>; +def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", + "Enable SSE 4.1 instructions", + [FeatureSSSE3]>; +def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", + "Enable SSE 4.2 instructions", + [FeatureSSE41]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", @@ -66,6 +72,7 @@ def : Proc<"prescott", [FeatureSSE3]>; def : Proc<"nocona", [FeatureSSE3]>; def : Proc<"core2", [FeatureSSSE3]>; +def : Proc<"penryn", [FeatureSSE41]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Sun Feb 3 01:18:54 2008 @@ -1266,3 +1266,13 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// X86-64 SSE4.1 Instructions +//===----------------------------------------------------------------------===// + +// PEXTRB, unary, TA, 0x14, REX.W +// PEXTRW, unary, TA, 0x15, REX.W +// PEXTRQ, unary, TA, 0x16, REX.W +// EXTRACTPS, unary, TA, 0x17, REX.W +// PINSRQ, 2addr, binary, TA, 0x22, REX.W Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sun Feb 3 01:18:54 2008 @@ -166,6 +166,8 @@ def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Feb 3 01:18:54 2008 @@ -3038,3 +3038,98 @@ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + +//===----------------------------------------------------------------------===// +// SSE4.1 Instructions +//===----------------------------------------------------------------------===// + +// SSE4.1 Instruction Templates: +// +// SS418I - SSE 4.1 instructions with T8 prefix. +// SS41AI - SSE 4.1 instructions with TA prefix. +// +class SS418I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>; +class SS41AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>; + + +multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps, + bits<8> opcsd, bits<8> opcpd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic V4F32Int, + Intrinsic F64Int, + Intrinsic V2F64Int, + bit Commutable = 0> { + // Intrinsic operation, reg. + def SSr_Int : SS41AI<opcss, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]> { + let isCommutable = Commutable; + } + + // Intrinsic operation, mem. + def SSm_Int : SS41AI<opcss, MRMSrcMem, + (outs VR128:$dst), (ins ssmem:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>; + + // Vector intrinsic operation, reg + def PSr_Int : SS41AI<opcps, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]> { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, mem + def PSm_Int : SS41AI<opcps, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V4F32Int (load addr:$src1), imm:$src2))]>; + + // Intrinsic operation, reg. + def SDr_Int : SS41AI<opcsd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]> { + let isCommutable = Commutable; + } + + // Intrinsic operation, mem. + def SDm_Int : SS41AI<opcsd, MRMSrcMem, + (outs VR128:$dst), (ins sdmem:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>; + + // Vector intrinsic operation, reg + def PDr_Int : SS41AI<opcpd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]> { + let isCommutable = Commutable; + } + + // Vector intrinsic operation, mem + def PDm_Int : SS41AI<opcpd, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V2F64Int (load addr:$src1), imm:$src2))]>; +} + +// FP round - roundss, roundps, roundsd, roundpd +defm ROUND : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_ps, + int_x86_sse41_round_sd, int_x86_sse41_round_pd>; Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Sun Feb 3 01:18:54 2008 @@ -114,6 +114,8 @@ if ((EDX >> 26) & 0x1) X86SSELevel = SSE2; if (ECX & 0x1) X86SSELevel = SSE3; if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3; + if ((ECX >> 19) & 0x1) X86SSELevel = SSE41; + if ((ECX >> 20) & 0x1) X86SSELevel = SSE42; if (memcmp(text.c, "GenuineIntel", 12) == 0 || memcmp(text.c, "AuthenticAMD", 12) == 0) { Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=46681&r1=46680&r2=46681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sun Feb 3 01:18:54 2008 @@ -38,7 +38,7 @@ }; protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3 + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 }; enum X863DNowEnum { @@ -127,6 +127,8 @@ bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } bool hasSSSE3() const { return X86SSELevel >= SSSE3; } + bool hasSSE41() const { return X86SSELevel >= SSE41; } + bool hasSSE42() const { return X86SSELevel >= SSE42; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits