================ @@ -1,73 +1,151 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: --check-prefixes=CHECK,NATIVE_HALF -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF - -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: call half @llvm.fabs.f16(half -// NO_HALF: call float @llvm.fabs.f32(float -// NATIVE_HALF: ret half -// NO_HALF: ret float -half test_length_half(half p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half2(half2 p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half3(half3 p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half4(half4 p0) -{ - return length(p0); -} - -// CHECK: define noundef float @ -// CHECK: call float @llvm.fabs.f32(float -// CHECK: ret float -float test_length_float(float p0) -{ - return length(p0); -} -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32( -// CHECK: ret float %hlsl.length -float test_length_float2(float2 p0) -{ - return length(p0); -} -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32( -// CHECK: ret float %hlsl.length -float test_length_float3(float3 p0) -{ - return length(p0); -} -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32( -// CHECK: ret float %hlsl.length -float test_length_float4(float4 p0) -{ - return length(p0); -} +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -O2 -o - | FileCheck %s + +// CHECK-LABEL: define noundef half @_Z16test_length_halfDh( +// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]]) +// CHECK-NEXT: ret half [[ELT_ABS_I]] +// +half test_length_half(half p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef half @_Z17test_length_half2Dv2_Dh( +// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half2(half2 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef half @_Z17test_length_half3Dv3_Dh( +// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_1]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half3(half3 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef half @_Z17test_length_half4Dv4_Dh( +// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]] +// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3 +// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd half [[ADD_I_1]], [[VECEXT1_I_2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_2]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half4(half4 p0) +{ + return length(p0); +} + + +// CHECK-LABEL: define noundef float @_Z17test_length_floatf( +// CHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]]) +// CHECK-NEXT: ret float [[ELT_ABS_I]] +// +float test_length_float(float p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef float @_Z18test_length_float2Dv2_f( +// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float2(float2 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef float @_Z18test_length_float3Dv3_f( +// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_1]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float3(float3 p0) +{ + return length(p0); +} + + +// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f( +// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]] +// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3 +// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd float [[ADD_I_1]], [[VECEXT1_I_2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_2]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float4(float4 p0) +{ + return length(p0); +} + + +// CHECK-LABEL: define noundef float @_Z26test_length_float4_extractDv4_f( +// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2 +// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3 +// CHECK-NEXT: [[ADD_I12:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[ADD_I12_1:%.*]] = fadd float [[ADD_I12]], [[VECEXT1_I_1]] +// CHECK-NEXT: [[ADD_I12_2:%.*]] = fadd float [[ADD_I12_1]], [[VECEXT1_I_2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I12_2]]) +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]] +// CHECK-NEXT: ret float [[ADD]] +// +float test_length_float4_extract(float4 p0) +{ + return length(p0) + length(p0); ---------------- farzonl wrote:
Adding this case because made me realize that we need `O2` so that we don't have duplicate extracts and adds when calling length twice. The vec_reduce_add implementation used `O1` and didn't have this problem so I think this might be a side effect of not being able to do a constexpr. https://github.com/llvm/llvm-project/pull/121611 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits