Hi Nicolai,
On 13/10/16 01:50 AM, Nicolai Hähnle wrote: > Module: Mesa > Branch: master > Commit: f5f3cadca3809952288e3726ed5fde22090dc61d > URL: > http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5f3cadca3809952288e3726ed5fde22090dc61d > > Author: Nicolai Hähnle <nicolai.haeh...@amd.com> > Date: Fri Oct 7 12:49:36 2016 +0200 > > st/glsl_to_tgsi: simpler fixup of empty writemasks This change broke the piglit tests spec@glsl-110@execution@variable-indexing@vs-temp-array-mat2-index(-col)-wr on my Kaveri. Output with R600_DEBUG=ps,vs attached as vs-temp-array-mat2-index-wr.txt . P.S. The newly enabled tests spec@arb_enhanced_layouts@execution@component-layout@vs-tcs-load-output(-indirect) also fail, output attached as vs-tcs-load-output.stderr . -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..2], ARRAY(1), LOCAL DCL TEMP[3..8], ARRAY(2), LOCAL DCL TEMP[9..10], ARRAY(3), LOCAL DCL TEMP[11..12], ARRAY(4), LOCAL DCL TEMP[13..14], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 0.0000, 1.0000, 0.0000} IMM[1] INT32 {2, 0, 0, 0} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MOV TEMP[1], IMM[0].xxxx 5: MOV TEMP[2], IMM[0].xxxx 6: MOV TEMP[3].xy, TEMP[1].xyxx 7: MOV TEMP[4].xy, TEMP[2].xyxx 8: MOV TEMP[9], IMM[0].xxxx 9: MOV TEMP[10], IMM[0].xxxx 10: MOV TEMP[5].xy, TEMP[9].xyxx 11: MOV TEMP[6].xy, TEMP[10].xyxx 12: MOV TEMP[11], IMM[0].xxxx 13: MOV TEMP[12], IMM[0].xxxx 14: MOV TEMP[7].xy, TEMP[11].xyxx 15: MOV TEMP[8].xy, TEMP[12].xyxx 16: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 17: UARL ADDR[0].x, TEMP[13].xxxx 18: MOV TEMP[ADDR[0].x+3](2).xy, CONST[0].xyxx 19: UARL ADDR[0].x, TEMP[13].xxxx 20: MOV TEMP[ADDR[0].x+4](2).xy, CONST[1].xyxx 21: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 22: UARL ADDR[0].x, TEMP[13].xxxx 23: MOV TEMP[ADDR[0].x+4](2).xy, CONST[5].xyxx 24: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 25: UMUL TEMP[14].x, CONST[4].xxxx, IMM[1].xxxx 26: UARL ADDR[0].x, TEMP[14].xxxx 27: MUL TEMP[14].xy, TEMP[ADDR[0].x+3](2).xyyy, CONST[2].xxxx 28: UARL ADDR[0].x, TEMP[13].xxxx 29: MAD TEMP[13].xy, TEMP[ADDR[0].x+4](2).xyyy, CONST[2].yyyy, TEMP[14].xyyy 30: ADD TEMP[13].xy, TEMP[13].xyyy, -CONST[3].xyyy 31: DP2 TEMP[13].x, TEMP[13].xyyy, TEMP[13].xyyy 32: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].yyyy 33: UIF TEMP[13].xxxx :0 34: MOV TEMP[13], IMM[0].xzxz 35: ELSE :0 36: MOV TEMP[13], IMM[0].zxxz 37: ENDIF 38: MOV OUT[0], TEMP[0] 39: MOV OUT[1], TEMP[13] 40: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { main_body: %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %14) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %25 = fmul float %24, %18 %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %27 = fmul float %26, %18 %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %29 = fmul float %28, %18 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call float @llvm.SI.load.const(<16 x i8> %31, i32 108) %33 = fmul float %32, %18 %34 = call float @llvm.SI.load.const(<16 x i8> %31, i32 112) %35 = fmul float %34, %19 %36 = fadd float %35, %25 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 116) %40 = fmul float %39, %19 %41 = fadd float %40, %27 %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 120) %43 = fmul float %42, %19 %44 = fadd float %43, %29 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call float @llvm.SI.load.const(<16 x i8> %46, i32 124) %48 = fmul float %47, %19 %49 = fadd float %48, %33 %50 = call float @llvm.SI.load.const(<16 x i8> %46, i32 128) %51 = fmul float %50, %20 %52 = fadd float %51, %36 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call float @llvm.SI.load.const(<16 x i8> %54, i32 132) %56 = fmul float %55, %20 %57 = fadd float %56, %41 %58 = call float @llvm.SI.load.const(<16 x i8> %54, i32 136) %59 = fmul float %58, %20 %60 = fadd float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 140) %64 = fmul float %63, %20 %65 = fadd float %64, %49 %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 144) %67 = fmul float %66, %21 %68 = fadd float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 148) %72 = fmul float %71, %21 %73 = fadd float %72, %57 %74 = call float @llvm.SI.load.const(<16 x i8> %70, i32 152) %75 = fmul float %74, %21 %76 = fadd float %75, %60 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !invariant.load !0 %79 = call float @llvm.SI.load.const(<16 x i8> %78, i32 156) %80 = fmul float %79, %21 %81 = fadd float %80, %65 %82 = call float @llvm.SI.load.const(<16 x i8> %78, i32 64) %83 = bitcast float %82 to i32 %84 = shl i32 %83, 1 %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !invariant.load !0 %87 = call float @llvm.SI.load.const(<16 x i8> %86, i32 0) %88 = call float @llvm.SI.load.const(<16 x i8> %86, i32 4) %89 = insertelement <6 x float> zeroinitializer, float %87, i32 %84 %90 = extractelement <6 x float> %89, i32 0 %91 = extractelement <6 x float> %89, i32 1 %92 = extractelement <6 x float> %89, i32 2 %93 = extractelement <6 x float> %89, i32 3 %94 = extractelement <6 x float> %89, i32 4 %95 = extractelement <6 x float> %89, i32 5 %96 = insertelement <6 x float> zeroinitializer, float %88, i32 %84 %97 = extractelement <6 x float> %96, i32 0 %98 = extractelement <6 x float> %96, i32 1 %99 = extractelement <6 x float> %96, i32 2 %100 = extractelement <6 x float> %96, i32 3 %101 = extractelement <6 x float> %96, i32 4 %102 = extractelement <6 x float> %96, i32 5 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !invariant.load !0 %105 = call float @llvm.SI.load.const(<16 x i8> %104, i32 16) %106 = call float @llvm.SI.load.const(<16 x i8> %104, i32 20) %107 = or i32 %84, 1 %array_vector12 = insertelement <6 x float> undef, float %90, i32 0 %array_vector13 = insertelement <6 x float> %array_vector12, float %91, i32 1 %array_vector14 = insertelement <6 x float> %array_vector13, float %92, i32 2 %array_vector15 = insertelement <6 x float> %array_vector14, float %93, i32 3 %array_vector16 = insertelement <6 x float> %array_vector15, float %94, i32 4 %array_vector17 = insertelement <6 x float> %array_vector16, float %95, i32 5 %108 = insertelement <6 x float> %array_vector17, float %105, i32 %107 %109 = extractelement <6 x float> %108, i32 0 %110 = extractelement <6 x float> %108, i32 1 %111 = extractelement <6 x float> %108, i32 2 %112 = extractelement <6 x float> %108, i32 3 %113 = extractelement <6 x float> %108, i32 4 %114 = extractelement <6 x float> %108, i32 5 %115 = or i32 %84, 1 %array_vector18 = insertelement <6 x float> undef, float %97, i32 0 %array_vector19 = insertelement <6 x float> %array_vector18, float %98, i32 1 %array_vector20 = insertelement <6 x float> %array_vector19, float %99, i32 2 %array_vector21 = insertelement <6 x float> %array_vector20, float %100, i32 3 %array_vector22 = insertelement <6 x float> %array_vector21, float %101, i32 4 %array_vector23 = insertelement <6 x float> %array_vector22, float %102, i32 5 %116 = insertelement <6 x float> %array_vector23, float %106, i32 %115 %117 = extractelement <6 x float> %116, i32 0 %118 = extractelement <6 x float> %116, i32 1 %119 = extractelement <6 x float> %116, i32 2 %120 = extractelement <6 x float> %116, i32 3 %121 = extractelement <6 x float> %116, i32 4 %122 = extractelement <6 x float> %116, i32 5 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call float @llvm.SI.load.const(<16 x i8> %124, i32 64) %126 = bitcast float %125 to i32 %127 = shl i32 %126, 1 %128 = call float @llvm.SI.load.const(<16 x i8> %124, i32 80) %129 = call float @llvm.SI.load.const(<16 x i8> %124, i32 84) %130 = or i32 %127, 1 %array_vector24 = insertelement <6 x float> undef, float %109, i32 0 %array_vector25 = insertelement <6 x float> %array_vector24, float %110, i32 1 %array_vector26 = insertelement <6 x float> %array_vector25, float %111, i32 2 %array_vector27 = insertelement <6 x float> %array_vector26, float %112, i32 3 %array_vector28 = insertelement <6 x float> %array_vector27, float %113, i32 4 %array_vector29 = insertelement <6 x float> %array_vector28, float %114, i32 5 %131 = insertelement <6 x float> %array_vector29, float %128, i32 %130 %132 = or i32 %127, 1 %array_vector30 = insertelement <6 x float> undef, float %117, i32 0 %array_vector31 = insertelement <6 x float> %array_vector30, float %118, i32 1 %array_vector32 = insertelement <6 x float> %array_vector31, float %119, i32 2 %array_vector33 = insertelement <6 x float> %array_vector32, float %120, i32 3 %array_vector34 = insertelement <6 x float> %array_vector33, float %121, i32 4 %array_vector35 = insertelement <6 x float> %array_vector34, float %122, i32 5 %133 = insertelement <6 x float> %array_vector35, float %129, i32 %132 %134 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %135 = load <16 x i8>, <16 x i8> addrspace(2)* %134, align 16, !invariant.load !0 %136 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %137 = bitcast float %136 to i32 %138 = shl i32 %137, 1 %139 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %140 = bitcast float %139 to i32 %141 = shl i32 %140, 1 %142 = extractelement <6 x float> %131, i32 %141 %143 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %144 = load <16 x i8>, <16 x i8> addrspace(2)* %143, align 16, !invariant.load !0 %145 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %146 = fmul float %142, %145 %147 = extractelement <6 x float> %133, i32 %141 %148 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %149 = fmul float %147, %148 %150 = or i32 %138, 1 %151 = extractelement <6 x float> %131, i32 %150 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call float @llvm.SI.load.const(<16 x i8> %153, i32 36) %155 = fmul float %151, %154 %156 = fadd float %155, %146 %157 = or i32 %138, 1 %158 = extractelement <6 x float> %133, i32 %157 %159 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %160 = load <16 x i8>, <16 x i8> addrspace(2)* %159, align 16, !invariant.load !0 %161 = call float @llvm.SI.load.const(<16 x i8> %160, i32 36) %162 = fmul float %158, %161 %163 = fadd float %162, %149 %164 = call float @llvm.SI.load.const(<16 x i8> %160, i32 48) %165 = fsub float %156, %164 %166 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %167 = load <16 x i8>, <16 x i8> addrspace(2)* %166, align 16, !invariant.load !0 %168 = call float @llvm.SI.load.const(<16 x i8> %167, i32 52) %169 = fsub float %163, %168 %170 = fmul float %165, %165 %171 = fmul float %169, %169 %172 = fadd float %170, %171 %173 = fcmp olt float %172, 0x3E312E0BE0000000 %. = select i1 %173, float 0.000000e+00, float 1.000000e+00 %.60 = select i1 %173, float 1.000000e+00, float 0.000000e+00 %174 = and i32 %9, 1 %175 = icmp eq i32 %174, 0 br i1 %175, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %176 = call float @llvm.AMDGPU.clamp.(float %., float 0.000000e+00, float 1.000000e+00) %177 = call float @llvm.AMDGPU.clamp.(float %.60, float 0.000000e+00, float 1.000000e+00) %178 = call float @llvm.AMDGPU.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %179 = call float @llvm.AMDGPU.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %179, %if-true-block ], [ 1.000000e+00, %main_body ] %OUT1.z.0 = phi float [ %178, %if-true-block ], [ 0.000000e+00, %main_body ] %OUT1.y.0 = phi float [ %177, %if-true-block ], [ %.60, %main_body ] %OUT1.x.0 = phi float [ %176, %if-true-block ], [ %., %main_body ] %180 = bitcast i32 %12 to float %181 = insertvalue <{ float, float, float }> undef, float %180, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %73, float %76, float %81) ret <{ float, float, float }> %181 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR0<def> = V_MOVRELS_B32_e32 %VGPR13<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR13_VGPR14_VGPR15_VGPR16_VGPR17_VGPR18_VGPR19_VGPR20<imp-use>, %VGPR13<imp-def>, %VGPR14<imp-def>, %VGPR13_VGPR14<imp-def> LLVM failed to compile shader radeonsi: can't compile a main shader part FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } attributes #0 = { "InitialPSInputAddr"="36983" } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..2], ARRAY(1), LOCAL DCL TEMP[3..8], ARRAY(2), LOCAL DCL TEMP[9..10], ARRAY(3), LOCAL DCL TEMP[11..12], ARRAY(4), LOCAL DCL TEMP[13..14], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 0.0000, 1.0000, 0.0000} IMM[1] INT32 {2, 0, 0, 0} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MOV TEMP[1], IMM[0].xxxx 5: MOV TEMP[2], IMM[0].xxxx 6: MOV TEMP[3].xy, TEMP[1].xyxx 7: MOV TEMP[4].xy, TEMP[2].xyxx 8: MOV TEMP[9], IMM[0].xxxx 9: MOV TEMP[10], IMM[0].xxxx 10: MOV TEMP[5].xy, TEMP[9].xyxx 11: MOV TEMP[6].xy, TEMP[10].xyxx 12: MOV TEMP[11], IMM[0].xxxx 13: MOV TEMP[12], IMM[0].xxxx 14: MOV TEMP[7].xy, TEMP[11].xyxx 15: MOV TEMP[8].xy, TEMP[12].xyxx 16: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 17: UARL ADDR[0].x, TEMP[13].xxxx 18: MOV TEMP[ADDR[0].x+3](2).xy, CONST[0].xyxx 19: UARL ADDR[0].x, TEMP[13].xxxx 20: MOV TEMP[ADDR[0].x+4](2).xy, CONST[1].xyxx 21: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 22: UARL ADDR[0].x, TEMP[13].xxxx 23: MOV TEMP[ADDR[0].x+4](2).xy, CONST[5].xyxx 24: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 25: UMUL TEMP[14].x, CONST[4].xxxx, IMM[1].xxxx 26: UARL ADDR[0].x, TEMP[14].xxxx 27: MUL TEMP[14].xy, TEMP[ADDR[0].x+3](2).xyyy, CONST[2].xxxx 28: UARL ADDR[0].x, TEMP[13].xxxx 29: MAD TEMP[13].xy, TEMP[ADDR[0].x+4](2).xyyy, CONST[2].yyyy, TEMP[14].xyyy 30: ADD TEMP[13].xy, TEMP[13].xyyy, -CONST[3].xyyy 31: DP2 TEMP[13].x, TEMP[13].xyyy, TEMP[13].xyyy 32: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].yyyy 33: UIF TEMP[13].xxxx :0 34: MOV TEMP[13], IMM[0].xzxz 35: ELSE :0 36: MOV TEMP[13], IMM[0].zxxz 37: ENDIF 38: MOV OUT[0], TEMP[0] 39: MOV OUT[1], TEMP[13] 40: END radeonsi: Compiling shader 3 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0 %16 = add i32 %6, %10 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %25 = fmul float %24, %18 %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %27 = fmul float %26, %18 %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %29 = fmul float %28, %18 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call float @llvm.SI.load.const(<16 x i8> %31, i32 108) %33 = fmul float %32, %18 %34 = call float @llvm.SI.load.const(<16 x i8> %31, i32 112) %35 = fmul float %34, %19 %36 = fadd float %35, %25 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 116) %40 = fmul float %39, %19 %41 = fadd float %40, %27 %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 120) %43 = fmul float %42, %19 %44 = fadd float %43, %29 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call float @llvm.SI.load.const(<16 x i8> %46, i32 124) %48 = fmul float %47, %19 %49 = fadd float %48, %33 %50 = call float @llvm.SI.load.const(<16 x i8> %46, i32 128) %51 = fmul float %50, %20 %52 = fadd float %51, %36 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call float @llvm.SI.load.const(<16 x i8> %54, i32 132) %56 = fmul float %55, %20 %57 = fadd float %56, %41 %58 = call float @llvm.SI.load.const(<16 x i8> %54, i32 136) %59 = fmul float %58, %20 %60 = fadd float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 140) %64 = fmul float %63, %20 %65 = fadd float %64, %49 %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 144) %67 = fmul float %66, %21 %68 = fadd float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 148) %72 = fmul float %71, %21 %73 = fadd float %72, %57 %74 = call float @llvm.SI.load.const(<16 x i8> %70, i32 152) %75 = fmul float %74, %21 %76 = fadd float %75, %60 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !invariant.load !0 %79 = call float @llvm.SI.load.const(<16 x i8> %78, i32 156) %80 = fmul float %79, %21 %81 = fadd float %80, %65 %82 = call float @llvm.SI.load.const(<16 x i8> %78, i32 64) %83 = bitcast float %82 to i32 %84 = shl i32 %83, 1 %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !invariant.load !0 %87 = call float @llvm.SI.load.const(<16 x i8> %86, i32 0) %88 = call float @llvm.SI.load.const(<16 x i8> %86, i32 4) %89 = insertelement <6 x float> zeroinitializer, float %87, i32 %84 %90 = extractelement <6 x float> %89, i32 0 %91 = extractelement <6 x float> %89, i32 1 %92 = extractelement <6 x float> %89, i32 2 %93 = extractelement <6 x float> %89, i32 3 %94 = extractelement <6 x float> %89, i32 4 %95 = extractelement <6 x float> %89, i32 5 %96 = insertelement <6 x float> zeroinitializer, float %88, i32 %84 %97 = extractelement <6 x float> %96, i32 0 %98 = extractelement <6 x float> %96, i32 1 %99 = extractelement <6 x float> %96, i32 2 %100 = extractelement <6 x float> %96, i32 3 %101 = extractelement <6 x float> %96, i32 4 %102 = extractelement <6 x float> %96, i32 5 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !invariant.load !0 %105 = call float @llvm.SI.load.const(<16 x i8> %104, i32 16) %106 = call float @llvm.SI.load.const(<16 x i8> %104, i32 20) %107 = or i32 %84, 1 %array_vector12 = insertelement <6 x float> undef, float %90, i32 0 %array_vector13 = insertelement <6 x float> %array_vector12, float %91, i32 1 %array_vector14 = insertelement <6 x float> %array_vector13, float %92, i32 2 %array_vector15 = insertelement <6 x float> %array_vector14, float %93, i32 3 %array_vector16 = insertelement <6 x float> %array_vector15, float %94, i32 4 %array_vector17 = insertelement <6 x float> %array_vector16, float %95, i32 5 %108 = insertelement <6 x float> %array_vector17, float %105, i32 %107 %109 = extractelement <6 x float> %108, i32 0 %110 = extractelement <6 x float> %108, i32 1 %111 = extractelement <6 x float> %108, i32 2 %112 = extractelement <6 x float> %108, i32 3 %113 = extractelement <6 x float> %108, i32 4 %114 = extractelement <6 x float> %108, i32 5 %115 = or i32 %84, 1 %array_vector18 = insertelement <6 x float> undef, float %97, i32 0 %array_vector19 = insertelement <6 x float> %array_vector18, float %98, i32 1 %array_vector20 = insertelement <6 x float> %array_vector19, float %99, i32 2 %array_vector21 = insertelement <6 x float> %array_vector20, float %100, i32 3 %array_vector22 = insertelement <6 x float> %array_vector21, float %101, i32 4 %array_vector23 = insertelement <6 x float> %array_vector22, float %102, i32 5 %116 = insertelement <6 x float> %array_vector23, float %106, i32 %115 %117 = extractelement <6 x float> %116, i32 0 %118 = extractelement <6 x float> %116, i32 1 %119 = extractelement <6 x float> %116, i32 2 %120 = extractelement <6 x float> %116, i32 3 %121 = extractelement <6 x float> %116, i32 4 %122 = extractelement <6 x float> %116, i32 5 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call float @llvm.SI.load.const(<16 x i8> %124, i32 64) %126 = bitcast float %125 to i32 %127 = shl i32 %126, 1 %128 = call float @llvm.SI.load.const(<16 x i8> %124, i32 80) %129 = call float @llvm.SI.load.const(<16 x i8> %124, i32 84) %130 = or i32 %127, 1 %array_vector24 = insertelement <6 x float> undef, float %109, i32 0 %array_vector25 = insertelement <6 x float> %array_vector24, float %110, i32 1 %array_vector26 = insertelement <6 x float> %array_vector25, float %111, i32 2 %array_vector27 = insertelement <6 x float> %array_vector26, float %112, i32 3 %array_vector28 = insertelement <6 x float> %array_vector27, float %113, i32 4 %array_vector29 = insertelement <6 x float> %array_vector28, float %114, i32 5 %131 = insertelement <6 x float> %array_vector29, float %128, i32 %130 %132 = or i32 %127, 1 %array_vector30 = insertelement <6 x float> undef, float %117, i32 0 %array_vector31 = insertelement <6 x float> %array_vector30, float %118, i32 1 %array_vector32 = insertelement <6 x float> %array_vector31, float %119, i32 2 %array_vector33 = insertelement <6 x float> %array_vector32, float %120, i32 3 %array_vector34 = insertelement <6 x float> %array_vector33, float %121, i32 4 %array_vector35 = insertelement <6 x float> %array_vector34, float %122, i32 5 %133 = insertelement <6 x float> %array_vector35, float %129, i32 %132 %134 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %135 = load <16 x i8>, <16 x i8> addrspace(2)* %134, align 16, !invariant.load !0 %136 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %137 = bitcast float %136 to i32 %138 = shl i32 %137, 1 %139 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %140 = bitcast float %139 to i32 %141 = shl i32 %140, 1 %142 = extractelement <6 x float> %131, i32 %141 %143 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %144 = load <16 x i8>, <16 x i8> addrspace(2)* %143, align 16, !invariant.load !0 %145 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %146 = fmul float %142, %145 %147 = extractelement <6 x float> %133, i32 %141 %148 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %149 = fmul float %147, %148 %150 = or i32 %138, 1 %151 = extractelement <6 x float> %131, i32 %150 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call float @llvm.SI.load.const(<16 x i8> %153, i32 36) %155 = fmul float %151, %154 %156 = fadd float %155, %146 %157 = or i32 %138, 1 %158 = extractelement <6 x float> %133, i32 %157 %159 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %160 = load <16 x i8>, <16 x i8> addrspace(2)* %159, align 16, !invariant.load !0 %161 = call float @llvm.SI.load.const(<16 x i8> %160, i32 36) %162 = fmul float %158, %161 %163 = fadd float %162, %149 %164 = call float @llvm.SI.load.const(<16 x i8> %160, i32 48) %165 = fsub float %156, %164 %166 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %167 = load <16 x i8>, <16 x i8> addrspace(2)* %166, align 16, !invariant.load !0 %168 = call float @llvm.SI.load.const(<16 x i8> %167, i32 52) %169 = fsub float %163, %168 %170 = fmul float %165, %165 %171 = fmul float %169, %169 %172 = fadd float %170, %171 %173 = fcmp olt float %172, 0x3E312E0BE0000000 %. = select i1 %173, float 0.000000e+00, float 1.000000e+00 %.60 = select i1 %173, float 1.000000e+00, float 0.000000e+00 %174 = and i32 %9, 1 %175 = icmp eq i32 %174, 0 br i1 %175, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %176 = call float @llvm.AMDGPU.clamp.(float %., float 0.000000e+00, float 1.000000e+00) %177 = call float @llvm.AMDGPU.clamp.(float %.60, float 0.000000e+00, float 1.000000e+00) %178 = call float @llvm.AMDGPU.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %179 = call float @llvm.AMDGPU.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %179, %if-true-block ], [ 1.000000e+00, %main_body ] %OUT1.z.0 = phi float [ %178, %if-true-block ], [ 0.000000e+00, %main_body ] %OUT1.y.0 = phi float [ %177, %if-true-block ], [ %.60, %main_body ] %OUT1.x.0 = phi float [ %176, %if-true-block ], [ %., %main_body ] call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %73, float %76, float %81) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR6<def> = V_MOVRELS_B32_e32 %VGPR10<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15_VGPR16_VGPR17<imp-use>, %VGPR10<imp-def>, %VGPR11<imp-def>, %VGPR10_VGPR11<imp-def> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1082 si_shader_select_with_key - Failed to build shader variant (type=0) 1 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 4 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END radeonsi: Compiling shader 5 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { main_body: %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %14) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = extractelement <4 x float> %18, i32 3 %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %15) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = extractelement <4 x float> %25, i32 3 %30 = bitcast i32 %12 to float %31 = insertvalue <{ float, float, float }> undef, float %30, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %28, float %29) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %19, float %20, float %21, float %22) ret <{ float, float, float }> %31 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} radeonsi: Compiling shader 6 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %1, 1 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %2, 2 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %3, 3 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %4, 4 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %5, 5 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %6, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %7, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %8, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %9, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %10, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %11, 11 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %12, 12 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %13, 13 %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, i32 %14, 14 %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %34, i32 %15, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18 %42 = bitcast i32 %19 to float %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %42, 19 %44 = add i32 %16, %12 %45 = bitcast i32 %44 to float %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %43, float %45, 20 %47 = add i32 %16, %12 %48 = bitcast i32 %47 to float %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %46, float %48, 21 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %49 } radeonsi: Compiling shader 7 Vertex Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main() { main_body: ret void } SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 s_waitcnt vmcnt(0) ; BF8C0F70 exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 8 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = call i32 @llvm.SI.packf16(float %6, float %7) %21 = bitcast i32 %20 to float %22 = call i32 @llvm.SI.packf16(float %8, float %9) %23 = bitcast i32 %22 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "InitialPSInputAddr"="16777215" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } SHADER KEY prolog.color_two_side = 0 prolog.flatshade_colors = 0 prolog.poly_stipple = 0 prolog.force_persp_sample_interp = 0 prolog.force_linear_sample_interp = 0 prolog.force_persp_center_interp = 0 prolog.force_linear_center_interp = 0 prolog.bc_optimize_for_persp = 0 prolog.bc_optimize_for_linear = 0 epilog.spi_shader_col_format = 0x4 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 7 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 m0, s11 ; BEFC030B v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END radeonsi: Compiling shader 9 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0 %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = extractelement <8 x i32> %24, i32 7 %29 = extractelement <4 x i32> %27, i32 0 %30 = and i32 %29, %28 %31 = insertelement <4 x i32> %27, i32 %30, i32 0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = bitcast float %5 to i32 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} SHADER KEY prolog.color_two_side = 0 prolog.flatshade_colors = 0 prolog.poly_stipple = 0 prolog.force_persp_sample_interp = 0 prolog.force_linear_sample_interp = 0 prolog.force_persp_center_interp = 0 prolog.force_linear_center_interp = 0 prolog.bc_optimize_for_persp = 0 prolog.bc_optimize_for_linear = 0 epilog.spi_shader_col_format = 0x4 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 7 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b64 s[6:7], exec ; BE86047E s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_mov_b32 m0, s11 ; BEFC030B v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_b32 s0, s0, s19 ; 87001300 v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 s_and_b64 exec, exec, s[6:7] ; 87FE067E image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** Probe color at (25,10) Expected: 0.000000 1.000000 0.000000 Observed: 0.501961 0.501961 0.501961 Test failure on line 82 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..2], ARRAY(1), LOCAL DCL TEMP[3..8], ARRAY(2), LOCAL DCL TEMP[9..10], ARRAY(3), LOCAL DCL TEMP[11..12], ARRAY(4), LOCAL DCL TEMP[13..14], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 0.0000, 1.0000, 0.0000} IMM[1] INT32 {2, 0, 0, 0} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MOV TEMP[1], IMM[0].xxxx 5: MOV TEMP[2], IMM[0].xxxx 6: MOV TEMP[3].xy, TEMP[1].xyxx 7: MOV TEMP[4].xy, TEMP[2].xyxx 8: MOV TEMP[9], IMM[0].xxxx 9: MOV TEMP[10], IMM[0].xxxx 10: MOV TEMP[5].xy, TEMP[9].xyxx 11: MOV TEMP[6].xy, TEMP[10].xyxx 12: MOV TEMP[11], IMM[0].xxxx 13: MOV TEMP[12], IMM[0].xxxx 14: MOV TEMP[7].xy, TEMP[11].xyxx 15: MOV TEMP[8].xy, TEMP[12].xyxx 16: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 17: UARL ADDR[0].x, TEMP[13].xxxx 18: MOV TEMP[ADDR[0].x+3](2).xy, CONST[0].xyxx 19: UARL ADDR[0].x, TEMP[13].xxxx 20: MOV TEMP[ADDR[0].x+4](2).xy, CONST[1].xyxx 21: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 22: UARL ADDR[0].x, TEMP[13].xxxx 23: MOV TEMP[ADDR[0].x+4](2).xy, CONST[5].xyxx 24: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 25: UMUL TEMP[14].x, CONST[4].xxxx, IMM[1].xxxx 26: UARL ADDR[0].x, TEMP[14].xxxx 27: MUL TEMP[14].xy, TEMP[ADDR[0].x+3](2).xyyy, CONST[2].xxxx 28: UARL ADDR[0].x, TEMP[13].xxxx 29: MAD TEMP[13].xy, TEMP[ADDR[0].x+4](2).xyyy, CONST[2].yyyy, TEMP[14].xyyy 30: ADD TEMP[13].xy, TEMP[13].xyyy, -CONST[3].xyyy 31: DP2 TEMP[13].x, TEMP[13].xyyy, TEMP[13].xyyy 32: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].yyyy 33: UIF TEMP[13].xxxx :0 34: MOV TEMP[13], IMM[0].xzxz 35: ELSE :0 36: MOV TEMP[13], IMM[0].zxxz 37: ENDIF 38: MOV OUT[0], TEMP[0] 39: MOV OUT[1], TEMP[13] 40: END radeonsi: Compiling shader 10 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0 %16 = add i32 %6, %10 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %25 = fmul float %24, %18 %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %27 = fmul float %26, %18 %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %29 = fmul float %28, %18 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call float @llvm.SI.load.const(<16 x i8> %31, i32 108) %33 = fmul float %32, %18 %34 = call float @llvm.SI.load.const(<16 x i8> %31, i32 112) %35 = fmul float %34, %19 %36 = fadd float %35, %25 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 116) %40 = fmul float %39, %19 %41 = fadd float %40, %27 %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 120) %43 = fmul float %42, %19 %44 = fadd float %43, %29 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call float @llvm.SI.load.const(<16 x i8> %46, i32 124) %48 = fmul float %47, %19 %49 = fadd float %48, %33 %50 = call float @llvm.SI.load.const(<16 x i8> %46, i32 128) %51 = fmul float %50, %20 %52 = fadd float %51, %36 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call float @llvm.SI.load.const(<16 x i8> %54, i32 132) %56 = fmul float %55, %20 %57 = fadd float %56, %41 %58 = call float @llvm.SI.load.const(<16 x i8> %54, i32 136) %59 = fmul float %58, %20 %60 = fadd float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 140) %64 = fmul float %63, %20 %65 = fadd float %64, %49 %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 144) %67 = fmul float %66, %21 %68 = fadd float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 148) %72 = fmul float %71, %21 %73 = fadd float %72, %57 %74 = call float @llvm.SI.load.const(<16 x i8> %70, i32 152) %75 = fmul float %74, %21 %76 = fadd float %75, %60 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !invariant.load !0 %79 = call float @llvm.SI.load.const(<16 x i8> %78, i32 156) %80 = fmul float %79, %21 %81 = fadd float %80, %65 %82 = call float @llvm.SI.load.const(<16 x i8> %78, i32 64) %83 = bitcast float %82 to i32 %84 = shl i32 %83, 1 %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !invariant.load !0 %87 = call float @llvm.SI.load.const(<16 x i8> %86, i32 0) %88 = call float @llvm.SI.load.const(<16 x i8> %86, i32 4) %89 = insertelement <6 x float> zeroinitializer, float %87, i32 %84 %90 = extractelement <6 x float> %89, i32 0 %91 = extractelement <6 x float> %89, i32 1 %92 = extractelement <6 x float> %89, i32 2 %93 = extractelement <6 x float> %89, i32 3 %94 = extractelement <6 x float> %89, i32 4 %95 = extractelement <6 x float> %89, i32 5 %96 = insertelement <6 x float> zeroinitializer, float %88, i32 %84 %97 = extractelement <6 x float> %96, i32 0 %98 = extractelement <6 x float> %96, i32 1 %99 = extractelement <6 x float> %96, i32 2 %100 = extractelement <6 x float> %96, i32 3 %101 = extractelement <6 x float> %96, i32 4 %102 = extractelement <6 x float> %96, i32 5 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !invariant.load !0 %105 = call float @llvm.SI.load.const(<16 x i8> %104, i32 16) %106 = call float @llvm.SI.load.const(<16 x i8> %104, i32 20) %107 = or i32 %84, 1 %array_vector12 = insertelement <6 x float> undef, float %90, i32 0 %array_vector13 = insertelement <6 x float> %array_vector12, float %91, i32 1 %array_vector14 = insertelement <6 x float> %array_vector13, float %92, i32 2 %array_vector15 = insertelement <6 x float> %array_vector14, float %93, i32 3 %array_vector16 = insertelement <6 x float> %array_vector15, float %94, i32 4 %array_vector17 = insertelement <6 x float> %array_vector16, float %95, i32 5 %108 = insertelement <6 x float> %array_vector17, float %105, i32 %107 %109 = extractelement <6 x float> %108, i32 0 %110 = extractelement <6 x float> %108, i32 1 %111 = extractelement <6 x float> %108, i32 2 %112 = extractelement <6 x float> %108, i32 3 %113 = extractelement <6 x float> %108, i32 4 %114 = extractelement <6 x float> %108, i32 5 %115 = or i32 %84, 1 %array_vector18 = insertelement <6 x float> undef, float %97, i32 0 %array_vector19 = insertelement <6 x float> %array_vector18, float %98, i32 1 %array_vector20 = insertelement <6 x float> %array_vector19, float %99, i32 2 %array_vector21 = insertelement <6 x float> %array_vector20, float %100, i32 3 %array_vector22 = insertelement <6 x float> %array_vector21, float %101, i32 4 %array_vector23 = insertelement <6 x float> %array_vector22, float %102, i32 5 %116 = insertelement <6 x float> %array_vector23, float %106, i32 %115 %117 = extractelement <6 x float> %116, i32 0 %118 = extractelement <6 x float> %116, i32 1 %119 = extractelement <6 x float> %116, i32 2 %120 = extractelement <6 x float> %116, i32 3 %121 = extractelement <6 x float> %116, i32 4 %122 = extractelement <6 x float> %116, i32 5 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call float @llvm.SI.load.const(<16 x i8> %124, i32 64) %126 = bitcast float %125 to i32 %127 = shl i32 %126, 1 %128 = call float @llvm.SI.load.const(<16 x i8> %124, i32 80) %129 = call float @llvm.SI.load.const(<16 x i8> %124, i32 84) %130 = or i32 %127, 1 %array_vector24 = insertelement <6 x float> undef, float %109, i32 0 %array_vector25 = insertelement <6 x float> %array_vector24, float %110, i32 1 %array_vector26 = insertelement <6 x float> %array_vector25, float %111, i32 2 %array_vector27 = insertelement <6 x float> %array_vector26, float %112, i32 3 %array_vector28 = insertelement <6 x float> %array_vector27, float %113, i32 4 %array_vector29 = insertelement <6 x float> %array_vector28, float %114, i32 5 %131 = insertelement <6 x float> %array_vector29, float %128, i32 %130 %132 = or i32 %127, 1 %array_vector30 = insertelement <6 x float> undef, float %117, i32 0 %array_vector31 = insertelement <6 x float> %array_vector30, float %118, i32 1 %array_vector32 = insertelement <6 x float> %array_vector31, float %119, i32 2 %array_vector33 = insertelement <6 x float> %array_vector32, float %120, i32 3 %array_vector34 = insertelement <6 x float> %array_vector33, float %121, i32 4 %array_vector35 = insertelement <6 x float> %array_vector34, float %122, i32 5 %133 = insertelement <6 x float> %array_vector35, float %129, i32 %132 %134 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %135 = load <16 x i8>, <16 x i8> addrspace(2)* %134, align 16, !invariant.load !0 %136 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %137 = bitcast float %136 to i32 %138 = shl i32 %137, 1 %139 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %140 = bitcast float %139 to i32 %141 = shl i32 %140, 1 %142 = extractelement <6 x float> %131, i32 %141 %143 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %144 = load <16 x i8>, <16 x i8> addrspace(2)* %143, align 16, !invariant.load !0 %145 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %146 = fmul float %142, %145 %147 = extractelement <6 x float> %133, i32 %141 %148 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %149 = fmul float %147, %148 %150 = or i32 %138, 1 %151 = extractelement <6 x float> %131, i32 %150 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call float @llvm.SI.load.const(<16 x i8> %153, i32 36) %155 = fmul float %151, %154 %156 = fadd float %155, %146 %157 = or i32 %138, 1 %158 = extractelement <6 x float> %133, i32 %157 %159 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %160 = load <16 x i8>, <16 x i8> addrspace(2)* %159, align 16, !invariant.load !0 %161 = call float @llvm.SI.load.const(<16 x i8> %160, i32 36) %162 = fmul float %158, %161 %163 = fadd float %162, %149 %164 = call float @llvm.SI.load.const(<16 x i8> %160, i32 48) %165 = fsub float %156, %164 %166 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %167 = load <16 x i8>, <16 x i8> addrspace(2)* %166, align 16, !invariant.load !0 %168 = call float @llvm.SI.load.const(<16 x i8> %167, i32 52) %169 = fsub float %163, %168 %170 = fmul float %165, %165 %171 = fmul float %169, %169 %172 = fadd float %170, %171 %173 = fcmp olt float %172, 0x3E312E0BE0000000 %. = select i1 %173, float 0.000000e+00, float 1.000000e+00 %.60 = select i1 %173, float 1.000000e+00, float 0.000000e+00 %174 = and i32 %9, 1 %175 = icmp eq i32 %174, 0 br i1 %175, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %176 = call float @llvm.AMDGPU.clamp.(float %., float 0.000000e+00, float 1.000000e+00) %177 = call float @llvm.AMDGPU.clamp.(float %.60, float 0.000000e+00, float 1.000000e+00) %178 = call float @llvm.AMDGPU.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %179 = call float @llvm.AMDGPU.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %179, %if-true-block ], [ 1.000000e+00, %main_body ] %OUT1.z.0 = phi float [ %178, %if-true-block ], [ 0.000000e+00, %main_body ] %OUT1.y.0 = phi float [ %177, %if-true-block ], [ %.60, %main_body ] %OUT1.x.0 = phi float [ %176, %if-true-block ], [ %., %main_body ] call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %73, float %76, float %81) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR6<def> = V_MOVRELS_B32_e32 %VGPR10<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15_VGPR16_VGPR17<imp-use>, %VGPR10<imp-def>, %VGPR11<imp-def>, %VGPR10_VGPR11<imp-def> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1082 si_shader_select_with_key - Failed to build shader variant (type=0) 1 Probe color at (65,10) Expected: 0.000000 1.000000 0.000000 Observed: 0.501961 0.501961 0.501961 Test failure on line 90 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..9] DCL TEMP[0], LOCAL DCL TEMP[1..2], ARRAY(1), LOCAL DCL TEMP[3..8], ARRAY(2), LOCAL DCL TEMP[9..10], ARRAY(3), LOCAL DCL TEMP[11..12], ARRAY(4), LOCAL DCL TEMP[13..14], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 0.0000, 1.0000, 0.0000} IMM[1] INT32 {2, 0, 0, 0} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[9], IN[0].wwww, TEMP[0] 4: MOV TEMP[1], IMM[0].xxxx 5: MOV TEMP[2], IMM[0].xxxx 6: MOV TEMP[3].xy, TEMP[1].xyxx 7: MOV TEMP[4].xy, TEMP[2].xyxx 8: MOV TEMP[9], IMM[0].xxxx 9: MOV TEMP[10], IMM[0].xxxx 10: MOV TEMP[5].xy, TEMP[9].xyxx 11: MOV TEMP[6].xy, TEMP[10].xyxx 12: MOV TEMP[11], IMM[0].xxxx 13: MOV TEMP[12], IMM[0].xxxx 14: MOV TEMP[7].xy, TEMP[11].xyxx 15: MOV TEMP[8].xy, TEMP[12].xyxx 16: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 17: UARL ADDR[0].x, TEMP[13].xxxx 18: MOV TEMP[ADDR[0].x+3](2).xy, CONST[0].xyxx 19: UARL ADDR[0].x, TEMP[13].xxxx 20: MOV TEMP[ADDR[0].x+4](2).xy, CONST[1].xyxx 21: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 22: UARL ADDR[0].x, TEMP[13].xxxx 23: MOV TEMP[ADDR[0].x+4](2).xy, CONST[5].xyxx 24: UMUL TEMP[13].x, CONST[4].xxxx, IMM[1].xxxx 25: UMUL TEMP[14].x, CONST[4].xxxx, IMM[1].xxxx 26: UARL ADDR[0].x, TEMP[14].xxxx 27: MUL TEMP[14].xy, TEMP[ADDR[0].x+3](2).xyyy, CONST[2].xxxx 28: UARL ADDR[0].x, TEMP[13].xxxx 29: MAD TEMP[13].xy, TEMP[ADDR[0].x+4](2).xyyy, CONST[2].yyyy, TEMP[14].xyyy 30: ADD TEMP[13].xy, TEMP[13].xyyy, -CONST[3].xyyy 31: DP2 TEMP[13].x, TEMP[13].xyyy, TEMP[13].xyyy 32: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].yyyy 33: UIF TEMP[13].xxxx :0 34: MOV TEMP[13], IMM[0].xzxz 35: ELSE :0 36: MOV TEMP[13], IMM[0].zxxz 37: ENDIF 38: MOV OUT[0], TEMP[0] 39: MOV OUT[1], TEMP[13] 40: END radeonsi: Compiling shader 11 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0 %16 = add i32 %6, %10 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %25 = fmul float %24, %18 %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %27 = fmul float %26, %18 %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %29 = fmul float %28, %18 %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0 %32 = call float @llvm.SI.load.const(<16 x i8> %31, i32 108) %33 = fmul float %32, %18 %34 = call float @llvm.SI.load.const(<16 x i8> %31, i32 112) %35 = fmul float %34, %19 %36 = fadd float %35, %25 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 116) %40 = fmul float %39, %19 %41 = fadd float %40, %27 %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 120) %43 = fmul float %42, %19 %44 = fadd float %43, %29 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0 %47 = call float @llvm.SI.load.const(<16 x i8> %46, i32 124) %48 = fmul float %47, %19 %49 = fadd float %48, %33 %50 = call float @llvm.SI.load.const(<16 x i8> %46, i32 128) %51 = fmul float %50, %20 %52 = fadd float %51, %36 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0 %55 = call float @llvm.SI.load.const(<16 x i8> %54, i32 132) %56 = fmul float %55, %20 %57 = fadd float %56, %41 %58 = call float @llvm.SI.load.const(<16 x i8> %54, i32 136) %59 = fmul float %58, %20 %60 = fadd float %59, %44 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 140) %64 = fmul float %63, %20 %65 = fadd float %64, %49 %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 144) %67 = fmul float %66, %21 %68 = fadd float %67, %52 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0 %71 = call float @llvm.SI.load.const(<16 x i8> %70, i32 148) %72 = fmul float %71, %21 %73 = fadd float %72, %57 %74 = call float @llvm.SI.load.const(<16 x i8> %70, i32 152) %75 = fmul float %74, %21 %76 = fadd float %75, %60 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !invariant.load !0 %79 = call float @llvm.SI.load.const(<16 x i8> %78, i32 156) %80 = fmul float %79, %21 %81 = fadd float %80, %65 %82 = call float @llvm.SI.load.const(<16 x i8> %78, i32 64) %83 = bitcast float %82 to i32 %84 = shl i32 %83, 1 %85 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !invariant.load !0 %87 = call float @llvm.SI.load.const(<16 x i8> %86, i32 0) %88 = call float @llvm.SI.load.const(<16 x i8> %86, i32 4) %89 = insertelement <6 x float> zeroinitializer, float %87, i32 %84 %90 = extractelement <6 x float> %89, i32 0 %91 = extractelement <6 x float> %89, i32 1 %92 = extractelement <6 x float> %89, i32 2 %93 = extractelement <6 x float> %89, i32 3 %94 = extractelement <6 x float> %89, i32 4 %95 = extractelement <6 x float> %89, i32 5 %96 = insertelement <6 x float> zeroinitializer, float %88, i32 %84 %97 = extractelement <6 x float> %96, i32 0 %98 = extractelement <6 x float> %96, i32 1 %99 = extractelement <6 x float> %96, i32 2 %100 = extractelement <6 x float> %96, i32 3 %101 = extractelement <6 x float> %96, i32 4 %102 = extractelement <6 x float> %96, i32 5 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !invariant.load !0 %105 = call float @llvm.SI.load.const(<16 x i8> %104, i32 16) %106 = call float @llvm.SI.load.const(<16 x i8> %104, i32 20) %107 = or i32 %84, 1 %array_vector12 = insertelement <6 x float> undef, float %90, i32 0 %array_vector13 = insertelement <6 x float> %array_vector12, float %91, i32 1 %array_vector14 = insertelement <6 x float> %array_vector13, float %92, i32 2 %array_vector15 = insertelement <6 x float> %array_vector14, float %93, i32 3 %array_vector16 = insertelement <6 x float> %array_vector15, float %94, i32 4 %array_vector17 = insertelement <6 x float> %array_vector16, float %95, i32 5 %108 = insertelement <6 x float> %array_vector17, float %105, i32 %107 %109 = extractelement <6 x float> %108, i32 0 %110 = extractelement <6 x float> %108, i32 1 %111 = extractelement <6 x float> %108, i32 2 %112 = extractelement <6 x float> %108, i32 3 %113 = extractelement <6 x float> %108, i32 4 %114 = extractelement <6 x float> %108, i32 5 %115 = or i32 %84, 1 %array_vector18 = insertelement <6 x float> undef, float %97, i32 0 %array_vector19 = insertelement <6 x float> %array_vector18, float %98, i32 1 %array_vector20 = insertelement <6 x float> %array_vector19, float %99, i32 2 %array_vector21 = insertelement <6 x float> %array_vector20, float %100, i32 3 %array_vector22 = insertelement <6 x float> %array_vector21, float %101, i32 4 %array_vector23 = insertelement <6 x float> %array_vector22, float %102, i32 5 %116 = insertelement <6 x float> %array_vector23, float %106, i32 %115 %117 = extractelement <6 x float> %116, i32 0 %118 = extractelement <6 x float> %116, i32 1 %119 = extractelement <6 x float> %116, i32 2 %120 = extractelement <6 x float> %116, i32 3 %121 = extractelement <6 x float> %116, i32 4 %122 = extractelement <6 x float> %116, i32 5 %123 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %124 = load <16 x i8>, <16 x i8> addrspace(2)* %123, align 16, !invariant.load !0 %125 = call float @llvm.SI.load.const(<16 x i8> %124, i32 64) %126 = bitcast float %125 to i32 %127 = shl i32 %126, 1 %128 = call float @llvm.SI.load.const(<16 x i8> %124, i32 80) %129 = call float @llvm.SI.load.const(<16 x i8> %124, i32 84) %130 = or i32 %127, 1 %array_vector24 = insertelement <6 x float> undef, float %109, i32 0 %array_vector25 = insertelement <6 x float> %array_vector24, float %110, i32 1 %array_vector26 = insertelement <6 x float> %array_vector25, float %111, i32 2 %array_vector27 = insertelement <6 x float> %array_vector26, float %112, i32 3 %array_vector28 = insertelement <6 x float> %array_vector27, float %113, i32 4 %array_vector29 = insertelement <6 x float> %array_vector28, float %114, i32 5 %131 = insertelement <6 x float> %array_vector29, float %128, i32 %130 %132 = or i32 %127, 1 %array_vector30 = insertelement <6 x float> undef, float %117, i32 0 %array_vector31 = insertelement <6 x float> %array_vector30, float %118, i32 1 %array_vector32 = insertelement <6 x float> %array_vector31, float %119, i32 2 %array_vector33 = insertelement <6 x float> %array_vector32, float %120, i32 3 %array_vector34 = insertelement <6 x float> %array_vector33, float %121, i32 4 %array_vector35 = insertelement <6 x float> %array_vector34, float %122, i32 5 %133 = insertelement <6 x float> %array_vector35, float %129, i32 %132 %134 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %135 = load <16 x i8>, <16 x i8> addrspace(2)* %134, align 16, !invariant.load !0 %136 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %137 = bitcast float %136 to i32 %138 = shl i32 %137, 1 %139 = call float @llvm.SI.load.const(<16 x i8> %135, i32 64) %140 = bitcast float %139 to i32 %141 = shl i32 %140, 1 %142 = extractelement <6 x float> %131, i32 %141 %143 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %144 = load <16 x i8>, <16 x i8> addrspace(2)* %143, align 16, !invariant.load !0 %145 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %146 = fmul float %142, %145 %147 = extractelement <6 x float> %133, i32 %141 %148 = call float @llvm.SI.load.const(<16 x i8> %144, i32 32) %149 = fmul float %147, %148 %150 = or i32 %138, 1 %151 = extractelement <6 x float> %131, i32 %150 %152 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, align 16, !invariant.load !0 %154 = call float @llvm.SI.load.const(<16 x i8> %153, i32 36) %155 = fmul float %151, %154 %156 = fadd float %155, %146 %157 = or i32 %138, 1 %158 = extractelement <6 x float> %133, i32 %157 %159 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %160 = load <16 x i8>, <16 x i8> addrspace(2)* %159, align 16, !invariant.load !0 %161 = call float @llvm.SI.load.const(<16 x i8> %160, i32 36) %162 = fmul float %158, %161 %163 = fadd float %162, %149 %164 = call float @llvm.SI.load.const(<16 x i8> %160, i32 48) %165 = fsub float %156, %164 %166 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %167 = load <16 x i8>, <16 x i8> addrspace(2)* %166, align 16, !invariant.load !0 %168 = call float @llvm.SI.load.const(<16 x i8> %167, i32 52) %169 = fsub float %163, %168 %170 = fmul float %165, %165 %171 = fmul float %169, %169 %172 = fadd float %170, %171 %173 = fcmp olt float %172, 0x3E312E0BE0000000 %. = select i1 %173, float 0.000000e+00, float 1.000000e+00 %.60 = select i1 %173, float 1.000000e+00, float 0.000000e+00 %174 = and i32 %9, 1 %175 = icmp eq i32 %174, 0 br i1 %175, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %176 = call float @llvm.AMDGPU.clamp.(float %., float 0.000000e+00, float 1.000000e+00) %177 = call float @llvm.AMDGPU.clamp.(float %.60, float 0.000000e+00, float 1.000000e+00) %178 = call float @llvm.AMDGPU.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) %179 = call float @llvm.AMDGPU.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %OUT1.w.0 = phi float [ %179, %if-true-block ], [ 1.000000e+00, %main_body ] %OUT1.z.0 = phi float [ %178, %if-true-block ], [ 0.000000e+00, %main_body ] %OUT1.y.0 = phi float [ %177, %if-true-block ], [ %.60, %main_body ] %OUT1.x.0 = phi float [ %176, %if-true-block ], [ %., %main_body ] call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %OUT1.x.0, float %OUT1.y.0, float %OUT1.z.0, float %OUT1.w.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %73, float %76, float %81) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR6<def> = V_MOVRELS_B32_e32 %VGPR10<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15_VGPR16_VGPR17<imp-use>, %VGPR10<imp-def>, %VGPR11<imp-def>, %VGPR10_VGPR11<imp-def> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1082 si_shader_select_with_key - Failed to build shader variant (type=0) 1 Probe color at (105,10) Expected: 0.000000 1.000000 0.000000 Observed: 0.501961 0.501961 0.501961 Test failure on line 98 PIGLIT: {"result": "fail" }
VERT PROPERTY NEXT_SHADER TESS_CTRL DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] IMM[0] FLT32 { 0.0000, 0.7500, 1.0000, 0.2500} 0: MOV OUT[0], IN[0] 1: MOV OUT[1].yzw, IMM[0].xxyz 2: MOV OUT[1].x, IMM[0].wwww 3: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { main_body: %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %14) %18 = lshr i32 %9, 13 %19 = and i32 %18, 255 %20 = mul i32 %19, %11 %bc = bitcast <4 x float> %17 to <4 x i32> %21 = extractelement <4 x i32> %bc, i32 0 %22 = sext i32 %20 to i64 %23 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %22 store i32 %21, i32 addrspace(3)* %23, align 4 %24 = add i32 %20, 1 %bc1 = bitcast <4 x float> %17 to <4 x i32> %25 = extractelement <4 x i32> %bc1, i32 1 %26 = sext i32 %24 to i64 %27 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %26 store i32 %25, i32 addrspace(3)* %27, align 4 %28 = add i32 %20, 2 %bc2 = bitcast <4 x float> %17 to <4 x i32> %29 = extractelement <4 x i32> %bc2, i32 2 %30 = sext i32 %28 to i64 %31 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %30 store i32 %29, i32 addrspace(3)* %31, align 4 %32 = add i32 %20, 3 %bc3 = bitcast <4 x float> %17 to <4 x i32> %33 = extractelement <4 x i32> %bc3, i32 3 %34 = sext i32 %32 to i64 %35 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %34 store i32 %33, i32 addrspace(3)* %35, align 4 %36 = add i32 %20, 16 %37 = sext i32 %36 to i64 %38 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %37 store i32 1048576000, i32 addrspace(3)* %38, align 4 %39 = add i32 %20, 17 %40 = sext i32 %39 to i64 %41 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %40 store i32 0, i32 addrspace(3)* %41, align 4 %42 = add i32 %20, 18 %43 = sext i32 %42 to i64 %44 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %43 store i32 1061158912, i32 addrspace(3)* %44, align 4 %45 = add i32 %20, 19 %46 = sext i32 %45 to i64 %47 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* null, i64 0, i64 %46 store i32 1065353216, i32 addrspace(3)* %47, align 4 ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 attributes #0 = { nounwind readnone } !0 = !{} FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } radeonsi: Compiling shader 5 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %1, 1 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %2, 2 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %3, 3 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %4, 4 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %5, 5 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %6, 6 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %7, 7 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %8, 8 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %9, 9 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %10, 10 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %11, 11 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %12, 12 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %13, 13 %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, i32 %14, 14 %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %34, i32 %15, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18 %42 = bitcast i32 %19 to float %43 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %42, 19 %44 = add i32 %16, %12 %45 = bitcast i32 %44 to float %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %43, float %45, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %46 } Use of %vreg128 does not have a corresponding definition on every path: 1312r DS_WRITE2_B32 %vreg172, %vreg122, %vreg128, 12, 14, 0, %M0<imp-use>, %EXEC<imp-use>; mem:ST4[%121(addrspace=3)] ST4[%112(addrspace=3)] VGPR_32:%vreg172,%vreg122,%vreg128 LLVM ERROR: Use not jointly dominated by defs.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev