On 21.07.2016 18:17, Matt Arsenault wrote: >> On Jul 21, 2016, at 01:03, Michel Dänzer <mic...@daenzer.net >> <mailto:mic...@daenzer.net>> wrote: >> >> On 21.07.2016 00:04, Michel Dänzer wrote: >>> On 15.07.2016 05:15, Marek =?UNKNOWN?B?T2zFocOhaw==?= wrote: >>>> Module: Mesa >>>> Branch: master >>>> Commit: f84e9d749fbb6da73a60fb70e6725db773c9b8f8 >>>> URL: >>>> >>>> http://cgit.freedesktop.org/mesa/mesa/commit/?id=f84e9d749fbb6da73a60fb70e6725db773c9b8f8 >>>> >>>> Author: Marek Olšák <marek.ol...@amd.com <mailto:marek.ol...@amd.com>> >>>> Date: Thu Jul 14 22:07:46 2016 +0200 >>>> >>>> Revert "radeon/llvm: Use alloca instructions for larger arrays" >>>> >>>> This reverts commit 513fccdfb68e6a71180e21827f071617c93fd09b. >>>> >>>> Bioshock Infinite hangs with that. >>> >>> Unfortunately, this change caused the piglit test >>> shaders@glsl-fs-vec4-indexing-temp-dst-in-loop (and possibly others) to >>> hang my Kaveri. Any ideas for how we can get out of this conundrum? >> >> The hang was introduced by LLVM SVN r275934 ("AMDGPU: Expand register >> indexing pseudos in custom inserter"). The good/bad (without/with >> r275934) shader dumps and the GALLIUM_DDEBUG=800 dump corresponding to >> the hang are attached. >> >> >> BTW, even with Marek's change above reverted, I still see some piglit >> regressions compared to last week, but I'm not sure if those are all >> related to the same LLVM change. >> >> >> -- >> Earthling Michel Dänzer | >> http://www.amd.com <http://www.amd.com/> >> Libre software enthusiast | Mesa and X developer >> <glsl-fs-vec4-indexing-temp-dst-in-loop.bad><glsl-fs-vec4-indexing-temp-dst-in-loop.good><shader_runner_3339_00000000.txt> > > This fixes the verifier error in it: https://reviews.llvm.org/D22616
This seems to fix the hang, thanks! > This fixes another issue which may be > related: https://reviews.llvm.org/D22556 Even with that applied as well, there are still piglit regressions compared to early last week, see the attached dumps (look for "LLVM triggered Diagnostic Handler:"). -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32, float, float, float, float) #0 { main_body: %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } attributes #0 = { "InitialPSInputAddr"="36983" } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { main_body: %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call float @llvm.SI.load.const(<16 x i8> %16, i32 0) %18 = call float @llvm.SI.load.const(<16 x i8> %16, i32 4) %19 = call float @llvm.SI.load.const(<16 x i8> %16, i32 8) %20 = call float @llvm.SI.load.const(<16 x i8> %16, i32 12) %21 = call float @llvm.SI.load.const(<16 x i8> %16, i32 16) %22 = call float @llvm.SI.load.const(<16 x i8> %16, i32 20) %23 = call float @llvm.SI.load.const(<16 x i8> %16, i32 24) %24 = call float @llvm.SI.load.const(<16 x i8> %16, i32 28) %25 = call float @llvm.SI.load.const(<16 x i8> %16, i32 32) %26 = call float @llvm.SI.load.const(<16 x i8> %16, i32 36) %27 = call float @llvm.SI.load.const(<16 x i8> %16, i32 40) %28 = call float @llvm.SI.load.const(<16 x i8> %16, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %16, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %16, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %16, i32 56) %32 = call float @llvm.SI.load.const(<16 x i8> %16, i32 60) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !invariant.load !0 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %13) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !invariant.load !0 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %14) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = fmul float %36, %17 %48 = fmul float %36, %18 %49 = fmul float %36, %19 %50 = fmul float %36, %20 %51 = fmul float %37, %21 %52 = fadd float %51, %47 %53 = fmul float %37, %22 %54 = fadd float %53, %48 %55 = fmul float %37, %23 %56 = fadd float %55, %49 %57 = fmul float %37, %24 %58 = fadd float %57, %50 %59 = fmul float %38, %25 %60 = fadd float %59, %52 %61 = fmul float %38, %26 %62 = fadd float %61, %54 %63 = fmul float %38, %27 %64 = fadd float %63, %56 %65 = fmul float %38, %28 %66 = fadd float %65, %58 %67 = fmul float %39, %29 %68 = fadd float %67, %60 %69 = fmul float %39, %30 %70 = fadd float %69, %62 %71 = fmul float %39, %31 %72 = fadd float %71, %64 %73 = fmul float %39, %32 %74 = fadd float %73, %66 %75 = and i32 %8, 1 %76 = icmp eq i32 %75, 0 br i1 %76, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %77 = call float @llvm.AMDGPU.clamp.(float %43, float 0.000000e+00, float 1.000000e+00) %78 = call float @llvm.AMDGPU.clamp.(float %44, float 0.000000e+00, float 1.000000e+00) %79 = call float @llvm.AMDGPU.clamp.(float %45, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDGPU.clamp.(float %46, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %.06 = phi float [ %77, %if-true-block ], [ %43, %main_body ] %.05 = phi float [ %78, %if-true-block ], [ %44, %main_body ] %.04 = phi float [ %79, %if-true-block ], [ %45, %main_body ] %.0 = phi float [ %80, %if-true-block ], [ %46, %main_body ] %81 = bitcast i32 %11 to float %82 = insertvalue <{ float, float, float }> undef, float %81, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.06, float %.05, float %.04, float %.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret <{ float, float, float }> %82 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..5] DCL TEMP[0..3], ARRAY(1), LOCAL DCL TEMP[4..5], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[3], IMM[0].xyxx 1: UARL ADDR[0].x, CONST[0].xxxx 2: UARL ADDR[0].x, CONST[0].xxxx 3: MOV TEMP[4], TEMP[ADDR[0].x](1) 4: UARL ADDR[0].x, CONST[1].xxxx 5: MOV TEMP[ADDR[0].x](1), TEMP[4] 6: MOV TEMP[4], TEMP[2] 7: MUL TEMP[5], CONST[2], IN[0].xxxx 8: MAD TEMP[5], CONST[3], IN[0].yyyy, TEMP[5] 9: MAD TEMP[5], CONST[4], IN[0].zzzz, TEMP[5] 10: MAD TEMP[5], CONST[5], IN[0].wwww, TEMP[5] 11: MOV OUT[0], TEMP[5] 12: MOV OUT[1], TEMP[4] 13: END radeonsi: Compiling shader 3 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { main_body: %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0 %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0) %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %15, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %15, i32 92) %34 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !invariant.load !0 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %13) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = bitcast float %16 to i32 %42 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %43 = extractelement <4 x float> <float undef, float undef, float undef, float 1.000000e+00>, i32 %41 %44 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %45 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %46 = bitcast float %17 to i32 %47 = insertelement <4 x float> undef, float %42, i32 %46 %48 = extractelement <4 x float> %47, i32 2 %49 = insertelement <4 x float> undef, float %43, i32 %46 %50 = extractelement <4 x float> %49, i32 2 %51 = insertelement <4 x float> undef, float %44, i32 %46 %52 = extractelement <4 x float> %51, i32 2 %53 = insertelement <4 x float> undef, float %45, i32 %46 %54 = extractelement <4 x float> %53, i32 2 %55 = fmul float %18, %37 %56 = fmul float %19, %37 %57 = fmul float %20, %37 %58 = fmul float %21, %37 %59 = fmul float %22, %38 %60 = fadd float %59, %55 %61 = fmul float %23, %38 %62 = fadd float %61, %56 %63 = fmul float %24, %38 %64 = fadd float %63, %57 %65 = fmul float %25, %38 %66 = fadd float %65, %58 %67 = fmul float %26, %39 %68 = fadd float %67, %60 %69 = fmul float %27, %39 %70 = fadd float %69, %62 %71 = fmul float %28, %39 %72 = fadd float %71, %64 %73 = fmul float %29, %39 %74 = fadd float %73, %66 %75 = fmul float %30, %40 %76 = fadd float %75, %68 %77 = fmul float %31, %40 %78 = fadd float %77, %70 %79 = fmul float %32, %40 %80 = fadd float %79, %72 %81 = fmul float %33, %40 %82 = fadd float %81, %74 %83 = and i32 %8, 1 %84 = icmp eq i32 %83, 0 br i1 %84, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %85 = call float @llvm.AMDGPU.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDGPU.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %87 = call float @llvm.AMDGPU.clamp.(float %52, float 0.000000e+00, float 1.000000e+00) %88 = call float @llvm.AMDGPU.clamp.(float %54, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %.026 = phi float [ %88, %if-true-block ], [ %54, %main_body ] %.025 = phi float [ %87, %if-true-block ], [ %52, %main_body ] %.024 = phi float [ %86, %if-true-block ], [ %50, %main_body ] %.0 = phi float [ %85, %if-true-block ], [ %48, %main_body ] %89 = bitcast i32 %11 to float %90 = insertvalue <{ float, float, float }> undef, float %89, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.0, float %.024, float %.025, float %.026) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82) ret <{ float, float, float }> %90 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: src0 should be subreg of implicit vector use V_MOVRELD_B32_e32 %VGPR0<undef>, %VGPR0<kill>, %M0<imp-use>, %EXEC<imp-use>, %VGPR7_VGPR8_VGPR9_VGPR10<imp-def,tied5>, %VGPR7_VGPR8_VGPR9_VGPR10<imp-use,undef,tied4> LLVM triggered Diagnostic Handler: Illegal instruction detected: src0 should be subreg of implicit vector use V_MOVRELD_B32_e32 %VGPR0<undef>, %VGPR1<kill>, %M0<imp-use,kill>, %EXEC<imp-use>, %VGPR10_VGPR11_VGPR12_VGPR13<imp-def,tied5>, %VGPR10_VGPR11_VGPR12_VGPR13<imp-use,undef,tied4> LLVM failed to compile shader radeonsi: can't compile a main shader part FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 4 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END radeonsi: Compiling shader 5 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { main_body: %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = bitcast i32 %11 to float %30 = insertvalue <{ float, float, float }> undef, float %29, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) ret <{ float, float, float }> %30 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} radeonsi: Compiling shader 6 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18 %42 = add i32 %15, %12 %43 = bitcast i32 %42 to float %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19 %45 = add i32 %15, %12 %46 = bitcast i32 %45 to float %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47 } radeonsi: Compiling shader 7 Vertex Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main() { main_body: ret void } Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 s_waitcnt vmcnt(0) ; BF8C0F70 exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 8 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = call i32 @llvm.SI.packf16(float %6, float %7) %21 = bitcast i32 %20 to float %22 = call i32 @llvm.SI.packf16(float %8, float %9) %23 = bitcast i32 %22 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "InitialPSInputAddr"="16777215" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } Pixel Shader: Shader main disassembly: s_mov_b32 m0, s11 ; BEFC030B v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..5] DCL TEMP[0..3], ARRAY(1), LOCAL DCL TEMP[4..5], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[3], IMM[0].xyxx 1: UARL ADDR[0].x, CONST[0].xxxx 2: UARL ADDR[0].x, CONST[0].xxxx 3: MOV TEMP[4], TEMP[ADDR[0].x](1) 4: UARL ADDR[0].x, CONST[1].xxxx 5: MOV TEMP[ADDR[0].x](1), TEMP[4] 6: MOV TEMP[4], TEMP[2] 7: MUL TEMP[5], CONST[2], IN[0].xxxx 8: MAD TEMP[5], CONST[3], IN[0].yyyy, TEMP[5] 9: MAD TEMP[5], CONST[4], IN[0].zzzz, TEMP[5] 10: MAD TEMP[5], CONST[5], IN[0].wwww, TEMP[5] 11: MOV OUT[0], TEMP[5] 12: MOV OUT[1], TEMP[4] 13: END radeonsi: Compiling shader 9 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !invariant.load !0 %15 = call float @llvm.SI.load.const(<16 x i8> %14, i32 0) %16 = call float @llvm.SI.load.const(<16 x i8> %14, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %14, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %14, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %14, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %14, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %14, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %14, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %14, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %14, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %14, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %14, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %14, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %14, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %14, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %14, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %14, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %14, i32 92) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !invariant.load !0 %35 = add i32 %6, %9 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = bitcast float %15 to i32 %42 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %43 = extractelement <4 x float> <float undef, float undef, float undef, float 1.000000e+00>, i32 %41 %44 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %45 = extractelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, i32 %41 %46 = bitcast float %16 to i32 %47 = insertelement <4 x float> undef, float %42, i32 %46 %48 = extractelement <4 x float> %47, i32 2 %49 = insertelement <4 x float> undef, float %43, i32 %46 %50 = extractelement <4 x float> %49, i32 2 %51 = insertelement <4 x float> undef, float %44, i32 %46 %52 = extractelement <4 x float> %51, i32 2 %53 = insertelement <4 x float> undef, float %45, i32 %46 %54 = extractelement <4 x float> %53, i32 2 %55 = fmul float %17, %37 %56 = fmul float %18, %37 %57 = fmul float %19, %37 %58 = fmul float %20, %37 %59 = fmul float %21, %38 %60 = fadd float %59, %55 %61 = fmul float %22, %38 %62 = fadd float %61, %56 %63 = fmul float %23, %38 %64 = fadd float %63, %57 %65 = fmul float %24, %38 %66 = fadd float %65, %58 %67 = fmul float %25, %39 %68 = fadd float %67, %60 %69 = fmul float %26, %39 %70 = fadd float %69, %62 %71 = fmul float %27, %39 %72 = fadd float %71, %64 %73 = fmul float %28, %39 %74 = fadd float %73, %66 %75 = fmul float %29, %40 %76 = fadd float %75, %68 %77 = fmul float %30, %40 %78 = fadd float %77, %70 %79 = fmul float %31, %40 %80 = fadd float %79, %72 %81 = fmul float %32, %40 %82 = fadd float %81, %74 %83 = and i32 %8, 1 %84 = icmp eq i32 %83, 0 br i1 %84, label %endif-block, label %if-true-block if-true-block: ; preds = %main_body %85 = call float @llvm.AMDGPU.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDGPU.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %87 = call float @llvm.AMDGPU.clamp.(float %52, float 0.000000e+00, float 1.000000e+00) %88 = call float @llvm.AMDGPU.clamp.(float %54, float 0.000000e+00, float 1.000000e+00) br label %endif-block endif-block: ; preds = %main_body, %if-true-block %.026 = phi float [ %88, %if-true-block ], [ %54, %main_body ] %.025 = phi float [ %87, %if-true-block ], [ %52, %main_body ] %.024 = phi float [ %86, %if-true-block ], [ %50, %main_body ] %.0 = phi float [ %85, %if-true-block ], [ %48, %main_body ] call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %.0, float %.024, float %.025, float %.026) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} LLVM triggered Diagnostic Handler: Illegal instruction detected: src0 should be subreg of implicit vector use V_MOVRELD_B32_e32 %VGPR0<undef>, %VGPR4, %M0<imp-use>, %EXEC<imp-use>, %VGPR4_VGPR5_VGPR6_VGPR7<imp-def,tied5>, %VGPR4_VGPR5_VGPR6_VGPR7<imp-use,undef,tied4> LLVM triggered Diagnostic Handler: Illegal instruction detected: src0 should be subreg of implicit vector use V_MOVRELD_B32_e32 %VGPR0<undef>, %VGPR8, %M0<imp-use,kill>, %EXEC<imp-use>, %VGPR7_VGPR8_VGPR9_VGPR10<imp-def,tied5>, %VGPR7_VGPR8_VGPR9_VGPR10<imp-use,undef,tied4> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1041 si_shader_select_with_key - Failed to build shader variant (type=0) 1 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END radeonsi: Compiling shader 10 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0 %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = extractelement <8 x i32> %24, i32 7 %29 = extractelement <4 x i32> %27, i32 0 %30 = and i32 %29, %28 %31 = insertelement <4 x i32> %27, i32 %30, i32 0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = bitcast float %5 to i32 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_mov_b32 m0, s11 ; BEFC030B v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_b32 s0, s0, s19 ; 87001300 v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 72 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ********************
VERT PROPERTY NEXT_SHADER GEOM DCL IN[0] DCL OUT[0], GENERIC[0] 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 1 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) { main_body: %14 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i64 2, !amdgpu.uniform !0 %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0 %16 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %17 = load <16 x i8>, <16 x i8> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %13) %bc = bitcast <4 x float> %18 to <4 x i32> %19 = extractelement <4 x i32> %bc, i32 0 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %19, i32 1, i32 undef, i32 %8, i32 64, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) %bc1 = bitcast <4 x float> %18 to <4 x i32> %20 = extractelement <4 x i32> %bc1, i32 1 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %20, i32 1, i32 undef, i32 %8, i32 68, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) %bc2 = bitcast <4 x float> %18 to <4 x i32> %21 = extractelement <4 x i32> %bc2, i32 2 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %21, i32 1, i32 undef, i32 %8, i32 72, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) %bc3 = bitcast <4 x float> %18 to <4 x i32> %22 = extractelement <4 x i32> %bc3, i32 3 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %15, i32 %22, i32 1, i32 undef, i32 %8, i32 76, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 2 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %6, <2 x i32> %8) %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } radeonsi: Compiling shader 4 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18 %42 = add i32 %15, %12 %43 = bitcast i32 %42 to float %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44 } LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR4<def> = V_MOVRELS_B32_e32 %VGPR5<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11<imp-use>, %VGPR4_VGPR5<imp-def>, %VGPR6_VGPR7<imp-def>, %VGPR8_VGPR9<imp-def>, %VGPR10_VGPR11<imp-def> LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR5<def> = V_MOVRELS_B32_e32 %VGPR13<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR12_VGPR13_VGPR14_VGPR15_VGPR16_VGPR17_VGPR18_VGPR19<imp-use>, %VGPR12_VGPR13<imp-def> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1041 si_shader_select_with_key - Failed to build shader variant (type=2) 1 radeonsi: can't create a monolithic shader Vertex Shader as ES: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00 s_load_dwordx4 s[0:3], s[0:1], 0x8 ; C0800108 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004 s_waitcnt vmcnt(0) ; BF8C0F70 tbuffer_store_format_x v0, 0x40, 0, 0, -1, 0, 4, 4, v0, s[0:3], -1, 0, s14 ; EA244040 0E400000 tbuffer_store_format_x v1, 0x44, 0, 0, -1, 0, 4, 4, v0, s[0:3], -1, 0, s14 ; EA244044 0E400100 tbuffer_store_format_x v2, 0x48, 0, 0, -1, 0, 4, 4, v0, s[0:3], -1, 0, s14 ; EA244048 0E400200 tbuffer_store_format_x v3, 0x4c, 0, 0, -1, 0, 4, 4, v0, s[0:3], -1, 0, s14 ; EA24404C 0E400300 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR4<def> = V_MOVRELS_B32_e32 %VGPR5<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11<imp-use>, %VGPR4_VGPR5<imp-def>, %VGPR6_VGPR7<imp-def>, %VGPR8_VGPR9<imp-def>, %VGPR10_VGPR11<imp-def> LLVM triggered Diagnostic Handler: Illegal instruction detected: missing implicit register operands %VGPR5<def> = V_MOVRELS_B32_e32 %VGPR13<undef>, %M0<imp-use>, %EXEC<imp-use>, %VGPR12_VGPR13_VGPR14_VGPR15_VGPR16_VGPR17_VGPR18_VGPR19<imp-use>, %VGPR12_VGPR13<imp-def> LLVM failed to compile shader EE ../../../../../src/gallium/drivers/radeonsi/si_state_shaders.c:1041 si_shader_select_with_key - Failed to build shader variant (type=2) 1 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END radeonsi: Compiling shader 6 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6) %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6) %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6) %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6) %27 = bitcast float %5 to i32 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END radeonsi: Compiling shader 7 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) { main_body: %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = bitcast i32 %11 to float %30 = insertvalue <{ float, float, float }> undef, float %29, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) ret <{ float, float, float }> %30 } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } !0 = !{} radeonsi: Compiling shader 8 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18 %42 = add i32 %15, %12 %43 = bitcast i32 %42 to float %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19 %45 = add i32 %15, %12 %46 = bitcast i32 %45 to float %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47 } radeonsi: Compiling shader 9 Vertex Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs void @main() { main_body: ret void } Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C v_mov_b32_e32 v5, v4 ; 7E0A0304 Shader main disassembly: s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00 s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604 buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05 s_waitcnt vmcnt(0) ; BF8C0F70 exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** radeonsi: Compiling shader 10 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = call i32 @llvm.SI.packf16(float %6, float %7) %21 = bitcast i32 %20 to float %22 = call i32 @llvm.SI.packf16(float %8, float %9) %23 = bitcast i32 %22 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "InitialPSInputAddr"="16777215" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } Pixel Shader: Shader main disassembly: s_mov_b32 m0, s11 ; BEFC030B v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END radeonsi: Compiling shader 11 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0 %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0 %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0 %28 = extractelement <8 x i32> %24, i32 7 %29 = extractelement <4 x i32> %27, i32 0 %30 = and i32 %29, %28 %31 = insertelement <4 x i32> %27, i32 %30, i32 0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = bitcast float %5 to i32 %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10 %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11 %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12 %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13 %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14 %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } !0 = !{} Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_mov_b32 m0, s11 ; BEFC030B v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008 v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009 v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_b32 s0, s0, s19 ; 87001300 v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109 image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000 s_waitcnt vmcnt(0) ; BF8C0F70 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0020 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Spilled SGPRs: 0 Spilled VGPRs: 0 Code Size: 72 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ********************
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev