Mark Wielaard wrote on 04.08.2015 00:17: > On Mon, Aug 03, 2015 at 10:34:19PM +0200, Kai Wasserbäch wrote: >>> Could you point me to the source code that does the libelf calls to create >>> the ELF file? Maybe reading the source helps to figure out what might go >>> wrong. The stacktrace from the test doesn't immediately seem to give a >>> direct clue. >> >> I think all the ELF stuff is encapsulated in >> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeon/radeon_elf_util.c> >> (and the header for that). The functions defined therein are called from >> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/si_shader.c> >> and >> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/si_compute.c> >> if I haven't missed something. Michel can probably spot any mistakes in this, >> therefore I CCed him on this message. >> >> Let me know, if you need something else. > > Thanks that was really helpful. It looks like the real problem is the > parsing of the relocation section. Would it be possible for you to dump > the ELF image that is being parsed in radeon/radeon_elf_util.c > (radeon_elf_read) Maybe just by adding the following just before the > elf_memory () call: > int dfd = creat ("/tmp/dump.elf", 00755); > write (dfd, elf_buffer, elf_size); > close (dfd);
I guarded this with a environment variable and replaced creat(), which is deprecated AFAIR, with open(). Then I ran the test. Instead of just running through it didn't exit by itself. After a few minutes I killed it (size of dump.elf didn't change). In addition to the ELF dump, I added a shader dump, which radeonsi can produce. Cheers, Kai
dump.elf
Description: Binary data
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_es = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
0: MOV OUT[0], IN[0]
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>]
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>]
addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32
inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64
0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = add i32 %5, %7
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
%15 = extractelement <4 x float> %14, i32 0
%16 = extractelement <4 x float> %14, i32 1
%17 = extractelement <4 x float> %14, i32 2
%18 = extractelement <4 x float> %14, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15,
float %16, float %17, float %18)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float,
float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 36 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..3]
DCL TEMP[0..4], ARRAY(1), LOCAL
DCL TEMP[5..6], LOCAL
DCL ADDR[0]
IMM[0] FLT64 {0.00000000, 0.25000000}
IMM[1] FLT64 {0.50000000, 0.75000000}
IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IMM[0].xyxy
1: MOV TEMP[1].xy, IMM[0].zwzw
2: MOV TEMP[2].xy, IMM[1].xyxy
3: MOV TEMP[3].xy, IMM[1].zwzw
4: UARL ADDR[0].x, CONST[3].xxxx
5: DADD TEMP[5].xy, TEMP[ADDR[0].x](1).xyxy, CONST[0].xyxy
6: DNEG TEMP[6].xy, CONST[2].xyxy
7: DADD TEMP[5].xy, TEMP[5].xyxy, TEMP[6].xyxy
8: DABS TEMP[5].xy, TEMP[5].xyxy
9: DSGE TEMP[5].x, CONST[1].xyxy, TEMP[5].xyxy
10: UIF TEMP[5].xxxx :0
11: MOV TEMP[5], IMM[2].xyxy
12: ELSE :0
13: MOV TEMP[5], IMM[2].yxxy
14: ENDIF
15: MOV OUT[0], TEMP[5]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>]
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>]
addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>,
<3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float,
float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64
0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = bitcast float %30 to i32
%32 = extractelement <5 x double> <double 0.000000e+00, double bitcast (<2 x
i32> <i32 0, i32 1070596096> to double), double bitcast (<2 x i32> <i32 0, i32
1071644672> to double), double bitcast (<2 x i32> <i32 0, i32 1072168960> to
double), double 0.000000e+00>, i32 %31
%33 = bitcast float %24 to i32
%34 = insertelement <2 x i32> undef, i32 %33, i32 0
%35 = bitcast float %25 to i32
%36 = insertelement <2 x i32> %34, i32 %35, i32 1
%37 = bitcast <2 x i32> %36 to double
%38 = fadd double %32, %37
%39 = bitcast float %28 to i32
%40 = insertelement <2 x i32> undef, i32 %39, i32 0
%41 = bitcast float %29 to i32
%42 = insertelement <2 x i32> %40, i32 %41, i32 1
%43 = bitcast <2 x i32> %42 to double
%44 = fsub double %38, %43
%45 = call double @fabs(double %44)
%46 = bitcast float %26 to i32
%47 = insertelement <2 x i32> undef, i32 %46, i32 0
%48 = bitcast float %27 to i32
%49 = insertelement <2 x i32> %47, i32 %48, i32 1
%50 = bitcast <2 x i32> %49 to double
%51 = fcmp oge double %50, %45
%. = select i1 %51, float 1.000000e+00, float 0.000000e+00
%.28 = select i1 %51, float 0.000000e+00, float 1.000000e+00
%52 = call i32 @llvm.SI.packf16(float %.28, float %.)
%53 = bitcast i32 %52 to float
%54 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
%55 = bitcast i32 %54 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53,
float %55, float %53, float %55)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: readnone
declare double @fabs(double) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float,
float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0x3fe80000 ; 7E0202FF
3FE80000
s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF
00000000
v_mov_b32_e32 v2, 0 ; 7E040280
v_mov_b32_e32 v3, 0 ; 7E060280
v_mov_b32_e32 v0, 0 ; 7E000280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF
00000000
s_mov_b32 s7, 0x80f000 ; BE8703FF
0080F000
s_mov_b32 s6, -1 ; BE8603C1
v_mov_b32_e32 v4, 0 ; 7E080280
buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:24 ; E0741018
0A010004
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000
v_mov_b32_e32 v1, 0x3fe00000 ; 7E0202FF
3FE00000
v_mov_b32_e32 v4, 0 ; 7E080280
buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:16 ; E0741010
0A010004
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
v_mov_b32_e32 v1, 0x3fd00000 ; 7E0202FF
3FD00000
v_mov_b32_e32 v4, 0 ; 7E080280
buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:8 ; E0741008
0A010004
v_mov_b32_e32 v4, 0 ; 7E080280
buffer_store_dwordx2 v[2:3], v4, s[4:7], s10 offen ; E0741000
0A010204
s_lshl_b32 s8, s8, 3 ; 8F088308
s_add_i32 s8, s8, 0 ; 81088008
v_mov_b32_e32 v4, 0 ; 7E080280
buffer_store_dwordx2 v[2:3], v4, s[4:7], s10 offen offset:32 ; E0741020
0A010204
s_waitcnt vmcnt(2) expcnt(0) ; BF8C0702
v_mov_b32_e32 v0, s8 ; 7E000208
buffer_load_dwordx2 v[0:1], v0, s[4:7], s10 offen ; E0341000
0A010000
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v2, s4 ; 7E040204
v_mov_b32_e32 v3, s5 ; 7E060205
v_add_f64 v[0:1], v[0:1], v[2:3] ; D2C80000
00020500
v_mov_b32_e32 v2, s6 ; 7E040206
v_mov_b32_e32 v3, s7 ; 7E060207
v_add_f64 v[0:1], v[0:1], -v[2:3] ; D2C80000
40020500
v_mov_b32_e32 v2, s8 ; 7E040208
v_mov_b32_e32 v3, s0 ; 7E060200
v_cmp_ge_f64_e64 s[0:1], v[2:3], |v[0:1]| ; D04C0200
00020102
v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] ; D2000000
0001E480
v_cndmask_b32_e64 v1, 1.0, 0, s[0:1] ; D2000001
000100F2
v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101
v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001
0001E480
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F
01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 8
Code Size: 284 bytes
LDS: 0 blocks
Scratch: 4096 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_es = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
0: MOV OUT[0], IN[0]
1: MOV OUT[1], IN[1]
2: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>]
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>]
addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32
inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64
0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = add i32 %5, %7
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
%15 = extractelement <4 x float> %14, i32 0
%16 = extractelement <4 x float> %14, i32 1
%17 = extractelement <4 x float> %14, i32 2
%18 = extractelement <4 x float> %14, i32 3
%19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64
0, i64 1
%20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
%21 = add i32 %5, %7
%22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
%23 = extractelement <4 x float> %22, i32 0
%24 = extractelement <4 x float> %22, i32 1
%25 = extractelement <4 x float> %22, i32 2
%26 = extractelement <4 x float> %22, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23,
float %24, float %25, float %26)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15,
float %16, float %17, float %18)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float,
float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 56 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
0: MOV OUT[0], IN[0]
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>]
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>]
addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>,
<3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float,
float, i32, float, float) #0 {
main_body:
%22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
%23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
%24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
%25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
%26 = call i32 @llvm.SI.packf16(float %22, float %23)
%27 = bitcast i32 %26 to float
%28 = call i32 @llvm.SI.packf16(float %24, float %25)
%29 = bitcast i32 %28 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27,
float %29, float %27, float %29)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float,
float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
signature.asc
Description: OpenPGP digital signature

