Hi,

Recently I've been working on the shader optimization for r600g, and now
I have the initial working implementation of simple alu scheduler and
register allocator. It has no piglit regressions, though it's still a
work in progress and there are known issues with some applications.

I've pushed the working branch to github:
https://github.com/VadimGirlin/mesa/tree/r600_shader_opt

Currently it supports evergreen only, but I'm planning to make it work
with other chips too. It uses "struct r600_bytecode" as the source,
converting it to SSA-based internal representation. I'm going to
implement some optimization passes at that phase, but currently it's
then doing final steps - register allocation, alu scheduling, and
building new bytecode.

I'm attaching as an example the dump for one of the shaders in the
glxgears. You could get such dump for all shaders before and after
processing by setting R600_OPT_DUMP environment variable to 2. Setting
this variable to 1 will only print some information for the processed
shaders - size, number of gprs, and number of alu instruction groups.

Vadim





bytecode 130 dw -- 11 gprs ---------------------
     E
0000 00000000 CF ADDR:0
0001 84C00000 CF INST:13 CALL_FS COND:0 POP_COUNT:0 
0002 80000004 ALU ADDR:8 KCACHE_MODE0:2 KCACHE_BANK0:0 KCACHE_BANK1:0
0003 A0F00000 ALU INST:0x8 ALU KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 
COUNT:61
                  1     MUL     R5.x, R1.x, KC0[0].x
                        MUL     R5.y, R1.x, KC0[0].y
                        MUL     R5.z, R1.x, KC0[0].z
                        MUL     R5.w, R1.x, KC0[0].w
                  2     MULADD  R5.x, R1.y, KC0[1].x, PV.x
                        MULADD  R5.y, R1.y, KC0[1].y, PV.y
                        MULADD  R5.z, R1.y, KC0[1].z, PV.z
                        MULADD  R5.w, R1.y, KC0[1].w, PV.w
                  3     MULADD  R5.x, R1.z, KC0[2].x, PV.x
                        MULADD  R5.y, R1.z, KC0[2].y, PV.y
                        MULADD  R5.z, R1.z, KC0[2].z, PV.z
                        MULADD  R5.w, R1.z, KC0[2].w, PV.w
                  4     MULADD  R3.x, R1.w, KC0[3].x, PV.x
                        MULADD  R3.y, R1.w, KC0[3].y, PV.y
                        MULADD  R3.z, R1.w, KC0[3].z, PV.z
                        MULADD  R3.w, R1.w, KC0[3].w, PV.w
                  5     DOT4    R6.x, R2.x, R2.x
                        DOT4    __.y, R2.y, R2.y
                        DOT4    __.z, R2.z, R2.z
                        DOT4    __.w, 0.0f, 0.0f
                  6     RECIPSQRT_CLAMPED       R10.x, |PV.x|
                  7     MOV     R6.x, PS
                        MOV     __.y, PS
                        MOV     __.z, PS
                        MOV     __.w, PS
                  8     MUL     R5.x, R2.x, PV.x
                        MUL     R5.y, R2.y, PV.x
                        MUL     R5.z, R2.z, PV.x
                        MUL     R5.w, R2.w, PV.x
                  9     MOV     R7.x, KC0[4].x
                        MOV     R7.y, KC0[4].y
                        MOV     R7.z, KC0[4].z
                        MOV     R7.w, KC0[4].w
                 10     MOV     R4.x, PV.x
                        MOV     R4.y, PV.y
                        MOV     R4.z, PV.z
                        MOV     R4.w, PV.w
                 11     DOT4    R8.x, R5.x, KC0[5].x
                        DOT4    R8.y, R5.y, KC0[5].y
                        DOT4    R8.z, R5.z, KC0[5].z
                        DOT4    R8.w, 0.0f, 0.0f
                 12     MAX     R6.x, 0.0f, PV.x
                        MAX     R6.y, 0.0f, PV.x
                        MAX     R6.z, 0.0f, PV.x
                        MAX     R6.w, 1.0f, PV.x
                        SETGT   R6.z, PV.x, 0.0f
                 13     ADD     R7.x, KC0[6].x, R7.x
                        ADD     R7.y, KC0[6].y, R7.y
                        ADD     R7.z, KC0[6].z, R7.z
                        ADD     R7.w, KC0[6].w, R7.w
                 14     MULADD  R7.x, R6.y, KC0[7].x, PV.x
                        MULADD  R7.y, R6.y, KC0[7].y, PV.y
                        MULADD  R7.z, R6.y, KC0[7].z, PV.z
                        MULADD  R7.w, R6.y, KC0[7].w, PV.w
                 15     MULADD  R4.x, R6.z, KC0[8].x, PV.x
                        MULADD  R4.y, R6.z, KC0[8].y, PV.y
                        MULADD  R4.z, R6.z, KC0[8].z, PV.z
                 16     MOV_sat R4.x, PV.x
                        MOV_sat R4.y, PV.y
                        MOV_sat R4.z, PV.z
                        MOV_sat R4.w, R4.w
                 EXPORT_DONE     POS   60,      R3.xyzw
                 EXPORT_DONE     PARAM 0,       R4.xyzw
--------------------------------------
optimizing shader 7
INFO: shader optimized : size -27.7% ( 130 -> 94 dw),    gpr -63.6% ( 11 -> 4 
),   alu_groups -31.2% ( 16 -> 11 )
optimized bytecode 94 dw -- 4 gprs ---------------------
     E
0000 00000000 CF ADDR:0
0001 84C00000 CF INST:13 CALL_FS COND:0 POP_COUNT:0 
0002 80000004 ALU ADDR:8 KCACHE_MODE0:2 KCACHE_BANK0:0 KCACHE_BANK1:0
0003 A0A80000 ALU INST:0x8 ALU KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 
COUNT:43
                  1     MUL     R0.x, R1.x, KC0[0].x
                        MUL     R0.y, R1.x, KC0[0].y
                  2     MULADD  R0.x, R1.y, KC0[1].x, PV.x
                        MULADD  R0.y, R1.y, KC0[1].y, PV.y
                        MUL     R0.z, R1.x, KC0[0].z
                        MUL     R0.w, R1.x, KC0[0].w
                  3     MULADD  R1.x, R1.z, KC0[2].x, PV.x
                        MULADD  R0.z, R1.y, KC0[1].z, PV.z
                        MULADD  R0.w, R1.y, KC0[1].w, PV.w
                  4     DOT4    R0.x, R2.x, R2.x
                        DOT4    __.y, R2.y, R2.y
                        DOT4    __.z, R2.z, R2.z
                        DOT4    __.w, 0.0f, 0.0f
                        MULADD  R0.y, R1.z, KC0[2].y, R0.y
                  5     MULADD  R1.x, R1.w, KC0[3].x, R1.x
                        MULADD  R0.z, R1.z, KC0[2].z, R0.z
                        MULADD  R0.w, R1.z, KC0[2].w, R0.w
                        RECIPSQRT_CLAMPED       R0.x, |PV.x|
                  6     MUL     R2.x, R2.x, PS
                        MUL     R2.y, R2.y, PS
                        MULADD  R1.z, R1.w, KC0[3].z, PV.z
                        MULADD  R1.w, R1.w, KC0[3].w, PV.w
                        MULADD  R1.y, R1.w, KC0[3].y, R0.y
                  7     MOV     R0.x, KC0[4].y
                        MOV     R0.y, KC0[4].z
                        MOV     R0.z, KC0[4].x
                        MOV_sat R0.w, KC0[4].w
                        MUL     R2.z, R2.z, R0.x
                  8     DOT4    R3.x, R2.x, KC0[5].x
                        DOT4    __.y, R2.y, KC0[5].y
                        DOT4    __.z, PS, KC0[5].z
                        DOT4    __.w, 0.0f, 0.0f
                  9     ADD     R0.x, KC0[6].z, R0.y
                        ADD     R0.y, KC0[6].y, R0.x
                        ADD     R0.z, KC0[6].x, R0.z
                        MAX     R2.x, 0.0f, PV.x
                 10     MULADD  R0.x, PS, KC0[7].z, PV.x
                        SETGT   R0.y, R3.x, 0.0f
                        MULADD  R0.z, PS, KC0[7].y, PV.y
                        MULADD  R2.x, PS, KC0[7].x, PV.z
                 11     MULADD_sat      R0.x, PV.y, KC0[8].x, PS
                        MULADD_sat      R0.y, PV.y, KC0[8].y, PV.z
                        MULADD_sat      R0.z, PV.y, KC0[8].z, PV.x
                 EXPORT_DONE     POS   60,      R1.xyzw
                 EXPORT_DONE     PARAM 0,       R0.xyzw
--------------------------------------

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to