Does the TCG optimizer assume all global variables are unique?  If so, is there 
a method to indicate that two global variables alias?

Background:
I am improving the way we handle register pairs for Hexagon.  The original 
implementation would read from the individual 32-bit registers and concat to 
form the 64-bit value
tcg_gen_concat_i32_i64(val64, hex_gpr[NUM], hex_gpr[(NUM) + 1]);
Similarly, a write would break apart the 64-bit value into two parts and store 
them individually
/* Low word */
tcg_gen_extrl_i64_i32(val32, val64);
tcg_gen_mov_tl(hex_gpr[rnum], val32);
/* High word */
tcg_gen_extrh_i64_i32(val32, val64);
tcg_gen_mov_tl(hex_grp[rnum + 1], val32);

I'm hoping to get more efficient code by creating an array of global i64 
variables that overlap the i32 single registers.
for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
    hex_gpr[i] = tcg_global_mem_new(cpu_env,
        offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]);
}
for (i = 0; i < TOTAL_PER_THREAD_REGS/2; i++) {
    hex_gpr_pairs[i] = tcg_global_mem_new_i64(cpu_env,
       offsetof(CPUHexagonState, gpr[2 * i]), hexagon_pairnames[i]);
}
So, a read would be
                tcg_gen_mov_i64(val64, hex_gpr_pairs[NUM/2]);
and a write would be
                tcg_gen_mov_i64(hex_gpr_pairs[NUM/2], val64);

Unfortunately, it seems TCG is optimizing with the assumption that globals 
don't overlap.  Here's an example
                {
                    r4 = ##0x11111111
                    r5 = ##0x22222222
                }
                {
                    p1 = cmp.eq(r3:2,r5:4)
                    r4 = r17
                    jump 1f
                }
Here is the TCG
---- 00400094
movi_i32 pc,$0x400094
movi_i32 slot_cancelled,$0x0
movi_i32 pred_written,$0x0
movi_i32 loc2,$0x11111111
mov_i32 new_r4,loc2
movi_i32 loc2,$0x22222222
mov_i32 new_r5,loc2
mov_i32 r4,new_r4                                                        /* 
Assignment to r4 value is 0x11111111 */
mov_i32 r5,new_r5                                                        /* 
Assignment to r5 value is 0x22222222 */
movi_i32 tmp0,$0x1
add_i32 pkt_cnt,pkt_cnt,tmp0
movi_i32 tmp0,$0x2
add_i32 insn_cnt,insn_cnt,tmp0
---- 004000a4
movi_i32 pc,$0x4000a4
movi_i32 slot_cancelled,$0x0
movi_i32 branch_taken,$0x0
movi_i32 next_PC,$0x4000ac
movi_i32 pred_written,$0x0
mov_i64 loc3,r3:2
mov_i64 loc4,r5:4                                                            /* 
Read from register pair r5:4 */
movi_i64 tmp5,$0xff
movi_i64 tmp6,$0x0
movcond_i64 tmp7,loc3,loc4,tmp5,tmp6,eq
extrl_i64_i32 loc2,tmp7
ext8u_i32 loc2,loc2
movi_i32 tmp0,$0x0
ext8u_i32 tmp1,loc2
and_i32 tmp8,tmp1,new_pred_p1
movi_i32 tmp10,$0x2
and_i32 tmp9,pred_written,tmp10
movcond_i32 new_pred_p1,tmp9,tmp0,tmp8,tmp1,ne
movi_i32 tmp10,$0x2
or_i32 pred_written,pred_written,tmp10
mov_i32 loc2,r17
movi_i32 tmp1,$0x8
add_i32 tmp0,pc,tmp1
movi_i32 tmp1,$0x0
movcond_i32 next_PC,branch_taken,tmp1,next_PC,tmp0,ne
movi_i32 branch_taken,$0x1
mov_i32 new_r4,loc2
mov_i32 r4,new_r4                                                        /* 
Assignment to r4 from r17 */
movi_i32 tmp0,$0x0
mov_i32 p1,new_pred_p1
mov_i32 pc,next_PC
movi_i32 tmp0,$0x1
add_i32 pkt_cnt,pkt_cnt,tmp0
movi_i32 tmp0,$0x2
add_i32 insn_cnt,insn_cnt,tmp0
exit_tb $0x0
set_label $L0
exit_tb $0x55bb47db6043

Here is the generated x86 code.
OUT: [size=186]
0x55bb47db6100:  mov    -0x8(%rbp),%ebx
0x55bb47db6103:  test   %ebx,%ebx
0x55bb47db6105:  jl     0x55bb47db61ae
0x55bb47db610b:  mov    $0x22222222,%ebx
0x55bb47db6110:  mov    %ebx,0x138(%rbp)
0x55bb47db6116:  mov    %ebx,0x14(%rbp)
0x55bb47db6119:  mov    0xd0(%rbp),%ebx
0x55bb47db611f:  inc    %ebx
0x55bb47db6121:  mov    0xd4(%rbp),%r12d
0x55bb47db6128:  add    $0x2,%r12d
0x55bb47db612c:  movl   $0x0,0x120(%rbp)
0x55bb47db6136:  mov    0x8(%rbp),%r13
0x55bb47db613a:  mov    0x10(%rbp),%r14
0x55bb47db613e:  mov    $0xff,%r15d
0x55bb47db6144:  xor    %r10d,%r10d
0x55bb47db6147:  cmp    %r14,%r13
0x55bb47db614a:  cmove  %r15,%r10
0x55bb47db614e:  mov    %r10d,%r13d
0x55bb47db6151:  mov    %r13d,0x32c(%rbp)
0x55bb47db6158:  movl   $0x2,0x338(%rbp)
0x55bb47db6162:  mov    $0x4000ac,%r14d
0x55bb47db6168:  mov    %r14d,0x114(%rbp)
0x55bb47db616f:  movl   $0x1,0x110(%rbp)
0x55bb47db6179:  mov    0x44(%rbp),%r15d
0x55bb47db617d:  mov    %r15d,0x134(%rbp)
0x55bb47db6184:  mov    %r15d,0x10(%rbp)
0x55bb47db6188:  mov    %r13d,0x104(%rbp)
0x55bb47db618f:  mov    %r14d,0xa4(%rbp)
0x55bb47db6196:  inc    %ebx
0x55bb47db6198:  mov    %ebx,0xd0(%rbp)
0x55bb47db619e:  lea    0x2(%r12),%ebx
0x55bb47db61a3:  mov    %ebx,0xd4(%rbp)
0x55bb47db61a9:  jmpq   0x55bb47db6016
0x55bb47db61ae:  lea    -0x172(%rip),%rax        # 0x55bb47db6043
0x55bb47db61b5:  jmpq   0x55bb47db6018
The first assignment to r4 with 0x11111111 has been removed.  I guess this is 
because the second assignment makes it look dead, but it is NOT dead because 
the read from r5:4 accesses the value.

Thanks,
Taylor

Reply via email to