Does the TCG optimizer assume all global variables are unique? If so, is there a method to indicate that two global variables alias?
Background: I am improving the way we handle register pairs for Hexagon. The original implementation would read from the individual 32-bit registers and concat to form the 64-bit value tcg_gen_concat_i32_i64(val64, hex_gpr[NUM], hex_gpr[(NUM) + 1]); Similarly, a write would break apart the 64-bit value into two parts and store them individually /* Low word */ tcg_gen_extrl_i64_i32(val32, val64); tcg_gen_mov_tl(hex_gpr[rnum], val32); /* High word */ tcg_gen_extrh_i64_i32(val32, val64); tcg_gen_mov_tl(hex_grp[rnum + 1], val32); I'm hoping to get more efficient code by creating an array of global i64 variables that overlap the i32 single registers. for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { hex_gpr[i] = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]); } for (i = 0; i < TOTAL_PER_THREAD_REGS/2; i++) { hex_gpr_pairs[i] = tcg_global_mem_new_i64(cpu_env, offsetof(CPUHexagonState, gpr[2 * i]), hexagon_pairnames[i]); } So, a read would be tcg_gen_mov_i64(val64, hex_gpr_pairs[NUM/2]); and a write would be tcg_gen_mov_i64(hex_gpr_pairs[NUM/2], val64); Unfortunately, it seems TCG is optimizing with the assumption that globals don't overlap. Here's an example { r4 = ##0x11111111 r5 = ##0x22222222 } { p1 = cmp.eq(r3:2,r5:4) r4 = r17 jump 1f } Here is the TCG ---- 00400094 movi_i32 pc,$0x400094 movi_i32 slot_cancelled,$0x0 movi_i32 pred_written,$0x0 movi_i32 loc2,$0x11111111 mov_i32 new_r4,loc2 movi_i32 loc2,$0x22222222 mov_i32 new_r5,loc2 mov_i32 r4,new_r4 /* Assignment to r4 value is 0x11111111 */ mov_i32 r5,new_r5 /* Assignment to r5 value is 0x22222222 */ movi_i32 tmp0,$0x1 add_i32 pkt_cnt,pkt_cnt,tmp0 movi_i32 tmp0,$0x2 add_i32 insn_cnt,insn_cnt,tmp0 ---- 004000a4 movi_i32 pc,$0x4000a4 movi_i32 slot_cancelled,$0x0 movi_i32 branch_taken,$0x0 movi_i32 next_PC,$0x4000ac movi_i32 pred_written,$0x0 mov_i64 loc3,r3:2 mov_i64 loc4,r5:4 /* Read from register pair r5:4 */ movi_i64 tmp5,$0xff movi_i64 tmp6,$0x0 movcond_i64 tmp7,loc3,loc4,tmp5,tmp6,eq extrl_i64_i32 loc2,tmp7 ext8u_i32 loc2,loc2 movi_i32 tmp0,$0x0 ext8u_i32 tmp1,loc2 and_i32 tmp8,tmp1,new_pred_p1 movi_i32 tmp10,$0x2 and_i32 tmp9,pred_written,tmp10 movcond_i32 new_pred_p1,tmp9,tmp0,tmp8,tmp1,ne movi_i32 tmp10,$0x2 or_i32 pred_written,pred_written,tmp10 mov_i32 loc2,r17 movi_i32 tmp1,$0x8 add_i32 tmp0,pc,tmp1 movi_i32 tmp1,$0x0 movcond_i32 next_PC,branch_taken,tmp1,next_PC,tmp0,ne movi_i32 branch_taken,$0x1 mov_i32 new_r4,loc2 mov_i32 r4,new_r4 /* Assignment to r4 from r17 */ movi_i32 tmp0,$0x0 mov_i32 p1,new_pred_p1 mov_i32 pc,next_PC movi_i32 tmp0,$0x1 add_i32 pkt_cnt,pkt_cnt,tmp0 movi_i32 tmp0,$0x2 add_i32 insn_cnt,insn_cnt,tmp0 exit_tb $0x0 set_label $L0 exit_tb $0x55bb47db6043 Here is the generated x86 code. OUT: [size=186] 0x55bb47db6100: mov -0x8(%rbp),%ebx 0x55bb47db6103: test %ebx,%ebx 0x55bb47db6105: jl 0x55bb47db61ae 0x55bb47db610b: mov $0x22222222,%ebx 0x55bb47db6110: mov %ebx,0x138(%rbp) 0x55bb47db6116: mov %ebx,0x14(%rbp) 0x55bb47db6119: mov 0xd0(%rbp),%ebx 0x55bb47db611f: inc %ebx 0x55bb47db6121: mov 0xd4(%rbp),%r12d 0x55bb47db6128: add $0x2,%r12d 0x55bb47db612c: movl $0x0,0x120(%rbp) 0x55bb47db6136: mov 0x8(%rbp),%r13 0x55bb47db613a: mov 0x10(%rbp),%r14 0x55bb47db613e: mov $0xff,%r15d 0x55bb47db6144: xor %r10d,%r10d 0x55bb47db6147: cmp %r14,%r13 0x55bb47db614a: cmove %r15,%r10 0x55bb47db614e: mov %r10d,%r13d 0x55bb47db6151: mov %r13d,0x32c(%rbp) 0x55bb47db6158: movl $0x2,0x338(%rbp) 0x55bb47db6162: mov $0x4000ac,%r14d 0x55bb47db6168: mov %r14d,0x114(%rbp) 0x55bb47db616f: movl $0x1,0x110(%rbp) 0x55bb47db6179: mov 0x44(%rbp),%r15d 0x55bb47db617d: mov %r15d,0x134(%rbp) 0x55bb47db6184: mov %r15d,0x10(%rbp) 0x55bb47db6188: mov %r13d,0x104(%rbp) 0x55bb47db618f: mov %r14d,0xa4(%rbp) 0x55bb47db6196: inc %ebx 0x55bb47db6198: mov %ebx,0xd0(%rbp) 0x55bb47db619e: lea 0x2(%r12),%ebx 0x55bb47db61a3: mov %ebx,0xd4(%rbp) 0x55bb47db61a9: jmpq 0x55bb47db6016 0x55bb47db61ae: lea -0x172(%rip),%rax # 0x55bb47db6043 0x55bb47db61b5: jmpq 0x55bb47db6018 The first assignment to r4 with 0x11111111 has been removed. I guess this is because the second assignment makes it look dead, but it is NOT dead because the read from r5:4 accesses the value. Thanks, Taylor