Well, at least what combine does here is a mystery to me (s390x
with -O3 in case it matters).

Rtl before combine:

-- snip --
(insn 6 3 7 2 (parallel [
            (set (reg:SI 64)
                (and:SI (mem:SI (reg/v/f:DI 63 [ a ]) [1 *a_2(D)+0 S4 A32])
                    (const_int -65521 [0xffffffffffff000f])))
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:21 1481 {*andsi3_zarch}
     (expr_list:REG_DEAD (reg/v/f:DI 63 [ a ])
        (expr_list:REG_UNUSED (reg:CC 33 %cc)
            (nil))))
(insn 7 6 12 2 (set (reg:DI 65)
        (zero_extend:DI (reg:SI 64))) andc-immediate.c:21 1207 
{*zero_extendsidi2}
     (expr_list:REG_DEAD (reg:SI 64)
        (nil)))
(insn 12 7 13 2 (set (reg/i:DI 2 %r2)
        (reg:DI 65)) andc-immediate.c:22 1073 {*movdi_64}
     (expr_list:REG_DEAD (reg:DI 65)
        (nil)))
-- snip --

How does combine get this idea (it's the only match in the
function)?

  Trying 7 -> 12:
  Successfully matched this instruction:
  (set (reg/i:DI 2 %r2)
      (and:DI (subreg:DI (reg:SI 64) 0)
          (const_int 4294901775 [0xffff000f])))
  allowing combination of insns 7 and 12

=>

-- snip --
(insn 6 3 7 2 (parallel [
            (set (reg:SI 64)
                (and:SI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 A32])
                    (const_int -65521 [0xffffffffffff000f])))
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:21 1481 {*andsi3_zarch}
     (expr_list:REG_DEAD (reg:DI 2 %r2 [ a ])
        (expr_list:REG_UNUSED (reg:CC 33 %cc)
            (nil))))
(insn 12 7 13 2 (parallel [
            (set (reg/i:DI 2 %r2)
                (and:DI (subreg:DI (reg:SI 64) 0)
                 ^^^
                    (const_int 4294901775 [0xffff000f])))
                                           ^^^^^^^^^^
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:22 1474 {*anddi3}
     (expr_list:REG_UNUSED (reg:CC 33 %cc)
        (expr_list:REG_DEAD (reg:SI 64)
            (nil))))
-- snip --

It combines "zero extend" + "copy to hardreg" into an "and with
0xffff000f".  That is the correct result for combining insn 6 + 7
+ 12, however.  (Eventually the two "and"s with constant values
are not merged into a single "and" with a single constant.)

(dumps attached)

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
;; Function andc_32_pv (andc_32_pv, funcdef_no=0, decl_uid=1973, cgraph_uid=0, 
symbol_order=0)

starting the processing of deferred insns
ending the processing of deferred insns
df_analyze called
df_worklist_dataflow_doublequeue:n_basic_blocks 3 n_edges 2 count 3 (    1)


andc_32_pv

Dataflow summary:
;;  invalidated by call          0 [%r0] 1 [%r1] 2 [%r2] 3 [%r3] 4 [%r4] 5 
[%r5] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 20 [%f1] 21 [%f3] 22 [%f5] 23 [%f7] 
33 [%cc] 35 [%rp] 38 [%v16] 39 [%v18] 40 [%v20] 41 [%v22] 42 [%v17] 43 [%v19] 
44 [%v21] 45 [%v23] 46 [%v24] 47 [%v26] 48 [%v28] 49 [%v30] 50 [%v25] 51 [%v27] 
52 [%v29] 53 [%v31]
;;  hardware regs used   15 [%r15] 32 [%ap] 34 [%fp]
;;  regular block artificial uses        11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  eh block artificial uses     11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  entry block defs     0 [%r0] 2 [%r2] 3 [%r3] 4 [%r4] 5 [%r5] 6 [%r6] 11 
[%r11] 14 [%r14] 15 [%r15] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 32 [%ap] 34 [%fp]
;;  exit block uses      2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;;  regs ever live       2 [%r2] 33 [%cc]
;;  ref usage   r0={1d} r2={2d,3u} r3={1d} r4={1d} r5={1d} r6={1d} r11={1d,2u} 
r14={1d,1u} r15={1d,2u} r16={1d} r17={1d} r18={1d} r19={1d} r32={1d,1u} 
r33={1d} r34={1d,2u} r63={1d,1u} r64={1d,1u} r65={1d,1u} 
;;    total ref usage 34{20d,14u,0e} in 5{5 regular + 0 call} insns.
;; Reaching defs:
;;  sparse invalidated  
;;  dense invalidated   0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 15
;;  reg->defs[] map:    0[0,0] 2[1,2] 3[3,3] 4[4,4] 5[5,5] 6[6,6] 11[7,7] 
14[8,8] 15[9,9] 16[10,10] 17[11,11] 18[12,12] 19[13,13] 32[14,14] 33[15,15] 
34[16,16] 63[17,17] 64[18,18] 65[19,19] 

( )->[0]->( 2 )
;; bb 0 artificial_defs: { d0(0){ }d2(2){ }d3(3){ }d4(4){ }d5(5){ }d6(6){ 
}d7(11){ }d8(14){ }d9(15){ }d10(16){ }d11(17){ }d12(18){ }d13(19){ }d14(32){ 
}d16(34){ }}
;; bb 0 artificial_uses: { }
;; lr  in       
;; lr  use      
;; lr  def       0 [%r0] 2 [%r2] 3 [%r3] 4 [%r4] 5 [%r5] 6 [%r6] 11 [%r11] 14 
[%r14] 15 [%r15] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 32 [%ap] 34 [%fp]
;; live  in     
;; live  gen     0 [%r0] 2 [%r2] 3 [%r3] 4 [%r4] 5 [%r5] 6 [%r6] 11 [%r11] 14 
[%r14] 15 [%r15] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 32 [%ap] 34 [%fp]
;; live  kill   
;; rd  in       (0) 
;; rd  gen      (15) 
0[0],2[2],3[3],4[4],5[5],6[6],11[7],14[8],15[9],16[10],17[11],18[12],19[13],32[14],34[16]
;; rd  kill     (16) 
0[0],2[1,2],3[3],4[4],5[5],6[6],11[7],14[8],15[9],16[10],17[11],18[12],19[13],32[14],34[16]
;;  UD chains for artificial uses at top
;; lr  out       2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; live  out     2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; rd  out      (6) 2[2],11[7],14[8],15[9],32[14],34[16]
;;  UD chains for artificial uses at bottom

( 0 )->[2]->( 1 )
;; bb 2 artificial_defs: { }
;; bb 2 artificial_uses: { u0(11){ d7(bb 0 insn -1) }u1(15){ d9(bb 0 insn -1) 
}u2(32){ d14(bb 0 insn -1) }u3(34){ d16(bb 0 insn -1) }}
;; lr  in        2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; lr  use       2 [%r2] 11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;; lr  def       2 [%r2] 33 [%cc] 63 64 65
;; live  in      2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; live  gen     2 [%r2] 63 64 65
;; live  kill    33 [%cc]
;; rd  in       (6) 2[2],11[7],14[8],15[9],32[14],34[16]
;; rd  gen      (4) 2[1],63[17],64[18],65[19]
;; rd  kill     (6) 2[1,2],33[15],63[17],64[18],65[19]
;;  UD chains for artificial uses at top
;; lr  out       2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; live  out     2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 32 [%ap] 34 [%fp]
;; rd  out      (6) 2[1],11[7],14[8],15[9],32[14],34[16]
;;  UD chains for artificial uses at bottom
;;   reg 11 { d7(bb 0 insn -1) }
;;   reg 15 { d9(bb 0 insn -1) }
;;   reg 32 { d14(bb 0 insn -1) }
;;   reg 34 { d16(bb 0 insn -1) }

( 2 )->[1]->( )
;; bb 1 artificial_defs: { }
;; bb 1 artificial_uses: { u9(2){ d1(bb 2 insn 12) }u10(11){ d7(bb 0 insn -1) 
}u11(14){ d8(bb 0 insn -1) }u12(15){ d9(bb 0 insn -1) }u13(34){ d16(bb 0 insn 
-1) }}
;; lr  in        2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;; lr  use       2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;; lr  def      
;; live  in      2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;; live  gen    
;; live  kill   
;; rd  in       (6) 2[1],11[7],14[8],15[9],32[14],34[16]
;; rd  gen      (0) 
;; rd  kill     (0) 
;;  UD chains for artificial uses at top
;; lr  out      
;; live  out    
;; rd  out      (0) 
;;  UD chains for artificial uses at bottom
;;   reg 2 { d1(bb 2 insn 12) }
;;   reg 11 { d7(bb 0 insn -1) }
;;   reg 14 { d8(bb 0 insn -1) }
;;   reg 15 { d9(bb 0 insn -1) }
;;   reg 34 { d16(bb 0 insn -1) }

Finding needed instructions:
  Adding insn 13 to worklist
Finished finding needed instructions:
  Adding insn 12 to worklist
Processing use of (reg 65) in insn 12:
  Adding insn 7 to worklist
Processing use of (reg 64) in insn 7:
  Adding insn 6 to worklist
Processing use of (reg 63 [ a ]) in insn 6:
  Adding insn 2 to worklist
Processing use of (reg 2 %r2) in insn 2:
Processing use of (reg 2 %r2) in insn 13:
starting the processing of deferred insns
ending the processing of deferred insns


andc_32_pv

Dataflow summary:
;;  invalidated by call          0 [%r0] 1 [%r1] 2 [%r2] 3 [%r3] 4 [%r4] 5 
[%r5] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 20 [%f1] 21 [%f3] 22 [%f5] 23 [%f7] 
33 [%cc] 35 [%rp] 38 [%v16] 39 [%v18] 40 [%v20] 41 [%v22] 42 [%v17] 43 [%v19] 
44 [%v21] 45 [%v23] 46 [%v24] 47 [%v26] 48 [%v28] 49 [%v30] 50 [%v25] 51 [%v27] 
52 [%v29] 53 [%v31]
;;  hardware regs used   15 [%r15] 32 [%ap] 34 [%fp]
;;  regular block artificial uses        11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  eh block artificial uses     11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  entry block defs     0 [%r0] 2 [%r2] 3 [%r3] 4 [%r4] 5 [%r5] 6 [%r6] 11 
[%r11] 14 [%r14] 15 [%r15] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 32 [%ap] 34 [%fp]
;;  exit block uses      2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;;  regs ever live       2 [%r2] 33 [%cc]
;;  ref usage   r0={1d} r2={2d,3u} r3={1d} r4={1d} r5={1d} r6={1d} r11={1d,2u} 
r14={1d,1u} r15={1d,2u} r16={1d} r17={1d} r18={1d} r19={1d} r32={1d,1u} 
r33={1d} r34={1d,2u} r63={1d,1u} r64={1d,1u} r65={1d,1u} 
;;    total ref usage 34{20d,14u,0e} in 5{5 regular + 0 call} insns.
(note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn 2 4 3 2 (set (reg/v/f:DI 63 [ a ])
        (reg:DI 2 %r2 [ a ])) andc-immediate.c:20 1073 {*movdi_64}
     (expr_list:REG_DEAD (reg:DI 2 %r2 [ a ])
        (nil)))
(note 3 2 6 2 NOTE_INSN_FUNCTION_BEG)
(insn 6 3 7 2 (parallel [
            (set (reg:SI 64)
                (and:SI (mem:SI (reg/v/f:DI 63 [ a ]) [1 *a_2(D)+0 S4 A32])
                    (const_int -65521 [0xffffffffffff000f])))
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:21 1481 {*andsi3_zarch}
     (expr_list:REG_DEAD (reg/v/f:DI 63 [ a ])
        (expr_list:REG_UNUSED (reg:CC 33 %cc)
            (nil))))
(insn 7 6 12 2 (set (reg:DI 65)
        (zero_extend:DI (reg:SI 64))) andc-immediate.c:21 1207 
{*zero_extendsidi2}
     (expr_list:REG_DEAD (reg:SI 64)
        (nil)))
(insn 12 7 13 2 (set (reg/i:DI 2 %r2)
        (reg:DI 65)) andc-immediate.c:22 1073 {*movdi_64}
     (expr_list:REG_DEAD (reg:DI 65)
        (nil)))
(insn 13 12 0 2 (use (reg/i:DI 2 %r2)) andc-immediate.c:22 -1
     (nil))
;; Function andc_32_pv (andc_32_pv, funcdef_no=0, decl_uid=1973, cgraph_uid=0, 
symbol_order=0)

starting the processing of deferred insns
ending the processing of deferred insns
df_analyze called
insn_cost 2: 4
insn_cost 6: 4
insn_cost 7: 4
insn_cost 12: 4
insn_cost 13: 0

Trying 2 -> 6:
Successfully matched this instruction:
(parallel [
        (set (reg:SI 64)
            (and:SI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 A32])
                (const_int -65521 [0xffffffffffff000f])))
        (clobber (reg:CC 33 %cc))
    ])
allowing combination of insns 2 and 6
original costs 4 + 4 = 8
replacement cost 4
deferring deletion of insn with uid = 2.
modifying insn i3     6: {r64:SI=[%r2:DI]&0xffffffffffff000f;clobber %cc:CC;}
      REG_DEAD %r2:DI
      REG_UNUSED %cc:CC
deferring rescan insn with uid = 6.

Trying 6 -> 7:
Failed to match this instruction:
(set (reg:DI 65)
    (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 A32]) 0)
        (const_int 4294901775 [0xffff000f])))

Trying 7 -> 12:
Successfully matched this instruction:
(set (reg/i:DI 2 %r2)
    (and:DI (subreg:DI (reg:SI 64) 0)
        (const_int 4294901775 [0xffff000f])))
allowing combination of insns 7 and 12
original costs 4 + 4 = 8
replacement cost 4
deferring deletion of insn with uid = 7.
modifying insn i3    12: {%r2:DI=r64:SI#0&0xffff000f;clobber %cc:CC;}
      REG_UNUSED %cc:CC
      REG_DEAD r64:SI
deferring rescan insn with uid = 12.

Trying 6 -> 12:
Failed to match this instruction:
(parallel [
        (set (reg/i:DI 2 %r2)
            (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 
A32]) 0)
                (const_int 4294901775 [0xffff000f])))
        (clobber (reg:CC 33 %cc))
    ])
Failed to match this instruction:
(set (reg/i:DI 2 %r2)
    (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 A32]) 0)
        (const_int 4294901775 [0xffff000f])))

Trying 12 -> 13:
Failed to match this instruction:
(parallel [
        (use (and:DI (subreg:DI (reg:SI 64) 0)
                (const_int 4294901775 [0xffff000f])))
        (set (reg/i:DI 2 %r2)
            (and:DI (subreg:DI (reg:SI 64) 0)
                (const_int 4294901775 [0xffff000f])))
    ])
Failed to match this instruction:
(parallel [
        (use (and:DI (subreg:DI (reg:SI 64) 0)
                (const_int 4294901775 [0xffff000f])))
        (set (reg/i:DI 2 %r2)
            (and:DI (subreg:DI (reg:SI 64) 0)
                (const_int 4294901775 [0xffff000f])))
    ])

Trying 6, 12 -> 13:
Failed to match this instruction:
(parallel [
        (use (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 
A32]) 0)
                (const_int 4294901775 [0xffff000f])))
        (set (reg/i:DI 2 %r2)
            (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 
A32]) 0)
                (const_int 4294901775 [0xffff000f])))
    ])
Failed to match this instruction:
(parallel [
        (use (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 
A32]) 0)
                (const_int 4294901775 [0xffff000f])))
        (set (reg/i:DI 2 %r2)
            (and:DI (subreg:DI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 
A32]) 0)
                (const_int 4294901775 [0xffff000f])))
    ])
starting the processing of deferred insns
rescanning insn with uid = 6.
rescanning insn with uid = 12.
ending the processing of deferred insns


andc_32_pv

Dataflow summary:
;;  invalidated by call          0 [%r0] 1 [%r1] 2 [%r2] 3 [%r3] 4 [%r4] 5 
[%r5] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 20 [%f1] 21 [%f3] 22 [%f5] 23 [%f7] 
33 [%cc] 35 [%rp] 38 [%v16] 39 [%v18] 40 [%v20] 41 [%v22] 42 [%v17] 43 [%v19] 
44 [%v21] 45 [%v23] 46 [%v24] 47 [%v26] 48 [%v28] 49 [%v30] 50 [%v25] 51 [%v27] 
52 [%v29] 53 [%v31]
;;  hardware regs used   15 [%r15] 32 [%ap] 34 [%fp]
;;  regular block artificial uses        11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  eh block artificial uses     11 [%r11] 15 [%r15] 32 [%ap] 34 [%fp]
;;  entry block defs     0 [%r0] 2 [%r2] 3 [%r3] 4 [%r4] 5 [%r5] 6 [%r6] 11 
[%r11] 14 [%r14] 15 [%r15] 16 [%f0] 17 [%f2] 18 [%f4] 19 [%f6] 32 [%ap] 34 [%fp]
;;  exit block uses      2 [%r2] 11 [%r11] 14 [%r14] 15 [%r15] 34 [%fp]
;;  regs ever live       2 [%r2] 33 [%cc]
;;  ref usage   r0={1d} r2={2d,3u} r3={1d} r4={1d} r5={1d} r6={1d} r11={1d,2u} 
r14={1d,1u} r15={1d,2u} r16={1d} r17={1d} r18={1d} r19={1d} r32={1d,1u} 
r33={2d} r34={1d,2u} r64={1d,1u} 
;;    total ref usage 31{19d,12u,0e} in 3{3 regular + 0 call} insns.
(note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 2 4 3 2 NOTE_INSN_DELETED)
(note 3 2 6 2 NOTE_INSN_FUNCTION_BEG)
(insn 6 3 7 2 (parallel [
            (set (reg:SI 64)
                (and:SI (mem:SI (reg:DI 2 %r2 [ a ]) [1 *a_2(D)+0 S4 A32])
                    (const_int -65521 [0xffffffffffff000f])))
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:21 1481 {*andsi3_zarch}
     (expr_list:REG_DEAD (reg:DI 2 %r2 [ a ])
        (expr_list:REG_UNUSED (reg:CC 33 %cc)
            (nil))))
(note 7 6 12 2 NOTE_INSN_DELETED)
(insn 12 7 13 2 (parallel [
            (set (reg/i:DI 2 %r2)
                (and:DI (subreg:DI (reg:SI 64) 0)
                    (const_int 4294901775 [0xffff000f])))
            (clobber (reg:CC 33 %cc))
        ]) andc-immediate.c:22 1474 {*anddi3}
     (expr_list:REG_UNUSED (reg:CC 33 %cc)
        (expr_list:REG_DEAD (reg:SI 64)
            (nil))))
(insn 13 12 0 2 (use (reg/i:DI 2 %r2)) andc-immediate.c:22 -1
     (nil))
#if 0 /*!!!*/
unsigned long andc_64_vv(unsigned long a)
{
  return ~0xf0lu & a;
}

unsigned long andc_64_pv(unsigned long *a)
{
  return ~0xf0lu & *a;
}

unsigned int andc_32_vv(unsigned int a)
{
  return 0xffff1234u & a;
}
#endif

#if 1 /*!!!*/
unsigned int andc_32_pv(unsigned int *a)
{
  return ~0xfff0u & *a;
}
#endif

Reply via email to