https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83920

--- Comment #9 from cesar at gcc dot gnu.org ---
I figured out why my patch does work. Here's the assembly code for your C test
case at -O0:

        {
                .reg.u32        %x;
                mov.u32 %x, %tid.x;
                setp.ne.u32     %r81, %x, 0;
        }
        @%r81   bra     $L11;
                mov.u64 %r58, %ar0;
                st.u64  [%frame+32], %r58;
                mov.u64 %r59, %ar1;
                st.u64  [%frame+40], %r59;
                ld.u64  %r60, [%frame+40];
                cvt.u32.u64     %r26, %r60;
                st.u32  [%frame], %r26;
                ld.u64  %r61, [%frame+32];
                st.u64  [%frame+8], %r61;
                mov.u32 %r22, 0;
                mov.u32 %r28, 1;
                mov.u32 %r29, 1;
$L11:
$L9:
                mov.pred        %r82, %r62;
                setp.eq.u32     %r62, 1, 0;
        @%r81   bra     $L12;
                mov.u32 %r55, %nctaid.x;
                mov.u32 %r56, %ctaid.x;
                mov.u32 %r48, 9;
                add.u32 %r49, %r55, %r48;
                div.s32 %r50, %r49, %r55;
                mul.lo.u32      %r23, %r56, %r50;
                mov.u32 %r57, %nctaid.x;
                mov.u32 %r51, 9;
                add.u32 %r52, %r57, %r51;
                div.s32 %r53, %r52, %r57;
                add.u32 %r54, %r23, %r53;
                min.s32 %r30, %r54, 10;
                setp.ge.s32     %r62, %r23, %r30;
$L12:
                mov.pred        %r62, %r82;
                selp.u32        %r83, 1, 0, %r62;
                shfl.idx.b32    %r83, %r83, 0, 31;
                setp.ne.u32     %r62, %r83, 0;
        @%r62   bra.uni $L2;
$L4:
                mov.pred        %r84, %r64;
                setp.eq.u32     %r64, 1, 0;
        @%r81   bra     $L13;

The predicate register in question here is %r62. Notice how the JIT workaround
clobbers %r62 much earlier than it's defined. My patch just copied the register
predicate register before it was clobbered. That's fine, but when it restores
the value of %r62 in L12, r62 gets an uninitialized value.

Reply via email to