On 24/06/15 03:35, Ian Romanick wrote:
I'd also like to see assembly dumps with and without -fno-strict-aliasing of some place where this goes wrong. If you, Davin, or someone else can point out a specific function that actually does the wrong thing, I can generate assembly myself. For that matter... how the heck is the ralloc (or any memory allocator) implementation valid?

Ok, got one. In src/glsl/link_varying.cpp, function "canonicalize_shader_io". I compiled with gcc 4.8.4 on x86, with CFLAGS="-O3 -fomit-frame-pointer -march=corei7". The method has a loop that looks like this:


/* Remove the variable from it's current location in the IR, and put it at
    * the front.
    */
   for (unsigned i = 0; i < num_variables; i++) {
      var_table[i]->remove();
      ir->push_head(var_table[i]);
   }


With -fno-strict-aliasing (i.e. "correctly compiled") this becomes (with my comments added):

            .p2align 4,,10
            .p2align 3
             # at this point: %ecx = var_table, %eax = ir,
             #    %edi = var_table + num_variables
   .L59:
            movl    (%ecx), %edx # %edx = var_table[i]
            # call to remove() is inlined beginning here:
            movl    4(%edx), %ebp     # %ebp = (%edx)->next
            movl    8(%edx), %esi     # %esi = (%edx)->prev
            movl    %esi, 4(%ebp)     # (%edx)->next->prev = (%edx)->prev
            movl    4(%edx), %esi     # %esi = (%edx)->next(reload)
            movl    8(%edx), %ebp     # %ebp = (%edx)->prev (reload)
            movl    %esi, 0(%ebp)     # (%edx)->prev->next = (%edx)->next
            movl    $0, %esi
            movl    $0, 4(%edx)       # (%edx)->next = NULL
            movl    $0, 8(%edx)       # (%edx)->prev = NULL
            # inlined call to remove() completes.
            movl    (%ecx), %ebp      # %ebp = var_table[i]
            leal    4(%ebp), %edx     # %edx = (struct exec_node *)(%ebp)
            testl   %ebp, %ebp        # NULL check(?!)
            movl    (%eax), %ebp # Note that %eax = ir: %ebp=ir->head
            cmove   %esi, %edx        # %edx = NULL if var_table[i]==NULL
            addl    $4, %ecx          # (i++)
            cmpl    %edi, %ecx        # (i < num_variables?)
            # ir->push_head(var_table[i])
            movl    %eax, 4(%edx)     # var_table[i]->prev = & ir->head
            movl    %ebp, (%edx)      # var_table[i]->next = ir->head
            movl    %edx, 4(%ebp)     # ir->head->prev = var_table[i]
            movl    %edx, (%eax) # ir->head = var_table[i]
            jne     .L59


This more-or-less makes sense (there is an unnecessary NULL check generated by the implicit type conversion of ir_variable* to exec_node*). Now, without -fno-strict-aliasing it's:

            jmp     .L59 # note this jump
            .p2align 4,,10
            .p2align 3
   .L74:
            movl    %esi, %edi
            # %ecx = var_table, %eax = ir, %edi = ir->head
   .L59:
            # inlined remove():
            movl    (%ecx), %edx # %edx = var_table[i]
            movl    4(%edx), %esi   # %esi = (%edx)->next
            movl    8(%edx), %ebp   # %ebp = (%edx)->prev
            movl    %ebp, 4(%esi)   # (%esi)->next->prev = (%edx)->prev
            movl    8(%edx), %ebp   # re-load (%edx)->prev
            # in this case reload of ->next is omitted
            movl    %esi, 0(%ebp)   #(%edx)->prev->next = (%edx)->next
            movl    $0, 4(%edx) # (%edx)->next = NULL
            movl    $0, 8(%edx)     # (%edx)->prev = NULL

            movl    (%ecx), %edx    # %edx = var_table[i]
            leal    4(%edx), %esi   # %esi = (struct exec node *)%edx
            testl   %edx, %edx      # NULL check
            movl    $0, %edx
            cmove   %edx, %esi
            addl    $4, %ecx        # (i++)
            cmpl    28(%esp), %ecx  # (i < num_variables?)
            movl    %edi, (%esi)    # var_table[i]->next = ir->head
            movl    %eax, 4(%esi)   # var_table[i]->prev = & ir->head
            movl    %esi, 4(%edi)   #ir->head->prev = var_table[i]
            jne     .L74
            movl    %esi, (%eax)    # ir->head = var_table[i]


Note that it doesn't update ir->head in memory until right at the end of the loop (i.e. it collapses all writes to ir->head into a single write). The current value of ir->head is kept in %edi, and is neither stored in nor re-loaded from memory during the loop. This is the problem: the line that sets "(%edx)->prev->next = (%edx)->next" can in fact store to ir->head (which is an aliasing violation), so it does need to be reloaded after that line (and stored before it).

Davin


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to