Author: lattner
Date: Fri Dec 28 15:50:40 2007
New Revision: 45387

URL: http://llvm.org/viewvc/llvm-project?rev=45387&view=rev
Log:
add a note.

Modified:
    llvm/trunk/lib/Target/X86/README.txt

Modified: llvm/trunk/lib/Target/X86/README.txt
URL: 
http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README.txt?rev=45387&r1=45386&r2=45387&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/README.txt (original)
+++ llvm/trunk/lib/Target/X86/README.txt Fri Dec 28 15:50:40 2007
@@ -1585,3 +1585,35 @@
 if the flags of the xor are dead.
 
 //===---------------------------------------------------------------------===//
+
+This testcase misses a read/modify/write opportunity (from PR1425):
+
+void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
+    int i;
+    for(i=0; i<width; i++)
+        b1[i] += (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We compile it down to:
+
+LBB1_2:        # bb
+       movl    (%esi,%edi,4), %ebx
+       addl    (%ecx,%edi,4), %ebx
+       addl    (%edx,%edi,4), %ebx
+       movl    %ebx, (%ecx,%edi,4)
+       incl    %edi
+       cmpl    %eax, %edi
+       jne     LBB1_2  # bb
+
+the inner loop should add to the memory location (%ecx,%edi,4), saving
+a mov.  Something like:
+
+        movl    (%esi,%edi,4), %ebx
+        addl    (%edx,%edi,4), %ebx
+        addl    %ebx, (%ecx,%edi,4)
+
+Additionally, LSR should rewrite the exit condition of the loop to use
+a stride-4 IV, would would allow all the scales in the loop to go away.
+This would result in smaller code and more efficient microops.
+
+//===---------------------------------------------------------------------===//


_______________________________________________
llvm-commits mailing list
llvm-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

Reply via email to