Changes in directory llvm-test/SingleSource/UnitTests/Vector/SSE:
Makefile added (r1.1) sse.expandfft.c added (r1.1) sse.isamax.c added (r1.1) sse.stepfft.c added (r1.1) --- Log message: Added some Altivec and SSE examples from: Introduction to Parallel Computing A practical guide with examples in C Oxford Texts in Applied and Engineering Mathematics No. 9 Oxford University Press, February 2004 ISBN: 0-19-851576-6 (hardback), 0-19-851577-4 (paperback) http://people.inf.ethz.ch/arbenz/book/ --- Diffs of the changes: (+585 -0) Makefile | 8 + sse.expandfft.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sse.isamax.c | 119 +++++++++++++++++++++++++ sse.stepfft.c | 195 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 585 insertions(+) Index: llvm-test/SingleSource/UnitTests/Vector/SSE/Makefile diff -c /dev/null llvm-test/SingleSource/UnitTests/Vector/SSE/Makefile:1.1 *** /dev/null Mon Apr 3 19:48:04 2006 --- llvm-test/SingleSource/UnitTests/Vector/SSE/Makefile Mon Apr 3 19:47:54 2006 *************** *** 0 **** --- 1,8 ---- + # SingleSource/UnitTests/Vector/SSE/Makefile + + DIRS = + LEVEL = ../../../.. + include $(LEVEL)/SingleSource/Makefile.singlesrc + + TARGET_CFLAGS += -msse3 + LCCFLAGS += -msse3 Index: llvm-test/SingleSource/UnitTests/Vector/SSE/sse.expandfft.c diff -c /dev/null llvm-test/SingleSource/UnitTests/Vector/SSE/sse.expandfft.c:1.1 *** /dev/null Mon Apr 3 19:48:08 2006 --- llvm-test/SingleSource/UnitTests/Vector/SSE/sse.expandfft.c Mon Apr 3 19:47:54 2006 *************** *** 0 **** --- 1,263 ---- + #include <stdio.h> + #include <math.h> + #include <time.h> + #include <float.h> + #include "xmmintrin.h" + #define N 256 + #define N2 N/2 + main() + { + /* + SSE version of cfft2 - uses Intel intrinsics. + Expanded version + + wpp, SAM. Math. ETHZ 21 May, 2002 + */ + int first,i,icase,it,n; + float error,fnm1,seed,sign,z0,z1,ggl(); + float *x,*y,*z,*w; + float t1,ln2,mflops; + void cffti(),cfft2(); + /* allocate storage for x,y,z,w on 4-word bndr. */ + x = (float *)_mm_malloc(8*N, 16); + y = (float *)_mm_malloc(8*N, 16); + z = (float *)_mm_malloc(8*N, 16); + w = (float *)_mm_malloc(4*N, 16); + first = 1; + seed = 331.0; + for(icase=0;icase<2;icase++){ + if(first){ + for(i=0;i<2*N;i+=2){ + z0 = ggl(&seed); /* real part of array */ + z1 = ggl(&seed); /* imaginary part of array */ + x[i] = z0; + z[i] = z0; /* copy of initial real data */ + x[i+1] = z1; + z[i+1] = z1; /* copy of initial imag. data */ + } + } else { + for(i=0;i<2*N;i+=2){ + z0 = 0; /* real part of array */ + z1 = 0; /* imaginary part of array */ + x[i] = z0; + z[i] = z0; /* copy of initial real data */ + x[i+1] = z1; + z[i+1] = z1; /* copy of initial imag. data */ + } + } + /* initialize sine/cosine tables */ + n = N; + cffti(n,w); + /* transform forward, back */ + if(first){ + sign = 1.0; + cfft2(n,x,y,w,sign); + sign = -1.0; + cfft2(n,y,x,w,sign); + /* results should be same as initial multiplied by N */ + fnm1 = 1.0/((float) n); + error = 0.0; + for(i=0;i<2*N;i+=2){ + error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); + } + error = sqrt(fnm1*error); + printf(" for n=%d, fwd/bck error=%e\n",N,error); + first = 0; + } else { + t1 = ((float)clock())/((float) CLOCKS_PER_SEC); + for(it=0;it<1000;it++){ + sign = +1.0; + cfft2(n,x,y,w,sign); + sign = -1.0; + cfft2(n,y,x,w,sign); + } + t1 = ((float)clock())/((float) CLOCKS_PER_SEC) - t1; + t1 = t1/2000.0; + ln2 = 10.0; // reset this for different N + mflops = 5.0*((float) N)*ln2/((1.e+6)*t1); + printf(" for n=%d, t1=%e, mflops=%e\n",n,t1,mflops); + } + } + } + void cfft2(n,x,y,w,sign) + int n; + float x[][2],y[][2],w[][2],sign; + { + int jb, jc, jw, k, k2, lj, m, j, mj, mj2, pass, tgle; + float (*a)[2],(*b)[2],(*c)[2],(*d)[2]; + float (*aa)[2],(*bb)[2],(*cb)[2],(*dd)[2]; + float rp,up,wr[4],wu[4]; + __m128 V0,V1,V2,V3,V4,V5,V6,V7; + __m128 V8,V9,V10,V11,V12,V13,V14,V15; + + if(n<=1){ + y[0][0] = x[0][0]; + y[0][1] = x[0][1]; + return; + } + m = (int) (log((float) n)/log(1.99)); + mj = 1; + mj2 = 2; + lj = n/2; + // first pass thru data: x -> y + a = (void *)&x[0][0]; + b = (void *)&x[n/2][0]; + c = (void *)&y[0][0]; + d = (void *)&y[1][0]; + for(j=0;j<lj;j++){ + jc = j*mj2; + rp = w[j][0]; up = w[j][1]; + if(sign<0.0) up = -up; + d[jc][0] = rp*(a[j][0] - b[j][0]) - up*(a[j][1] - b[j][1]); + d[jc][1] = up*(a[j][0] - b[j][0]) + rp*(a[j][1] - b[j][1]); + c[jc][0] = a[j][0] + b[j][0]; + c[jc][1] = a[j][1] + b[j][1]; + } + if(n==2) return; + // next pass is mj = 2 + mj = 2; + mj2 = 4; + lj = n/4; + a = (void *)&y[0][0]; + b = (void *)&y[n/2][0]; + c = (void *)&x[0][0]; + d = (void *)&x[mj][0]; + if(n==4){ + c = (void *)&y[0][0]; + d = (void *)&y[mj][0]; + } + for(j=0;j<lj;j++){ + jw = j*mj; jc = j*mj2; + rp = w[jw][0]; up = w[jw][1]; + if(sign<0.0) up = -up; + wr[0] = rp; wr[1] = rp; wr[2] = rp; wr[3] = rp; + wu[0] = -up; wu[1] = up; wu[2] = -up; wu[3] = up; + V6 = _mm_load_ps(wr); + V7 = _mm_load_ps(wu); + V0 = _mm_load_ps(&a[jw][0]); + V1 = _mm_load_ps(&b[jw][0]); + V2 = _mm_add_ps(V0,V1); /* a + b */ + _mm_store_ps(&c[jc][0],V2); /* store c */ + V3 = _mm_sub_ps(V0,V1); /* a - b */ + V4 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,3,0,1)); + V0 = _mm_mul_ps(V6,V3); + V1 = _mm_mul_ps(V7,V4); + V2 = _mm_add_ps(V0,V1); /* w*(a - b) */ + _mm_store_ps(&d[jc][0],V2); /* store d */ + } + if(n==4) return; + mj *= 2; + mj2 = 2*mj; + lj = n/mj2; + tgle = 0; + for(pass=2;pass<m-1;pass++){ + if(tgle){ + a = (void *)&y[0][0]; + b = (void *)&y[n/2][0]; + c = (void *)&x[0][0]; + d = (void *)&x[mj][0]; + tgle = 0; + } else { + a = (void *)&x[0][0]; + b = (void *)&x[n/2][0]; + c = (void *)&y[0][0]; + d = (void *)&y[mj][0]; + tgle = 1; + } + for(j=0; j<lj; j++){ + jw = j*mj; jc = j*mj2; + rp = w[jw][0]; + up = w[jw][1]; + if(sign<0.0) up = -up; + wr[0] = rp; wr[1] = rp; wr[2] = rp; wr[3] = rp; + wu[0] = -up; wu[1] = up; wu[2] = -up; wu[3] = up; + V6 = _mm_load_ps(wr); + V7 = _mm_load_ps(wu); + for(k=0; k<mj; k+=4){ + k2 = k + 2; + V0 = _mm_load_ps(&a[jw+k][0]); + V1 = _mm_load_ps(&b[jw+k][0]); + V2 = _mm_add_ps(V0,V1); /* a + b */ + _mm_store_ps(&c[jc+k][0],V2); /* store c */ + V3 = _mm_sub_ps(V0,V1); /* a - b */ + V4 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,3,0,1)); + V0 = _mm_mul_ps(V6,V3); + V1 = _mm_mul_ps(V7,V4); + V2 = _mm_add_ps(V0,V1); /* w*(a - b) */ + _mm_store_ps(&d[jc+k][0],V2); /* store d */ + V8 = _mm_load_ps(&a[jw+k2][0]); + V9 = _mm_load_ps(&b[jw+k2][0]); + V10 = _mm_add_ps(V8,V9); /* a + b */ + _mm_store_ps(&c[jc+k2][0],V10); /* store c */ + V11 = _mm_sub_ps(V8,V9); /* a - b */ + V12 = _mm_shuffle_ps(V11,V11,_MM_SHUFFLE(2,3,0,1)); + V8 = _mm_mul_ps(V6,V11); + V9 = _mm_mul_ps(V7,V12); + V10 = _mm_add_ps(V8,V9); /* w*(a - b) */ + _mm_store_ps(&d[jc+k2][0],V10); /* store d */ + } + } + mj *= 2; + mj2 = 2*mj; + lj = n/mj2; + } + /* last pass thru data: in-place if previous in y */ + c = (void *)&y[0][0]; + d = (void *)&y[n/2][0]; + if(tgle) { + a = (void *)&y[0][0]; + b = (void *)&y[n/2][0]; + } else { + a = (void *)&x[0][0]; + b = (void *)&x[n/2][0]; + } + for(k=0; k<(n/2); k+=4){ + k2 = k + 2; + V0 = _mm_load_ps(&a[k][0]); + V1 = _mm_load_ps(&b[k][0]); + V2 = _mm_add_ps(V0,V1); /* a + b */ + _mm_store_ps(&c[k][0],V2); /* store c */ + V3 = _mm_sub_ps(V0,V1); /* a - b */ + _mm_store_ps(&d[k][0],V3); /* store d */ + V4 = _mm_load_ps(&a[k2][0]); + V5 = _mm_load_ps(&b[k2][0]); + V6 = _mm_add_ps(V4,V5); /* a + b */ + _mm_store_ps(&c[k2][0],V6); /* store c */ + V7 = _mm_sub_ps(V4,V5); /* a - b */ + _mm_store_ps(&d[k2][0],V7); /* store d */ + } + } + void cffti(int n, float w[][2]) + { + int i,n2; + float aw,arg,pi; + pi = 3.141592653589793; + n2 = n/2; + aw = 2.0*pi/((float)n); + #pragma vector + for(i=0;i<n2;i++){ + arg = aw*((float)i); + w[i][0] = cos(arg); + w[i][1] = sin(arg); + } + } + #include <math.h> + float ggl(float *ds) + { + /* generate u(0,1) distributed random numbers. + Seed ds must be saved between calls. ggl is + essentially the same as the IMSL routine RNUM. + + W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: + a modification of a fortran version from + I. Vattulainen, Tampere Univ. of Technology, + Finland, 1992 */ + + double t,d2=0.2147483647e10; + t = (float) *ds; + t = fmod(0.16807e5*t,d2); + *ds = (float) t; + return((float) ((t-1.0e0)/(d2-1.0e0))); + } + Index: llvm-test/SingleSource/UnitTests/Vector/SSE/sse.isamax.c diff -c /dev/null llvm-test/SingleSource/UnitTests/Vector/SSE/sse.isamax.c:1.1 *** /dev/null Mon Apr 3 19:48:08 2006 --- llvm-test/SingleSource/UnitTests/Vector/SSE/sse.isamax.c Mon Apr 3 19:47:54 2006 *************** *** 0 **** --- 1,119 ---- + #include <stdio.h> + #include <math.h> + #include <float.h> + #include "xmmintrin.h" + #define N 20 + main() + { + /* + SSE unit step isamax with alignment code. From Section + 3.5.7 of Petersen and Arbenz "Intro. to Parallel Computing," + Oxford Univ. Press, 2004. + + wpp 31/7/2002 + */ + float x[N]; + int i,im; + int isamax0(int,float *); + for(i=0;i<N;i++){ + x[i] = -2.0 + (float) i; + } + x[7] =33.0; + im = isamax0(N,x); + printf(" maximum index = %d\n",im); + printf(" maximum value = %e\n",x[im]); + } + #define NS 12 + int isamax0(int n, float *x) + { + float bbig,ebig,bres,*xp; + int eres,i,ibbig,iebig,align,nsegs,mb,nn; + __m128 offset4,V0,V1,V2,V3,V6,V7; + float xbig[8],indx[8]; + // n < NS done in scalar mode + if(n < NS){ + iebig = 0; + bbig = 0.0; + for(i=0;i<n;i++){ + if(fabs(x[i]) > bbig){ + bbig = fabs(x[i]); + iebig = i; + } + } + return(iebig); + } + // n >= NS case done in SSE mode + V7 = _mm_set_ps(3.0,2.0,1.0,0.0); + V2 = _mm_set_ps(3.0,2.0,1.0,0.0); + V6 = _mm_set_ps1(-0.0); + offset4 = _mm_set_ps1(4.0); + align = ((unsigned int) x >> 2) & 0x03; + if(align == 1){ // bres = 3 case + bbig = fabsf(x[0]); ibbig = 0; + bres = 3.0; nn = n - 3; + for(i=1;i<3;i++){ + if(fabsf(x[i]) > bbig){ + bbig = fabsf(x[i]); ibbig = i; + } + } + } else if(align == 2){ // bres = 2 case + bbig = fabsf(x[0]); ibbig = 0; + bres = 2.0; nn = n - 2; + if(fabsf(x[1]) > bbig){ + bbig = fabsf(x[1]); ibbig = 1; + } + } else if(align == 1){ // bres = 1 case + bbig = fabsf(x[0]); ibbig = 0; + bres = 1.0; nn = n - 1; + } else { // bres = 0 case + bbig = 0.0; ibbig = 0; nn = n; + bres = 0.0; + } + xp = x + (int) bres; + nsegs = (nn >> 2) - 2; + eres = nn - 4*(nsegs+2); + V0 = _mm_load_ps(xp); xp += 4; // first four in 4/time seq. + V1 = _mm_load_ps(xp); xp += 4; // next four in 4/time seq. + V0 = _mm_andnot_ps(V6,V0); // take absolute value + for(i=0;i<nsegs;i++){ + V1 = _mm_andnot_ps(V6,V1); // take absolute value + V3 = _mm_cmpnle_ps(V1,V0); // compare old max of 4 to new + mb = _mm_movemask_ps(V3); // any of 4 bigger? + V2 = _mm_add_ps(V2,offset4); // add offset + if(mb > 0){ + V0 = _mm_max_ps(V0,V1); + V3 = _mm_and_ps(V2,V3); + V7 = _mm_max_ps(V7,V3); + } + V1 = _mm_load_ps(xp); xp += 4; // bottom load next four + } + // finish up the last segment of 4 + V1 = _mm_andnot_ps(V6,V1); // take absolute value + V3 = _mm_cmpnle_ps(V1,V0); // compare old max of 4 to new + mb = _mm_movemask_ps(V3); // any of 4 bigger? + V2 = _mm_add_ps(V2,offset4); // add offset + if(mb > 0){ + V0 = _mm_max_ps(V0,V1); + V3 = _mm_and_ps(V2,V3); + V7 = _mm_max_ps(V7,V3); + } + // Now finish up: segment maxima are in V0, indices in V7 + _mm_store_ps(xbig,V0); + _mm_store_ps(indx,V7); + if(eres>0){ + for(i=0;i<eres;i++){ + xbig[4+i] = fabsf(*(xp++)); + indx[4+i] = (float) (nn+i); + } + } + ebig = bbig; + iebig = ibbig; + for(i=0;i<4+eres;i++){ + if(xbig[i] > ebig){ + ebig = xbig[i]; + iebig = (int) indx[i]; + } + } + return(iebig); + } + #undef NS Index: llvm-test/SingleSource/UnitTests/Vector/SSE/sse.stepfft.c diff -c /dev/null llvm-test/SingleSource/UnitTests/Vector/SSE/sse.stepfft.c:1.1 *** /dev/null Mon Apr 3 19:48:08 2006 --- llvm-test/SingleSource/UnitTests/Vector/SSE/sse.stepfft.c Mon Apr 3 19:47:54 2006 *************** *** 0 **** --- 1,195 ---- + #include <stdio.h> + #include <math.h> + #include <time.h> + #include <float.h> + #include "xmmintrin.h" + #define N 1024 + #define N2 N/2 + main() + { + /* + SSE version of cfft2 - uses INTEL intrinsics + W. Petersen, SAM. Math. ETHZ 2 May, 2002 + */ + int first,i,icase,it,n; + float seed,error,fnm1,sign,z0,z1,ggl(); + float *x,*y,*z,*w; + float t1,ln2,mflops; + void cffti(),cfft2(); + /* allocate storage for x,y,z,w on 4-word bndr. */ + x = (float *)_mm_malloc(8*N, 16); + y = (float *)_mm_malloc(8*N, 16); + z = (float *)_mm_malloc(8*N, 16); + w = (float *)_mm_malloc(4*N, 16); + first = 1; + seed = 331.0; + for(icase=0;icase<2;icase++){ + if(first){ + for(i=0;i<2*N;i+=2){ + z0 = ggl(&seed); /* real part of array */ + z1 = ggl(&seed); /* imaginary part of array */ + x[i] = z0; + z[i] = z0; /* copy of initial real data */ + x[i+1] = z1; + z[i+1] = z1; /* copy of initial imag. data */ + } + } else { + for(i=0;i<2*N;i+=2){ + z0 = 0; /* real part of array */ + z1 = 0; /* imaginary part of array */ + x[i] = z0; + z[i] = z0; /* copy of initial real data */ + x[i+1] = z1; + z[i+1] = z1; /* copy of initial imag. data */ + } + } + /* initialize sine/cosine tables */ + n = N; + cffti(n,w); + /* transform forward, back */ + if(first){ + sign = 1.0; + cfft2(n,x,y,w,sign); + sign = -1.0; + cfft2(n,y,x,w,sign); + /* results should be same as initial multiplied by N */ + fnm1 = 1.0/((float) n); + error = 0.0; + for(i=0;i<2*N;i+=2){ + error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); + } + error = sqrt(fnm1*error); + printf(" for n=%d, fwd/bck error=%e\n",N,error); + first = 0; + } else { + t1 = ((float)clock())/((float) CLOCKS_PER_SEC); + for(it=0;it<10000;it++){ + sign = +1.0; + cfft2(n,x,y,w,sign); + sign = -1.0; + cfft2(n,y,x,w,sign); + } + t1 = ((float)clock())/((float) CLOCKS_PER_SEC) - t1; + t1 = t1/20000.0; + ln2 = 10.0; /* reset this for different N */ + mflops = 5.0*((float) N)*ln2/((1.e+6)*t1); + printf(" for n=%d, t1=%e, mflops=%e\n",n,t1,mflops); + } + } + } + void cfft2(n,x,y,w,sign) + int n; + float x[][2],y[][2],w[][2],sign; + { + int jb, m, j, mj, tgle; + void ccopy(),step(); + m = (int) (log((float) n)/log(1.99)); + mj = 1; + tgle = 1; /* toggling switch for work array */ + step(n,mj,&x[0][0],&x[n/2][0],&y[0][0],&y[mj][0],w,sign); + for(j=0;j<m-2;j++){ + mj *= 2; + if(tgle){ + step(n,mj,&y[0][0],&y[n/2][0],&x[0][0],&x[mj][0],w,sign); + tgle = 0; + } else { + step(n,mj,&x[0][0],&x[n/2][0],&y[0][0],&y[mj][0],w,sign); + tgle = 1; + } + } + /* last pass thru data: move y to x if needed */ + if(tgle) { + ccopy(n,y,x); + } + mj = n/2; + step(n,mj,&x[0][0],&x[n/2][0],&y[0][0],&y[mj][0],w,sign); + } + void cffti(int n, float w[][2]) + { + int i,n2; + float aw,arg,pi; + pi = 3.141592653589793; + n2 = n/2; + aw = 2.0*pi/((float)n); + #pragma vector + for(i=0;i<n2;i++){ + arg = aw*((float)i); + w[i][0] = cos(arg); + w[i][1] = sin(arg); + } + } + void ccopy(int n, float x[][2], float y[][2]) + { + int i; + for(i=0;i<n;i++){ + y[i][0] = x[i][0]; + y[i][1] = x[i][1]; + } + } + #include <math.h> + float ggl(float *ds) + { + + /* generate u(0,1) distributed random numbers. + Seed ds must be saved between calls. ggl is + essentially the same as the IMSL routine RNUM. + + W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: + a modification of a fortran version from + I. Vattulainen, Tampere Univ. of Technology, + Finland, 1992 */ + + double t,d2=0.2147483647e10; + t = (float) *ds; + t = fmod(0.16807e5*t,d2); + *ds = (float) t; + return((float) ((t-1.0e0)/(d2-1.0e0))); + } + void step(n,mj,a,b,c,d,w,sign) + int n, mj; + float a[][2],b[][2],c[][2],d[][2],w[][2],sign; + { + int j,k,jc,jw,l,lj,mj2,mseg; + float rp,up,wr[4],wu[4]; + __m128 xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7; + + mj2 = 2*mj; + lj = n/mj2; + + for(j=0; j<lj; j++){ + jw = j*mj; jc = j*mj2; + rp = w[jw][0]; + up = w[jw][1]; + if(sign<0.0) up = -up; + if(mj<2){ + /* special case mj=1 */ + d[jc][0] = rp*(a[jw][0] - b[jw][0]) - up*(a[jw][1] - b[jw][1]); + d[jc][1] = up*(a[jw][0] - b[jw][0]) + rp*(a[jw][1] - b[jw][1]); + c[jc][0] = a[jw][0] + b[jw][0]; + c[jc][1] = a[jw][1] + b[jw][1]; + } else { + /* mj>=2 case */ + /* _mm_prefetch((char *)&a[jw][0],_MM_HINT_NTA); */ + /* _mm_prefetch((char *)&b[jw][0],_MM_HINT_NTA); */ + wr[0] = rp; wr[1] = rp; wr[2] = rp; wr[3] = rp; + wu[0] = -up; wu[1] = up; wu[2] = -up; wu[3] = up; + xmm6 = _mm_load_ps(wr); + xmm7 = _mm_load_ps(wu); + for(k=0; k<mj; k+=2){ + /* _mm_prefetch((char *)&a[jw+k][0],_MM_HINT_NTA); */ + /* _mm_prefetch((char *)&b[jw+k][0],_MM_HINT_NTA); */ + xmm0 = _mm_load_ps(&a[jw+k][0]); + xmm1 = _mm_load_ps(&b[jw+k][0]); + xmm2 = _mm_add_ps(xmm0,xmm1); /* a + b */ + _mm_store_ps(&c[jc+k][0],xmm2); /* store c */ + xmm3 = _mm_sub_ps(xmm0,xmm1); /* a - b */ + xmm4 = _mm_shuffle_ps(xmm3,xmm3,_MM_SHUFFLE(2,3,0,1)); + xmm0 = _mm_mul_ps(xmm6,xmm3); + xmm1 = _mm_mul_ps(xmm7,xmm4); + xmm2 = _mm_add_ps(xmm0,xmm1); /* w*(a - b) */ + _mm_store_ps(&d[jc+k][0],xmm2); /* store d */ + } + } + } + } _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits