On closer inspection, the sse2 test code in: http://www.loria.fr/~thome/vrac/gf2x-201309240733-4b5636d.tar.gz the previously linked http://www.loria.fr/~thome/vracgf2x-201309232352-49027b2.tar.gz and the code above all compile (gcc -msse2 test.c), however when they are run the output of: ./a.out || echo $? is Illegal instruction (core dumped) 132
However... If I add in the flags that I think you are using -std=gnu99 -g -O4 -Wall -W, it now compiles and runs. I am attaching the file that compiles and its object dump. You may be able to come up with something better that breaks optimization. For reference the following does, it may be simpler to just drop the optimization flags for the purpose of the system testing (since there could in principle be other tests going on that also silently pass when they should fail). #include <stdlib.h> #include <emmintrin.h> #include <stdio.h> __v2di x; /* Our code currently uses these, but it should not */ int main() { __m128i foo = _mm_setr_epi32(rand(), rand(), rand(), rand()); __m128i bar = _mm_setr_epi32(rand(), 0x26622626, 0xabbabbab, 0xfeed1664); foo = _mm_mullo_epi16(foo, bar); if( _mm_extract_epi16(foo, 0) ) { printf("XX"); return 0; } else { printf("YY"); return _mm_extract_epi16(foo, 0); } } - Andrew On Tue, Sep 24, 2013 at 1:44 AM, Emmanuel Thomé <emmanuel.th...@gmail.com>wrote: > > On Tuesday, September 24, 2013 2:41:58 AM UTC+2, Andrew Fiori wrote: > >> The linked tar.gz file doesn't appear to fix the problem (I am assuming >> this is what you wanted me to test). >> It still tries to compile with sse2 on the non-sse 2 machine >> >> I am attaching the config.log as well as the output from both make and >> make tune-lowlevel. >> > > ok ; It seems that optimization got me. > Please try again with the repository tip, or > http://www.loria.fr/~thome/vrac/gf2x-201309240733-4b5636d.tar.gz > > E. > >> >> -- > You received this message because you are subscribed to a topic in the > Google Groups "sage-devel" group. > To unsubscribe from this topic, visit > https://groups.google.com/d/topic/sage-devel/Cp1lDMzyCrU/unsubscribe. > To unsubscribe from this group and all its topics, send an email to > sage-devel+unsubscr...@googlegroups.com. > To post to this group, send email to sage-devel@googlegroups.com. > Visit this group at http://groups.google.com/group/sage-devel. > For more options, visit https://groups.google.com/groups/opt_out. > -- You received this message because you are subscribed to the Google Groups "sage-devel" group. To unsubscribe from this group and stop receiving emails from it, send an email to sage-devel+unsubscr...@googlegroups.com. To post to this group, send email to sage-devel@googlegroups.com. Visit this group at http://groups.google.com/group/sage-devel. For more options, visit https://groups.google.com/groups/opt_out.
./a.out: file format elf32-i386 Disassembly of section .init: 08048294 <_init>: 8048294: 53 push %ebx 8048295: 83 ec 08 sub $0x8,%esp 8048298: e8 00 00 00 00 call 804829d <_init+0x9> 804829d: 5b pop %ebx 804829e: 81 c3 57 1d 00 00 add $0x1d57,%ebx 80482a4: 8b 83 fc ff ff ff mov -0x4(%ebx),%eax 80482aa: 85 c0 test %eax,%eax 80482ac: 74 05 je 80482b3 <_init+0x1f> 80482ae: e8 2d 00 00 00 call 80482e0 <__gmon_start__@plt> 80482b3: e8 e8 00 00 00 call 80483a0 <frame_dummy> 80482b8: e8 93 01 00 00 call 8048450 <__do_global_ctors_aux> 80482bd: 83 c4 08 add $0x8,%esp 80482c0: 5b pop %ebx 80482c1: c3 ret Disassembly of section .plt: 080482d0 <__gmon_start__@plt-0x10>: 80482d0: ff 35 f8 9f 04 08 pushl 0x8049ff8 80482d6: ff 25 fc 9f 04 08 jmp *0x8049ffc 80482dc: 00 00 add %al,(%eax) ... 080482e0 <__gmon_start__@plt>: 80482e0: ff 25 00 a0 04 08 jmp *0x804a000 80482e6: 68 00 00 00 00 push $0x0 80482eb: e9 e0 ff ff ff jmp 80482d0 <_init+0x3c> 080482f0 <__libc_start_main@plt>: 80482f0: ff 25 04 a0 04 08 jmp *0x804a004 80482f6: 68 08 00 00 00 push $0x8 80482fb: e9 d0 ff ff ff jmp 80482d0 <_init+0x3c> Disassembly of section .text: 08048300 <main>: 8048300: 55 push %ebp 8048301: b8 00 b2 00 00 mov $0xb200,%eax 8048306: 89 e5 mov %esp,%ebp 8048308: 83 e4 f0 and $0xfffffff0,%esp 804830b: c9 leave 804830c: c3 ret 804830d: 90 nop 804830e: 90 nop 804830f: 90 nop 08048310 <_start>: 8048310: 31 ed xor %ebp,%ebp 8048312: 5e pop %esi 8048313: 89 e1 mov %esp,%ecx 8048315: 83 e4 f0 and $0xfffffff0,%esp 8048318: 50 push %eax 8048319: 54 push %esp 804831a: 52 push %edx 804831b: 68 40 84 04 08 push $0x8048440 8048320: 68 d0 83 04 08 push $0x80483d0 8048325: 51 push %ecx 8048326: 56 push %esi 8048327: 68 00 83 04 08 push $0x8048300 804832c: e8 bf ff ff ff call 80482f0 <__libc_start_main@plt> 8048331: f4 hlt 8048332: 90 nop 8048333: 90 nop 8048334: 90 nop 8048335: 90 nop 8048336: 90 nop 8048337: 90 nop 8048338: 90 nop 8048339: 90 nop 804833a: 90 nop 804833b: 90 nop 804833c: 90 nop 804833d: 90 nop 804833e: 90 nop 804833f: 90 nop 08048340 <__do_global_dtors_aux>: 8048340: 55 push %ebp 8048341: 89 e5 mov %esp,%ebp 8048343: 53 push %ebx 8048344: 83 ec 04 sub $0x4,%esp 8048347: 80 3d 10 a0 04 08 00 cmpb $0x0,0x804a010 804834e: 75 3f jne 804838f <__do_global_dtors_aux+0x4f> 8048350: a1 14 a0 04 08 mov 0x804a014,%eax 8048355: bb 20 9f 04 08 mov $0x8049f20,%ebx 804835a: 81 eb 1c 9f 04 08 sub $0x8049f1c,%ebx 8048360: c1 fb 02 sar $0x2,%ebx 8048363: 83 eb 01 sub $0x1,%ebx 8048366: 39 d8 cmp %ebx,%eax 8048368: 73 1e jae 8048388 <__do_global_dtors_aux+0x48> 804836a: 8d b6 00 00 00 00 lea 0x0(%esi),%esi 8048370: 83 c0 01 add $0x1,%eax 8048373: a3 14 a0 04 08 mov %eax,0x804a014 8048378: ff 14 85 1c 9f 04 08 call *0x8049f1c(,%eax,4) 804837f: a1 14 a0 04 08 mov 0x804a014,%eax 8048384: 39 d8 cmp %ebx,%eax 8048386: 72 e8 jb 8048370 <__do_global_dtors_aux+0x30> 8048388: c6 05 10 a0 04 08 01 movb $0x1,0x804a010 804838f: 83 c4 04 add $0x4,%esp 8048392: 5b pop %ebx 8048393: 5d pop %ebp 8048394: c3 ret 8048395: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi 8048399: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi 080483a0 <frame_dummy>: 80483a0: 55 push %ebp 80483a1: 89 e5 mov %esp,%ebp 80483a3: 83 ec 18 sub $0x18,%esp 80483a6: a1 24 9f 04 08 mov 0x8049f24,%eax 80483ab: 85 c0 test %eax,%eax 80483ad: 74 12 je 80483c1 <frame_dummy+0x21> 80483af: b8 00 00 00 00 mov $0x0,%eax 80483b4: 85 c0 test %eax,%eax 80483b6: 74 09 je 80483c1 <frame_dummy+0x21> 80483b8: c7 04 24 24 9f 04 08 movl $0x8049f24,(%esp) 80483bf: ff d0 call *%eax 80483c1: c9 leave 80483c2: c3 ret 80483c3: 90 nop 80483c4: 90 nop 80483c5: 90 nop 80483c6: 90 nop 80483c7: 90 nop 80483c8: 90 nop 80483c9: 90 nop 80483ca: 90 nop 80483cb: 90 nop 80483cc: 90 nop 80483cd: 90 nop 80483ce: 90 nop 80483cf: 90 nop 080483d0 <__libc_csu_init>: 80483d0: 55 push %ebp 80483d1: 57 push %edi 80483d2: 56 push %esi 80483d3: 53 push %ebx 80483d4: e8 69 00 00 00 call 8048442 <__i686.get_pc_thunk.bx> 80483d9: 81 c3 1b 1c 00 00 add $0x1c1b,%ebx 80483df: 83 ec 1c sub $0x1c,%esp 80483e2: 8b 6c 24 30 mov 0x30(%esp),%ebp 80483e6: 8d bb 20 ff ff ff lea -0xe0(%ebx),%edi 80483ec: e8 a3 fe ff ff call 8048294 <_init> 80483f1: 8d 83 20 ff ff ff lea -0xe0(%ebx),%eax 80483f7: 29 c7 sub %eax,%edi 80483f9: c1 ff 02 sar $0x2,%edi 80483fc: 85 ff test %edi,%edi 80483fe: 74 29 je 8048429 <__libc_csu_init+0x59> 8048400: 31 f6 xor %esi,%esi 8048402: 8d b6 00 00 00 00 lea 0x0(%esi),%esi 8048408: 8b 44 24 38 mov 0x38(%esp),%eax 804840c: 89 2c 24 mov %ebp,(%esp) 804840f: 89 44 24 08 mov %eax,0x8(%esp) 8048413: 8b 44 24 34 mov 0x34(%esp),%eax 8048417: 89 44 24 04 mov %eax,0x4(%esp) 804841b: ff 94 b3 20 ff ff ff call *-0xe0(%ebx,%esi,4) 8048422: 83 c6 01 add $0x1,%esi 8048425: 39 fe cmp %edi,%esi 8048427: 75 df jne 8048408 <__libc_csu_init+0x38> 8048429: 83 c4 1c add $0x1c,%esp 804842c: 5b pop %ebx 804842d: 5e pop %esi 804842e: 5f pop %edi 804842f: 5d pop %ebp 8048430: c3 ret 8048431: eb 0d jmp 8048440 <__libc_csu_fini> 8048433: 90 nop 8048434: 90 nop 8048435: 90 nop 8048436: 90 nop 8048437: 90 nop 8048438: 90 nop 8048439: 90 nop 804843a: 90 nop 804843b: 90 nop 804843c: 90 nop 804843d: 90 nop 804843e: 90 nop 804843f: 90 nop 08048440 <__libc_csu_fini>: 8048440: f3 c3 repz ret 08048442 <__i686.get_pc_thunk.bx>: 8048442: 8b 1c 24 mov (%esp),%ebx 8048445: c3 ret 8048446: 90 nop 8048447: 90 nop 8048448: 90 nop 8048449: 90 nop 804844a: 90 nop 804844b: 90 nop 804844c: 90 nop 804844d: 90 nop 804844e: 90 nop 804844f: 90 nop 08048450 <__do_global_ctors_aux>: 8048450: 55 push %ebp 8048451: 89 e5 mov %esp,%ebp 8048453: 53 push %ebx 8048454: 83 ec 04 sub $0x4,%esp 8048457: a1 14 9f 04 08 mov 0x8049f14,%eax 804845c: 83 f8 ff cmp $0xffffffff,%eax 804845f: 74 13 je 8048474 <__do_global_ctors_aux+0x24> 8048461: bb 14 9f 04 08 mov $0x8049f14,%ebx 8048466: 66 90 xchg %ax,%ax 8048468: 83 eb 04 sub $0x4,%ebx 804846b: ff d0 call *%eax 804846d: 8b 03 mov (%ebx),%eax 804846f: 83 f8 ff cmp $0xffffffff,%eax 8048472: 75 f4 jne 8048468 <__do_global_ctors_aux+0x18> 8048474: 83 c4 04 add $0x4,%esp 8048477: 5b pop %ebx 8048478: 5d pop %ebp 8048479: c3 ret 804847a: 90 nop 804847b: 90 nop Disassembly of section .fini: 0804847c <_fini>: 804847c: 53 push %ebx 804847d: 83 ec 08 sub $0x8,%esp 8048480: e8 00 00 00 00 call 8048485 <_fini+0x9> 8048485: 5b pop %ebx 8048486: 81 c3 6f 1b 00 00 add $0x1b6f,%ebx 804848c: e8 af fe ff ff call 8048340 <__do_global_dtors_aux> 8048491: 83 c4 08 add $0x8,%esp 8048494: 5b pop %ebx 8048495: c3 ret
#include <emmintrin.h> __v2di x; /* Our code currently uses these, but it should not */ int main() { __m128i foo = _mm_setr_epi32(0x8cab1e00, 0x12345678, 0xdeadbeef, 0xbebecafe); __m128i bar = _mm_setr_epi32(0x12323717, 0x26622626, 0xabbabbab, 0xfeed1664); foo = _mm_mullo_epi16(foo, bar); return _mm_extract_epi16(foo, 0); }