Dear Robert, a bug has been reported against the Debian lame package on arch i386 [1]. The library fails with a SIGSEGV in init_xrpow_core_sse(). As it turns out, this is a general-protection exception triggered by non-aligned memory access. Bernhard Übelacker succeeded to create a patch that forces the variables into alignment by using posix_memslign(), but we are not sure if there may be better suitable solutions to this issue.
I am asking you for input on this issue. Could you please have a look at the patch Bernhard provides and share your opinion? NB: This issue has not been raised before, because somehow the ./configure script failed to detect the presence of the xmmintrin.h header in previous package versions and thus decided that it would not want to "build the internal vector lib". This has apparently just changed recently. Best regards, Fabian [1] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=786438
--- Begin Message ---Hello Fabian, hello Detrick, I could reproduce a SIGSEGV on arch i386 inside qemu VM by these actions: (amd64 did not show the fault) - apt-get install icecast2 liquidsoap liquidsoap-plugin-icecast liquidsoap-plugin-lame liquidsoap-plugin-mad liquidsoap-plugin-ogg liquidsoap-plugin-vorbis - enable and start icecast2 (/etc/default/icecast2) - get a mp3 file and put it to current directory as test.mp3 - create test.sh - start liquidsoap in debugger "gdb --args /usr/bin/liquidsoap test.sh" content of test.sh: #!/usr/bin/liquidsoap set("log.file.path","/dev/stdout") myplaylist = single("test.mp3") output.icecast(%mp3, host = "localhost", port = 8000, password = "hackme", mount = "basic-radio.ogg", myplaylist) (gdb) bt #0 0xb76e30c9 in init_xrpow_core_sse () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #1 0xb76d2ebf in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #2 0xb76d65e6 in CBR_iteration_loop () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #3 0xb76c3e27 in lame_encode_mp3_frame () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #4 0xb76c8e4f in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #5 0xb76c9b48 in lame_encode_buffer_float () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #6 0xb781cf77 in ocaml_lame_encode_buffer_float () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #7 0xb781b72f in camlLame__fun_1175 () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #8 0x0820b06f in camlOutput__f_1354 () #9 0x0820b0c5 in camlOutput__fun_1740 () #10 0x0820b7dd in camlOutput__fun_1600 () #11 0x08257c6f in camlClock__fun_1848 () #12 0x08310b3c in camlList__fold_left_1073 () #13 0x08258740 in camlClock__fun_1813 () #14 0x082582a5 in camlClock__loop_1351 () #15 0x08258daa in camlClock__fun_2074 () #16 0x082703e3 in camlTutils__fun_1346 () #17 0x08307ac8 in camlThread__fun_1081 () #18 0x083612fa in caml_start_program () #19 0x0834a675 in ?? () #20 0xb7f1befb in start_thread (arg=0x79e8b781) at pthread_create.c:309 #21 0xcde0b850 in ?? () #22 0x79e8b781 in ?? () #23 0x8350b45b in ?? () #24 0x8dc314c4 in ?? () #25 0x000000b6 in ?? () #26 0x27bc8d00 in ?? () #27 0x00000000 in ?? () Building libmp3lame0 with debug information: - export DEB_BUILD_OPTIONS=nostrip - apt-get build-dep libmp3lame0 - apt-get source libmp3lame0 - dpkg-buildpackage -b - dpkg -i libmp3lame0_*.deb - gdb --args /usr/bin/liquidsoap test.sh Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0xb49ffb40 (LWP 20251)] init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 73 vec_xrpow_max._m128 = _mm_set_ps1(0); (gdb) bt #0 init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 #1 0xb76cf71f in init_xrpow (gfc=0x87318d0, cod_info=0x8731a00, xrpow=0xb49fa6f4) at quantize.c:127 #2 0xb76d2cc6 in CBR_iteration_loop (gfc=0x87318d0, pe=0xb49fb0c4, ms_ener_ratio=0xb49fb09c, ratio=0xb49fd06c) at quantize.c:2034 #3 0xb76c0c37 in lame_encode_mp3_frame (gfc=gfc@entry=0x87318d0, inbuf_l=0x873e48c, inbuf_r=0x87422cc, mp3buf=mp3buf@entry=0xb4a081b9 "", mp3buf_size=mp3buf_size@entry=8988) at encoder.c:518 #4 0xb76c5a22 in lame_encode_buffer_sample_t (mp3buf_size=9405, mp3buf=0xb4a081b9 "", nsamples=<optimized out>, gfc=<optimized out>) at lame.c:1786 #5 lame_encode_buffer_template (gfp=gfp@entry=0x865f580, buffer_l=buffer_l@entry=0xb4a048e8, buffer_r=buffer_r@entry=0xb4a06480, nsamples=nsamples@entry=1764, mp3buf=mp3buf@entry=0xb4a08018 "\377\373\220D", mp3buf_size=mp3buf_size@entry=9405, pcm_type=pcm_type@entry=pcm_float_type, aa=aa@entry=1, norm=norm@entry=1) at lame.c:1897 #6 0xb76c6648 in lame_encode_buffer_float (gfp=0x865f580, pcm_l=0xb4a048e8, pcm_r=0xb4a06480, nsamples=1764, mp3buf=0xb4a08018 "\377\373\220D", mp3buf_size=9405) at lame.c:1918 #7 0xb7819f77 in ocaml_lame_encode_buffer_float () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #8 0xb781872f in camlLame__fun_1175 () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #9 0x0820b07f in camlOutput__f_1354 () #10 0x0820b0d5 in camlOutput__fun_1740 () #11 0x0820b7ed in camlOutput__fun_1600 () #12 0x08257c7f in camlClock__fun_1848 () #13 0x08310b4c in camlList__fold_left_1073 () #14 0x08258750 in camlClock__fun_1813 () #15 0x082582b5 in camlClock__loop_1351 () #16 0x08258dba in camlClock__fun_2074 () #17 0x082703f3 in camlTutils__fun_1346 () #18 0x08307ad8 in camlThread__fun_1081 () at thread.ml:37 #19 0x08360506 in caml_start_program () #20 0x0834a6b4 in caml_thread_start () #21 0xb7f18efb in start_thread (arg=0x85e8b781) at pthread_create.c:309 #22 0x9de0b850 in ?? () #23 0x85e8b781 in ?? () #24 0x8350b47d in ?? () #25 0x8dc314c4 in ?? () #26 0x000000b6 in ?? () #27 0x27bc8d00 in ?? () #28 0x00000000 in ?? () (gdb) bt full 1 #0 init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 i = <optimized out> tmp_max = 0 tmp_sum = 0 upper4 = 572 rest = 3 fabs_mask = {_i_32 = {2147483647, 2147483647, 2147483647, 2147483647}, _float = {nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}, _m128 = {nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}} vec_xrpow_max = {_i_32 = {-1264605616, 141760720, -1264602940, -1217579639}, _float = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, -1.41387654e-05}, _m128 = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, -1.41387654e-05}} vec_sum = {_i_32 = {141760720, 1371, -1264605752, -1264605756}, _float = {7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}, _m128 = {7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}} vec_tmp = {_i_32 = {0, 0, 0, 0}, _float = {0, 0, 0, 0}, _m128 = {0, 0, 0, 0}} (More stack frames follow...) (gdb) display /i $pc 1: x/i $pc => 0xb76df6c9 <init_xrpow_core_sse+105>: movaps %xmm0,0x20(%esp) (gdb) info reg xmm0 xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} (gdb) info reg esp esp 0xb49fa538 0xb49fa538 (gdb) print &vec_xrpow_max $16 = (vecfloat_union *) 0xb49fa558 (gdb) print /x $esp + 0x20 $12 = 0xb49fa558 (gdb) print /x ((0xb49fa538 + 0x20) / 16) * 16 $15 = 0xb49fa550 From http://x86.renejeschke.de/html/file_module_x86_id_180.html : When the source or destination operand is a memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) is generated. In our case we seem not to be on an 16-byte boundary. Tried to declare the variable with "__attribute__ ((aligned (16)))" but did not change anything. Therefore changed variables to pointer and allocated memory via posix_memalign (see attached patch). That way the crash did not happen anymore. But there must be a better way to achieve this. So probably this is a problem of libmp3lame0 and just on arch i386. Kind regards, BernhardDescription: Get aligned memory in xmm_quantize_sub.c Otherwise the SSE instructions generate a SIGSEGV Author: Bernhard Ãbelacker <bernha...@vr-web.de> Bug-Debian: https://bugs.debian.org/786438 Last-Update: 2015-05-29 --- lame-3.99.5+repack1.orig/libmp3lame/vector/xmm_quantize_sub.c +++ lame-3.99.5+repack1/libmp3lame/vector/xmm_quantize_sub.c @@ -62,54 +62,65 @@ init_xrpow_core_sse(gr_info * const cod_ int rest = upper-upper4; const vecfloat_union fabs_mask = {{ 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }}; - const __m128 vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]); - vecfloat_union vec_xrpow_max; - vecfloat_union vec_sum; - vecfloat_union vec_tmp; + __m128 *vec_fabs_mask = NULL; + vecfloat_union *vec_xrpow_max = NULL; + vecfloat_union *vec_sum = NULL; + vecfloat_union *vec_tmp = NULL; + + posix_memalign((void**)&vec_fabs_mask, 16, sizeof(*vec_fabs_mask)); + posix_memalign((void**)&vec_xrpow_max, 16, sizeof(*vec_xrpow_max)); + posix_memalign((void**)&vec_sum, 16, sizeof(*vec_sum)); + posix_memalign((void**)&vec_tmp, 16, sizeof(*vec_tmp)); + *vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]); _mm_prefetch((char *) cod_info->xr, _MM_HINT_T0); _mm_prefetch((char *) xrpow, _MM_HINT_T0); - vec_xrpow_max._m128 = _mm_set_ps1(0); - vec_sum._m128 = _mm_set_ps1(0); + vec_xrpow_max->_m128 = _mm_set_ps1(0); + vec_sum->_m128 = _mm_set_ps1(0); for (i = 0; i < upper4; i += 4) { - vec_tmp._m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */ - vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */ - vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128); - vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128))); - vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */ - _mm_storeu_ps(&(xrpow[i]), vec_tmp._m128); /* store into xrpow[] */ + vec_tmp->_m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */ + vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */ + vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128); + vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128))); + vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */ + _mm_storeu_ps(&(xrpow[i]), vec_tmp->_m128); /* store into xrpow[] */ } - vec_tmp._m128 = _mm_set_ps1(0); + vec_tmp->_m128 = _mm_set_ps1(0); switch (rest) { - case 3: vec_tmp._float[2] = cod_info->xr[upper4+2]; - case 2: vec_tmp._float[1] = cod_info->xr[upper4+1]; - case 1: vec_tmp._float[0] = cod_info->xr[upper4+0]; - vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */ - vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128); - vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128))); - vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */ + case 3: vec_tmp->_float[2] = cod_info->xr[upper4+2]; + case 2: vec_tmp->_float[1] = cod_info->xr[upper4+1]; + case 1: vec_tmp->_float[0] = cod_info->xr[upper4+0]; + vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */ + vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128); + vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128))); + vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */ switch (rest) { - case 3: xrpow[upper4+2] = vec_tmp._float[2]; - case 2: xrpow[upper4+1] = vec_tmp._float[1]; - case 1: xrpow[upper4+0] = vec_tmp._float[0]; + case 3: xrpow[upper4+2] = vec_tmp->_float[2]; + case 2: xrpow[upper4+1] = vec_tmp->_float[1]; + case 1: xrpow[upper4+0] = vec_tmp->_float[0]; default: break; } default: break; } - tmp_sum = vec_sum._float[0] + vec_sum._float[1] + vec_sum._float[2] + vec_sum._float[3]; + tmp_sum = vec_sum->_float[0] + vec_sum->_float[1] + vec_sum->_float[2] + vec_sum->_float[3]; { - float ma = vec_xrpow_max._float[0] > vec_xrpow_max._float[1] - ? vec_xrpow_max._float[0] : vec_xrpow_max._float[1]; - float mb = vec_xrpow_max._float[2] > vec_xrpow_max._float[3] - ? vec_xrpow_max._float[2] : vec_xrpow_max._float[3]; + float ma = vec_xrpow_max->_float[0] > vec_xrpow_max->_float[1] + ? vec_xrpow_max->_float[0] : vec_xrpow_max->_float[1]; + float mb = vec_xrpow_max->_float[2] > vec_xrpow_max->_float[3] + ? vec_xrpow_max->_float[2] : vec_xrpow_max->_float[3]; tmp_max = ma > mb ? ma : mb; } cod_info->xrpow_max = tmp_max; *sum = tmp_sum; + + free(vec_fabs_mask); + free(vec_xrpow_max); + free(vec_sum); + free(vec_tmp); }_______________________________________________ pkg-multimedia-maintainers mailing list pkg-multimedia-maintainers@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-multimedia-maintainers
--- End Message ---
signature.asc
Description: This is a digitally signed message part
_______________________________________________ pkg-multimedia-maintainers mailing list pkg-multimedia-maintainers@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-multimedia-maintainers