Hello Fabian, hello Detrick, I could reproduce a SIGSEGV on arch i386 inside qemu VM by these actions: (amd64 did not show the fault)
- apt-get install icecast2 liquidsoap liquidsoap-plugin-icecast liquidsoap-plugin-lame liquidsoap-plugin-mad liquidsoap-plugin-ogg liquidsoap-plugin-vorbis - enable and start icecast2 (/etc/default/icecast2) - get a mp3 file and put it to current directory as test.mp3 - create test.sh - start liquidsoap in debugger "gdb --args /usr/bin/liquidsoap test.sh" content of test.sh: #!/usr/bin/liquidsoap set("log.file.path","/dev/stdout") myplaylist = single("test.mp3") output.icecast(%mp3, host = "localhost", port = 8000, password = "hackme", mount = "basic-radio.ogg", myplaylist) (gdb) bt #0 0xb76e30c9 in init_xrpow_core_sse () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #1 0xb76d2ebf in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #2 0xb76d65e6 in CBR_iteration_loop () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #3 0xb76c3e27 in lame_encode_mp3_frame () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #4 0xb76c8e4f in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #5 0xb76c9b48 in lame_encode_buffer_float () from /usr/lib/i386-linux-gnu/libmp3lame.so.0 #6 0xb781cf77 in ocaml_lame_encode_buffer_float () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #7 0xb781b72f in camlLame__fun_1175 () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #8 0x0820b06f in camlOutput__f_1354 () #9 0x0820b0c5 in camlOutput__fun_1740 () #10 0x0820b7dd in camlOutput__fun_1600 () #11 0x08257c6f in camlClock__fun_1848 () #12 0x08310b3c in camlList__fold_left_1073 () #13 0x08258740 in camlClock__fun_1813 () #14 0x082582a5 in camlClock__loop_1351 () #15 0x08258daa in camlClock__fun_2074 () #16 0x082703e3 in camlTutils__fun_1346 () #17 0x08307ac8 in camlThread__fun_1081 () #18 0x083612fa in caml_start_program () #19 0x0834a675 in ?? () #20 0xb7f1befb in start_thread (arg=0x79e8b781) at pthread_create.c:309 #21 0xcde0b850 in ?? () #22 0x79e8b781 in ?? () #23 0x8350b45b in ?? () #24 0x8dc314c4 in ?? () #25 0x000000b6 in ?? () #26 0x27bc8d00 in ?? () #27 0x00000000 in ?? () Building libmp3lame0 with debug information: - export DEB_BUILD_OPTIONS=nostrip - apt-get build-dep libmp3lame0 - apt-get source libmp3lame0 - dpkg-buildpackage -b - dpkg -i libmp3lame0_*.deb - gdb --args /usr/bin/liquidsoap test.sh Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0xb49ffb40 (LWP 20251)] init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 73 vec_xrpow_max._m128 = _mm_set_ps1(0); (gdb) bt #0 init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 #1 0xb76cf71f in init_xrpow (gfc=0x87318d0, cod_info=0x8731a00, xrpow=0xb49fa6f4) at quantize.c:127 #2 0xb76d2cc6 in CBR_iteration_loop (gfc=0x87318d0, pe=0xb49fb0c4, ms_ener_ratio=0xb49fb09c, ratio=0xb49fd06c) at quantize.c:2034 #3 0xb76c0c37 in lame_encode_mp3_frame (gfc=gfc@entry=0x87318d0, inbuf_l=0x873e48c, inbuf_r=0x87422cc, mp3buf=mp3buf@entry=0xb4a081b9 "", mp3buf_size=mp3buf_size@entry=8988) at encoder.c:518 #4 0xb76c5a22 in lame_encode_buffer_sample_t (mp3buf_size=9405, mp3buf=0xb4a081b9 "", nsamples=<optimized out>, gfc=<optimized out>) at lame.c:1786 #5 lame_encode_buffer_template (gfp=gfp@entry=0x865f580, buffer_l=buffer_l@entry=0xb4a048e8, buffer_r=buffer_r@entry=0xb4a06480, nsamples=nsamples@entry=1764, mp3buf=mp3buf@entry=0xb4a08018 "\377\373\220D", mp3buf_size=mp3buf_size@entry=9405, pcm_type=pcm_type@entry=pcm_float_type, aa=aa@entry=1, norm=norm@entry=1) at lame.c:1897 #6 0xb76c6648 in lame_encode_buffer_float (gfp=0x865f580, pcm_l=0xb4a048e8, pcm_r=0xb4a06480, nsamples=1764, mp3buf=0xb4a08018 "\377\373\220D", mp3buf_size=9405) at lame.c:1918 #7 0xb7819f77 in ocaml_lame_encode_buffer_float () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #8 0xb781872f in camlLame__fun_1175 () from /usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs #9 0x0820b07f in camlOutput__f_1354 () #10 0x0820b0d5 in camlOutput__fun_1740 () #11 0x0820b7ed in camlOutput__fun_1600 () #12 0x08257c7f in camlClock__fun_1848 () #13 0x08310b4c in camlList__fold_left_1073 () #14 0x08258750 in camlClock__fun_1813 () #15 0x082582b5 in camlClock__loop_1351 () #16 0x08258dba in camlClock__fun_2074 () #17 0x082703f3 in camlTutils__fun_1346 () #18 0x08307ad8 in camlThread__fun_1081 () at thread.ml:37 #19 0x08360506 in caml_start_program () #20 0x0834a6b4 in caml_thread_start () #21 0xb7f18efb in start_thread (arg=0x85e8b781) at pthread_create.c:309 #22 0x9de0b850 in ?? () #23 0x85e8b781 in ?? () #24 0x8350b47d in ?? () #25 0x8dc314c4 in ?? () #26 0x000000b6 in ?? () #27 0x27bc8d00 in ?? () #28 0x00000000 in ?? () (gdb) bt full 1 #0 init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, sum=0xb49fa5d0) at xmm_quantize_sub.c:73 i = <optimized out> tmp_max = 0 tmp_sum = 0 upper4 = 572 rest = 3 fabs_mask = {_i_32 = {2147483647, 2147483647, 2147483647, 2147483647}, _float = {nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}, _m128 = {nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}} vec_xrpow_max = {_i_32 = {-1264605616, 141760720, -1264602940, -1217579639}, _float = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, -1.41387654e-05}, _m128 = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, -1.41387654e-05}} vec_sum = {_i_32 = {141760720, 1371, -1264605752, -1264605756}, _float = {7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}, _m128 = {7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}} vec_tmp = {_i_32 = {0, 0, 0, 0}, _float = {0, 0, 0, 0}, _m128 = {0, 0, 0, 0}} (More stack frames follow...) (gdb) display /i $pc 1: x/i $pc => 0xb76df6c9 <init_xrpow_core_sse+105>: movaps %xmm0,0x20(%esp) (gdb) info reg xmm0 xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 0x00000000000000000000000000000000} (gdb) info reg esp esp 0xb49fa538 0xb49fa538 (gdb) print &vec_xrpow_max $16 = (vecfloat_union *) 0xb49fa558 (gdb) print /x $esp + 0x20 $12 = 0xb49fa558 (gdb) print /x ((0xb49fa538 + 0x20) / 16) * 16 $15 = 0xb49fa550 >From http://x86.renejeschke.de/html/file_module_x86_id_180.html : When the source or destination operand is a memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) is generated. In our case we seem not to be on an 16-byte boundary. Tried to declare the variable with "__attribute__ ((aligned (16)))" but did not change anything. Therefore changed variables to pointer and allocated memory via posix_memalign (see attached patch). That way the crash did not happen anymore. But there must be a better way to achieve this. So probably this is a problem of libmp3lame0 and just on arch i386. Kind regards, Bernhard
Description: Get aligned memory in xmm_quantize_sub.c Otherwise the SSE instructions generate a SIGSEGV Author: Bernhard Ãbelacker <bernha...@vr-web.de> Bug-Debian: https://bugs.debian.org/786438 Last-Update: 2015-05-29 --- lame-3.99.5+repack1.orig/libmp3lame/vector/xmm_quantize_sub.c +++ lame-3.99.5+repack1/libmp3lame/vector/xmm_quantize_sub.c @@ -62,54 +62,65 @@ init_xrpow_core_sse(gr_info * const cod_ int rest = upper-upper4; const vecfloat_union fabs_mask = {{ 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }}; - const __m128 vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]); - vecfloat_union vec_xrpow_max; - vecfloat_union vec_sum; - vecfloat_union vec_tmp; + __m128 *vec_fabs_mask = NULL; + vecfloat_union *vec_xrpow_max = NULL; + vecfloat_union *vec_sum = NULL; + vecfloat_union *vec_tmp = NULL; + + posix_memalign((void**)&vec_fabs_mask, 16, sizeof(*vec_fabs_mask)); + posix_memalign((void**)&vec_xrpow_max, 16, sizeof(*vec_xrpow_max)); + posix_memalign((void**)&vec_sum, 16, sizeof(*vec_sum)); + posix_memalign((void**)&vec_tmp, 16, sizeof(*vec_tmp)); + *vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]); _mm_prefetch((char *) cod_info->xr, _MM_HINT_T0); _mm_prefetch((char *) xrpow, _MM_HINT_T0); - vec_xrpow_max._m128 = _mm_set_ps1(0); - vec_sum._m128 = _mm_set_ps1(0); + vec_xrpow_max->_m128 = _mm_set_ps1(0); + vec_sum->_m128 = _mm_set_ps1(0); for (i = 0; i < upper4; i += 4) { - vec_tmp._m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */ - vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */ - vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128); - vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128))); - vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */ - _mm_storeu_ps(&(xrpow[i]), vec_tmp._m128); /* store into xrpow[] */ + vec_tmp->_m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */ + vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */ + vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128); + vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128))); + vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */ + _mm_storeu_ps(&(xrpow[i]), vec_tmp->_m128); /* store into xrpow[] */ } - vec_tmp._m128 = _mm_set_ps1(0); + vec_tmp->_m128 = _mm_set_ps1(0); switch (rest) { - case 3: vec_tmp._float[2] = cod_info->xr[upper4+2]; - case 2: vec_tmp._float[1] = cod_info->xr[upper4+1]; - case 1: vec_tmp._float[0] = cod_info->xr[upper4+0]; - vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */ - vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128); - vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128))); - vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */ + case 3: vec_tmp->_float[2] = cod_info->xr[upper4+2]; + case 2: vec_tmp->_float[1] = cod_info->xr[upper4+1]; + case 1: vec_tmp->_float[0] = cod_info->xr[upper4+0]; + vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */ + vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128); + vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128))); + vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */ switch (rest) { - case 3: xrpow[upper4+2] = vec_tmp._float[2]; - case 2: xrpow[upper4+1] = vec_tmp._float[1]; - case 1: xrpow[upper4+0] = vec_tmp._float[0]; + case 3: xrpow[upper4+2] = vec_tmp->_float[2]; + case 2: xrpow[upper4+1] = vec_tmp->_float[1]; + case 1: xrpow[upper4+0] = vec_tmp->_float[0]; default: break; } default: break; } - tmp_sum = vec_sum._float[0] + vec_sum._float[1] + vec_sum._float[2] + vec_sum._float[3]; + tmp_sum = vec_sum->_float[0] + vec_sum->_float[1] + vec_sum->_float[2] + vec_sum->_float[3]; { - float ma = vec_xrpow_max._float[0] > vec_xrpow_max._float[1] - ? vec_xrpow_max._float[0] : vec_xrpow_max._float[1]; - float mb = vec_xrpow_max._float[2] > vec_xrpow_max._float[3] - ? vec_xrpow_max._float[2] : vec_xrpow_max._float[3]; + float ma = vec_xrpow_max->_float[0] > vec_xrpow_max->_float[1] + ? vec_xrpow_max->_float[0] : vec_xrpow_max->_float[1]; + float mb = vec_xrpow_max->_float[2] > vec_xrpow_max->_float[3] + ? vec_xrpow_max->_float[2] : vec_xrpow_max->_float[3]; tmp_max = ma > mb ? ma : mb; } cod_info->xrpow_max = tmp_max; *sum = tmp_sum; + + free(vec_fabs_mask); + free(vec_xrpow_max); + free(vec_sum); + free(vec_tmp); }