Dear Robert,

a bug has been reported against the Debian lame package on arch i386
[1]. The library fails with a SIGSEGV in init_xrpow_core_sse(). As it
turns out, this is a general-protection exception triggered by
non-aligned memory access. Bernhard Übelacker succeeded to create a
patch that forces the variables into alignment by using
posix_memslign(), but we are not sure if there may be better suitable
solutions to this issue.

I am asking you for input on this issue. Could you please have a look at
the patch Bernhard provides and share your opinion?

NB: This issue has not been raised before, because somehow
the ./configure script failed to detect the presence of the xmmintrin.h
header in previous package versions and thus decided that it would not
want to "build the internal vector lib". This has apparently just
changed recently.

Best regards,

Fabian


[1] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=786438
--- Begin Message ---
Hello Fabian, hello Detrick,
I could reproduce a SIGSEGV on arch i386 inside qemu VM by these actions:
(amd64 did not show the fault)


- apt-get install icecast2 liquidsoap liquidsoap-plugin-icecast 
liquidsoap-plugin-lame liquidsoap-plugin-mad liquidsoap-plugin-ogg 
liquidsoap-plugin-vorbis
- enable and start icecast2 (/etc/default/icecast2)
- get a mp3 file and put it to current directory as test.mp3
- create test.sh
- start liquidsoap in debugger "gdb --args /usr/bin/liquidsoap test.sh"


content of test.sh:
#!/usr/bin/liquidsoap
set("log.file.path","/dev/stdout")
myplaylist = single("test.mp3")
output.icecast(%mp3, host = "localhost", port = 8000, password = "hackme", 
mount = "basic-radio.ogg", myplaylist)


(gdb) bt
#0  0xb76e30c9 in init_xrpow_core_sse () from 
/usr/lib/i386-linux-gnu/libmp3lame.so.0
#1  0xb76d2ebf in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0
#2  0xb76d65e6 in CBR_iteration_loop () from 
/usr/lib/i386-linux-gnu/libmp3lame.so.0
#3  0xb76c3e27 in lame_encode_mp3_frame () from 
/usr/lib/i386-linux-gnu/libmp3lame.so.0
#4  0xb76c8e4f in ?? () from /usr/lib/i386-linux-gnu/libmp3lame.so.0
#5  0xb76c9b48 in lame_encode_buffer_float () from 
/usr/lib/i386-linux-gnu/libmp3lame.so.0
#6  0xb781cf77 in ocaml_lame_encode_buffer_float () from 
/usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs
#7  0xb781b72f in camlLame__fun_1175 () from 
/usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs
#8  0x0820b06f in camlOutput__f_1354 ()
#9  0x0820b0c5 in camlOutput__fun_1740 ()
#10 0x0820b7dd in camlOutput__fun_1600 ()
#11 0x08257c6f in camlClock__fun_1848 ()
#12 0x08310b3c in camlList__fold_left_1073 ()
#13 0x08258740 in camlClock__fun_1813 ()
#14 0x082582a5 in camlClock__loop_1351 ()
#15 0x08258daa in camlClock__fun_2074 ()
#16 0x082703e3 in camlTutils__fun_1346 ()
#17 0x08307ac8 in camlThread__fun_1081 ()
#18 0x083612fa in caml_start_program ()
#19 0x0834a675 in ?? ()
#20 0xb7f1befb in start_thread (arg=0x79e8b781) at pthread_create.c:309
#21 0xcde0b850 in ?? ()
#22 0x79e8b781 in ?? ()
#23 0x8350b45b in ?? ()
#24 0x8dc314c4 in ?? ()
#25 0x000000b6 in ?? ()
#26 0x27bc8d00 in ?? ()
#27 0x00000000 in ?? ()


Building libmp3lame0 with debug information:

- export DEB_BUILD_OPTIONS=nostrip
- apt-get build-dep libmp3lame0
- apt-get source libmp3lame0
- dpkg-buildpackage -b
- dpkg -i libmp3lame0_*.deb
- gdb --args /usr/bin/liquidsoap test.sh

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0xb49ffb40 (LWP 20251)]
init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, 
sum=0xb49fa5d0) at xmm_quantize_sub.c:73
73          vec_xrpow_max._m128 = _mm_set_ps1(0);
(gdb) bt
#0  init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, 
sum=0xb49fa5d0) at xmm_quantize_sub.c:73
#1  0xb76cf71f in init_xrpow (gfc=0x87318d0, cod_info=0x8731a00, 
xrpow=0xb49fa6f4) at quantize.c:127
#2  0xb76d2cc6 in CBR_iteration_loop (gfc=0x87318d0, pe=0xb49fb0c4, 
ms_ener_ratio=0xb49fb09c, ratio=0xb49fd06c) at quantize.c:2034
#3  0xb76c0c37 in lame_encode_mp3_frame (gfc=gfc@entry=0x87318d0, 
inbuf_l=0x873e48c, inbuf_r=0x87422cc, mp3buf=mp3buf@entry=0xb4a081b9 "", 
mp3buf_size=mp3buf_size@entry=8988) at encoder.c:518
#4  0xb76c5a22 in lame_encode_buffer_sample_t (mp3buf_size=9405, 
mp3buf=0xb4a081b9 "", nsamples=<optimized out>, gfc=<optimized out>) at 
lame.c:1786
#5  lame_encode_buffer_template (gfp=gfp@entry=0x865f580, 
buffer_l=buffer_l@entry=0xb4a048e8, buffer_r=buffer_r@entry=0xb4a06480, 
nsamples=nsamples@entry=1764, mp3buf=mp3buf@entry=0xb4a08018 "\377\373\220D", 
mp3buf_size=mp3buf_size@entry=9405, pcm_type=pcm_type@entry=pcm_float_type, 
aa=aa@entry=1, norm=norm@entry=1) at lame.c:1897
#6  0xb76c6648 in lame_encode_buffer_float (gfp=0x865f580, pcm_l=0xb4a048e8, 
pcm_r=0xb4a06480, nsamples=1764, mp3buf=0xb4a08018 "\377\373\220D", 
mp3buf_size=9405) at lame.c:1918
#7  0xb7819f77 in ocaml_lame_encode_buffer_float () from 
/usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs
#8  0xb781872f in camlLame__fun_1175 () from 
/usr/lib/liquidsoap/1.1.1/plugins/lame.cmxs
#9  0x0820b07f in camlOutput__f_1354 ()
#10 0x0820b0d5 in camlOutput__fun_1740 ()
#11 0x0820b7ed in camlOutput__fun_1600 ()
#12 0x08257c7f in camlClock__fun_1848 ()
#13 0x08310b4c in camlList__fold_left_1073 ()
#14 0x08258750 in camlClock__fun_1813 ()
#15 0x082582b5 in camlClock__loop_1351 ()
#16 0x08258dba in camlClock__fun_2074 ()
#17 0x082703f3 in camlTutils__fun_1346 ()
#18 0x08307ad8 in camlThread__fun_1081 () at thread.ml:37
#19 0x08360506 in caml_start_program ()
#20 0x0834a6b4 in caml_thread_start ()
#21 0xb7f18efb in start_thread (arg=0x85e8b781) at pthread_create.c:309
#22 0x9de0b850 in ?? ()
#23 0x85e8b781 in ?? ()
#24 0x8350b47d in ?? ()
#25 0x8dc314c4 in ?? ()
#26 0x000000b6 in ?? ()
#27 0x27bc8d00 in ?? ()
#28 0x00000000 in ?? ()
(gdb) bt full 1
#0  init_xrpow_core_sse (cod_info=0x8731a00, xrpow=0xb49fa6f4, upper=575, 
sum=0xb49fa5d0) at xmm_quantize_sub.c:73
        i = <optimized out>
        tmp_max = 0
        tmp_sum = 0
        upper4 = 572
        rest = 3
        fabs_mask = {_i_32 = {2147483647, 2147483647, 2147483647, 2147483647}, 
_float = {nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}, _m128 = 
{nan(0x7fffff), nan(0x7fffff), nan(0x7fffff), nan(0x7fffff)}}
        vec_xrpow_max = {_i_32 = {-1264605616, 141760720, -1264602940, 
-1217579639}, _float = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, 
-1.41387654e-05}, _m128 = {-2.97370661e-07, 7.31543195e-34, -2.97446718e-07, 
-1.41387654e-05}}
        vec_sum = {_i_32 = {141760720, 1371, -1264605752, -1264605756}, _float 
= {7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}, _m128 = 
{7.31543195e-34, 1.92118019e-42, -2.97366796e-07, -2.97366682e-07}}
        vec_tmp = {_i_32 = {0, 0, 0, 0}, _float = {0, 0, 0, 0}, _m128 = {0, 0, 
0, 0}}
(More stack frames follow...)

(gdb) display /i $pc
1: x/i $pc
=> 0xb76df6c9 <init_xrpow_core_sse+105>:        movaps %xmm0,0x20(%esp)

(gdb) info reg xmm0
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, 
v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 = 
0x00000000000000000000000000000000}

(gdb) info reg esp
esp            0xb49fa538       0xb49fa538

(gdb) print &vec_xrpow_max
$16 = (vecfloat_union *) 0xb49fa558

(gdb) print /x $esp + 0x20
$12 = 0xb49fa558

(gdb) print /x ((0xb49fa538 + 0x20) / 16) * 16
$15 = 0xb49fa550



From http://x86.renejeschke.de/html/file_module_x86_id_180.html :
When the source or destination operand is a memory operand, the operand must be 
aligned
on a 16-byte boundary or a general-protection exception (#GP) is generated.

In our case we seem not to be on an 16-byte boundary.



Tried to declare the variable with "__attribute__ ((aligned (16)))" but
did not change anything.



Therefore changed variables to pointer and allocated memory via posix_memalign 
(see attached patch).
That way the crash did not happen anymore. But there must be a better way
to achieve this.

So probably this is a problem of libmp3lame0 and just on arch i386.


Kind regards,
Bernhard

Description: Get aligned memory in xmm_quantize_sub.c
 Otherwise the SSE instructions generate a SIGSEGV

Author: Bernhard Übelacker <bernha...@vr-web.de>
Bug-Debian: https://bugs.debian.org/786438
Last-Update: 2015-05-29

--- lame-3.99.5+repack1.orig/libmp3lame/vector/xmm_quantize_sub.c
+++ lame-3.99.5+repack1/libmp3lame/vector/xmm_quantize_sub.c
@@ -62,54 +62,65 @@ init_xrpow_core_sse(gr_info * const cod_
     int     rest = upper-upper4;
 
     const vecfloat_union fabs_mask = {{ 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }};
-    const __m128 vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]);
-    vecfloat_union vec_xrpow_max;
-    vecfloat_union vec_sum;
-    vecfloat_union vec_tmp;
+    __m128 *vec_fabs_mask = NULL;
+    vecfloat_union *vec_xrpow_max = NULL;
+    vecfloat_union *vec_sum = NULL;
+    vecfloat_union *vec_tmp = NULL;
+
+    posix_memalign((void**)&vec_fabs_mask, 16, sizeof(*vec_fabs_mask));
+    posix_memalign((void**)&vec_xrpow_max, 16, sizeof(*vec_xrpow_max));
+    posix_memalign((void**)&vec_sum, 16, sizeof(*vec_sum));
+    posix_memalign((void**)&vec_tmp, 16, sizeof(*vec_tmp));
 
+    *vec_fabs_mask = _mm_loadu_ps(&fabs_mask._float[0]);
     _mm_prefetch((char *) cod_info->xr, _MM_HINT_T0);
     _mm_prefetch((char *) xrpow, _MM_HINT_T0);
 
-    vec_xrpow_max._m128 = _mm_set_ps1(0);
-    vec_sum._m128 = _mm_set_ps1(0);
+    vec_xrpow_max->_m128 = _mm_set_ps1(0);
+    vec_sum->_m128 = _mm_set_ps1(0);
 
     for (i = 0; i < upper4; i += 4) {
-        vec_tmp._m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */
-        vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */
-        vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128);
-        vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128)));
-        vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */
-        _mm_storeu_ps(&(xrpow[i]), vec_tmp._m128); /* store into xrpow[] */
+        vec_tmp->_m128 = _mm_loadu_ps(&(cod_info->xr[i])); /* load */
+        vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */
+        vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128);
+        vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128)));
+        vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */
+        _mm_storeu_ps(&(xrpow[i]), vec_tmp->_m128); /* store into xrpow[] */
     }
-    vec_tmp._m128 = _mm_set_ps1(0);
+    vec_tmp->_m128 = _mm_set_ps1(0);
     switch (rest) {
-        case 3: vec_tmp._float[2] = cod_info->xr[upper4+2];
-        case 2: vec_tmp._float[1] = cod_info->xr[upper4+1];
-        case 1: vec_tmp._float[0] = cod_info->xr[upper4+0];
-            vec_tmp._m128 = _mm_and_ps(vec_tmp._m128, vec_fabs_mask); /* fabs */
-            vec_sum._m128 = _mm_add_ps(vec_sum._m128, vec_tmp._m128);
-            vec_tmp._m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp._m128, _mm_sqrt_ps(vec_tmp._m128)));
-            vec_xrpow_max._m128 = _mm_max_ps(vec_xrpow_max._m128, vec_tmp._m128); /* retrieve max */
+        case 3: vec_tmp->_float[2] = cod_info->xr[upper4+2];
+        case 2: vec_tmp->_float[1] = cod_info->xr[upper4+1];
+        case 1: vec_tmp->_float[0] = cod_info->xr[upper4+0];
+            vec_tmp->_m128 = _mm_and_ps(vec_tmp->_m128, *vec_fabs_mask); /* fabs */
+            vec_sum->_m128 = _mm_add_ps(vec_sum->_m128, vec_tmp->_m128);
+            vec_tmp->_m128 = _mm_sqrt_ps(_mm_mul_ps(vec_tmp->_m128, _mm_sqrt_ps(vec_tmp->_m128)));
+            vec_xrpow_max->_m128 = _mm_max_ps(vec_xrpow_max->_m128, vec_tmp->_m128); /* retrieve max */
             switch (rest) {
-                case 3: xrpow[upper4+2] = vec_tmp._float[2];
-                case 2: xrpow[upper4+1] = vec_tmp._float[1];
-                case 1: xrpow[upper4+0] = vec_tmp._float[0];
+                case 3: xrpow[upper4+2] = vec_tmp->_float[2];
+                case 2: xrpow[upper4+1] = vec_tmp->_float[1];
+                case 1: xrpow[upper4+0] = vec_tmp->_float[0];
                 default:
                     break;
             }
         default:
             break;
     }
-    tmp_sum = vec_sum._float[0] + vec_sum._float[1] + vec_sum._float[2] + vec_sum._float[3];
+    tmp_sum = vec_sum->_float[0] + vec_sum->_float[1] + vec_sum->_float[2] + vec_sum->_float[3];
     {
-        float ma = vec_xrpow_max._float[0] > vec_xrpow_max._float[1]
-                ? vec_xrpow_max._float[0] : vec_xrpow_max._float[1];
-        float mb = vec_xrpow_max._float[2] > vec_xrpow_max._float[3]
-                ? vec_xrpow_max._float[2] : vec_xrpow_max._float[3];
+        float ma = vec_xrpow_max->_float[0] > vec_xrpow_max->_float[1]
+                ? vec_xrpow_max->_float[0] : vec_xrpow_max->_float[1];
+        float mb = vec_xrpow_max->_float[2] > vec_xrpow_max->_float[3]
+                ? vec_xrpow_max->_float[2] : vec_xrpow_max->_float[3];
         tmp_max = ma > mb ? ma : mb;
     }
     cod_info->xrpow_max = tmp_max;
     *sum = tmp_sum;
+
+    free(vec_fabs_mask);
+    free(vec_xrpow_max);
+    free(vec_sum);
+    free(vec_tmp);
 }
 
 

_______________________________________________
pkg-multimedia-maintainers mailing list
pkg-multimedia-maintainers@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-multimedia-maintainers

--- End Message ---

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
pkg-multimedia-maintainers mailing list
pkg-multimedia-maintainers@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-multimedia-maintainers

Reply via email to