2018-11-25 16:17 GMT+01:00, Lauri Kasanen <c...@gmx.com>:
> Hi,
>
> The lone power8 fate failing test seems like an aliasing issue.
> I've isolated it into the attached standalone test case. Compiling it
> with
> gcc -std=c11 -maltivec -mabi=altivec -mvsx -O3 -fno-tree-vectorize
> -o test test.c
>
> reproduces on gcc 8.2.0, dropping the optimization level fixes it. This
> was one of the "adding a printf made it work" things too.
>
> -Wstrict-aliasing=1 complains about the "register int *idataptr =
> (int*)dataptr;" cast. If I put "typedef int __attribute__((may_alias))
> int_alias;" at the top and change the cast and type to int_alias, the
> results become correct.

Thank you for the analysis!

Patch attached, Carl Eugen
From e5403b832f2bcd360128d9986b602484e576c931 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <ceffm...@gmail.com>
Date: Mon, 26 Nov 2018 00:43:46 +0100
Subject: [PATCH] lavc/jrevdct: Avoid an aliasing violation.

Fixes fate on different PowerPC systems with some compilers.

Analyzed-by: Lauri Kasanen
---
 libavcodec/jrevdct.c |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c
index 3b15a52..c749468 100644
--- a/libavcodec/jrevdct.c
+++ b/libavcodec/jrevdct.c
@@ -63,6 +63,7 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 
 #include "dct.h"
 #include "idctdsp.h"
@@ -234,7 +235,7 @@ void ff_j_rev_dct(DCTBLOCK data)
      * row DCT calculations can be simplified this way.
      */
 
-    register int *idataptr = (int*)dataptr;
+    register uint8_t *idataptr = (uint8_t*)dataptr;
 
     /* WARNING: we do the same permutation as MMX idct to simplify the
        video core */
@@ -254,10 +255,10 @@ void ff_j_rev_dct(DCTBLOCK data)
           int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
           register int v = (dcval & 0xffff) | ((dcval * (1 << 16)) & 0xffff0000);
 
-          idataptr[0] = v;
-          idataptr[1] = v;
-          idataptr[2] = v;
-          idataptr[3] = v;
+          AV_WN32(&idataptr[ 0], v);
+          AV_WN32(&idataptr[ 4], v);
+          AV_WN32(&idataptr[ 8], v);
+          AV_WN32(&idataptr[12], v);
       }
 
       dataptr += DCTSIZE;       /* advance pointer to next row */
@@ -974,7 +975,7 @@ void ff_j_rev_dct4(DCTBLOCK data)
      * row DCT calculations can be simplified this way.
      */
 
-    register int *idataptr = (int*)dataptr;
+    register uint8_t *idataptr = (uint8_t*)dataptr;
 
     d0 = dataptr[0];
     d2 = dataptr[1];
@@ -988,8 +989,8 @@ void ff_j_rev_dct4(DCTBLOCK data)
           int16_t dcval = (int16_t) (d0 << PASS1_BITS);
           register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
 
-          idataptr[0] = v;
-          idataptr[1] = v;
+          AV_WN32(&idataptr[0], v);
+          AV_WN32(&idataptr[4], v);
       }
 
       dataptr += DCTSTRIDE;     /* advance pointer to next row */
-- 
1.7.10.4

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to