On Tue, 16 Aug 2022, J. Dekker wrote:

hevc_add_res_4x4_12_c: 46.0
hevc_add_res_4x4_12_neon: 18.7
hevc_add_res_8x8_12_c: 194.7
hevc_add_res_8x8_12_neon: 25.2
hevc_add_res_16x16_12_c: 716.0
hevc_add_res_16x16_12_neon: 69.7
hevc_add_res_32x32_12_c: 3820.7
hevc_add_res_32x32_12_neon: 261.0

Signed-off-by: J. Dekker <j...@itanimul.li>
---
libavcodec/aarch64/hevcdsp_idct_neon.S    | 156 ++++++++++++----------
libavcodec/aarch64/hevcdsp_init_aarch64.c |  34 ++---
2 files changed, 105 insertions(+), 85 deletions(-)

-function ff_hevc_add_residual_32x32_10_neon, export=1
+.macro add_res bitdepth
+function ff_hevc_add_residual_4x4_\bitdepth\()_neon, export=1
+        mvni            v21.8h, #((0xFF << (\bitdepth - 8)) & 0xFF), lsl #8
+        b               X(ff_hevc_add_residual_4x4_16_neon)

When the function isn't exported, you shouldn't use X() to access the symbol of it. On Darwin, X() adds the underscore prefix, but that symbol name is only defined for exported functions. Also, you probably should remove the ff_ prefix for symbols that aren't exported, for clarity.

This issue causes the patch in its current form to break compilation on macOS.

-void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs,
-                                     ptrdiff_t stride);
-void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, int16_t *coeffs,
-                                      ptrdiff_t stride);
-void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs,
-                                     ptrdiff_t stride);
-void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, int16_t *coeffs,
-                                      ptrdiff_t stride);
-void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride);
-void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, int16_t *coeffs,
-                                        ptrdiff_t stride);
-void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride);
-void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, int16_t *coeffs,
-                                        ptrdiff_t stride);
+void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, int16_t *coeffs, ptrdiff_t 
stride);
+void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, int16_t *coeffs, ptrdiff_t 
stride);
+void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);
+void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, int16_t *coeffs, 
ptrdiff_t stride);

Note that these have been amended to include "const" on the coeffs parameter recently.

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to