2017-09-19 17:53 GMT+02:00 James Almer <jamr...@gmail.com>: > On 9/19/2017 5:02 AM, Thomas Mundt wrote: > > 2017-09-19 4:09 GMT+02:00 James Almer <jamr...@gmail.com>: > > > >> On 9/18/2017 10:41 PM, Thomas Mundt wrote: > >>> I tried to set up MIPS compiler for two days on windows and linux > without > >>> success. > >>> Now I try it blind. This solution is based on the first suggestion > James > >>> gave me at IRC. > >>> There might be room for improvement and an alternative solution with > >>> AV_RL16() / AV_WL16(). > >>> I used av_le2ne16() because it will be ignored for little endian. > >>> > >>> Regards, > >>> Thomas > >> > >>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001 > >>> From: Thomas Mundt <tmund...@gmail.com> > >>> Date: Tue, 19 Sep 2017 00:25:25 +0200 > >>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 > bit > >>> > >>> Signed-off-by: Thomas Mundt <tmund...@gmail.com> > >>> --- > >>> libavfilter/interlace.h | 5 +- > >>> libavfilter/tinterlace.h | 5 +- > >>> libavfilter/vf_interlace.c | 92 > >> ++++++++++++++++++++++---- > >>> libavfilter/vf_tinterlace.c | 73 > ++++++++++++++++++-- > >>> libavfilter/x86/vf_interlace.asm | 80 > >> ++++++++++++++++++++-- > >>> libavfilter/x86/vf_interlace_init.c | 51 ++++++++++---- > >>> libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++---- > >>> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ > >>> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ > >>> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ > >>> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ > >>> 11 files changed, 345 insertions(+), 56 deletions(-) > >>> > >>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h > >>> index 2101b79..90a0198 100644 > >>> --- a/libavfilter/interlace.h > >>> +++ b/libavfilter/interlace.h > >>> @@ -25,9 +25,11 @@ > >>> #ifndef AVFILTER_INTERLACE_H > >>> #define AVFILTER_INTERLACE_H > >>> > >>> +#include "libavutil/bswap.h" > >>> #include "libavutil/common.h" > >>> #include "libavutil/imgutils.h" > >>> #include "libavutil/opt.h" > >>> +#include "libavutil/pixdesc.h" > >>> > >>> #include "avfilter.h" > >>> #include "formats.h" > >>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext { > >>> enum ScanMode scan; // top or bottom field first scanning > >>> int lowpass; // enable or disable low pass filtering > >>> AVFrame *cur, *next; // the two frames from which the new one is > >> obtained > >>> + const AVPixFmtDescriptor *csp; > >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const > >> uint8_t *srcp, > >>> - ptrdiff_t mref, ptrdiff_t pref); > >>> + ptrdiff_t mref, ptrdiff_t pref, int > clip_max); > >>> } InterlaceContext; > >>> > >>> void ff_interlace_init_x86(InterlaceContext *interlace); > >>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h > >>> index cc13a6c..b5c39aa 100644 > >>> --- a/libavfilter/tinterlace.h > >>> +++ b/libavfilter/tinterlace.h > >>> @@ -27,7 +27,9 @@ > >>> #ifndef AVFILTER_TINTERLACE_H > >>> #define AVFILTER_TINTERLACE_H > >>> > >>> +#include "libavutil/bswap.h" > >>> #include "libavutil/opt.h" > >>> +#include "libavutil/pixdesc.h" > >>> #include "drawutils.h" > >>> #include "avfilter.h" > >>> > >>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext { > >>> int black_linesize[4]; > >>> FFDrawContext draw; > >>> FFDrawColor color; > >>> + const AVPixFmtDescriptor *csp; > >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t > >> *srcp, > >>> - ptrdiff_t mref, ptrdiff_t pref); > >>> + ptrdiff_t mref, ptrdiff_t pref, int > clip_max); > >>> } TInterlaceContext; > >>> > >>> void ff_tinterlace_init_x86(TInterlaceContext *interlace); > >>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c > >>> index 55bf782..bfba054 100644 > >>> --- a/libavfilter/vf_interlace.c > >>> +++ b/libavfilter/vf_interlace.c > >>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { > >>> AVFILTER_DEFINE_CLASS(interlace); > >>> > >>> static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, > >>> - const uint8_t *srcp, > >>> - ptrdiff_t mref, ptrdiff_t pref) > >>> + const uint8_t *srcp, ptrdiff_t mref, > >>> + ptrdiff_t pref, int clip_max) > >>> { > >>> const uint8_t *srcp_above = srcp + mref; > >>> const uint8_t *srcp_below = srcp + pref; > >>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t > >> linesize, > >>> } > >>> } > >>> > >>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, > >>> + const uint8_t *src8, ptrdiff_t mref, > >>> + ptrdiff_t pref, int clip_max) > >>> +{ > >>> + uint16_t *dstp = (uint16_t *)dst8; > >>> + const uint16_t *srcp = (const uint16_t *)src8; > >>> + const uint16_t *srcp_above = srcp + mref / 2; > >>> + const uint16_t *srcp_below = srcp + pref / 2; > >>> + int i; > >>> + for (i = 0; i < linesize; i++) { > >>> + // this calculation is an integer representation of > >>> + // '0.5 * current + 0.25 * above + 0.25 * below' > >>> + // '1 +' is for rounding. > >>> + dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + > av_le2ne16(srcp[i]) > > You might want to load srcp[i] into a local variable here as well. > > >>> + + av_le2ne16(srcp_above[i]) > >>> + + av_le2ne16(srcp_below[i])) >> 2); > >> > >> This might work (And Michael will be able to confirm that if > >> filter-pixfmts-tinterlace_vlpf passes)... > >> > >>> + } > >>> +} > >>> + > >>> static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, > >>> - const uint8_t *srcp, > >>> - ptrdiff_t mref, ptrdiff_t pref) > >>> + const uint8_t *srcp, ptrdiff_t > mref, > >>> + ptrdiff_t pref, int clip_max) > >>> { > >>> const uint8_t *srcp_above = srcp + mref; > >>> const uint8_t *srcp_below = srcp + pref; > >>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp, > >> ptrdiff_t linesize, > >>> } > >>> } > >>> > >>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t > >> linesize, > >>> + const uint8_t *src8, ptrdiff_t > >> mref, > >>> + ptrdiff_t pref, int clip_max) > >>> +{ > >>> + uint16_t *dstp = (uint16_t *)dst8; > >>> + const uint16_t *srcp = (const uint16_t *)src8; > >>> + const uint16_t *srcp_above = srcp + mref / 2; > >>> + const uint16_t *srcp_below = srcp + pref / 2; > >>> + const uint16_t *srcp_above2 = srcp + mref; > >>> + const uint16_t *srcp_below2 = srcp + pref; > >>> + int i, srcp_x, srcp_ab; > >>> + for (i = 0; i < linesize; i++) { > >>> + // this calculation is an integer representation of > >>> + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * > >> above2 - 0.125 * below2' > >>> + // '4 +' is for rounding. > >>> + srcp_x = av_le2ne16(srcp[i]) << 1; > >>> + srcp_ab = av_le2ne16(srcp_above[i]) + > av_le2ne16(srcp_below[i]); > >>> + dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) + > >> srcp_x + srcp_ab) << 1) > >>> + - av_le2ne16(srcp_above2[i]) > >>> + - av_le2ne16(srcp_below2[i])) >> > >> 3, 0, clip_max)); > >>> + // Prevent over-sharpening: > >>> + // dst must not exceed src when the average of above and below > >>> + // is less than src. And the other way around. > >>> + if (srcp_ab > srcp_x) { > >>> + if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i])) > >>> + dstp[i] = srcp[i]; > >>> + } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i])) > >>> + dstp[i] = srcp[i]; > >> > >> ...but chances are this over-sharpening prevention part will not. You're > >> loading in native endianness here before storing. You only byteswapped > >> for the comparison. > >> > >> Also, consider using local variables inside the for loop. You're loading > >> scrp[i] and dstp[i] several times per iteration. > >> > > > > Okay, then I would do: > > int i, dstp_le, srcp_le, srcp_x, srcp_ab; > > These are not pointers, so maybe remove the "p" to avoid confusion. > > > for (i = 0; i < linesize; i++) { > > srcp_le = av_le2ne16(srcp[i]); > > srcp_x = srcp_le << 1; > > srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); > > dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1) > > - av_le2ne16(srcp_above2[i]) > > - av_le2ne16(srcp_below2[i])) >> 3, 0, > clip_max); > > if (srcp_ab > srcp_x) { > > if (dstp_le < srcp_le) > > dstp[i] = srcp[i]; > > else > > dstp[i] = av_le2ne16(dstp_le); > > } else if (dstp_le > srcp_le) { > > dstp[i] = srcp[i]; > > } else > > dstp[i] = av_le2ne16(dstp_le); > > } > > Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ? > > Yes. No need to load srcp[i] again. >
Okay, thanks! A patch with your suggestions is attached. I will send a separate patch that removes the "p" from srcp_x and src_ab in lowpass_line_complex_c function for consistency.
0003-avfilter-interlace-add-support-for-10-and-12-bit.patch
Description: Binary data
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel