2017-09-19 4:09 GMT+02:00 James Almer <jamr...@gmail.com>: > On 9/18/2017 10:41 PM, Thomas Mundt wrote: > > I tried to set up MIPS compiler for two days on windows and linux without > > success. > > Now I try it blind. This solution is based on the first suggestion James > > gave me at IRC. > > There might be room for improvement and an alternative solution with > > AV_RL16() / AV_WL16(). > > I used av_le2ne16() because it will be ignored for little endian. > > > > Regards, > > Thomas > > > From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001 > > From: Thomas Mundt <tmund...@gmail.com> > > Date: Tue, 19 Sep 2017 00:25:25 +0200 > > Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit > > > > Signed-off-by: Thomas Mundt <tmund...@gmail.com> > > --- > > libavfilter/interlace.h | 5 +- > > libavfilter/tinterlace.h | 5 +- > > libavfilter/vf_interlace.c | 92 > ++++++++++++++++++++++---- > > libavfilter/vf_tinterlace.c | 73 ++++++++++++++++++-- > > libavfilter/x86/vf_interlace.asm | 80 > ++++++++++++++++++++-- > > libavfilter/x86/vf_interlace_init.c | 51 ++++++++++---- > > libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++---- > > tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ > > 11 files changed, 345 insertions(+), 56 deletions(-) > > > > diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h > > index 2101b79..90a0198 100644 > > --- a/libavfilter/interlace.h > > +++ b/libavfilter/interlace.h > > @@ -25,9 +25,11 @@ > > #ifndef AVFILTER_INTERLACE_H > > #define AVFILTER_INTERLACE_H > > > > +#include "libavutil/bswap.h" > > #include "libavutil/common.h" > > #include "libavutil/imgutils.h" > > #include "libavutil/opt.h" > > +#include "libavutil/pixdesc.h" > > > > #include "avfilter.h" > > #include "formats.h" > > @@ -55,8 +57,9 @@ typedef struct InterlaceContext { > > enum ScanMode scan; // top or bottom field first scanning > > int lowpass; // enable or disable low pass filtering > > AVFrame *cur, *next; // the two frames from which the new one is > obtained > > + const AVPixFmtDescriptor *csp; > > void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const > uint8_t *srcp, > > - ptrdiff_t mref, ptrdiff_t pref); > > + ptrdiff_t mref, ptrdiff_t pref, int clip_max); > > } InterlaceContext; > > > > void ff_interlace_init_x86(InterlaceContext *interlace); > > diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h > > index cc13a6c..b5c39aa 100644 > > --- a/libavfilter/tinterlace.h > > +++ b/libavfilter/tinterlace.h > > @@ -27,7 +27,9 @@ > > #ifndef AVFILTER_TINTERLACE_H > > #define AVFILTER_TINTERLACE_H > > > > +#include "libavutil/bswap.h" > > #include "libavutil/opt.h" > > +#include "libavutil/pixdesc.h" > > #include "drawutils.h" > > #include "avfilter.h" > > > > @@ -60,8 +62,9 @@ typedef struct TInterlaceContext { > > int black_linesize[4]; > > FFDrawContext draw; > > FFDrawColor color; > > + const AVPixFmtDescriptor *csp; > > void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t > *srcp, > > - ptrdiff_t mref, ptrdiff_t pref); > > + ptrdiff_t mref, ptrdiff_t pref, int clip_max); > > } TInterlaceContext; > > > > void ff_tinterlace_init_x86(TInterlaceContext *interlace); > > diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c > > index 55bf782..bfba054 100644 > > --- a/libavfilter/vf_interlace.c > > +++ b/libavfilter/vf_interlace.c > > @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { > > AVFILTER_DEFINE_CLASS(interlace); > > > > static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, > > - const uint8_t *srcp, > > - ptrdiff_t mref, ptrdiff_t pref) > > + const uint8_t *srcp, ptrdiff_t mref, > > + ptrdiff_t pref, int clip_max) > > { > > const uint8_t *srcp_above = srcp + mref; > > const uint8_t *srcp_below = srcp + pref; > > @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t > linesize, > > } > > } > > > > +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, > > + const uint8_t *src8, ptrdiff_t mref, > > + ptrdiff_t pref, int clip_max) > > +{ > > + uint16_t *dstp = (uint16_t *)dst8; > > + const uint16_t *srcp = (const uint16_t *)src8; > > + const uint16_t *srcp_above = srcp + mref / 2; > > + const uint16_t *srcp_below = srcp + pref / 2; > > + int i; > > + for (i = 0; i < linesize; i++) { > > + // this calculation is an integer representation of > > + // '0.5 * current + 0.25 * above + 0.25 * below' > > + // '1 +' is for rounding. > > + dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + > av_le2ne16(srcp[i]) > > + + av_le2ne16(srcp_above[i]) > > + + av_le2ne16(srcp_below[i])) >> 2); > > This might work (And Michael will be able to confirm that if > filter-pixfmts-tinterlace_vlpf passes)... > > > + } > > +} > > + > > static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, > > - const uint8_t *srcp, > > - ptrdiff_t mref, ptrdiff_t pref) > > + const uint8_t *srcp, ptrdiff_t mref, > > + ptrdiff_t pref, int clip_max) > > { > > const uint8_t *srcp_above = srcp + mref; > > const uint8_t *srcp_below = srcp + pref; > > @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp, > ptrdiff_t linesize, > > } > > } > > > > +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t > linesize, > > + const uint8_t *src8, ptrdiff_t > mref, > > + ptrdiff_t pref, int clip_max) > > +{ > > + uint16_t *dstp = (uint16_t *)dst8; > > + const uint16_t *srcp = (const uint16_t *)src8; > > + const uint16_t *srcp_above = srcp + mref / 2; > > + const uint16_t *srcp_below = srcp + pref / 2; > > + const uint16_t *srcp_above2 = srcp + mref; > > + const uint16_t *srcp_below2 = srcp + pref; > > + int i, srcp_x, srcp_ab; > > + for (i = 0; i < linesize; i++) { > > + // this calculation is an integer representation of > > + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * > above2 - 0.125 * below2' > > + // '4 +' is for rounding. > > + srcp_x = av_le2ne16(srcp[i]) << 1; > > + srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); > > + dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) + > srcp_x + srcp_ab) << 1) > > + - av_le2ne16(srcp_above2[i]) > > + - av_le2ne16(srcp_below2[i])) >> > 3, 0, clip_max)); > > + // Prevent over-sharpening: > > + // dst must not exceed src when the average of above and below > > + // is less than src. And the other way around. > > + if (srcp_ab > srcp_x) { > > + if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i])) > > + dstp[i] = srcp[i]; > > + } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i])) > > + dstp[i] = srcp[i]; > > ...but chances are this over-sharpening prevention part will not. You're > loading in native endianness here before storing. You only byteswapped > for the comparison. > > Also, consider using local variables inside the for loop. You're loading > scrp[i] and dstp[i] several times per iteration. >
Okay, then I would do: int i, dstp_le, srcp_le, srcp_x, srcp_ab; for (i = 0; i < linesize; i++) { srcp_le = av_le2ne16(srcp[i]); srcp_x = srcp_le << 1; srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1) - av_le2ne16(srcp_above2[i]) - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max); if (srcp_ab > srcp_x) { if (dstp_le < srcp_le) dstp[i] = srcp[i]; else dstp[i] = av_le2ne16(dstp_le); } else if (dstp_le > srcp_le) { dstp[i] = srcp[i]; } else dstp[i] = av_le2ne16(dstp_le); } Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ? DonĀ“t two consecutive byte swaps cancel each other out? _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel