2017-09-23 20:24 GMT+02:00 Michael Niedermayer <mich...@niedermayer.cc>:
> On Tue, Sep 19, 2017 at 10:35:30PM +0200, Thomas Mundt wrote: > > 2017-09-19 17:53 GMT+02:00 James Almer <jamr...@gmail.com>: > > > > > On 9/19/2017 5:02 AM, Thomas Mundt wrote: > > > > 2017-09-19 4:09 GMT+02:00 James Almer <jamr...@gmail.com>: > > > > > > > >> On 9/18/2017 10:41 PM, Thomas Mundt wrote: > > > >>> I tried to set up MIPS compiler for two days on windows and linux > > > without > > > >>> success. > > > >>> Now I try it blind. This solution is based on the first suggestion > > > James > > > >>> gave me at IRC. > > > >>> There might be room for improvement and an alternative solution > with > > > >>> AV_RL16() / AV_WL16(). > > > >>> I used av_le2ne16() because it will be ignored for little endian. > > > >>> > > > >>> Regards, > > > >>> Thomas > > > >> > > > >>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 > 2001 > > > >>> From: Thomas Mundt <tmund...@gmail.com> > > > >>> Date: Tue, 19 Sep 2017 00:25:25 +0200 > > > >>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and > 12 > > > bit > > > >>> > > > >>> Signed-off-by: Thomas Mundt <tmund...@gmail.com> > > > >>> --- > > > >>> libavfilter/interlace.h | 5 +- > > > >>> libavfilter/tinterlace.h | 5 +- > > > >>> libavfilter/vf_interlace.c | 92 > > > >> ++++++++++++++++++++++---- > > > >>> libavfilter/vf_tinterlace.c | 73 > > > ++++++++++++++++++-- > > > >>> libavfilter/x86/vf_interlace.asm | 80 > > > >> ++++++++++++++++++++-- > > > >>> libavfilter/x86/vf_interlace_init.c | 51 > ++++++++++---- > > > >>> libavfilter/x86/vf_tinterlace_init.c | 51 > ++++++++++---- > > > >>> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ > > > >>> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ > > > >>> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ > > > >>> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ > > > >>> 11 files changed, 345 insertions(+), 56 deletions(-) > > > >>> > > > >>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h > > > >>> index 2101b79..90a0198 100644 > > > >>> --- a/libavfilter/interlace.h > > > >>> +++ b/libavfilter/interlace.h > > > >>> @@ -25,9 +25,11 @@ > > > >>> #ifndef AVFILTER_INTERLACE_H > > > >>> #define AVFILTER_INTERLACE_H > > > >>> > > > >>> +#include "libavutil/bswap.h" > > > >>> #include "libavutil/common.h" > > > >>> #include "libavutil/imgutils.h" > > > >>> #include "libavutil/opt.h" > > > >>> +#include "libavutil/pixdesc.h" > > > >>> > > > >>> #include "avfilter.h" > > > >>> #include "formats.h" > > > >>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext { > > > >>> enum ScanMode scan; // top or bottom field first scanning > > > >>> int lowpass; // enable or disable low pass filtering > > > >>> AVFrame *cur, *next; // the two frames from which the new > one is > > > >> obtained > > > >>> + const AVPixFmtDescriptor *csp; > > > >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const > > > >> uint8_t *srcp, > > > >>> - ptrdiff_t mref, ptrdiff_t pref); > > > >>> + ptrdiff_t mref, ptrdiff_t pref, int > > > clip_max); > > > >>> } InterlaceContext; > > > >>> > > > >>> void ff_interlace_init_x86(InterlaceContext *interlace); > > > >>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h > > > >>> index cc13a6c..b5c39aa 100644 > > > >>> --- a/libavfilter/tinterlace.h > > > >>> +++ b/libavfilter/tinterlace.h > > > >>> @@ -27,7 +27,9 @@ > > > >>> #ifndef AVFILTER_TINTERLACE_H > > > >>> #define AVFILTER_TINTERLACE_H > > > >>> > > > >>> +#include "libavutil/bswap.h" > > > >>> #include "libavutil/opt.h" > > > >>> +#include "libavutil/pixdesc.h" > > > >>> #include "drawutils.h" > > > >>> #include "avfilter.h" > > > >>> > > > >>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext { > > > >>> int black_linesize[4]; > > > >>> FFDrawContext draw; > > > >>> FFDrawColor color; > > > >>> + const AVPixFmtDescriptor *csp; > > > >>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const > uint8_t > > > >> *srcp, > > > >>> - ptrdiff_t mref, ptrdiff_t pref); > > > >>> + ptrdiff_t mref, ptrdiff_t pref, int > > > clip_max); > > > >>> } TInterlaceContext; > > > >>> > > > >>> void ff_tinterlace_init_x86(TInterlaceContext *interlace); > > > >>> diff --git a/libavfilter/vf_interlace.c > b/libavfilter/vf_interlace.c > > > >>> index 55bf782..bfba054 100644 > > > >>> --- a/libavfilter/vf_interlace.c > > > >>> +++ b/libavfilter/vf_interlace.c > > > >>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { > > > >>> AVFILTER_DEFINE_CLASS(interlace); > > > >>> > > > >>> static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, > > > >>> - const uint8_t *srcp, > > > >>> - ptrdiff_t mref, ptrdiff_t pref) > > > >>> + const uint8_t *srcp, ptrdiff_t mref, > > > >>> + ptrdiff_t pref, int clip_max) > > > >>> { > > > >>> const uint8_t *srcp_above = srcp + mref; > > > >>> const uint8_t *srcp_below = srcp + pref; > > > >>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, > ptrdiff_t > > > >> linesize, > > > >>> } > > > >>> } > > > >>> > > > >>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, > > > >>> + const uint8_t *src8, ptrdiff_t mref, > > > >>> + ptrdiff_t pref, int clip_max) > > > >>> +{ > > > >>> + uint16_t *dstp = (uint16_t *)dst8; > > > >>> + const uint16_t *srcp = (const uint16_t *)src8; > > > >>> + const uint16_t *srcp_above = srcp + mref / 2; > > > >>> + const uint16_t *srcp_below = srcp + pref / 2; > > > >>> + int i; > > > >>> + for (i = 0; i < linesize; i++) { > > > >>> + // this calculation is an integer representation of > > > >>> + // '0.5 * current + 0.25 * above + 0.25 * below' > > > >>> + // '1 +' is for rounding. > > > >>> + dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + > > > av_le2ne16(srcp[i]) > > > > > > You might want to load srcp[i] into a local variable here as well. > > > > > > >>> + + av_le2ne16(srcp_above[i]) > > > >>> + + av_le2ne16(srcp_below[i])) >> > 2); > > > >> > > > >> This might work (And Michael will be able to confirm that if > > > >> filter-pixfmts-tinterlace_vlpf passes)... > > > >> > > > >>> + } > > > >>> +} > > > >>> + > > > >>> static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t > linesize, > > > >>> - const uint8_t *srcp, > > > >>> - ptrdiff_t mref, ptrdiff_t pref) > > > >>> + const uint8_t *srcp, ptrdiff_t > > > mref, > > > >>> + ptrdiff_t pref, int clip_max) > > > >>> { > > > >>> const uint8_t *srcp_above = srcp + mref; > > > >>> const uint8_t *srcp_below = srcp + pref; > > > >>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t > *dstp, > > > >> ptrdiff_t linesize, > > > >>> } > > > >>> } > > > >>> > > > >>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t > > > >> linesize, > > > >>> + const uint8_t *src8, > ptrdiff_t > > > >> mref, > > > >>> + ptrdiff_t pref, int > clip_max) > > > >>> +{ > > > >>> + uint16_t *dstp = (uint16_t *)dst8; > > > >>> + const uint16_t *srcp = (const uint16_t *)src8; > > > >>> + const uint16_t *srcp_above = srcp + mref / 2; > > > >>> + const uint16_t *srcp_below = srcp + pref / 2; > > > >>> + const uint16_t *srcp_above2 = srcp + mref; > > > >>> + const uint16_t *srcp_below2 = srcp + pref; > > > >>> + int i, srcp_x, srcp_ab; > > > >>> + for (i = 0; i < linesize; i++) { > > > >>> + // this calculation is an integer representation of > > > >>> + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * > > > >> above2 - 0.125 * below2' > > > >>> + // '4 +' is for rounding. > > > >>> + srcp_x = av_le2ne16(srcp[i]) << 1; > > > >>> + srcp_ab = av_le2ne16(srcp_above[i]) + > > > av_le2ne16(srcp_below[i]); > > > >>> + dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) + > > > >> srcp_x + srcp_ab) << 1) > > > >>> + - av_le2ne16(srcp_above2[i]) > > > >>> + - > av_le2ne16(srcp_below2[i])) >> > > > >> 3, 0, clip_max)); > > > >>> + // Prevent over-sharpening: > > > >>> + // dst must not exceed src when the average of above and > below > > > >>> + // is less than src. And the other way around. > > > >>> + if (srcp_ab > srcp_x) { > > > >>> + if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i])) > > > >>> + dstp[i] = srcp[i]; > > > >>> + } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i])) > > > >>> + dstp[i] = srcp[i]; > > > >> > > > >> ...but chances are this over-sharpening prevention part will not. > You're > > > >> loading in native endianness here before storing. You only > byteswapped > > > >> for the comparison. > > > >> > > > >> Also, consider using local variables inside the for loop. You're > loading > > > >> scrp[i] and dstp[i] several times per iteration. > > > >> > > > > > > > > Okay, then I would do: > > > > int i, dstp_le, srcp_le, srcp_x, srcp_ab; > > > > > > These are not pointers, so maybe remove the "p" to avoid confusion. > > > > > > > for (i = 0; i < linesize; i++) { > > > > srcp_le = av_le2ne16(srcp[i]); > > > > srcp_x = srcp_le << 1; > > > > srcp_ab = av_le2ne16(srcp_above[i]) + > av_le2ne16(srcp_below[i]); > > > > dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1) > > > > - av_le2ne16(srcp_above2[i]) > > > > - av_le2ne16(srcp_below2[i])) >> 3, > 0, > > > clip_max); > > > > if (srcp_ab > srcp_x) { > > > > if (dstp_le < srcp_le) > > > > dstp[i] = srcp[i]; > > > > else > > > > dstp[i] = av_le2ne16(dstp_le); > > > > } else if (dstp_le > srcp_le) { > > > > dstp[i] = srcp[i]; > > > > } else > > > > dstp[i] = av_le2ne16(dstp_le); > > > > } > > > > Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = > srcp[i]; ? > > > > > > Yes. No need to load srcp[i] again. > > > > > > > Okay, thanks! > > A patch with your suggestions is attached. > > I will send a separate patch that removes the "p" from srcp_x and src_ab > > in lowpass_line_complex_c function for consistency. > > > libavfilter/interlace.h | 5 + > > libavfilter/tinterlace.h | 5 + > > libavfilter/vf_interlace.c | 97 > +++++++++++++++++++++---- > > libavfilter/vf_tinterlace.c | 78 > ++++++++++++++++++-- > > libavfilter/x86/vf_interlace.asm | 80 > ++++++++++++++++++-- > > libavfilter/x86/vf_interlace_init.c | 51 +++++++++---- > > libavfilter/x86/vf_tinterlace_init.c | 51 +++++++++---- > > tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 ++ > > tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 ++ > > tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 ++ > > tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 ++ > > 11 files changed, 355 insertions(+), 56 deletions(-) > > d34a8ac3c7d1213a33f8fb5d144e0c9fd7f694e6 0003-avfilter-interlace-add- > support-for-10-and-12-bit.patch > > From 4f7172f258bcad46c401ac0acbb4ef1666466c18 Mon Sep 17 00:00:00 2001 > > From: Thomas Mundt <tmund...@gmail.com> > > Date: Tue, 19 Sep 2017 22:23:23 +0200 > > Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit > > > > Signed-off-by: Thomas Mundt <tmund...@gmail.com> > > --- > > libavfilter/interlace.h | 5 +- > > libavfilter/tinterlace.h | 5 +- > > libavfilter/vf_interlace.c | 97 > ++++++++++++++++++++++---- > > libavfilter/vf_tinterlace.c | 78 > +++++++++++++++++++-- > > libavfilter/x86/vf_interlace.asm | 80 > +++++++++++++++++++-- > > libavfilter/x86/vf_interlace_init.c | 51 ++++++++++---- > > libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++---- > > tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ > > tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ > > 11 files changed, 355 insertions(+), 56 deletions(-) > > Tested on mips qemu and x86-64, works > Thanks! _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel