On 9/23/2017 3:24 PM, Michael Niedermayer wrote: > On Tue, Sep 19, 2017 at 10:35:30PM +0200, Thomas Mundt wrote: >> 2017-09-19 17:53 GMT+02:00 James Almer <jamr...@gmail.com>: >> >>> On 9/19/2017 5:02 AM, Thomas Mundt wrote: >>>> 2017-09-19 4:09 GMT+02:00 James Almer <jamr...@gmail.com>: >>>> >>>>> On 9/18/2017 10:41 PM, Thomas Mundt wrote: >>>>>> I tried to set up MIPS compiler for two days on windows and linux >>> without >>>>>> success. >>>>>> Now I try it blind. This solution is based on the first suggestion >>> James >>>>>> gave me at IRC. >>>>>> There might be room for improvement and an alternative solution with >>>>>> AV_RL16() / AV_WL16(). >>>>>> I used av_le2ne16() because it will be ignored for little endian. >>>>>> >>>>>> Regards, >>>>>> Thomas >>>>> >>>>>> From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001 >>>>>> From: Thomas Mundt <tmund...@gmail.com> >>>>>> Date: Tue, 19 Sep 2017 00:25:25 +0200 >>>>>> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 >>> bit >>>>>> >>>>>> Signed-off-by: Thomas Mundt <tmund...@gmail.com> >>>>>> --- >>>>>> libavfilter/interlace.h | 5 +- >>>>>> libavfilter/tinterlace.h | 5 +- >>>>>> libavfilter/vf_interlace.c | 92 >>>>> ++++++++++++++++++++++---- >>>>>> libavfilter/vf_tinterlace.c | 73 >>> ++++++++++++++++++-- >>>>>> libavfilter/x86/vf_interlace.asm | 80 >>>>> ++++++++++++++++++++-- >>>>>> libavfilter/x86/vf_interlace_init.c | 51 ++++++++++---- >>>>>> libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++---- >>>>>> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ >>>>>> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ >>>>>> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ >>>>>> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ >>>>>> 11 files changed, 345 insertions(+), 56 deletions(-) >>>>>> >>>>>> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h >>>>>> index 2101b79..90a0198 100644 >>>>>> --- a/libavfilter/interlace.h >>>>>> +++ b/libavfilter/interlace.h >>>>>> @@ -25,9 +25,11 @@ >>>>>> #ifndef AVFILTER_INTERLACE_H >>>>>> #define AVFILTER_INTERLACE_H >>>>>> >>>>>> +#include "libavutil/bswap.h" >>>>>> #include "libavutil/common.h" >>>>>> #include "libavutil/imgutils.h" >>>>>> #include "libavutil/opt.h" >>>>>> +#include "libavutil/pixdesc.h" >>>>>> >>>>>> #include "avfilter.h" >>>>>> #include "formats.h" >>>>>> @@ -55,8 +57,9 @@ typedef struct InterlaceContext { >>>>>> enum ScanMode scan; // top or bottom field first scanning >>>>>> int lowpass; // enable or disable low pass filtering >>>>>> AVFrame *cur, *next; // the two frames from which the new one is >>>>> obtained >>>>>> + const AVPixFmtDescriptor *csp; >>>>>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const >>>>> uint8_t *srcp, >>>>>> - ptrdiff_t mref, ptrdiff_t pref); >>>>>> + ptrdiff_t mref, ptrdiff_t pref, int >>> clip_max); >>>>>> } InterlaceContext; >>>>>> >>>>>> void ff_interlace_init_x86(InterlaceContext *interlace); >>>>>> diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h >>>>>> index cc13a6c..b5c39aa 100644 >>>>>> --- a/libavfilter/tinterlace.h >>>>>> +++ b/libavfilter/tinterlace.h >>>>>> @@ -27,7 +27,9 @@ >>>>>> #ifndef AVFILTER_TINTERLACE_H >>>>>> #define AVFILTER_TINTERLACE_H >>>>>> >>>>>> +#include "libavutil/bswap.h" >>>>>> #include "libavutil/opt.h" >>>>>> +#include "libavutil/pixdesc.h" >>>>>> #include "drawutils.h" >>>>>> #include "avfilter.h" >>>>>> >>>>>> @@ -60,8 +62,9 @@ typedef struct TInterlaceContext { >>>>>> int black_linesize[4]; >>>>>> FFDrawContext draw; >>>>>> FFDrawColor color; >>>>>> + const AVPixFmtDescriptor *csp; >>>>>> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t >>>>> *srcp, >>>>>> - ptrdiff_t mref, ptrdiff_t pref); >>>>>> + ptrdiff_t mref, ptrdiff_t pref, int >>> clip_max); >>>>>> } TInterlaceContext; >>>>>> >>>>>> void ff_tinterlace_init_x86(TInterlaceContext *interlace); >>>>>> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c >>>>>> index 55bf782..bfba054 100644 >>>>>> --- a/libavfilter/vf_interlace.c >>>>>> +++ b/libavfilter/vf_interlace.c >>>>>> @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { >>>>>> AVFILTER_DEFINE_CLASS(interlace); >>>>>> >>>>>> static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, >>>>>> - const uint8_t *srcp, >>>>>> - ptrdiff_t mref, ptrdiff_t pref) >>>>>> + const uint8_t *srcp, ptrdiff_t mref, >>>>>> + ptrdiff_t pref, int clip_max) >>>>>> { >>>>>> const uint8_t *srcp_above = srcp + mref; >>>>>> const uint8_t *srcp_below = srcp + pref; >>>>>> @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t >>>>> linesize, >>>>>> } >>>>>> } >>>>>> >>>>>> +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, >>>>>> + const uint8_t *src8, ptrdiff_t mref, >>>>>> + ptrdiff_t pref, int clip_max) >>>>>> +{ >>>>>> + uint16_t *dstp = (uint16_t *)dst8; >>>>>> + const uint16_t *srcp = (const uint16_t *)src8; >>>>>> + const uint16_t *srcp_above = srcp + mref / 2; >>>>>> + const uint16_t *srcp_below = srcp + pref / 2; >>>>>> + int i; >>>>>> + for (i = 0; i < linesize; i++) { >>>>>> + // this calculation is an integer representation of >>>>>> + // '0.5 * current + 0.25 * above + 0.25 * below' >>>>>> + // '1 +' is for rounding. >>>>>> + dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) + >>> av_le2ne16(srcp[i]) >>> >>> You might want to load srcp[i] into a local variable here as well. >>> >>>>>> + + av_le2ne16(srcp_above[i]) >>>>>> + + av_le2ne16(srcp_below[i])) >> 2); >>>>> >>>>> This might work (And Michael will be able to confirm that if >>>>> filter-pixfmts-tinterlace_vlpf passes)... >>>>> >>>>>> + } >>>>>> +} >>>>>> + >>>>>> static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, >>>>>> - const uint8_t *srcp, >>>>>> - ptrdiff_t mref, ptrdiff_t pref) >>>>>> + const uint8_t *srcp, ptrdiff_t >>> mref, >>>>>> + ptrdiff_t pref, int clip_max) >>>>>> { >>>>>> const uint8_t *srcp_above = srcp + mref; >>>>>> const uint8_t *srcp_below = srcp + pref; >>>>>> @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp, >>>>> ptrdiff_t linesize, >>>>>> } >>>>>> } >>>>>> >>>>>> +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t >>>>> linesize, >>>>>> + const uint8_t *src8, ptrdiff_t >>>>> mref, >>>>>> + ptrdiff_t pref, int clip_max) >>>>>> +{ >>>>>> + uint16_t *dstp = (uint16_t *)dst8; >>>>>> + const uint16_t *srcp = (const uint16_t *)src8; >>>>>> + const uint16_t *srcp_above = srcp + mref / 2; >>>>>> + const uint16_t *srcp_below = srcp + pref / 2; >>>>>> + const uint16_t *srcp_above2 = srcp + mref; >>>>>> + const uint16_t *srcp_below2 = srcp + pref; >>>>>> + int i, srcp_x, srcp_ab; >>>>>> + for (i = 0; i < linesize; i++) { >>>>>> + // this calculation is an integer representation of >>>>>> + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * >>>>> above2 - 0.125 * below2' >>>>>> + // '4 +' is for rounding. >>>>>> + srcp_x = av_le2ne16(srcp[i]) << 1; >>>>>> + srcp_ab = av_le2ne16(srcp_above[i]) + >>> av_le2ne16(srcp_below[i]); >>>>>> + dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) + >>>>> srcp_x + srcp_ab) << 1) >>>>>> + - av_le2ne16(srcp_above2[i]) >>>>>> + - av_le2ne16(srcp_below2[i])) >> >>>>> 3, 0, clip_max)); >>>>>> + // Prevent over-sharpening: >>>>>> + // dst must not exceed src when the average of above and below >>>>>> + // is less than src. And the other way around. >>>>>> + if (srcp_ab > srcp_x) { >>>>>> + if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i])) >>>>>> + dstp[i] = srcp[i]; >>>>>> + } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i])) >>>>>> + dstp[i] = srcp[i]; >>>>> >>>>> ...but chances are this over-sharpening prevention part will not. You're >>>>> loading in native endianness here before storing. You only byteswapped >>>>> for the comparison. >>>>> >>>>> Also, consider using local variables inside the for loop. You're loading >>>>> scrp[i] and dstp[i] several times per iteration. >>>>> >>>> >>>> Okay, then I would do: >>>> int i, dstp_le, srcp_le, srcp_x, srcp_ab; >>> >>> These are not pointers, so maybe remove the "p" to avoid confusion. >>> >>>> for (i = 0; i < linesize; i++) { >>>> srcp_le = av_le2ne16(srcp[i]); >>>> srcp_x = srcp_le << 1; >>>> srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); >>>> dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1) >>>> - av_le2ne16(srcp_above2[i]) >>>> - av_le2ne16(srcp_below2[i])) >> 3, 0, >>> clip_max); >>>> if (srcp_ab > srcp_x) { >>>> if (dstp_le < srcp_le) >>>> dstp[i] = srcp[i]; >>>> else >>>> dstp[i] = av_le2ne16(dstp_le); >>>> } else if (dstp_le > srcp_le) { >>>> dstp[i] = srcp[i]; >>>> } else >>>> dstp[i] = av_le2ne16(dstp_le); >>>> } >>>> Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ? >>> >>> Yes. No need to load srcp[i] again. >>> >> >> Okay, thanks! >> A patch with your suggestions is attached. >> I will send a separate patch that removes the "p" from srcp_x and src_ab >> in lowpass_line_complex_c function for consistency. > >> libavfilter/interlace.h | 5 + >> libavfilter/tinterlace.h | 5 + >> libavfilter/vf_interlace.c | 97 >> +++++++++++++++++++++---- >> libavfilter/vf_tinterlace.c | 78 ++++++++++++++++++-- >> libavfilter/x86/vf_interlace.asm | 80 ++++++++++++++++++-- >> libavfilter/x86/vf_interlace_init.c | 51 +++++++++---- >> libavfilter/x86/vf_tinterlace_init.c | 51 +++++++++---- >> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 ++ >> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 ++ >> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 ++ >> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 ++ >> 11 files changed, 355 insertions(+), 56 deletions(-) >> d34a8ac3c7d1213a33f8fb5d144e0c9fd7f694e6 >> 0003-avfilter-interlace-add-support-for-10-and-12-bit.patch >> From 4f7172f258bcad46c401ac0acbb4ef1666466c18 Mon Sep 17 00:00:00 2001 >> From: Thomas Mundt <tmund...@gmail.com> >> Date: Tue, 19 Sep 2017 22:23:23 +0200 >> Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit >> >> Signed-off-by: Thomas Mundt <tmund...@gmail.com> >> --- >> libavfilter/interlace.h | 5 +- >> libavfilter/tinterlace.h | 5 +- >> libavfilter/vf_interlace.c | 97 >> ++++++++++++++++++++++---- >> libavfilter/vf_tinterlace.c | 78 +++++++++++++++++++-- >> libavfilter/x86/vf_interlace.asm | 80 +++++++++++++++++++-- >> libavfilter/x86/vf_interlace_init.c | 51 ++++++++++---- >> libavfilter/x86/vf_tinterlace_init.c | 51 ++++++++++---- >> tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++ >> tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++ >> tests/ref/fate/filter-pixfmts-tinterlace_pad | 11 +++ >> tests/ref/fate/filter-pixfmts-tinterlace_vlpf | 11 +++ >> 11 files changed, 355 insertions(+), 56 deletions(-) > > Tested on mips qemu and x86-64, works
Applied and pushed then. Thanks. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel