vvc_mc R-V V sad

flow gg Mon, 25 Nov 2024 19:02:26 -0800

Updated them.

Rémi Denis-Courmont <r...@remlab.net> 于2024年11月18日周一 04:23写道：


> Le sunnuntaina 17. marraskuuta 2024, 15.16.23 EET u...@foxmail.com a
> écrit :
> > From: sunyuechi <sunyue...@iscas.ac.cn>
> >
> >                             k230               banana_f3
> > sad_8x16_c:                  385.9 ( 1.00x)    403.1 ( 1.00x)
> > sad_8x16_rvv_i32:            108.1 ( 3.57x)    100.8 ( 4.00x)
> > sad_16x8_c:                  376.6 ( 1.00x)    392.6 ( 1.00x)
> > sad_16x8_rvv_i32:             89.3 ( 4.21x)    69.5 ( 5.64x)
> > sad_16x16_c:                 746.6 ( 1.00x)    757.3 ( 1.00x)
> > sad_16x16_rvv_i32:           135.8 ( 5.50x)    121.5 ( 6.23x)
> > ---
> >  libavcodec/riscv/vvc/Makefile      |  3 +-
> >  libavcodec/riscv/vvc/vvc_sad_rvv.S | 58 ++++++++++++++++++++++++++++++
> >  libavcodec/riscv/vvc/vvcdsp_init.c |  7 ++++
> >  3 files changed, 67 insertions(+), 1 deletion(-)
> >  create mode 100644 libavcodec/riscv/vvc/vvc_sad_rvv.S
> >
> > diff --git a/libavcodec/riscv/vvc/Makefile
> b/libavcodec/riscv/vvc/Makefile
> > index 582b051579..6b9c618b33 100644
> > --- a/libavcodec/riscv/vvc/Makefile
> > +++ b/libavcodec/riscv/vvc/Makefile
> > @@ -1,2 +1,3 @@
> >  OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
> > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
> > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
> > +                                  riscv/vvc/vvc_sad_rvv.o
> > diff --git a/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > b/libavcodec/riscv/vvc/vvc_sad_rvv.S new file mode 100644
> > index 0000000000..acdc78d20d
> > --- /dev/null
> > +++ b/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > @@ -0,0 +1,58 @@
> > +/*
> > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> > (ISCAS). + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA + */
> > +
> > +#include "libavcodec/riscv/h26x/asm.S"
> > +
> > +.macro func_sad vlen
> > +func ff_vvc_sad_rvv_\vlen, zve32x, zbb, zba
> > +        lpad    0
> > +        slli              t2, a3, 7  // dy * 128
> > +        li                t1, 4*128+4
> > +        add               t0, t2, a2 // dy * 128 + dx
> > +        sub               t1, t1, t2
> > +        sub               t1, t1, a2
> > +        sh1add            a0, t0, a0
> > +        sh1add            a1, t1, a1
> > +        vsetvlstatic32    1, \vlen
> > +        li                t0, 16
> > +        vmv.s.x           v0, zero
> > +        beq               a4, t0, SAD\vlen\()16
> > +        .irp w,8,16
> > +SAD\vlen\w:
> > +        vsetvlstatic16    \w, \vlen
> > +        addi              a5, a5, -2
> > +        vle16.v           v8, (a0)
> > +        vle16.v           v16, (a1)
> > +        vwsub.vv          v24, v8, v16
>
> It might be faster to stick to 16-bit and only widen when accumulating.
> The
> distance between two 16-bit values is an unsigned 16-bit value afterall.
>
> > +        vsetvlstatic32    \w, \vlen
> > +        vneg.v            v16, v24
> > +        addi              a0, a0, 2 * 128 * 2
> > +        vmax.vv           v24, v24, v16
> > +        vredsum.vs        v0, v24, v0
>
> Don't calculate a reduction in a loop.
>
> > +        addi              a1, a1, 2 * 128 * 2
> > +        bnez              a5, SAD\vlen\w
> > +        vmv.x.s           a0, v0
> > +        ret
> > +        .endr
> > +endfunc
> > +.endm
> > +
> > +func_sad 256
> > +func_sad 128
> > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c
> > b/libavcodec/riscv/vvc/vvcdsp_init.c index 2fe93029aa..1b228cc9f5 100644
> > --- a/libavcodec/riscv/vvc/vvcdsp_init.c
> > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c
> > @@ -59,6 +59,9 @@ DMVR_PROTOTYPES(8, rvv_256)
> >      c->inter.dmvr[1][1]   = ff_vvc_dmvr_hv_##bd##_##opt;           \
> >  } while (0)
> >
> > +int ff_vvc_sad_rvv_128(const int16_t *src0, const int16_t *src1, int dx,
> > int dy, int block_w, int block_h); +int ff_vvc_sad_rvv_256(const int16_t
> > *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); +
> >  #define PUT_PIXELS_PROTOTYPES2(bd, opt)
>
> >      \ void bf(ff_vvc_put_pixels, bd, opt)(int16_t *dst,
>
> >             \ const uint8_t *_src, const ptrdiff_t _src_stride,
>
> >                \ @@ -97,6 +100,8 @@ void
> > ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) FUNCS(LUMA,
> > rvv_256);
> >                  FUNCS(CHROMA, rvv_256);
> >                  break;
> > +            case 10:
> > +                c->inter.sad      = ff_vvc_sad_rvv_256;
> >              default:
> >                  break;
> >          }
> > @@ -111,6 +116,8 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c,
> const
> > int bd) FUNCS(LUMA, rvv_128);
> >                  FUNCS(CHROMA, rvv_128);
> >                  break;
> > +            case 10:
> > +                c->inter.sad      = ff_vvc_sad_rvv_128;
> >              default:
> >                  break;
> >          }
>
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 4/4] lavc/vvc_mc R-V V sad

Reply via email to