On 22/11/14 5:58 PM, Kieran Kunhya wrote: > diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h > new file mode 100644 > index 0000000..b8b6143 > --- /dev/null > +++ b/libavcodec/v210enc.h > @@ -0,0 +1,31 @@ > +/* > + * This file is part of Libav.
It shouldn't take long to make a patch that can be applied to the ffmpeg tree in a conflict free way... > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCOENC_V210ENC_H > +#define AVCOENC_V210ENC_H > + > +#include "libavutil/log.h" > +#include "libavutil/opt.h" > + > +typedef struct { > + void (*pack_line)(const uint16_t *y, const uint16_t *u, const uint16_t > *v, uint8_t *dst, ptrdiff_t width); > +} V210EncContext; > + > +void v210enc_x86_init(V210EncContext *s); ff_v210enc_init_x86 [...] > diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm > new file mode 100644 > index 0000000..ca3edf4 > --- /dev/null > +++ b/libavcodec/x86/v210enc.asm > @@ -0,0 +1,76 @@ > +;****************************************************************************** > +;* V210 SIMD pack > +;* Copyright (c) 2014 Kieran Kunhya <kier...@obe.tv> > +;* > +;* This file is part of Libav. > +;* > +;* Libav is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* Libav is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with Libav; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +v210_enc_min: times 8 dw 0x4 > +v210_enc_max: times 8 dw 0x3fb > + > +v210_enc_luma_mult: dw 4,1,16,4,1,16,0,0 > +v210_enc_luma_shuf: db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11 > + > +v210_enc_chroma_mult: dw 1,4,16,0,16,1,4,0 > +v210_enc_chroma_shuf: db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1 > + > +SECTION .text > + > +%macro v210_planar_pack 0 > + > +; v210_planar_pack(const uint16_t *y, const uint16_t *u, const uint16_t *v, > uint8_t *dst, ptrdiff_t width) > +cglobal v210_planar_pack, 5, 5, 4, y, u, v, dst, width > + lea r0, [r0+2*widthq] > + add uq, widthq > + add vq, widthq > + neg widthq > + > + movu m2, [v210_enc_min] > + movu m3, [v210_enc_max] mova, they are aligned and declared on this same file. You may be able to use mova below as well, but I don't know if AVFrame->data and AVPacket->data are aligned here. It's probably worth a try. > + > +.loop > + movu m0, [yq+2*widthq] > + CLIPW m0, m2, m3 > + > + movq m1, [uq+widthq] > + movhps m1, [vq+widthq] > + CLIPW m1, m2, m3 > + > + pmullw m0, [v210_enc_luma_mult] > + pshufb m0, [v210_enc_luma_shuf] > + > + pmullw m1, [v210_enc_chroma_mult] > + pshufb m1, [v210_enc_chroma_shuf] > + > + por m0, m1 > + > + movu [dstq], m0 > + > + add dstq, mmsize > + add widthq, 6 > + jl .loop > + > + REP_RET This is an SSSE3 function, so RET can be used instead (REP_RET is only really needed for SSE3 and below). > +%endmacro > + > +INIT_XMM ssse3 > +v210_planar_pack > + > diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c > new file mode 100644 > index 0000000..524ec7a > --- /dev/null > +++ b/libavcodec/x86/v210enc_init.c > @@ -0,0 +1,31 @@ > +/* > + * This file is part of Libav. > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/x86/asm.h" Not needed. It's for inline asm only. > +#include "libavutil/x86/cpu.h" > +#include "libavcodec/v210enc.h" > + > +extern void ff_v210_planar_pack_ssse3(const uint16_t *y, const uint16_t *u, > const uint16_t *v, uint8_t *dst, ptrdiff_t width); No need to use extern. > + > +av_cold void v210enc_x86_init(V210EncContext *s) > +{ > + int cpu_flags = av_get_cpu_flags(); > + > + if( EXTERNAL_SSSE3(cpu_flags) ) > + s->pack_line = ff_v210_planar_pack_ssse3; > +} > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel