[FFmpeg-devel] [PATCH] libswscale/x86/yuv2rgb: Fix Segmentation Fault when load unaligned data

2020-02-24 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv_2_rgb.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index e05bbb89f5..575a84d921 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -139,7 +139,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 VBROADCASTSD vr_coff,  [pointer_c_ditherq + 4  * 8]
 %endif
 %endif
-mova m_y, [py_2indexq + 2 * indexq]
+movu m_y, [py_2indexq + 2 * indexq]
 movh m_u, [pu_indexq  + indexq]
 movh m_v, [pv_indexq  + indexq]
 .loop0:
@@ -347,7 +347,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 %endif ; PACK RGB15/16
 %endif ; PACK RGB15/16/32
 
-mova m_y, [py_2indexq + 2 * indexq + 8 * time_num]
+movu m_y, [py_2indexq + 2 * indexq + 8 * time_num]
 movh m_v, [pv_indexq  + indexq + 4 * time_num]
 movh m_u, [pu_indexq  + indexq + 4 * time_num]
 add imageq, 8 * depth * time_num
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2] libswscale/x86/yuv2rgb: Fix Segmentation Fault when load unaligned data

2020-02-25 Thread Ting Fu
Fixes ticket #8532

Signed-off-by: Ting Fu 
---
V2:
Add ticket info in commit message

 libswscale/x86/yuv_2_rgb.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index e05bbb89f5..575a84d921 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -139,7 +139,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 VBROADCASTSD vr_coff,  [pointer_c_ditherq + 4  * 8]
 %endif
 %endif
-mova m_y, [py_2indexq + 2 * indexq]
+movu m_y, [py_2indexq + 2 * indexq]
 movh m_u, [pu_indexq  + indexq]
 movh m_v, [pv_indexq  + indexq]
 .loop0:
@@ -347,7 +347,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 %endif ; PACK RGB15/16
 %endif ; PACK RGB15/16/32
 
-mova m_y, [py_2indexq + 2 * indexq + 8 * time_num]
+movu m_y, [py_2indexq + 2 * indexq + 8 * time_num]
 movh m_v, [pv_indexq  + indexq + 4 * time_num]
 movh m_u, [pu_indexq  + indexq + 4 * time_num]
 add imageq, 8 * depth * time_num
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in load_tf_model

2021-03-24 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 750a476726..e016571304 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -282,6 +282,9 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const 
char *model_filename
 TF_SetConfig(sess_opts, sess_config, 
sess_config_length,tf_model->status);
 av_freep(&sess_config);
 if (TF_GetCode(tf_model->status) != TF_OK) {
+TF_DeleteGraph(tf_model->graph);
+TF_DeleteStatus(tf_model->status);
+TF_DeleteSessionOptions(sess_opts);
 av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options 
with %s\n",
   tf_model->ctx.options.sess_config);
 return DNN_ERROR;
@@ -292,6 +295,8 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const 
char *model_filename
 TF_DeleteSessionOptions(sess_opts);
 if (TF_GetCode(tf_model->status) != TF_OK)
 {
+TF_DeleteGraph(tf_model->graph);
+TF_DeleteStatus(tf_model->status);
 av_log(ctx, AV_LOG_ERROR, "Failed to create new session with model 
graph\n");
 return DNN_ERROR;
 }
@@ -304,6 +309,9 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const 
char *model_filename
   &init_op, 1, NULL, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK)
 {
+TF_DeleteSession(tf_model->session, tf_model->status);
+TF_DeleteGraph(tf_model->graph);
+TF_DeleteStatus(tf_model->status);
 av_log(ctx, AV_LOG_ERROR, "Failed to run session when 
initializing\n");
 return DNN_ERROR;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in load_native_model

2021-03-24 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 55 ++--
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index e016571304..c18cb4063f 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -330,7 +330,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_OperationDescription *op_desc;
 TF_Output input;
 int64_t strides[] = {1, 1, 1, 1};
-TF_Tensor *tensor;
+TF_Tensor *kernel_tensor = NULL, *biases_tensor = NULL;
 int64_t dims[4];
 int dims_len;
 char name_buffer[NAME_BUFFER_SIZE];
@@ -347,17 +347,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 dims[2] = params->kernel_size;
 dims[3] = params->input_num;
 dims_len = 4;
-tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
-memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
-TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+kernel_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * 
sizeof(float));
+memcpy(TF_TensorData(kernel_tensor), params->kernel, size * sizeof(float));
+TF_SetAttrTensor(op_desc, "value", kernel_tensor, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to set value for kernel of conv 
layer %d\n", layer);
-return DNN_ERROR;
+goto err;
 }
 op = TF_FinishOperation(op_desc, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to add kernel to conv layer %d\n", 
layer);
-return DNN_ERROR;
+goto err;
 }
 
 snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
@@ -370,8 +368,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_SetAttrType(op_desc, "Tperm", TF_INT32);
 op = TF_FinishOperation(op_desc, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to add transpose to conv layer 
%d\n", layer);
-return DNN_ERROR;
+goto err;
 }
 
 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
@@ -385,8 +382,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_SetAttrString(op_desc, "padding", "VALID", 5);
 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to add conv2d to conv layer %d\n", 
layer);
-return DNN_ERROR;
+goto err;
 }
 
 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
@@ -394,17 +390,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
 dims[0] = params->output_num;
 dims_len = 1;
-tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * 
sizeof(float));
-memcpy(TF_TensorData(tensor), params->biases, params->output_num * 
sizeof(float));
-TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+biases_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, 
params->output_num * sizeof(float));
+memcpy(TF_TensorData(biases_tensor), params->biases, params->output_num * 
sizeof(float));
+TF_SetAttrTensor(op_desc, "value", biases_tensor, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to set value for conv_biases of conv 
layer %d\n", layer);
-return DNN_ERROR;
+goto err;
 }
 op = TF_FinishOperation(op_desc, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to add conv_biases to conv layer 
%d\n", layer);
-return DNN_ERROR;
+goto err;
 }
 
 snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
@@ -416,8 +410,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_SetAttrType(op_desc, "T", TF_FLOAT);
 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK){
-av_log(ctx, AV_LOG_ERROR, "Failed to add bias_add to conv layer %d\n", 
layer);
-return DNN_ERROR;
+goto err;
 }
 
 snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
@@ -440,11 +433,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, 
TF_Operation *transpose_o
 TF_SetAttrType(op_desc, &q

[FFmpeg-devel] [PATCH 3/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in execute_model_tf

2021-03-24 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index c18cb4063f..c0aa510630 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -766,18 +766,21 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 if (nb_output != 1) {
 // currently, the filter does not need multiple outputs,
 // so we just pending the support until we really need it.
+TF_DeleteTensor(input_tensor);
 avpriv_report_missing_feature(ctx, "multiple outputs");
 return DNN_ERROR;
 }
 
 tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs));
 if (tf_outputs == NULL) {
+TF_DeleteTensor(input_tensor);
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for 
*tf_outputs\n"); \
 return DNN_ERROR;
 }
 
 output_tensors = av_mallocz_array(nb_output, sizeof(*output_tensors));
 if (!output_tensors) {
+TF_DeleteTensor(input_tensor);
 av_freep(&tf_outputs);
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output 
tensor\n"); \
 return DNN_ERROR;
@@ -786,6 +789,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 for (int i = 0; i < nb_output; ++i) {
 tf_outputs[i].oper = TF_GraphOperationByName(tf_model->graph, 
output_names[i]);
 if (!tf_outputs[i].oper) {
+TF_DeleteTensor(input_tensor);
 av_freep(&tf_outputs);
 av_freep(&output_tensors);
 av_log(ctx, AV_LOG_ERROR, "Could not find output \"%s\" in 
model\n", output_names[i]); \
@@ -799,6 +803,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
   tf_outputs, output_tensors, nb_output,
   NULL, 0, NULL, tf_model->status);
 if (TF_GetCode(tf_model->status) != TF_OK) {
+TF_DeleteTensor(input_tensor);
 av_freep(&tf_outputs);
 av_freep(&output_tensors);
 av_log(ctx, AV_LOG_ERROR, "Failed to run session when executing 
model\n");
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] dnn_backend_native_layer_mathunary: add abs support

2020-05-25 Thread Ting Fu
more math unary operations will be added here

It can be tested with the model file generated with below python scripy:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.subtract(x, 0.5)
x2 = tf.abs(x1)
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/Makefile  |  1 +
 libavfilter/dnn/dnn_backend_native.h  |  1 +
 .../dnn/dnn_backend_native_layer_mathunary.c  | 80 +++
 .../dnn/dnn_backend_native_layer_mathunary.h  | 45 +++
 libavfilter/dnn/dnn_backend_native_layers.c   |  2 +
 tools/python/convert_from_tensorflow.py   | 16 +++-
 tools/python/convert_header.py|  2 +-
 7 files changed, 145 insertions(+), 2 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_mathunary.c
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_mathunary.h

diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index ce529587e1..bb37298b58 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,6 +6,7 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_native_layer_con
 OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_native_layer_depth2space.o
 OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_native_layer_maximum.o
 OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_native_layer_mathbinary.o
+OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_native_layer_mathunary.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 
diff --git a/libavfilter/dnn/dnn_backend_native.h 
b/libavfilter/dnn/dnn_backend_native.h
index 5d76d87915..61f0cb202f 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -42,6 +42,7 @@ typedef enum {
 DLT_MIRROR_PAD = 3,
 DLT_MAXIMUM = 4,
 DLT_MATH_BINARY = 5,
+DLT_MATH_UNARY = 6,
 DLT_COUNT
 } DNNLayerType;
 
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
new file mode 100644
index 00..d65af151cd
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN native backend implementation.
+ */
+
+#include "dnn_backend_native.h"
+#include "libavutil/avassert.h"
+#include "dnn_backend_native_layer_mathunary.h"
+
+int dnn_load_layer_math_unary(Layer *layer, AVIOContext *model_file_context, 
int file_size)
+{
+DnnLayerMathUnaryParams *params;
+int dnn_size = 0;
+params = av_malloc(sizeof(*params));
+if(!params)
+return 0;
+
+params->un_op = (int32_t)avio_rl32(model_file_context);
+dnn_size += 4;
+layer->params = params;
+layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+dnn_size += 8;
+
+return dnn_size;
+
+}
+
+int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t 
*input_operand_indexes,
+int32_t output_operand_index, const void 
*parameters)
+{
+const DnnOperand *input = &operands[input_operand_indexes[0]];
+DnnOperand *output = &operands[output_operand_index];
+const DnnLayerMathUnaryParams *params = (const DnnLayerMathUnaryParams 
*)parameters;
+int dims_count;
+const float *src;
+flo

[FFmpeg-devel] [PATCH 2/2] dnn-layer-mathunary-test: add unit test for abs

2020-05-25 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/.gitignore |  1 +
 tests/dnn/Makefile   |  1 +
 tests/dnn/dnn-layer-mathunary-test.c | 81 
 tests/fate/dnn.mak   |  5 ++
 4 files changed, 88 insertions(+)
 create mode 100644 tests/dnn/dnn-layer-mathunary-test.c

diff --git a/tests/dnn/.gitignore b/tests/dnn/.gitignore
index d78c5c1aec..1fcd2410b4 100644
--- a/tests/dnn/.gitignore
+++ b/tests/dnn/.gitignore
@@ -3,3 +3,4 @@
 /dnn-layer-maximum-test
 /dnn-layer-pad-test
 /dnn-layer-mathbinary-test
+/dnn-layer-mathunary-test
diff --git a/tests/dnn/Makefile b/tests/dnn/Makefile
index 1f96710821..64591b7851 100644
--- a/tests/dnn/Makefile
+++ b/tests/dnn/Makefile
@@ -3,6 +3,7 @@ DNNTESTPROGS += dnn-layer-conv2d
 DNNTESTPROGS += dnn-layer-depth2space
 DNNTESTPROGS += dnn-layer-mathbinary
 DNNTESTPROGS += dnn-layer-maximum
+DNNTESTPROGS += dnn-layer-mathunary
 
 DNNTESTOBJS  := $(DNNTESTOBJS:%=$(DNNTESTSDIR)%) 
$(DNNTESTPROGS:%=$(DNNTESTSDIR)/%-test.o)
 DNNTESTPROGS := $(DNNTESTPROGS:%=$(DNNTESTSDIR)/%-test$(EXESUF))
diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
new file mode 100644
index 00..f032ca0684
--- /dev/null
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include 
+#include 
+#include "libavfilter/dnn/dnn_backend_native_layer_mathunary.h"
+#include "libavutil/avassert.h"
+
+#define EPS 0.1
+
+static float get_expected(float f, DNNMathUnaryOperation op)
+{
+switch (op)
+{
+case DMUO_ABS:
+return (f >= 0) ? f : -f;
+default:
+av_assert0(!"not supported yet");
+return 0.f;
+}
+}
+
+static int test(DNNMathUnaryOperation op)
+{
+DnnLayerMathUnaryParams params;
+DnnOperand operands[2];
+int32_t input_indexes[1];
+float input[1*1*2*3] = {
+-3, 2.5, 2, -2.1, 7.8, 100};
+float *output;
+
+params.un_op = op;
+
+operands[0].data = input;
+operands[0].dims[0] = 1;
+operands[0].dims[1] = 1;
+operands[0].dims[2] = 2;
+operands[0].dims[3] = 3;
+operands[1].data = NULL;
+
+input_indexes[0] = 0;
+dnn_execute_layer_math_unary(operands, input_indexes, 1, ¶ms);
+
+output = operands[1].data;
+for (int i = 0; i < sizeof(input) / sizeof(float); ++i) {
+float expected_output = get_expected(input[i], op);
+if(fabs(output[i] - expected_output) > EPS) {
+printf("at index %d, output: %f, expected_output: %f\n", i, 
output[i], expected_output);
+av_freep(&output);
+return 1;
+}
+}
+
+av_freep(&output);
+return 0;
+}
+
+int main(int agrc, char **argv)
+{
+if (test(DMUO_ABS))
+return 1;
+return 0;
+}
diff --git a/tests/fate/dnn.mak b/tests/fate/dnn.mak
index 5a8e6296a6..4a50b16382 100644
--- a/tests/fate/dnn.mak
+++ b/tests/fate/dnn.mak
@@ -23,6 +23,11 @@ fate-dnn-layer-maximum: 
$(DNNTESTSDIR)/dnn-layer-maximum-test$(EXESUF)
 fate-dnn-layer-maximum: CMD = run 
$(DNNTESTSDIR)/dnn-layer-maximum-test$(EXESUF)
 fate-dnn-layer-maximum: CMP = null
 
+FATE_DNN += fate-dnn-layer-mathunary
+fate-dnn-layer-mathunary: $(DNNTESTSDIR)/dnn-layer-mathunary-test$(EXESUF)
+fate-dnn-layer-mathunary: CMD = run 
$(DNNTESTSDIR)/dnn-layer-mathunary-test$(EXESUF)
+fate-dnn-layer-mathunary: CMP = null
+
 FATE-yes += $(FATE_DNN)
 
 fate-dnn: $(FATE_DNN)
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/6] dnn_backend_native_layer_mathunary: add sin support

2020-06-06 Thread Ting Fu
It can be tested with the model file generated with below python scripy:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.multiply(x, 3.14)
x2 = tf.sin(x1)
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 6 ++
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index d65af151cd..5324d15bc3 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -23,6 +23,8 @@
  * DNN native backend implementation.
  */
 
+#include 
+
 #include "dnn_backend_native.h"
 #include "libavutil/avassert.h"
 #include "dnn_backend_native_layer_mathunary.h"
@@ -74,6 +76,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const 
int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = FFABS(src[i]);
 return 0;
+case DMUO_SIN:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = sin(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 4e44003b66..31a1ea8fb6 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -31,6 +31,7 @@
 
 typedef enum {
 DMUO_ABS = 0,
+DMUO_SIN = 1,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 8c0a9be7be..b17facdda8 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0}
+self.mathun2code  = {'Abs':0, 'Sin':1}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index ad4491729a..c79fef4be8 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 6
+minor = 7
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/6] dnn_backend_native_layer_mathunary: add cos support

2020-06-06 Thread Ting Fu
It can be tested with the model generated with below python scripy

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.multiply(x, 1.5)
x2 = tf.cos(x1)
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 5324d15bc3..fa8710a3ed 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -80,6 +80,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const 
int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = sin(src[i]);
 return 0;
+case DMUO_COS:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = cos(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 31a1ea8fb6..f70aea846b 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -32,6 +32,7 @@
 typedef enum {
 DMUO_ABS = 0,
 DMUO_SIN = 1,
+DMUO_COS = 2,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index b17facdda8..9e99fccdab 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index c79fef4be8..ba6d18126e 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 7
+minor = 8
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/6] dnn-layer-mathunary-test: add unit test for sin

2020-06-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index f032ca0684..ed42198195 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -32,6 +32,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 {
 case DMUO_ABS:
 return (f >= 0) ? f : -f;
+case DMUO_SIN:
+return sin(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -77,5 +79,7 @@ int main(int agrc, char **argv)
 {
 if (test(DMUO_ABS))
 return 1;
+if (test(DMUO_SIN))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 6/6] dnn-layer-mathunary-test: add unit test for tan

2020-06-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 23e1766ad0..9a7e07c98c 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -36,6 +36,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return sin(f);
 case DMUO_COS:
 return cos(f);
+case DMUO_TAN:
+return tan(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -85,5 +87,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_COS))
 return 1;
+if (test(DMUO_TAN))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 5/6] dnn_backend_native_layer_mathunary: add tan support

2020-06-06 Thread Ting Fu
It can be tested with the model generated with below python scripy

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.multiply(x, 0.78)
x2 = tf.tan(x1)
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index fa8710a3ed..e6e45a6b9f 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -84,6 +84,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const 
int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = cos(src[i]);
 return 0;
+case DMUO_TAN:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = tan(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index f70aea846b..0467717a8b 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -33,6 +33,7 @@ typedef enum {
 DMUO_ABS = 0,
 DMUO_SIN = 1,
 DMUO_COS = 2,
+DMUO_TAN = 3,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 9e99fccdab..9da6a43612 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index ba6d18126e..b7fb0f797a 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 8
+minor = 9
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 4/6] dnn-layer-mathunary-test: add unit test for cos

2020-06-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index ed42198195..23e1766ad0 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -34,6 +34,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return (f >= 0) ? f : -f;
 case DMUO_SIN:
 return sin(f);
+case DMUO_COS:
+return cos(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -81,5 +83,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_SIN))
 return 1;
+if (test(DMUO_COS))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/6] dnn_backend_native_layer_mathunary: add asin support

2020-06-18 Thread Ting Fu
It can be tested with the model generated with below python script:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.asin(x)
x2 = tf.divide(x1, 3.1416/2) # pi/2
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 90fac6aa67..3a147c2b3c 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -92,6 +92,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const 
int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = tan(src[i]);
 return 0;
+case DMUO_ASIN:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = asin(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 40a9bb5fb8..1c25db5a42 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -34,6 +34,7 @@ typedef enum {
 DMUO_SIN = 1,
 DMUO_COS = 2,
 DMUO_TAN = 3,
+DMUO_ASIN = 4,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 9da6a43612..5e526e31ce 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index b7fb0f797a..2b6afe8d13 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 9
+minor = 10
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 4/6] dnn-layer-math-unary-test: add unit test for acos

2020-06-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index ac26f7445f..540ea4cef5 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -40,6 +40,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return tan(f);
 case DMUO_ASIN:
 return asin(f);
+case DMUO_ACOS:
+return acos(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -93,5 +95,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ASIN))
 return 1;
+if (test(DMUO_ACOS))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/6] dnn-layer-math-unary-test: add unit test for asin

2020-06-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 9a7e07c98c..ac26f7445f 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -38,6 +38,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return cos(f);
 case DMUO_TAN:
 return tan(f);
+case DMUO_ASIN:
+return asin(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -89,5 +91,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_TAN))
 return 1;
+if (test(DMUO_ASIN))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 6/6] dnn-layer-math-unary-test: add unit test for atan

2020-06-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 540ea4cef5..bf77c44bbe 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -42,6 +42,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return asin(f);
 case DMUO_ACOS:
 return acos(f);
+case DMUO_ATAN:
+return atan(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -97,5 +99,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ACOS))
 return 1;
+if (test(DMUO_ATAN))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/6] dnn_backend_native_layer_mathunary: add acos support

2020-06-18 Thread Ting Fu
It can be tested with the model generated with below python script:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.acos(x)
x2 = tf.divide(x1, 3.1416/2) # pi/2
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 3a147c2b3c..d130058546 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -96,6 +96,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const 
int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = asin(src[i]);
 return 0;
+case DMUO_ACOS:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = acos(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 1c25db5a42..f146248567 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -35,6 +35,7 @@ typedef enum {
 DMUO_COS = 2,
 DMUO_TAN = 3,
 DMUO_ASIN = 4,
+DMUO_ACOS = 5,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 5e526e31ce..78297e48a9 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 2b6afe8d13..4a8e44b4aa 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 10
+minor = 11
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 5/6] dnn_backend_native_layer_mathunary: add atan support

2020-06-18 Thread Ting Fu
It can be tested with the model generated with below python script:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')
x1 = tf.atan(x)
x2 = tf.divide(x1, 3.1416/4) # pi/4
y = tf.identity(x2, name='dnn_out')

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index d130058546..42615c43d5 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -100,6 +100,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = acos(src[i]);
 return 0;
+case DMUO_ATAN:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = atan(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index f146248567..13fa33178a 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -36,6 +36,7 @@ typedef enum {
 DMUO_TAN = 3,
 DMUO_ASIN = 4,
 DMUO_ACOS = 5,
+DMUO_ATAN = 6,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 78297e48a9..b90c31c495 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 4a8e44b4aa..73cf23bf53 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 11
+minor = 12
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 01/12] dnn_backend_native_layer_mathunary: add sinh support

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 42615c43d5..2630fe07e2 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -104,6 +104,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = atan(src[i]);
 return 0;
+case DMUO_SINH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = sinh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 13fa33178a..760930c60e 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -37,6 +37,7 @@ typedef enum {
 DMUO_ASIN = 4,
 DMUO_ACOS = 5,
 DMUO_ATAN = 6,
+DMUO_SINH = 7,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index b90c31c495..6f34a71ab4 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 73cf23bf53..4747f41395 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 12
+minor = 13
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 08/12] dnn-layer-math-unary-test: add unit test for asinh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 6885b4d318..90fce71a0c 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -50,6 +50,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return cosh(f);
 case DMUO_TANH:
 return tanh(f);
+case DMUO_ASINH:
+return asinh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -113,5 +115,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_TANH))
 return 1;
+if (test(DMUO_ASINH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 03/12] dnn_backend_native_layer_mathunary: add cosh support

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 2630fe07e2..ddb70996e7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -108,6 +108,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = sinh(src[i]);
 return 0;
+case DMUO_COSH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = cosh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 760930c60e..5a486b4f5f 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -38,6 +38,7 @@ typedef enum {
 DMUO_ACOS = 5,
 DMUO_ATAN = 6,
 DMUO_SINH = 7,
+DMUO_COSH = 8,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 6f34a71ab4..96da44c4a8 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 4747f41395..a73f51ba48 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 13
+minor = 14
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 05/12] dnn_backend_native_layer_mathunary: add tanh support

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index ddb70996e7..ccdbcc21e0 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -112,6 +112,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = cosh(src[i]);
 return 0;
+case DMUO_TANH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = tanh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 5a486b4f5f..ae0c1e1cdd 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -39,6 +39,7 @@ typedef enum {
 DMUO_ATAN = 6,
 DMUO_SINH = 7,
 DMUO_COSH = 8,
+DMUO_TANH = 9,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 96da44c4a8..f98a3cae3d 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index a73f51ba48..d2753f0af0 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 14
+minor = 15
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 04/12] dnn-layer-math-unary-test: add unit test for cosh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index a1ff05e5fb..0280debc0b 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -46,6 +46,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return atan(f);
 case DMUO_SINH:
 return sinh(f);
+case DMUO_COSH:
+return cosh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -105,5 +107,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_SINH))
 return 1;
+if (test(DMUO_COSH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 02/12] dnn-layer-math-unary-test: add unit test for sinh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index bf77c44bbe..a1ff05e5fb 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -44,6 +44,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return acos(f);
 case DMUO_ATAN:
 return atan(f);
+case DMUO_SINH:
+return sinh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -101,5 +103,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ATAN))
 return 1;
+if (test(DMUO_SINH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 06/12] dnn-layer-math-unary-test: add unit test for tanh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 0280debc0b..6885b4d318 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -48,6 +48,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return sinh(f);
 case DMUO_COSH:
 return cosh(f);
+case DMUO_TANH:
+return tanh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -109,5 +111,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_COSH))
 return 1;
+if (test(DMUO_TANH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 10/12] dnn-layer-math-unary-test: add unit test for acosh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 90fce71a0c..5587e47ad5 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -52,6 +52,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return tanh(f);
 case DMUO_ASINH:
 return asinh(f);
+case DMUO_ACOSH:
+return acosh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -117,5 +119,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ASINH))
 return 1;
+if (test(DMUO_ACOSH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 12/12] dnn-layer-math-unary-test: add unit test for atanh

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 5587e47ad5..1815f79f34 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -54,6 +54,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return asinh(f);
 case DMUO_ACOSH:
 return acosh(f);
+case DMUO_ATANH:
+return acosh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -121,5 +123,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ACOSH))
 return 1;
+if (test(DMUO_ATANH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 11/12] dnn_backend_native_layer_mathunary: add atanh support

2020-06-28 Thread Ting Fu
It can be tested with the model generated with below python script:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')

please uncomment the part you want to test

x_sinh_1 = tf.sinh(x)
x_out = tf.divide(x_sinh_1, 1.176) # sinh(1.0)

x_cosh_1 = tf.cosh(x)
x_out = tf.divide(x_cosh_1, 1.55) # cosh(1.0)

x_tanh_1 = tf.tanh(x)
x__out = tf.divide(x_tanh_1, 0.77) # tanh(1.0)

x_asinh_1 = tf.asinh(x)
x_out = tf.divide(x_asinh_1, 0.89) # asinh(1.0/1.1)

x_acosh_1 = tf.add(x, 1.1)
x_acosh_2 = tf.acosh(x_acosh_1) # accept (1, inf)
x_out = tf.divide(x_acosh_2, 1.4) # acosh(2.1)

x_atanh_1 = tf.divide(x, 1.1)
x_atanh_2 = tf.atanh(x_atanh_1) # accept (-1, 1)
x_out = tf.divide(x_atanh_2, 1.55) # atanhh(1.0/1.1)

y = tf.identity(x_out, name='dnn_out') #please only preserve the x_out you want 
to test

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index b77b84a794..c83d50db64 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -124,6 +124,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = acosh(src[i]);
 return 0;
+case DMUO_ATANH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = atanh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index eb30231549..8076356ba4 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -42,6 +42,7 @@ typedef enum {
 DMUO_TANH = 9,
 DMUO_ASINH = 10,
 DMUO_ACOSH = 11,
+DMUO_ATANH = 12,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 1e73e3aefe..85db7bf710 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 
'Atanh':12}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 8fc3438552..9851d84144 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 17
+minor = 18
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 07/12] dnn_backend_native_layer_mathunary: add asinh support

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index ccdbcc21e0..83df98d0f8 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -116,6 +116,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = tanh(src[i]);
 return 0;
+case DMUO_ASINH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = asinh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index ae0c1e1cdd..fbe9af5c7d 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -40,6 +40,7 @@ typedef enum {
 DMUO_SINH = 7,
 DMUO_COSH = 8,
 DMUO_TANH = 9,
+DMUO_ASINH = 10,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index f98a3cae3d..0d756c8109 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index d2753f0af0..3211c13f6d 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 15
+minor = 16
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 09/12] dnn_backend_native_layer_mathunary: add acosh support

2020-06-28 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 83df98d0f8..b77b84a794 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -120,6 +120,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = asinh(src[i]);
 return 0;
+case DMUO_ACOSH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = acosh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index fbe9af5c7d..eb30231549 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -41,6 +41,7 @@ typedef enum {
 DMUO_COSH = 8,
 DMUO_TANH = 9,
 DMUO_ASINH = 10,
+DMUO_ACOSH = 11,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 0d756c8109..1e73e3aefe 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 3211c13f6d..8fc3438552 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 16
+minor = 17
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 03/12] dnn_backend_native_layer_mathunary: add cosh support

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 2630fe07e2..ddb70996e7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -108,6 +108,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = sinh(src[i]);
 return 0;
+case DMUO_COSH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = cosh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 760930c60e..5a486b4f5f 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -38,6 +38,7 @@ typedef enum {
 DMUO_ACOS = 5,
 DMUO_ATAN = 6,
 DMUO_SINH = 7,
+DMUO_COSH = 8,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 6f34a71ab4..96da44c4a8 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 4747f41395..a73f51ba48 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 13
+minor = 14
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 02/12] dnn-layer-math-unary-test: add unit test for sinh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index bf77c44bbe..a1ff05e5fb 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -44,6 +44,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return acos(f);
 case DMUO_ATAN:
 return atan(f);
+case DMUO_SINH:
+return sinh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -101,5 +103,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ATAN))
 return 1;
+if (test(DMUO_SINH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 01/12] dnn_backend_native_layer_mathunary: add sinh support

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
V2:
Add more test number
Fix incorrect atanh unit test

 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 42615c43d5..2630fe07e2 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -104,6 +104,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = atan(src[i]);
 return 0;
+case DMUO_SINH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = sinh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 13fa33178a..760930c60e 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -37,6 +37,7 @@ typedef enum {
 DMUO_ASIN = 4,
 DMUO_ACOS = 5,
 DMUO_ATAN = 6,
+DMUO_SINH = 7,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index b90c31c495..6f34a71ab4 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 73cf23bf53..4747f41395 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 12
+minor = 13
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 04/12] dnn-layer-math-unary-test: add unit test for cosh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index a1ff05e5fb..0280debc0b 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -46,6 +46,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return atan(f);
 case DMUO_SINH:
 return sinh(f);
+case DMUO_COSH:
+return cosh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -105,5 +107,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_SINH))
 return 1;
+if (test(DMUO_COSH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 05/12] dnn_backend_native_layer_mathunary: add tanh support

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index ddb70996e7..ccdbcc21e0 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -112,6 +112,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = cosh(src[i]);
 return 0;
+case DMUO_TANH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = tanh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index 5a486b4f5f..ae0c1e1cdd 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -39,6 +39,7 @@ typedef enum {
 DMUO_ATAN = 6,
 DMUO_SINH = 7,
 DMUO_COSH = 8,
+DMUO_TANH = 9,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 96da44c4a8..f98a3cae3d 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index a73f51ba48..d2753f0af0 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 14
+minor = 15
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 10/12] dnn-layer-math-unary-test: add unit test for acosh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 90fce71a0c..5587e47ad5 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -52,6 +52,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return tanh(f);
 case DMUO_ASINH:
 return asinh(f);
+case DMUO_ACOSH:
+return acosh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -117,5 +119,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ASINH))
 return 1;
+if (test(DMUO_ACOSH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 09/12] dnn_backend_native_layer_mathunary: add acosh support

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index 83df98d0f8..b77b84a794 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -120,6 +120,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = asinh(src[i]);
 return 0;
+case DMUO_ACOSH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = acosh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index fbe9af5c7d..eb30231549 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -41,6 +41,7 @@ typedef enum {
 DMUO_COSH = 8,
 DMUO_TANH = 9,
 DMUO_ASINH = 10,
+DMUO_ACOSH = 11,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 0d756c8109..1e73e3aefe 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 3211c13f6d..8fc3438552 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 16
+minor = 17
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 06/12] dnn-layer-math-unary-test: add unit test for tanh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 0280debc0b..6885b4d318 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -48,6 +48,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return sinh(f);
 case DMUO_COSH:
 return cosh(f);
+case DMUO_TANH:
+return tanh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -109,5 +111,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_COSH))
 return 1;
+if (test(DMUO_TANH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 11/12] dnn_backend_native_layer_mathunary: add atanh support

2020-06-29 Thread Ting Fu
It can be tested with the model generated with below python script:

import tensorflow as tf
import numpy as np
import imageio

in_img = imageio.imread('input.jpeg')
in_img = in_img.astype(np.float32)/255.0
in_data = in_img[np.newaxis, :]

x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in')

please uncomment the part you want to test

x_sinh_1 = tf.sinh(x)
x_out = tf.divide(x_sinh_1, 1.176) # sinh(1.0)

x_cosh_1 = tf.cosh(x)
x_out = tf.divide(x_cosh_1, 1.55) # cosh(1.0)

x_tanh_1 = tf.tanh(x)
x__out = tf.divide(x_tanh_1, 0.77) # tanh(1.0)

x_asinh_1 = tf.asinh(x)
x_out = tf.divide(x_asinh_1, 0.89) # asinh(1.0/1.1)

x_acosh_1 = tf.add(x, 1.1)
x_acosh_2 = tf.acosh(x_acosh_1) # accept (1, inf)
x_out = tf.divide(x_acosh_2, 1.4) # acosh(2.1)

x_atanh_1 = tf.divide(x, 1.1)
x_atanh_2 = tf.atanh(x_atanh_1) # accept (-1, 1)
x_out = tf.divide(x_atanh_2, 1.55) # atanhh(1.0/1.1)

y = tf.identity(x_out, name='dnn_out') #please only preserve the x_out you want 
to test

sess=tf.Session()
sess.run(tf.global_variables_initializer())

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 
['dnn_out'])
tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False)

print("image_process.pb generated, please use \
path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n")

output = sess.run(y, feed_dict={x: in_data})
imageio.imsave("out.jpg", np.squeeze(output))

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index b77b84a794..c83d50db64 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -124,6 +124,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = acosh(src[i]);
 return 0;
+case DMUO_ATANH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = atanh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index eb30231549..8076356ba4 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -42,6 +42,7 @@ typedef enum {
 DMUO_TANH = 9,
 DMUO_ASINH = 10,
 DMUO_ACOSH = 11,
+DMUO_ATANH = 12,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index 1e73e3aefe..85db7bf710 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 
'Atanh':12}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 8fc3438552..9851d84144 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 17
+minor = 18
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 07/12] dnn_backend_native_layer_mathunary: add asinh support

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 
 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 +
 tools/python/convert_from_tensorflow.py  | 2 +-
 tools/python/convert_header.py   | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
index ccdbcc21e0..83df98d0f8 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c
@@ -116,6 +116,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, 
const int32_t *input_oper
 for (int i = 0; i < dims_count; ++i)
 dst[i] = tanh(src[i]);
 return 0;
+case DMUO_ASINH:
+for (int i = 0; i < dims_count; ++i)
+dst[i] = asinh(src[i]);
+return 0;
 default:
 return -1;
 }
diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h 
b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
index ae0c1e1cdd..fbe9af5c7d 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h
@@ -40,6 +40,7 @@ typedef enum {
 DMUO_SINH = 7,
 DMUO_COSH = 8,
 DMUO_TANH = 9,
+DMUO_ASINH = 10,
 DMUO_COUNT
 } DNNMathUnaryOperation;
 
diff --git a/tools/python/convert_from_tensorflow.py 
b/tools/python/convert_from_tensorflow.py
index f98a3cae3d..0d756c8109 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -72,7 +72,7 @@ class TFConverter:
 self.conv2d_scopename_inputname_dict = {}
 self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 
'Maximum':4, 'MathBinary':5, 'MathUnary':6}
 self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 
'Minimum':4}
-self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9}
+self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10}
 self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 self.name_operand_dict = {}
 
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index d2753f0af0..3211c13f6d 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
 major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 15
+minor = 16
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 08/12] dnn-layer-math-unary-test: add unit test for asinh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 6885b4d318..90fce71a0c 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -50,6 +50,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return cosh(f);
 case DMUO_TANH:
 return tanh(f);
+case DMUO_ASINH:
+return asinh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -113,5 +115,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_TANH))
 return 1;
+if (test(DMUO_ASINH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 12/12] dnn-layer-math-unary-test: add unit test for atanh

2020-06-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 5587e47ad5..683e623d95 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -54,6 +54,8 @@ static float get_expected(float f, DNNMathUnaryOperation op)
 return asinh(f);
 case DMUO_ACOSH:
 return acosh(f);
+case DMUO_ATANH:
+return atanh(f);
 default:
 av_assert0(!"not supported yet");
 return 0.f;
@@ -65,8 +67,8 @@ static int test(DNNMathUnaryOperation op)
 DnnLayerMathUnaryParams params;
 DnnOperand operands[2];
 int32_t input_indexes[1];
-float input[1*1*2*3] = {
--3, 2.5, 2, -2.1, 7.8, 100};
+float input[1*1*3*3] = {
+0.1, 0.5, 0.75, -3, 2.5, 2, -2.1, 7.8, 100};
 float *output;
 
 params.un_op = op;
@@ -74,7 +76,7 @@ static int test(DNNMathUnaryOperation op)
 operands[0].data = input;
 operands[0].dims[0] = 1;
 operands[0].dims[1] = 1;
-operands[0].dims[2] = 2;
+operands[0].dims[2] = 3;
 operands[0].dims[3] = 3;
 operands[1].data = NULL;
 
@@ -121,5 +123,7 @@ int main(int agrc, char **argv)
 return 1;
 if (test(DMUO_ACOSH))
 return 1;
+if (test(DMUO_ATANH))
+return 1;
 return 0;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version

2019-09-17 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/x86/vf_eq.asm| 19 +--
 libavfilter/x86/vf_eq_init.c | 20 
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
index bf28691297..d6b51cf6df 100644
--- a/libavfilter/x86/vf_eq.asm
+++ b/libavfilter/x86/vf_eq.asm
@@ -24,14 +24,21 @@
 
 SECTION .text
 
-INIT_MMX mmx
+%macro PROCESS_ONE_LINE 1
 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
 movd m3, contrastd
 movd m4, brightnessd
 movsx r5d, contrastw
 movsx r6d, brightnessw
+%if mmsize == 8
 pshufw m3, m3, 0
 pshufw m4, m4, 0
+%elif mmsize == 16
+pshuflw m3, m3, 0
+movlhps m3, m3
+pshuflw m4, m4, 0
+movlhps m4, m4
+%endif
 
 DEFINE_ARGS src, dst, tmp, scalar, w
 xor tmpd, tmpd
@@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 pxor m1, m1
 mov scalard, wd
 and scalard, mmsize-1
-sar wd, 3
+sar wd, %1
 cmp wd, 1
 jl .loop1
 
@@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 
 .end:
 RET
+
+%endmacro
+
+INIT_MMX mmx
+PROCESS_ONE_LINE 3
+
+INIT_XMM sse2
+PROCESS_ONE_LINE 4
diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c
index 63c69078fb..cdd5272220 100644
--- a/libavfilter/x86/vf_eq_init.c
+++ b/libavfilter/x86/vf_eq_init.c
@@ -28,6 +28,8 @@
 
 extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int 
contvec,
 int brvec, int w);
+extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int 
contvec,
+int brvec, int w);
 
 static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride,
 const uint8_t *src, int src_stride, int w, int h)
@@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, 
int dst_stride,
 emms_c();
 }
 
+static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
+const uint8_t *src, int src_stride, int w, int h)
+{
+short contrast = (short) (param->contrast * 256 * 16);
+short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
+   / 200 - 128 - contrast / 32;
+
+while (h--) {
+ff_process_one_line_sse2(src, dst, contrast, brightness, w);
+src += src_stride;
+dst += dst_stride;
+}
+emms_c();
+}
+
 av_cold void ff_eq_init_x86(EQContext *eq)
 {
 int cpu_flags = av_get_cpu_flags();
@@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq)
 if (cpu_flags & AV_CPU_FLAG_MMX) {
 eq->process = process_mmx;
 }
+if (cpu_flags & AV_CPU_FLAG_SSE2) {
+eq->process = process_sse2;
+}
 }
 
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/3] checkasm/vf_eq: add test for vf_eq

2019-09-17 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/vf_eq.c   | 13 ---
 libavfilter/vf_eq.h   |  1 +
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_eq.c| 79 +++
 tests/fate/checkasm.mak   |  1 +
 7 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 tests/checkasm/vf_eq.c

diff --git a/libavfilter/vf_eq.c b/libavfilter/vf_eq.c
index 2c4c7e4d54..0f9d129255 100644
--- a/libavfilter/vf_eq.c
+++ b/libavfilter/vf_eq.c
@@ -174,12 +174,18 @@ static int set_expr(AVExpr **pexpr, const char *expr, 
const char *option, void *
 return 0;
 }
 
+void ff_eq_init(EQContext *eq)
+{
+eq->process = process_c;
+if (ARCH_X86)
+ff_eq_init_x86(eq);
+}
+
 static int initialize(AVFilterContext *ctx)
 {
 EQContext *eq = ctx->priv;
 int ret;
-
-eq->process = process_c;
+ff_eq_init(eq);
 
 if ((ret = set_expr(&eq->contrast_pexpr, eq->contrast_expr, 
"contrast", ctx)) < 0 ||
 (ret = set_expr(&eq->brightness_pexpr,   eq->brightness_expr,   
"brightness",   ctx)) < 0 ||
@@ -191,9 +197,6 @@ static int initialize(AVFilterContext *ctx)
 (ret = set_expr(&eq->gamma_weight_pexpr, eq->gamma_weight_expr, 
"gamma_weight", ctx)) < 0 )
 return ret;
 
-if (ARCH_X86)
-ff_eq_init_x86(eq);
-
 if (eq->eval_mode == EVAL_MODE_INIT) {
 set_gamma(eq);
 set_contrast(eq);
diff --git a/libavfilter/vf_eq.h b/libavfilter/vf_eq.h
index fa49d46e5c..cd0cd75f08 100644
--- a/libavfilter/vf_eq.h
+++ b/libavfilter/vf_eq.h
@@ -100,6 +100,7 @@ typedef struct EQContext {
 enum EvalMode { EVAL_MODE_INIT, EVAL_MODE_FRAME, EVAL_MODE_NB } eval_mode;
 } EQContext;
 
+void ff_eq_init(EQContext *eq);
 void ff_eq_init_x86(EQContext *eq);
 
 #endif /* AVFILTER_EQ_H */
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 0112ff603e..de850c016e 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -36,6 +36,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)  += $(AVCODECOBJS-yes)
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)  += vf_gblur.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)  += vf_hflip.o
 AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index d9a5c7f401..bcbe775510 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -165,6 +165,9 @@ static const struct {
 #if CONFIG_COLORSPACE_FILTER
 { "vf_colorspace", checkasm_check_colorspace },
 #endif
+#if CONFIG_EQ_FILTER
+{ "vf_eq", checkasm_check_vf_eq },
+#endif
 #if CONFIG_GBLUR_FILTER
 { "vf_gblur", checkasm_check_vf_gblur },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index fdf9eeb75d..0a7f9f25c4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -72,6 +72,7 @@ void checkasm_check_sw_rgb(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
+void checkasm_check_vf_eq(void);
 void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
 void checkasm_check_vf_threshold(void);
diff --git a/tests/checkasm/vf_eq.c b/tests/checkasm/vf_eq.c
new file mode 100644
index 00..b1a6a61e05
--- /dev/null
+++ b/tests/checkasm/vf_eq.c
@@ -0,0 +1,79 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include "checkasm.h"
+#include "libavfilter/avfilter.h"
+#include "libavfilter/vf_eq.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define SRC_STRIDE 256
+#define PIXELS (WIDTH * HEIGHT)
+#define RANDOM_RANGE 8
+#define SCALE 1
+
+#define randomize_buffers(buf, size)  \
+do {  \
+int j;\
+uint8_t *tmp_buf = (uint

[FFmpeg-devel] [PATCH 2/3] avfilter/x86/vf_eq: Change inline assembly into nasm code

2019-09-17 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/x86/Makefile |  3 +-
 libavfilter/x86/vf_eq.asm| 82 ++
 libavfilter/x86/vf_eq.c  | 96 
 libavfilter/x86/vf_eq_init.c | 55 +
 4 files changed, 139 insertions(+), 97 deletions(-)
 create mode 100644 libavfilter/x86/vf_eq.asm
 delete mode 100644 libavfilter/x86/vf_eq.c
 create mode 100644 libavfilter/x86/vf_eq_init.c

diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index f12993e606..f2922c4597 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -6,7 +6,7 @@ OBJS-$(CONFIG_BLEND_FILTER)  += 
x86/vf_blend_init.o
 OBJS-$(CONFIG_BWDIF_FILTER)  += x86/vf_bwdif_init.o
 OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)+= x86/vf_convolution_init.o
-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
+OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
 OBJS-$(CONFIG_FSPP_FILTER)   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GBLUR_FILTER)  += x86/vf_gblur_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o
@@ -43,6 +43,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER)   += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)   += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)  += x86/colorspacedsp.o
 X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
+X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_eq.o
 X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER)   += x86/vf_framerate.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GBLUR_FILTER)   += x86/vf_gblur.o
diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
new file mode 100644
index 00..bf28691297
--- /dev/null
+++ b/libavfilter/x86/vf_eq.asm
@@ -0,0 +1,82 @@
+;*
+;* x86-optimized functions for eq filter
+;*
+;* Original MPlayer filters by Richard Felker.
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License along
+;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+;*
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_MMX mmx
+cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
+movd m3, contrastd
+movd m4, brightnessd
+movsx r5d, contrastw
+movsx r6d, brightnessw
+pshufw m3, m3, 0
+pshufw m4, m4, 0
+
+DEFINE_ARGS src, dst, tmp, scalar, w
+xor tmpd, tmpd
+pxor m0, m0
+pxor m1, m1
+mov scalard, wd
+and scalard, mmsize-1
+sar wd, 3
+cmp wd, 1
+jl .loop1
+
+.loop0:
+movu m1, [srcq]
+mova m2, m1
+punpcklbw m1, m0
+punpckhbw m2, m0
+psllw m1, 4
+psllw m2, 4
+pmulhw m1, m3
+pmulhw m2, m3
+paddw m1, m4
+paddw m2, m4
+packuswb m1, m2
+movu [dstq], m1
+add srcq, mmsize
+add dstq, mmsize
+sub wd, 1
+cmp wd, 0
+jne .loop0
+
+.loop1:
+cmp scalard, 0
+je .end
+movzx tmpd, byte [srcq]
+imul tmpd, r5d
+sar tmpd, 12
+add tmpd, r6d
+movd m1, tmpd
+packuswb m1, m0
+movd tmpd, m1
+mov [dstq], tmpb
+inc srcq
+inc dstq
+dec scalard
+jmp .loop1
+
+.end:
+RET
diff --git a/libavfilter/x86/vf_eq.c b/libavfilter/x86/vf_eq.c
deleted file mode 100644
index 16f399505f..00
--- a/libavfilter/x86/vf_eq.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- *
- * Original MPlayer filters by Richard Felker.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for mo

[FFmpeg-devel] [PATCH V2 1/3] checkasm/vf_eq: add test for vf_eq

2019-09-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/vf_eq.c   | 13 ---
 libavfilter/vf_eq.h   |  1 +
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_eq.c| 79 +++
 tests/fate/checkasm.mak   |  1 +
 7 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 tests/checkasm/vf_eq.c

diff --git a/libavfilter/vf_eq.c b/libavfilter/vf_eq.c
index 2c4c7e4d54..0f9d129255 100644
--- a/libavfilter/vf_eq.c
+++ b/libavfilter/vf_eq.c
@@ -174,12 +174,18 @@ static int set_expr(AVExpr **pexpr, const char *expr, 
const char *option, void *
 return 0;
 }
 
+void ff_eq_init(EQContext *eq)
+{
+eq->process = process_c;
+if (ARCH_X86)
+ff_eq_init_x86(eq);
+}
+
 static int initialize(AVFilterContext *ctx)
 {
 EQContext *eq = ctx->priv;
 int ret;
-
-eq->process = process_c;
+ff_eq_init(eq);
 
 if ((ret = set_expr(&eq->contrast_pexpr, eq->contrast_expr, 
"contrast", ctx)) < 0 ||
 (ret = set_expr(&eq->brightness_pexpr,   eq->brightness_expr,   
"brightness",   ctx)) < 0 ||
@@ -191,9 +197,6 @@ static int initialize(AVFilterContext *ctx)
 (ret = set_expr(&eq->gamma_weight_pexpr, eq->gamma_weight_expr, 
"gamma_weight", ctx)) < 0 )
 return ret;
 
-if (ARCH_X86)
-ff_eq_init_x86(eq);
-
 if (eq->eval_mode == EVAL_MODE_INIT) {
 set_gamma(eq);
 set_contrast(eq);
diff --git a/libavfilter/vf_eq.h b/libavfilter/vf_eq.h
index fa49d46e5c..cd0cd75f08 100644
--- a/libavfilter/vf_eq.h
+++ b/libavfilter/vf_eq.h
@@ -100,6 +100,7 @@ typedef struct EQContext {
 enum EvalMode { EVAL_MODE_INIT, EVAL_MODE_FRAME, EVAL_MODE_NB } eval_mode;
 } EQContext;
 
+void ff_eq_init(EQContext *eq);
 void ff_eq_init_x86(EQContext *eq);
 
 #endif /* AVFILTER_EQ_H */
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 0112ff603e..de850c016e 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -36,6 +36,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)  += $(AVCODECOBJS-yes)
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)  += vf_gblur.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)  += vf_hflip.o
 AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index d9a5c7f401..bcbe775510 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -165,6 +165,9 @@ static const struct {
 #if CONFIG_COLORSPACE_FILTER
 { "vf_colorspace", checkasm_check_colorspace },
 #endif
+#if CONFIG_EQ_FILTER
+{ "vf_eq", checkasm_check_vf_eq },
+#endif
 #if CONFIG_GBLUR_FILTER
 { "vf_gblur", checkasm_check_vf_gblur },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index fdf9eeb75d..0a7f9f25c4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -72,6 +72,7 @@ void checkasm_check_sw_rgb(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
+void checkasm_check_vf_eq(void);
 void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
 void checkasm_check_vf_threshold(void);
diff --git a/tests/checkasm/vf_eq.c b/tests/checkasm/vf_eq.c
new file mode 100644
index 00..684718f2cd
--- /dev/null
+++ b/tests/checkasm/vf_eq.c
@@ -0,0 +1,79 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include "checkasm.h"
+#include "libavfilter/avfilter.h"
+#include "libavfilter/vf_eq.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define SRC_STRIDE 256
+#define PIXELS (WIDTH * HEIGHT)
+#define RANDOM_RANGE 8
+#define SCALE 1
+
+#define randomize_buffers(buf, size)  \
+do {  \
+int j;\
+uint8_t *tmp_buf = (uint

[FFmpeg-devel] [PATCH V2 3/3] avfilter/x86/vf_eq: add SSE2 version

2019-09-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/x86/vf_eq.asm| 12 ++--
 libavfilter/x86/vf_eq_init.c | 19 +++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
index 8460342896..a30a287029 100644
--- a/libavfilter/x86/vf_eq.asm
+++ b/libavfilter/x86/vf_eq.asm
@@ -24,7 +24,7 @@
 
 SECTION .text
 
-INIT_MMX mmxext
+%macro PROCESS_ONE_LINE 1
 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
 movd m3, contrastd
 movd m4, brightnessd
@@ -39,7 +39,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 pxor m1, m1
 mov scalard, wd
 and scalard, mmsize-1
-sar wd, 3
+sar wd, %1
 cmp wd, 1
 jl .loop1
 
@@ -80,3 +80,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 
 .end:
 RET
+
+%endmacro
+
+INIT_MMX mmxext
+PROCESS_ONE_LINE 3
+
+INIT_XMM sse2
+PROCESS_ONE_LINE 4
diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c
index c18db75545..8ad9f4bcaf 100644
--- a/libavfilter/x86/vf_eq_init.c
+++ b/libavfilter/x86/vf_eq_init.c
@@ -28,6 +28,8 @@
 
 extern void ff_process_one_line_mmxext(const uint8_t *src, uint8_t *dst, short 
contrast,
short brightness, int w);
+extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, short 
contrast,
+ short brightness, int w);
 
 static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w, int h)
@@ -44,6 +46,20 @@ static void process_mmxext(EQParameters *param, uint8_t 
*dst, int dst_stride,
 emms_c();
 }
 
+static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
+ const uint8_t *src, int src_stride, int w, int h)
+{
+short contrast = (short) (param->contrast * 256 * 16);
+short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
+   / 200 - 128 - contrast / 32;
+
+while (h--) {
+ff_process_one_line_sse2(src, dst, contrast, brightness, w);
+src += src_stride;
+dst += dst_stride;
+}
+}
+
 av_cold void ff_eq_init_x86(EQContext *eq)
 {
 int cpu_flags = av_get_cpu_flags();
@@ -51,5 +67,8 @@ av_cold void ff_eq_init_x86(EQContext *eq)
 if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
 eq->process = process_mmxext;
 }
+if (cpu_flags & AV_CPU_FLAG_SSE2) {
+eq->process = process_sse2;
+}
 }
 
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 2/3] avfilter/x86/vf_eq: Change inline assembly into nasm code

2019-09-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/x86/Makefile |  3 +-
 libavfilter/x86/vf_eq.asm| 82 ++
 libavfilter/x86/vf_eq.c  | 96 
 libavfilter/x86/vf_eq_init.c | 55 +
 4 files changed, 139 insertions(+), 97 deletions(-)
 create mode 100644 libavfilter/x86/vf_eq.asm
 delete mode 100644 libavfilter/x86/vf_eq.c
 create mode 100644 libavfilter/x86/vf_eq_init.c

diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index f12993e606..f2922c4597 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -6,7 +6,7 @@ OBJS-$(CONFIG_BLEND_FILTER)  += 
x86/vf_blend_init.o
 OBJS-$(CONFIG_BWDIF_FILTER)  += x86/vf_bwdif_init.o
 OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)+= x86/vf_convolution_init.o
-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
+OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
 OBJS-$(CONFIG_FSPP_FILTER)   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GBLUR_FILTER)  += x86/vf_gblur_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o
@@ -43,6 +43,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER)   += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)   += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)  += x86/colorspacedsp.o
 X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
+X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_eq.o
 X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER)   += x86/vf_framerate.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GBLUR_FILTER)   += x86/vf_gblur.o
diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
new file mode 100644
index 00..8460342896
--- /dev/null
+++ b/libavfilter/x86/vf_eq.asm
@@ -0,0 +1,82 @@
+;*
+;* x86-optimized functions for eq filter
+;*
+;* Original MPlayer filters by Richard Felker.
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License along
+;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+;*
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_MMX mmxext
+cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
+movd m3, contrastd
+movd m4, brightnessd
+movsx r5d, contrastw
+movsx r6d, brightnessw
+SPLATW m3, m3, 0
+SPLATW m4, m4, 0
+
+DEFINE_ARGS src, dst, tmp, scalar, w
+xor tmpd, tmpd
+pxor m0, m0
+pxor m1, m1
+mov scalard, wd
+and scalard, mmsize-1
+sar wd, 3
+cmp wd, 1
+jl .loop1
+
+.loop0:
+movu m1, [srcq]
+mova m2, m1
+punpcklbw m1, m0
+punpckhbw m2, m0
+psllw m1, 4
+psllw m2, 4
+pmulhw m1, m3
+pmulhw m2, m3
+paddw m1, m4
+paddw m2, m4
+packuswb m1, m2
+movu [dstq], m1
+add srcq, mmsize
+add dstq, mmsize
+sub wd, 1
+cmp wd, 0
+jne .loop0
+
+.loop1:
+cmp scalard, 0
+je .end
+movzx tmpd, byte [srcq]
+imul tmpd, r5d
+sar tmpd, 12
+add tmpd, r6d
+movd m1, tmpd
+packuswb m1, m0
+movd tmpd, m1
+mov [dstq], tmpb
+inc srcq
+inc dstq
+dec scalard
+jmp .loop1
+
+.end:
+RET
diff --git a/libavfilter/x86/vf_eq.c b/libavfilter/x86/vf_eq.c
deleted file mode 100644
index 16f399505f..00
--- a/libavfilter/x86/vf_eq.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- *
- * Original MPlayer filters by Richard Felker.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public Licens

[FFmpeg-devel] [PATCH 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-11-27 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   5 +
 libswscale/x86/yuv2rgb_template.c |  58 ++-
 libswscale/x86/yuv_2_rgb.asm  | 163 +++---
 3 files changed, 208 insertions(+), 18 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 70412a3914..d983934762 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,6 +61,11 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index efe6356f30..fe586047f0 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const 
uint8_t *src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115,7 +149,9 @@ static inline int yuv420_rgb24(SwsContext *c, const uint8_t 
*src[],
 int y, h_size, vshift;
 YUV2RGB_LOOP(3)
 
-#if COMPILE_TEMPLATE_MMXEXT
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#elif COMPILE_TEMPLATE_MMXEXT
 ff_yuv_420_rgb24_mmxext(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
 #else
 ff_yuv_420_rgb24_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
@@ -132,7 +168

[FFmpeg-devel] [PATCH 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2019-11-27 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 ++
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 394 insertions(+), 415 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..70412a3914 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,49 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
+switch (c->dstFormat) {
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-
-if (INLINE_MMX(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
-#endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-break;
-} else
-return yuv420_bgr32_mmx;
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmx;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmx;
-case AV_PIX_FMT_RGB565:
-

[FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-01 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   8 +-
 libswscale/x86/yuv2rgb_template.c |  58 ++-
 libswscale/x86/yuv_2_rgb.asm  | 162 +++---
 3 files changed, 209 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index ed9b613cab..b83dd7089a 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) ||
+EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index efe6356f30..fe586047f0 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const 
uint8_t *src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115,7 +149,9 @@ s

[FFmpeg-devel] [PATCH V2 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2019-12-01 Thread Ting Fu
Tested using this command:
./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 151 to 389 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 +++---
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 395 insertions(+), 414 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..ed9b613cab 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
-#endif
-
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-

[FFmpeg-devel] [PATCH V3 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2019-12-03 Thread Ting Fu
Tested using this command:
./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 151 to 389 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 +++---
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 395 insertions(+), 414 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..ed9b613cab 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
-#endif
-
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-

[FFmpeg-devel] [PATCH V3 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-03 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   8 +-
 libswscale/x86/yuv2rgb_template.c |  58 ++-
 libswscale/x86/yuv_2_rgb.asm  | 162 +++---
 3 files changed, 209 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index ed9b613cab..b83dd7089a 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) ||
+EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index bcc8eb7602..97a3645b90 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const 
uint8_t *src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115

[FFmpeg-devel] [PATCH] libswscale/swscale_unscaled.c: remove redundant code

2019-12-04 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/swscale_unscaled.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index d9260c151a..0d109da2d7 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -2032,7 +2032,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GRBG16) 
||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48)  ||
-IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
@@ -2053,7 +2052,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48)  ||
-IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) ||
 IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) ||
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V4 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2019-12-18 Thread Ting Fu
Tested using this command:
./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 151 to 389 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 +++---
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 395 insertions(+), 414 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..ed9b613cab 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
-#endif
-
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-

[FFmpeg-devel] [PATCH V4 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-18 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   8 +-
 libswscale/x86/yuv2rgb_template.c |  58 +++-
 libswscale/x86/yuv_2_rgb.asm  | 145 ++
 3 files changed, 192 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index ed9b613cab..b83dd7089a 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) ||
+EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index bcc8eb7602..97a3645b90 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const 
uint8_t *src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115

[FFmpeg-devel] [PATCH V5 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2020-01-05 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 +++---
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 395 insertions(+), 414 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..f3d2bb526e 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
-#endif
-
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-break;
-} else
-return yuv420_bgr32_mmx;
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmx;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmx;
-   

[FFmpeg-devel] [PATCH V5 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2020-01-05 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   7 +-
 libswscale/x86/yuv2rgb_template.c |  58 +++-
 libswscale/x86/yuv_2_rgb.asm  | 145 ++
 3 files changed, 191 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index f3d2bb526e..7015266a7e 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,18 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index e67a85df33..ceaa6dea32 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static int yuv420_rgb15(SwsContext *c, const uint8_t *src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static int yuv420_rgb16(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115,7 +149,9 @@ static int yuv420_rgb24(SwsContext *c, const uint8_t *src[],
 i

[FFmpeg-devel] [PATCH V6 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2020-01-07 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   7 +-
 libswscale/x86/yuv2rgb_template.c |  58 +++-
 libswscale/x86/yuv_2_rgb.asm  | 145 ++
 3 files changed, 191 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index f3d2bb526e..7015266a7e 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,18 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index e67a85df33..ceaa6dea32 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static int yuv420_rgb15(SwsContext *c, const uint8_t *src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static int yuv420_rgb16(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115,7 +149,9 @@ static int yuv420_rgb24(SwsContext *c,

[FFmpeg-devel] [PATCH V6 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2020-01-07 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  81 +++---
 libswscale/x86/yuv2rgb_template.c | 441 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 ++
 5 files changed, 395 insertions(+), 414 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..f3d2bb526e 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMX
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
-#undef RENAME
+#if HAVE_MMXEXT
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
-switch (c->dstFormat) {
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmxext;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmxext;
-}
-}
-#endif
-
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32;
 #endif
-break;
-} else
-return yuv420_bgr32_mmx;
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmx;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmx;
-   

[FFmpeg-devel] [PATCH V7 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2020-01-09 Thread Ting Fu
Signed-off-by: Ting Fu 
---
V7:
Fix compile issue when user configure with --disable-mmx.
Fix issue when running ./ffmpeg with --cpuflags mmx/ssse3.
Adjust the SIMD verify logic in libswscale/x86/yuv2rgb.c

 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  66 ++---
 libswscale/x86/yuv2rgb_template.c | 467 ++
 libswscale/x86/yuv_2_rgb.asm  | 270 +
 5 files changed, 405 insertions(+), 415 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..dd813d4deb 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,32 +50,31 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
+#if HAVE_MMX
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define RENAME(a) a ## _mmx
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
+#if HAVE_MMXEXT
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
 #define RENAME(a) a ## _mmxext
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_X86ASM */
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
+#if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB24:
 return yuv420_rgb24_mmxext;
@@ -83,37 +82,36 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 return yuv420_bgr24_mmxext;
 }
 }
-#endif
 
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags)) {
 switch (c->dstFormat) {
-case AV_PIX_FMT_RGB32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_rgb32_mmx;
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32_mmx;
 #endif
-break;
-} else
-return yuv420_rgb32_mmx;
-case AV_PIX_FMT_BGR32:
-if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
-return yuva420_bgr32_mmx;
+break;
+} else
+return yuv420_rgb32_mmx;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_

[FFmpeg-devel] [PATCH V7 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2020-01-09 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c |  38 +
 libswscale/x86/yuv_2_rgb.asm | 145 +++
 2 files changed, 167 insertions(+), 16 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index dd813d4deb..ecbad95d1f 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -67,6 +67,15 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #include "yuv2rgb_template.c"
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#undef RENAME
+#undef COMPILE_TEMPLATE_MMXEXT
+#define COMPILE_TEMPLATE_MMXEXT 0
+#define RENAME(a) a ## _ssse3
+#include "yuv2rgb_template.c"
+#endif
+
 #endif /* HAVE_X86ASM */
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
@@ -74,6 +83,35 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
+if (EXTERNAL_SSSE3(cpu_flags)) {
+switch (c->dstFormat) {
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32_ssse3;
+#endif
+break;
+} else
+return yuv420_rgb32_ssse3;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32_ssse3;
+#endif
+break;
+} else
+return yuv420_bgr32_ssse3;
+case AV_PIX_FMT_RGB24:
+return yuv420_rgb24_ssse3;
+case AV_PIX_FMT_BGR24:
+return yuv420_bgr24_ssse3;
+case AV_PIX_FMT_RGB565:
+return yuv420_rgb16_ssse3;
+case AV_PIX_FMT_RGB555:
+return yuv420_rgb15_ssse3;
+}
+}
+
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB24:
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index a44ab1607b..e05bbb89f5 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -25,11 +25,18 @@
 
 SECTION_RODATA
 
-pw_00ff: times 4 dw 255
-pb_f8:   times 8 db 248
-pb_e0:   times 8 db 224
-pb_03:   times 8 db 3
-pb_07:   times 8 db 7
+; below variables are named like mask_dwXY, which means to preserve dword No.X 
& No.Y
+mask_dw036 : db -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0
+mask_dw147 : db  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1
+mask_dw25  : db  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0
+rgb24_shuf1: db  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15,  4,  5, 10, 11
+rgb24_shuf2: db 10, 11,  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15,  4,  5
+rgb24_shuf3: db  4,  5, 10, 11,  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15
+pw_00ff: times 8 dw 255
+pb_f8:   times 16 db 248
+pb_e0:   times 16 db 224
+pb_03:   times 16 db 3
+pb_07:   times 16 db 7
 
 mask_1101: dw -1, -1,  0, -1
 mask_0010: dw  0,  0, -1,  0
@@ -49,7 +56,11 @@ SECTION .text
 ;-
 
 %macro MOV_H2L 1
-psrlq %1, 32
+%if mmsize == 8
+psrlq %1, 32
+%else ; mmsize == 16
+psrldq %1, 8
+%endif
 %endmacro
 
 %macro yuv2rgb_fn 3
@@ -77,6 +88,7 @@ psrlq %1, 32
 %define m_blue m1
 %endif
 
+%if mmsize == 8
 %define time_num 1
 %define reg_num 8
 %define y_offset [pointer_c_ditherq + 8  * 8]
@@ -87,11 +99,45 @@ psrlq %1, 32
 %define y_coff   [pointer_c_ditherq + 3  * 8]
 %define ub_coff  [pointer_c_ditherq + 5  * 8]
 %define vr_coff  [pointer_c_ditherq + 4  * 8]
+%elif mmsize == 16
+%define time_num 2
+%if ARCH_X86_32
+%define reg_num 8
+%define my_offset [pointer_c_ditherq + 8  * 8]
+%define mu_offset [pointer_c_ditherq + 9  * 8]
+%define mv_offset [pointer_c_ditherq + 10 * 8]
+%define mug_coff  [pointer_c_ditherq + 7  * 8]
+%define mvg_coff  [pointer_c_ditherq + 6  * 8]
+%define my_coff   [pointer_c_ditherq + 3  * 8]
+%define mub_coff  [pointer_c_ditherq + 5  * 8]
+%define mvr_coff  [pointer_c_ditherq + 4  * 8]
+%else ; ARCH_X86_64
+%define reg_num 16
+%define y_offset m8
+%define u_offset m9
+%define v_offset m10
+%define ug_coff  m11
+%define vg_coff  m12
+%define y_coff   m13
+%define ub_coff  m14
+%define vr_coff  m15
+%endif ; ARCH_X86_32/64
+%endif ; coeff define mmsize == 8/16
 
 cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 
 %if ARCH_X86_64
 movsxd indexq, indexd
+%if mmsize == 16
+VBROADCASTSD y_offset, [pointer_c_ditherq + 8  * 8]
+VBROADCASTSD u_offset, [pointer_c_ditherq + 9  * 8]
+VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
+VBROADCASTSD ug_coff,  [pointer_c_ditherq + 7  * 8]
+VBROADCASTSD vg_coff,  [pointer_c_dither

[FFmpeg-devel] [PATCH V8 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

2020-01-18 Thread Ting Fu
The original inline assembly and nasm code have the same fps when called by 
command.
NASM code almost has no impact on the perfromance.

Signed-off-by: Ting Fu 
---
V8:
Remove all reindention to make review easier.
Fix some improper indention.
Reserve the "inline" for next patch.

 libswscale/x86/Makefile   |   1 +
 libswscale/x86/swscale.c  |  16 +-
 libswscale/x86/yuv2rgb.c  |  26 +-
 libswscale/x86/yuv2rgb_template.c | 392 +-
 libswscale/x86/yuv_2_rgb.asm  | 270 
 5 files changed, 351 insertions(+), 354 deletions(-)
 create mode 100644 libswscale/x86/yuv_2_rgb.asm

diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index f317d5dd9b..831d5359aa 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o
  \
x86/output.o \
x86/scale.o  \
x86/rgb_2_rgb.o  \
+   x86/yuv_2_rgb.o  \
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 0eed4f18d5..e9d474a1e8 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -29,6 +29,14 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
+0x0103010301030103LL,
+0x0200020002000200LL,};
+
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
+0x0602060206020602LL,
+0x0004000400040004LL,};
+
 #if HAVE_INLINE_ASM
 
 #define DITHER1XBPP
@@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=   
0xFCFCFCFCFCFCFCFCLL;
 DECLARE_ASM_CONST(8, uint64_t, w10)=   0x0010001000100010LL;
 DECLARE_ASM_CONST(8, uint64_t, w02)=   0x0002000200020002LL;
 
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
-0x0103010301030103LL,
-0x0200020002000200LL,};
-
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
-0x0602060206020602LL,
-0x0004000400040004LL,};
-
 DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
 DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
 DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 5e2f77c20f..c7668f487c 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavutil/cpu.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_X86ASM
 
 #define DITHER1XBPP // only for MMX
 
@@ -50,32 +50,31 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 
0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX_INLINE && HAVE_6REGS
+#if HAVE_MMX
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define RENAME(a) a ## _mmx
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMX */
 
 // MMXEXT versions
-#if HAVE_MMXEXT_INLINE && HAVE_6REGS
+#if HAVE_MMXEXT
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
 #define RENAME(a) a ## _mmxext
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
+#endif /* HAVE_MMXEXT */
 
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_X86ASM */
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
-#if HAVE_MMX_INLINE && HAVE_6REGS
+#if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT_INLINE
-if (INLINE_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB24:
 return yuv420_rgb24_mmxext;
@@ -83,13 +82,12 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 return yuv420_bgr24_mmxext;
 }
 }
-#endif
 
-if (INLINE_MMX(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
+#if CONFIG_SWSCALE_ALPHA
 return yuva420_rgb32_mmx;
 #endif
 break;
@@ -97,7 +95,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 return yuv420_rgb32_mmx;
 case AV_PIX_FMT_BGR32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
+#if CONFIG_SWSCALE_ALPHA
 return yuva420_bgr32_mmx;
 #endif
 break;
@@ -113,7 +111,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 return yuv420_rgb15_mmx;
 }
 }
-#endif /* HAVE_MMX_INLINE  && HAVE_6

[FFmpeg-devel] [PATCH V8 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2020-01-18 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c |  38 +
 libswscale/x86/yuv_2_rgb.asm | 145 +++
 2 files changed, 167 insertions(+), 16 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index c7668f487c..c12e88cbb5 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -67,6 +67,15 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #include "yuv2rgb_template.c"
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#undef RENAME
+#undef COMPILE_TEMPLATE_MMXEXT
+#define COMPILE_TEMPLATE_MMXEXT 0
+#define RENAME(a) a ## _ssse3
+#include "yuv2rgb_template.c"
+#endif
+
 #endif /* HAVE_X86ASM */
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
@@ -74,6 +83,35 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
+if (EXTERNAL_SSSE3(cpu_flags)) {
+switch (c->dstFormat) {
+case AV_PIX_FMT_RGB32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_rgb32_ssse3;
+#endif
+break;
+} else
+return yuv420_rgb32_ssse3;
+case AV_PIX_FMT_BGR32:
+if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
+#if CONFIG_SWSCALE_ALPHA
+return yuva420_bgr32_ssse3;
+#endif
+break;
+} else
+return yuv420_bgr32_ssse3;
+case AV_PIX_FMT_RGB24:
+return yuv420_rgb24_ssse3;
+case AV_PIX_FMT_BGR24:
+return yuv420_bgr24_ssse3;
+case AV_PIX_FMT_RGB565:
+return yuv420_rgb16_ssse3;
+case AV_PIX_FMT_RGB555:
+return yuv420_rgb15_ssse3;
+}
+}
+
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB24:
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index a44ab1607b..e05bbb89f5 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -25,11 +25,18 @@
 
 SECTION_RODATA
 
-pw_00ff: times 4 dw 255
-pb_f8:   times 8 db 248
-pb_e0:   times 8 db 224
-pb_03:   times 8 db 3
-pb_07:   times 8 db 7
+; below variables are named like mask_dwXY, which means to preserve dword No.X 
& No.Y
+mask_dw036 : db -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0
+mask_dw147 : db  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1
+mask_dw25  : db  0,  0,  0,  0, -1, -1,  0,  0,  0,  0, -1, -1,  0,  0,  0,  0
+rgb24_shuf1: db  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15,  4,  5, 10, 11
+rgb24_shuf2: db 10, 11,  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15,  4,  5
+rgb24_shuf3: db  4,  5, 10, 11,  0,  1,  6,  7, 12, 13,  2,  3,  8,  9, 14, 15
+pw_00ff: times 8 dw 255
+pb_f8:   times 16 db 248
+pb_e0:   times 16 db 224
+pb_03:   times 16 db 3
+pb_07:   times 16 db 7
 
 mask_1101: dw -1, -1,  0, -1
 mask_0010: dw  0,  0, -1,  0
@@ -49,7 +56,11 @@ SECTION .text
 ;-
 
 %macro MOV_H2L 1
-psrlq %1, 32
+%if mmsize == 8
+psrlq %1, 32
+%else ; mmsize == 16
+psrldq %1, 8
+%endif
 %endmacro
 
 %macro yuv2rgb_fn 3
@@ -77,6 +88,7 @@ psrlq %1, 32
 %define m_blue m1
 %endif
 
+%if mmsize == 8
 %define time_num 1
 %define reg_num 8
 %define y_offset [pointer_c_ditherq + 8  * 8]
@@ -87,11 +99,45 @@ psrlq %1, 32
 %define y_coff   [pointer_c_ditherq + 3  * 8]
 %define ub_coff  [pointer_c_ditherq + 5  * 8]
 %define vr_coff  [pointer_c_ditherq + 4  * 8]
+%elif mmsize == 16
+%define time_num 2
+%if ARCH_X86_32
+%define reg_num 8
+%define my_offset [pointer_c_ditherq + 8  * 8]
+%define mu_offset [pointer_c_ditherq + 9  * 8]
+%define mv_offset [pointer_c_ditherq + 10 * 8]
+%define mug_coff  [pointer_c_ditherq + 7  * 8]
+%define mvg_coff  [pointer_c_ditherq + 6  * 8]
+%define my_coff   [pointer_c_ditherq + 3  * 8]
+%define mub_coff  [pointer_c_ditherq + 5  * 8]
+%define mvr_coff  [pointer_c_ditherq + 4  * 8]
+%else ; ARCH_X86_64
+%define reg_num 16
+%define y_offset m8
+%define u_offset m9
+%define v_offset m10
+%define ug_coff  m11
+%define vg_coff  m12
+%define y_coff   m13
+%define ub_coff  m14
+%define vr_coff  m15
+%endif ; ARCH_X86_32/64
+%endif ; coeff define mmsize == 8/16
 
 cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 
 %if ARCH_X86_64
 movsxd indexq, indexd
+%if mmsize == 16
+VBROADCASTSD y_offset, [pointer_c_ditherq + 8  * 8]
+VBROADCASTSD u_offset, [pointer_c_ditherq + 9  * 8]
+VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
+VBROADCASTSD ug_coff,  [pointer_c_ditherq + 7  * 8]
+VBROADCASTSD vg_coff,  [pointer_c_dither

[FFmpeg-devel] [PATCH 1/4] dnn: add DCO_RGB color order to enum DNNColorOrder

2021-04-29 Thread Ting Fu
Adding DCO_RGB color order to DNNColorOrder, since tensorflow model
needs this kind of color oder as input.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c |  1 +
 libavfilter/dnn/dnn_io_proc.c| 14 +++---
 libavfilter/dnn_interface.h  |  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 076dd3d6a9..f70e3d4659 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -143,6 +143,7 @@ static DNNReturnType get_input_tf(void *model, DNNData 
*input, const char *input
 
 tf_output.index = 0;
 input->dt = TF_OperationOutputType(tf_output);
+input->order = DCO_RGB;
 
 status = TF_NewStatus();
 TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status);
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index e104cc5064..5f6ce36b96 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -167,11 +167,19 @@ static DNNReturnType 
proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNND
 
 static enum AVPixelFormat get_pixel_format(DNNData *data)
 {
-if (data->dt == DNN_UINT8 && data->order == DCO_BGR) {
-return AV_PIX_FMT_BGR24;
+if (data->dt == DNN_UINT8) {
+switch (data->order) {
+case DCO_BGR:
+return AV_PIX_FMT_BGR24;
+case DCO_RGB:
+return AV_PIX_FMT_RGB24;
+default:
+av_assert0(!"unsupported data pixel format.\n");
+return AV_PIX_FMT_BGR24;
+}
 }
 
-av_assert0(!"not supported yet.\n");
+av_assert0(!"unsupported data type.\n");
 return AV_PIX_FMT_BGR24;
 }
 
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index ae5a488341..92c3b0fc6e 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -39,6 +39,7 @@ typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
 typedef enum {
 DCO_NONE,
 DCO_BGR,
+DCO_RGB,
 } DNNColorOrder;
 
 typedef enum {
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/4] lavfi/dnn_backend_tensorflow: add multiple outputs support

2021-04-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 49 
 libavfilter/dnn_filter_common.c  | 45 +
 libavfilter/dnn_filter_common.h  |  6 ++--
 libavfilter/vf_derain.c  |  2 +-
 libavfilter/vf_sr.c  |  2 +-
 5 files changed, 71 insertions(+), 33 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index f70e3d4659..5c85b562c4 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -155,7 +155,7 @@ static DNNReturnType get_input_tf(void *model, DNNData 
*input, const char *input
 TF_DeleteStatus(status);
 
 // currently only NHWC is supported
-av_assert0(dims[0] == 1);
+av_assert0(dims[0] == 1 || dims[0] == -1);
 input->height = dims[1];
 input->width = dims[2];
 input->channels = dims[3];
@@ -707,7 +707,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 TF_Output *tf_outputs;
 TFModel *tf_model = model->model;
 TFContext *ctx = &tf_model->ctx;
-DNNData input, output;
+DNNData input, *outputs;
 TF_Tensor **output_tensors;
 TF_Output tf_input;
 TF_Tensor *input_tensor;
@@ -738,14 +738,6 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 }
 }
 
-if (nb_output != 1) {
-// currently, the filter does not need multiple outputs,
-// so we just pending the support until we really need it.
-TF_DeleteTensor(input_tensor);
-avpriv_report_missing_feature(ctx, "multiple outputs");
-return DNN_ERROR;
-}
-
 tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs));
 if (tf_outputs == NULL) {
 TF_DeleteTensor(input_tensor);
@@ -785,23 +777,31 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 return DNN_ERROR;
 }
 
+outputs = av_malloc_array(nb_output, sizeof(*outputs));
+if (!outputs) {
+TF_DeleteTensor(input_tensor);
+av_freep(&tf_outputs);
+av_freep(&output_tensors);
+av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); 
\
+return DNN_ERROR;
+}
+
 for (uint32_t i = 0; i < nb_output; ++i) {
-output.height = TF_Dim(output_tensors[i], 1);
-output.width = TF_Dim(output_tensors[i], 2);
-output.channels = TF_Dim(output_tensors[i], 3);
-output.data = TF_TensorData(output_tensors[i]);
-output.dt = TF_TensorType(output_tensors[i]);
-
-if (do_ioproc) {
-if (tf_model->model->frame_post_proc != NULL) {
-tf_model->model->frame_post_proc(out_frame, &output, 
tf_model->model->filter_ctx);
-} else {
-ff_proc_from_dnn_to_frame(out_frame, &output, ctx);
-}
+outputs[i].height = TF_Dim(output_tensors[i], 1);
+outputs[i].width = TF_Dim(output_tensors[i], 2);
+outputs[i].channels = TF_Dim(output_tensors[i], 3);
+outputs[i].data = TF_TensorData(output_tensors[i]);
+outputs[i].dt = TF_TensorType(output_tensors[i]);
+}
+if (do_ioproc) {
+if (tf_model->model->frame_post_proc != NULL) {
+tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
 } else {
-out_frame->width = output.width;
-out_frame->height = output.height;
+ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
 }
+} else {
+out_frame->width = outputs[0].width;
+out_frame->height = outputs[0].height;
 }
 
 for (uint32_t i = 0; i < nb_output; ++i) {
@@ -812,6 +812,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 TF_DeleteTensor(input_tensor);
 av_freep(&output_tensors);
 av_freep(&tf_outputs);
+av_freep(&outputs);
 return DNN_SUCCESS;
 }
 
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 1b922455a3..4cbfdbf52a 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -17,6 +17,39 @@
  */
 
 #include "dnn_filter_common.h"
+#include "libavutil/avstring.h"
+
+#define MAX_SUPPORTED_OUTPUTS_NB 4
+
+static char **separate_output_names(const char *expr, const char *val_sep, int 
*separated_nb)
+{
+char *val, **parsed_vals = NULL;
+int val_num = 0;
+if (!expr || !val_sep || !separated_nb) {
+return NULL;
+}
+
+parsed_vals = av_mallocz_array(MAX_SUPPORTED_OUTPUTS_NB, 
sizeof(*parsed_vals));
+if (!parsed_vals) {
+return NULL;
+}
+
+do {
+val = av_get_token(&expr, val_sep);
+if(val) {
+parsed_vals[val_num] = val;
+val_num++;
+}
+if (*expr) {
+ 

[FFmpeg-devel] [PATCH 3/4] lavfi/dnn_backend_tensorflow: support detect model

2021-04-29 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 39 ++--
 libavfilter/vf_dnn_detect.c  | 32 +-
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 5c85b562c4..8fb2ae8583 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -793,15 +793,40 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 outputs[i].data = TF_TensorData(output_tensors[i]);
 outputs[i].dt = TF_TensorType(output_tensors[i]);
 }
-if (do_ioproc) {
-if (tf_model->model->frame_post_proc != NULL) {
-tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
+switch (model->func_type) {
+case DFT_PROCESS_FRAME:
+//it only support 1 output if it's frame in & frame out
+if (do_ioproc) {
+if (tf_model->model->frame_post_proc != NULL) {
+tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
+} else {
+ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
+}
 } else {
-ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
+out_frame->width = outputs[0].width;
+out_frame->height = outputs[0].height;
+}
+break;
+case DFT_ANALYTICS_DETECT:
+if (!model->detect_post_proc) {
+av_log(ctx, AV_LOG_ERROR, "Detect filter needs provide post 
proc\n");
+return DNN_ERROR;
+}
+model->detect_post_proc(out_frame, outputs, nb_output, 
model->filter_ctx);
+break;
+default:
+for (uint32_t i = 0; i < nb_output; ++i) {
+if (output_tensors[i]) {
+TF_DeleteTensor(output_tensors[i]);
+}
 }
-} else {
-out_frame->width = outputs[0].width;
-out_frame->height = outputs[0].height;
+TF_DeleteTensor(input_tensor);
+av_freep(&output_tensors);
+av_freep(&tf_outputs);
+av_freep(&outputs);
+
+av_log(ctx, AV_LOG_ERROR, "Tensorflow backend does not support this 
kind of dnn filter now\n");
+return DNN_ERROR;
 }
 
 for (uint32_t i = 0; i < nb_output; ++i) {
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 1dbe4f29a4..7d39acb653 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -203,10 +203,40 @@ static int read_detect_label_file(AVFilterContext 
*context)
 return 0;
 }
 
+static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, 
int output_nb)
+{
+switch(backend_type) {
+case DNN_TF:
+if (output_nb != 4) {
+av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model 
with 4 outputs, \
+   but get %d instead\n", output_nb);
+return AVERROR(EINVAL);
+}
+return 0;
+case DNN_OV:
+if (output_nb != 1) {
+av_log(ctx, AV_LOG_ERROR, "Dnn detect filter with openvino backend 
needs 1 output only, \
+   but get %d instead\n", output_nb);
+return AVERROR(EINVAL);
+}
+return 0;
+default:
+avpriv_report_missing_feature(ctx, "Dnn detect filter does not support 
current backend\n");
+return AVERROR(EINVAL);
+}
+return 0;
+}
+
 static av_cold int dnn_detect_init(AVFilterContext *context)
 {
 DnnDetectContext *ctx = context->priv;
-int ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);
+DnnContext *dnn_ctx = &ctx->dnnctx;
+int ret;
+
+ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);
+if (ret < 0)
+return ret;
+ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs);
 if (ret < 0)
 return ret;
 ff_dnn_set_detect_post_proc(&ctx->dnnctx, dnn_detect_post_proc);
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 4/4] dnn/vf_dnn_detect: add tensorflow output parse support

2021-04-29 Thread Ting Fu
Testing model is tensorflow offical model in github repo, please refer
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md
to download the detect model as you need.
For example, local testing was carried on with 
'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
used one image of dog in
https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image1.jpg

Testing command is:
./ffmpeg -i image1.jpg -vf 
dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
"num_detections&detection_scores&detection_classes&detection_boxes":model=ssd_mobilenet_v2_coco.pb,\
showinfo -f null -

We will see the result similar as below:
[Parsed_showinfo_1 @ 0x33e65f0]   side data - detection bounding boxes:
[Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
[Parsed_showinfo_1 @ 0x33e65f0] index: 0,   region: (382, 60) -> (1005, 
593), label: 18, confidence: 9834/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 1,   region: (12, 8) -> (328, 549), 
label: 18, confidence: 8555/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 2,   region: (293, 7) -> (682, 458), 
label: 1, confidence: 8033/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 3,   region: (342, 0) -> (690, 325), 
label: 1, confidence: 5878/1.

There are two boxes of dog with cores 94.05% & 93.45% and two boxes of person 
with scores 80.33% & 58.78%.

Signed-off-by: Ting Fu 
---
 libavfilter/vf_dnn_detect.c | 95 -
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7d39acb653..818b53a052 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -48,6 +48,9 @@ typedef struct DnnDetectContext {
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption dnn_detect_options[] = {
 { "dnn_backend", "DNN backend",OFFSET(backend_type), 
AV_OPT_TYPE_INT,   { .i64 = 2 },INT_MIN, INT_MAX, FLAGS, "backend" },
+#if (CONFIG_LIBTENSORFLOW == 1)
+{ "tensorflow",  "tensorflow backend flag",0,
AV_OPT_TYPE_CONST, { .i64 = 1 },0, 0, FLAGS, "backend" },
+#endif
 #if (CONFIG_LIBOPENVINO == 1)
 { "openvino","openvino backend flag",  0,
AV_OPT_TYPE_CONST, { .i64 = 2 },0, 0, FLAGS, "backend" },
 #endif
@@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
-static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, 
AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
 float conf_threshold = ctx->confidence;
@@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame, DNNData 
*output, uint32_t nb, AV
 return 0;
 }
 
+static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
+{
+DnnDetectContext *ctx = filter_ctx->priv;
+int proposal_count;
+float conf_threshold = ctx->confidence;
+float *conf, *position, *label_id, x0, y0, x1, y1;
+int nb_bboxes = 0;
+AVFrameSideData *sd;
+AVDetectionBBox *bbox;
+AVDetectionBBoxHeader *header;
+
+proposal_count = *(float *)(output[0].data);
+conf   = output[1].data;
+position   = output[3].data;
+label_id   = output[2].data;
+
+sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
+if (sd) {
+av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in 
side data.\n");
+return -1;
+}
+
+for (int i = 0; i < proposal_count; ++i) {
+if (conf[i] < conf_threshold)
+continue;
+nb_bboxes++;
+}
+
+if (nb_bboxes == 0) {
+av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this 
frame.\n");
+return 0;
+}
+
+header = av_detection_bbox_create_side_data(frame, nb_bboxes);
+if (!header) {
+av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d 
bounding boxes\n", nb_bboxes);
+return -1;
+}
+
+av_strlcpy(header->source, ctx->dnnctx.model_filename, 
sizeof(header->source));
+
+for (int i = 0; i < proposal_count; ++i) {
+y0 = position[i * 4];
+x0 = position[i * 4 + 1];
+y1 = position[i * 4 + 2];
+x1 = position[i * 4 + 3];
+
+bbox = av_get_detection_bbox(header, i);
+
+if (conf[i] < conf_threshold) {
+continue;
+}
+
+bbox->x = (int)(x0 * frame->width);
+bbox->w = (int)(x1 * frame->width) - bbox->x;
+bbox->y = 

[FFmpeg-devel] [PATCH V2 1/4] dnn: add DCO_RGB color order to enum DNNColorOrder

2021-05-06 Thread Ting Fu
Adding DCO_RGB color order to DNNColorOrder, since tensorflow model
needs this kind of color oder as input.

Signed-off-by: Ting Fu 
---
V2:
Rebase patch to latest code

 libavfilter/dnn/dnn_backend_tf.c |  1 +
 libavfilter/dnn/dnn_io_proc.c| 14 +++---
 libavfilter/dnn_interface.h  |  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 03fe310b03..45da29ae70 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -143,6 +143,7 @@ static DNNReturnType get_input_tf(void *model, DNNData 
*input, const char *input
 
 tf_output.index = 0;
 input->dt = TF_OperationOutputType(tf_output);
+input->order = DCO_RGB;
 
 status = TF_NewStatus();
 TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status);
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index 5f60d68078..1e2bef3f9a 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -168,11 +168,19 @@ static DNNReturnType 
proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNND
 
 static enum AVPixelFormat get_pixel_format(DNNData *data)
 {
-if (data->dt == DNN_UINT8 && data->order == DCO_BGR) {
-return AV_PIX_FMT_BGR24;
+if (data->dt == DNN_UINT8) {
+switch (data->order) {
+case DCO_BGR:
+return AV_PIX_FMT_BGR24;
+case DCO_RGB:
+return AV_PIX_FMT_RGB24;
+default:
+av_assert0(!"unsupported data pixel format.\n");
+return AV_PIX_FMT_BGR24;
+}
 }
 
-av_assert0(!"not supported yet.\n");
+av_assert0(!"unsupported data type.\n");
 return AV_PIX_FMT_BGR24;
 }
 
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 799244ee14..5e9ffeb077 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -39,6 +39,7 @@ typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
 typedef enum {
 DCO_NONE,
 DCO_BGR,
+DCO_RGB,
 } DNNColorOrder;
 
 typedef enum {
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V2 2/4] lavfi/dnn_backend_tensorflow: add multiple outputs support

2021-05-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 49 ++---
 libavfilter/dnn_filter_common.c  | 53 ++--
 libavfilter/dnn_filter_common.h  |  6 ++--
 libavfilter/vf_derain.c  |  2 +-
 libavfilter/vf_sr.c  |  2 +-
 5 files changed, 75 insertions(+), 37 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 45da29ae70..b6b1812cd9 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -155,7 +155,7 @@ static DNNReturnType get_input_tf(void *model, DNNData 
*input, const char *input
 TF_DeleteStatus(status);
 
 // currently only NHWC is supported
-av_assert0(dims[0] == 1);
+av_assert0(dims[0] == 1 || dims[0] == -1);
 input->height = dims[1];
 input->width = dims[2];
 input->channels = dims[3];
@@ -707,7 +707,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 TF_Output *tf_outputs;
 TFModel *tf_model = model->model;
 TFContext *ctx = &tf_model->ctx;
-DNNData input, output;
+DNNData input, *outputs;
 TF_Tensor **output_tensors;
 TF_Output tf_input;
 TF_Tensor *input_tensor;
@@ -738,14 +738,6 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 }
 }
 
-if (nb_output != 1) {
-// currently, the filter does not need multiple outputs,
-// so we just pending the support until we really need it.
-TF_DeleteTensor(input_tensor);
-avpriv_report_missing_feature(ctx, "multiple outputs");
-return DNN_ERROR;
-}
-
 tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs));
 if (tf_outputs == NULL) {
 TF_DeleteTensor(input_tensor);
@@ -785,23 +777,31 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 return DNN_ERROR;
 }
 
+outputs = av_malloc_array(nb_output, sizeof(*outputs));
+if (!outputs) {
+TF_DeleteTensor(input_tensor);
+av_freep(&tf_outputs);
+av_freep(&output_tensors);
+av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); 
\
+return DNN_ERROR;
+}
+
 for (uint32_t i = 0; i < nb_output; ++i) {
-output.height = TF_Dim(output_tensors[i], 1);
-output.width = TF_Dim(output_tensors[i], 2);
-output.channels = TF_Dim(output_tensors[i], 3);
-output.data = TF_TensorData(output_tensors[i]);
-output.dt = TF_TensorType(output_tensors[i]);
-
-if (do_ioproc) {
-if (tf_model->model->frame_post_proc != NULL) {
-tf_model->model->frame_post_proc(out_frame, &output, 
tf_model->model->filter_ctx);
-} else {
-ff_proc_from_dnn_to_frame(out_frame, &output, ctx);
-}
+outputs[i].height = TF_Dim(output_tensors[i], 1);
+outputs[i].width = TF_Dim(output_tensors[i], 2);
+outputs[i].channels = TF_Dim(output_tensors[i], 3);
+outputs[i].data = TF_TensorData(output_tensors[i]);
+outputs[i].dt = TF_TensorType(output_tensors[i]);
+}
+if (do_ioproc) {
+if (tf_model->model->frame_post_proc != NULL) {
+tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
 } else {
-out_frame->width = output.width;
-out_frame->height = output.height;
+ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
 }
+} else {
+out_frame->width = outputs[0].width;
+out_frame->height = outputs[0].height;
 }
 
 for (uint32_t i = 0; i < nb_output; ++i) {
@@ -812,6 +812,7 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 TF_DeleteTensor(input_tensor);
 av_freep(&output_tensors);
 av_freep(&tf_outputs);
+av_freep(&outputs);
 return DNN_SUCCESS;
 }
 
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 52c7a5392a..0ed0ac2e30 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -17,6 +17,39 @@
  */
 
 #include "dnn_filter_common.h"
+#include "libavutil/avstring.h"
+
+#define MAX_SUPPORTED_OUTPUTS_NB 4
+
+static char **separate_output_names(const char *expr, const char *val_sep, int 
*separated_nb)
+{
+char *val, **parsed_vals = NULL;
+int val_num = 0;
+if (!expr || !val_sep || !separated_nb) {
+return NULL;
+}
+
+parsed_vals = av_mallocz_array(MAX_SUPPORTED_OUTPUTS_NB, 
sizeof(*parsed_vals));
+if (!parsed_vals) {
+return NULL;
+}
+
+do {
+val = av_get_token(&expr, val_sep);
+if(val) {
+parsed_vals[val_num] = val;
+val_num++;
+}
+if (*expr) {
+ 

[FFmpeg-devel] [PATCH V2 3/4] lavfi/dnn_backend_tensorflow: support detect model

2021-05-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_tf.c | 39 ++--
 libavfilter/vf_dnn_detect.c  | 32 +-
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index b6b1812cd9..622b5a8464 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -793,15 +793,40 @@ static DNNReturnType execute_model_tf(const DNNModel 
*model, const char *input_n
 outputs[i].data = TF_TensorData(output_tensors[i]);
 outputs[i].dt = TF_TensorType(output_tensors[i]);
 }
-if (do_ioproc) {
-if (tf_model->model->frame_post_proc != NULL) {
-tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
+switch (model->func_type) {
+case DFT_PROCESS_FRAME:
+//it only support 1 output if it's frame in & frame out
+if (do_ioproc) {
+if (tf_model->model->frame_post_proc != NULL) {
+tf_model->model->frame_post_proc(out_frame, outputs, 
tf_model->model->filter_ctx);
+} else {
+ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
+}
 } else {
-ff_proc_from_dnn_to_frame(out_frame, outputs, ctx);
+out_frame->width = outputs[0].width;
+out_frame->height = outputs[0].height;
+}
+break;
+case DFT_ANALYTICS_DETECT:
+if (!model->detect_post_proc) {
+av_log(ctx, AV_LOG_ERROR, "Detect filter needs provide post 
proc\n");
+return DNN_ERROR;
+}
+model->detect_post_proc(out_frame, outputs, nb_output, 
model->filter_ctx);
+break;
+default:
+for (uint32_t i = 0; i < nb_output; ++i) {
+if (output_tensors[i]) {
+TF_DeleteTensor(output_tensors[i]);
+}
 }
-} else {
-out_frame->width = outputs[0].width;
-out_frame->height = outputs[0].height;
+TF_DeleteTensor(input_tensor);
+av_freep(&output_tensors);
+av_freep(&tf_outputs);
+av_freep(&outputs);
+
+av_log(ctx, AV_LOG_ERROR, "Tensorflow backend does not support this 
kind of dnn filter now\n");
+return DNN_ERROR;
 }
 
 for (uint32_t i = 0; i < nb_output; ++i) {
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 1dbe4f29a4..7d39acb653 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -203,10 +203,40 @@ static int read_detect_label_file(AVFilterContext 
*context)
 return 0;
 }
 
+static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, 
int output_nb)
+{
+switch(backend_type) {
+case DNN_TF:
+if (output_nb != 4) {
+av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model 
with 4 outputs, \
+   but get %d instead\n", output_nb);
+return AVERROR(EINVAL);
+}
+return 0;
+case DNN_OV:
+if (output_nb != 1) {
+av_log(ctx, AV_LOG_ERROR, "Dnn detect filter with openvino backend 
needs 1 output only, \
+   but get %d instead\n", output_nb);
+return AVERROR(EINVAL);
+}
+return 0;
+default:
+avpriv_report_missing_feature(ctx, "Dnn detect filter does not support 
current backend\n");
+return AVERROR(EINVAL);
+}
+return 0;
+}
+
 static av_cold int dnn_detect_init(AVFilterContext *context)
 {
 DnnDetectContext *ctx = context->priv;
-int ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);
+DnnContext *dnn_ctx = &ctx->dnnctx;
+int ret;
+
+ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);
+if (ret < 0)
+return ret;
+ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs);
 if (ret < 0)
 return ret;
 ff_dnn_set_detect_post_proc(&ctx->dnnctx, dnn_detect_post_proc);
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add tensorflow output parse support

2021-05-06 Thread Ting Fu
Testing model is tensorflow offical model in github repo, please refer
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md
to download the detect model as you need.
For example, local testing was carried on with 
'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and
used one image of dog in
https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image1.jpg

Testing command is:
./ffmpeg -i image1.jpg -vf 
dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\
"num_detections&detection_scores&detection_classes&detection_boxes":model=ssd_mobilenet_v2_coco.pb,\
showinfo -f null -

We will see the result similar as below:
[Parsed_showinfo_1 @ 0x33e65f0]   side data - detection bounding boxes:
[Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb
[Parsed_showinfo_1 @ 0x33e65f0] index: 0,   region: (382, 60) -> (1005, 
593), label: 18, confidence: 9834/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 1,   region: (12, 8) -> (328, 549), 
label: 18, confidence: 8555/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 2,   region: (293, 7) -> (682, 458), 
label: 1, confidence: 8033/1.
[Parsed_showinfo_1 @ 0x33e65f0] index: 3,   region: (342, 0) -> (690, 325), 
label: 1, confidence: 5878/1.

There are two boxes of dog with cores 94.05% & 93.45% and two boxes of person 
with scores 80.33% & 58.78%.

Signed-off-by: Ting Fu 
---
 libavfilter/vf_dnn_detect.c | 95 -
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7d39acb653..818b53a052 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -48,6 +48,9 @@ typedef struct DnnDetectContext {
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption dnn_detect_options[] = {
 { "dnn_backend", "DNN backend",OFFSET(backend_type), 
AV_OPT_TYPE_INT,   { .i64 = 2 },INT_MIN, INT_MAX, FLAGS, "backend" },
+#if (CONFIG_LIBTENSORFLOW == 1)
+{ "tensorflow",  "tensorflow backend flag",0,
AV_OPT_TYPE_CONST, { .i64 = 1 },0, 0, FLAGS, "backend" },
+#endif
 #if (CONFIG_LIBOPENVINO == 1)
 { "openvino","openvino backend flag",  0,
AV_OPT_TYPE_CONST, { .i64 = 2 },0, 0, FLAGS, "backend" },
 #endif
@@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
-static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, 
AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
 float conf_threshold = ctx->confidence;
@@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame, DNNData 
*output, uint32_t nb, AV
 return 0;
 }
 
+static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
+{
+DnnDetectContext *ctx = filter_ctx->priv;
+int proposal_count;
+float conf_threshold = ctx->confidence;
+float *conf, *position, *label_id, x0, y0, x1, y1;
+int nb_bboxes = 0;
+AVFrameSideData *sd;
+AVDetectionBBox *bbox;
+AVDetectionBBoxHeader *header;
+
+proposal_count = *(float *)(output[0].data);
+conf   = output[1].data;
+position   = output[3].data;
+label_id   = output[2].data;
+
+sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
+if (sd) {
+av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in 
side data.\n");
+return -1;
+}
+
+for (int i = 0; i < proposal_count; ++i) {
+if (conf[i] < conf_threshold)
+continue;
+nb_bboxes++;
+}
+
+if (nb_bboxes == 0) {
+av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this 
frame.\n");
+return 0;
+}
+
+header = av_detection_bbox_create_side_data(frame, nb_bboxes);
+if (!header) {
+av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d 
bounding boxes\n", nb_bboxes);
+return -1;
+}
+
+av_strlcpy(header->source, ctx->dnnctx.model_filename, 
sizeof(header->source));
+
+for (int i = 0; i < proposal_count; ++i) {
+y0 = position[i * 4];
+x0 = position[i * 4 + 1];
+y1 = position[i * 4 + 2];
+x1 = position[i * 4 + 3];
+
+bbox = av_get_detection_bbox(header, i);
+
+if (conf[i] < conf_threshold) {
+continue;
+}
+
+bbox->x = (int)(x0 * frame->width);
+bbox->w = (int)(x1 * frame->width) - bbox->x;
+bbox->y = 

[FFmpeg-devel] [PATCH 1/3] lavfi/drawbox: refine code

2021-05-14 Thread Ting Fu
Extract common code of filter_frame() and drawgrid_filter_frame() to 
draw_region().

Signed-off-by: Ting Fu 
---
 libavfilter/vf_drawbox.c | 160 ++-
 1 file changed, 58 insertions(+), 102 deletions(-)

diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c
index 2794fc2520..95e26191bd 100644
--- a/libavfilter/vf_drawbox.c
+++ b/libavfilter/vf_drawbox.c
@@ -85,6 +85,61 @@ typedef struct DrawBoxContext {
 
 static const int NUM_EXPR_EVALS = 5;
 
+typedef int (*PixelBelongsToRegion)(DrawBoxContext *s, int x, int y);
+
+#define ASSIGN_THREE_CHANNELS\
+row[0] = frame->data[0] +  y   * frame->linesize[0]; \
+row[1] = frame->data[1] + (y >> ctx->vsub) * frame->linesize[1]; \
+row[2] = frame->data[2] + (y >> ctx->vsub) * frame->linesize[2];
+
+#define ASSIGN_FOUR_CHANNELS  \
+ASSIGN_THREE_CHANNELS \
+row[3] = frame->data[3] + y * frame->linesize[3];
+
+static void draw_region(AVFrame *frame, DrawBoxContext *ctx, int left, int 
top, int right, int down,
+PixelBelongsToRegion pixel_belongs_to_region)
+{
+unsigned char *row[4];
+int x, y;
+if (ctx->have_alpha && ctx->replace) {
+for (y = top; y < down; y++) {
+ASSIGN_FOUR_CHANNELS
+if (ctx->invert_color) {
+for (x = left; x < right; x++)
+if (pixel_belongs_to_region(ctx, x, y))
+row[0][x] = 0xff - row[0][x];
+} else {
+for (x = left; x < right; x++) {
+if (pixel_belongs_to_region(ctx, x, y)) {
+row[0][x ] = ctx->yuv_color[Y];
+row[1][x >> ctx->hsub] = ctx->yuv_color[U];
+row[2][x >> ctx->hsub] = ctx->yuv_color[V];
+row[3][x ] = ctx->yuv_color[A];
+}
+}
+}
+}
+} else {
+for (y = top; y < down; y++) {
+ASSIGN_THREE_CHANNELS
+if (ctx->invert_color) {
+if (pixel_belongs_to_region(ctx, x, y))
+row[0][x] = 0xff - row[0][x];
+} else {
+for (x = left; x < right; x++) {
+double alpha = (double)ctx->yuv_color[A] / 255;
+
+if (pixel_belongs_to_region(ctx, x, y)) {
+row[0][x ] = (1 - alpha) * row[0][x
 ] + alpha * ctx->yuv_color[Y];
+row[1][x >> ctx->hsub] = (1 - alpha) * row[1][x >> 
ctx->hsub] + alpha * ctx->yuv_color[U];
+row[2][x >> ctx->hsub] = (1 - alpha) * row[2][x >> 
ctx->hsub] + alpha * ctx->yuv_color[V];
+}
+}
+}
+}
+}
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
 DrawBoxContext *s = ctx->priv;
@@ -217,58 +272,9 @@ static av_pure av_always_inline int 
pixel_belongs_to_box(DrawBoxContext *s, int
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
 DrawBoxContext *s = inlink->dst->priv;
-int plane, x, y, xb = s->x, yb = s->y;
-unsigned char *row[4];
-
-if (s->have_alpha && s->replace) {
-for (y = FFMAX(yb, 0); y < frame->height && y < (yb + s->h); y++) {
-row[0] = frame->data[0] + y * frame->linesize[0];
-row[3] = frame->data[3] + y * frame->linesize[3];
-
-for (plane = 1; plane < 3; plane++)
-row[plane] = frame->data[plane] +
- frame->linesize[plane] * (y >> s->vsub);
-
-if (s->invert_color) {
-for (x = FFMAX(xb, 0); x < xb + s->w && x < frame->width; x++)
-if (pixel_belongs_to_box(s, x, y))
-row[0][x] = 0xff - row[0][x];
-} else {
-for (x = FFMAX(xb, 0); x < xb + s->w && x < frame->width; x++) 
{
-if (pixel_belongs_to_box(s, x, y)) {
-row[0][x   ] = s->yuv_color[Y];
-row[1][x >> s->hsub] = s->yuv_color[U];
-row[2][x >> s->hsub] = s->yuv_color[V];
-row[3][x   ] = s->yuv_color[A];
-}
-}
-}
-}
-} else {
-for (y = FFMAX(yb, 0); y < frame->height && y < (yb + s->h); y++) {
-row[0] = frame->data[0] + y * frame->linesize[0];
 
-for (plane = 1; plane < 3; plane++)
-row[plane

[FFmpeg-devel] [PATCH 2/3] libavfilter: vf_drawbox filter support draw box with detection bounding boxes in side_data

2021-05-14 Thread Ting Fu
This feature can be used with dnn detection by setting vf_drawbox's
option box_source=side_data_detection_bboxes, for example:
./ffmpeg -i face.jpeg -vf 
dnn_detect=dnn_backend=openvino:model=face-detection-adas-0001.xml:\
input=data:output=detection_out:labels=face-detection-adas-0001.label,\
drawbox=box_source=side_data_detection_bboxes -y face_detect.jpeg

Signed-off-by: Ting Fu 
---
 doc/filters.texi |  8 +++
 libavfilter/vf_drawbox.c | 52 ++--
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index a218289ddd..f2ac8c4cc8 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10356,6 +10356,14 @@ The x and y offset coordinates where the box is drawn.
 @item h
 The width and height of the drawn box.
 
+@item box_source
+Box source can be set as side_data_detection_bboxes if you want to use box 
data in
+detection bboxes of side data.
+
+If @var{box_source} is set, the @var{x}, @var{y}, @var{width} and @var{height} 
will be ignored and
+still use box data in detection bboxes of side data. So please do not use this 
parameter if you were
+not sure about the box source.
+
 @item t
 The thickness of the drawn box.
 
diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c
index 95e26191bd..fff78862e9 100644
--- a/libavfilter/vf_drawbox.c
+++ b/libavfilter/vf_drawbox.c
@@ -31,6 +31,7 @@
 #include "libavutil/eval.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/detection_bbox.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
@@ -79,8 +80,10 @@ typedef struct DrawBoxContext {
 char *x_expr, *y_expr; ///< expression for x and y
 char *w_expr, *h_expr; ///< expression for width and height
 char *t_expr;  ///< expression for thickness
+char *box_source_string; ///< string for box data source
 int have_alpha;
 int replace;
+enum AVFrameSideDataType box_source;
 } DrawBoxContext;
 
 static const int NUM_EXPR_EVALS = 5;
@@ -140,11 +143,30 @@ static void draw_region(AVFrame *frame, DrawBoxContext 
*ctx, int left, int top,
 }
 }
 
+static enum AVFrameSideDataType box_source_string_parse(const char 
*box_source_string)
+{
+av_assert0(box_source_string);
+if (!strcmp(box_source_string, "side_data_detection_bboxes")) {
+return AV_FRAME_DATA_DETECTION_BBOXES;
+} else {
+// will support side_data_regions_of_interest next
+return AVERROR(EINVAL);
+}
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
 DrawBoxContext *s = ctx->priv;
 uint8_t rgba_color[4];
 
+if (s->box_source_string) {
+s->box_source = box_source_string_parse(s->box_source_string);
+if ((int)s->box_source < 0) {
+av_log(ctx, AV_LOG_ERROR, "Error box source: 
%s\n",s->box_source_string);
+return AVERROR(EINVAL);
+}
+}
+
 if (!strcmp(s->color_str, "invert"))
 s->invert_color = 1;
 else if (av_parse_color(rgba_color, s->color_str, -1, ctx) < 0)
@@ -272,9 +294,34 @@ static av_pure av_always_inline int 
pixel_belongs_to_box(DrawBoxContext *s, int
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
 DrawBoxContext *s = inlink->dst->priv;
+const AVDetectionBBoxHeader *header = NULL;
+const AVDetectionBBox *bbox;
+AVFrameSideData *sd;
+int loop = 1;
+
+if (s->box_source == AV_FRAME_DATA_DETECTION_BBOXES) {
+sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
+if (sd) {
+header = (AVDetectionBBoxHeader *)sd->data;
+loop = header->nb_bboxes;
+} else {
+av_log(s, AV_LOG_WARNING, "No detection bboxes.\n");
+return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+}
 
-draw_region(frame, s, FFMAX(s->x, 0), FFMAX(s->y, 0), FFMIN(s->x + s->w, 
frame->width),
-FFMIN(s->y + s->h, frame->height), pixel_belongs_to_box);
+for (int i = 0; i < loop; i++) {
+if (header) {
+bbox = av_get_detection_bbox(header, i);
+s->y = bbox->y;
+s->x = bbox->x;
+s->h = bbox->h;
+s->w = bbox->w;
+}
+
+draw_region(frame, s, FFMAX(s->x, 0), FFMAX(s->y, 0), FFMIN(s->x + 
s->w, frame->width),
+FFMIN(s->y + s->h, frame->height), pixel_belongs_to_box);
+}
 
 return ff_filter_frame(inlink->dst->outputs[0], frame);
 }
@@ -329,6 +376,7 @@ static const AVOption drawbox_options[] = {
 { "thickness", "set the box thickness",
OFFSET(t_expr),AV_OPT_TYPE_STRING, { .str="3" },   0, 0,

[FFmpeg-devel] [PATCH 3/3] libavfilter: vf_drawtext filter support draw text with detection bounding boxes in side_data

2021-05-14 Thread Ting Fu
This feature can be used with dnn detection by setting vf_drawtext's option
text_source=side_data_detection_bboxes, for example:
./ffmpeg -i face.jpeg -vf 
dnn_detect=dnn_backend=openvino:model=face-detection-adas-0001.xml:\
input=data:output=detection_out:labels=face-detection-adas-0001.label,drawbox=box_source=
side_data_detection_bboxes,drawtext=text_source=side_data_detection_bboxes:fontcolor=green:\
fontsize=40, -y face_detect.jpeg
Please note, the default fontsize of vf_drawtext is 12, which may be too
small to be seen clearly.

Signed-off-by: Ting Fu 
---
 doc/filters.texi  |  8 
 libavfilter/vf_drawtext.c | 77 ---
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index f2ac8c4cc8..d10e6de03d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10788,6 +10788,14 @@ parameter @var{text}.
 
 If both @var{text} and @var{textfile} are specified, an error is thrown.
 
+@item text_source
+Text source should be set as side_data_detection_bboxes if you want to use 
text data in
+detection bboxes of side data.
+
+If text source is set, @var{text} and @var{textfile} will be ignored and still 
use
+text data in detection bboxes of side data. So please do not use this parameter
+if you are not sure about the text source.
+
 @item reload
 If set to 1, the @var{textfile} will be reloaded before each frame.
 Be sure to update it atomically, or it may be read partially, or even fail.
diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index 7ea057b812..382d589e26 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -55,6 +55,7 @@
 #include "libavutil/time_internal.h"
 #include "libavutil/tree.h"
 #include "libavutil/lfg.h"
+#include "libavutil/detection_bbox.h"
 #include "avfilter.h"
 #include "drawutils.h"
 #include "formats.h"
@@ -199,6 +200,8 @@ typedef struct DrawTextContext {
 int tc24hmax;   ///< 1 if timecode is wrapped to 24 hours, 
0 otherwise
 int reload; ///< reload text file for each frame
 int start_number;   ///< starting frame number for n/frame_num 
var
+char *text_source_string;   ///< the string to specify text data source
+enum AVFrameSideDataType text_source;
 #if CONFIG_LIBFRIBIDI
 int text_shaping;   ///< 1 to shape the text before drawing it
 #endif
@@ -246,6 +249,7 @@ static const AVOption drawtext_options[]= {
 { "alpha",   "apply alpha while rendering", OFFSET(a_expr),  
AV_OPT_TYPE_STRING, { .str = "1" },  .flags = FLAGS },
 {"fix_bounds", "check and fix text coords to avoid clipping", 
OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 {"start_number", "start frame number for n/frame_num variable", 
OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
+{"text_source", "the source of text", OFFSET(text_source_string), 
AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
 
 #if CONFIG_LIBFRIBIDI
 {"text_shaping", "attempt to shape text before drawing", 
OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
@@ -690,6 +694,16 @@ out:
 }
 #endif
 
+static enum AVFrameSideDataType text_source_string_parse(const char 
*text_source_string)
+{
+av_assert0(text_source_string);
+if (!strcmp(text_source_string, "side_data_detection_bboxes")) {
+return AV_FRAME_DATA_DETECTION_BBOXES;
+} else {
+return AVERROR(EINVAL);
+}
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
 int err;
@@ -731,9 +745,28 @@ static av_cold int init(AVFilterContext *ctx)
 s->text = av_strdup("");
 }
 
+if (s->text_source_string) {
+s->text_source = text_source_string_parse(s->text_source_string);
+if ((int)s->text_source < 0) {
+av_log(ctx, AV_LOG_ERROR, "Error text source: %s\n", 
s->text_source_string);
+return AVERROR(EINVAL);
+}
+}
+
+if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES) {
+if (s->text) {
+av_log(ctx, AV_LOG_WARNING, "Multiple texts provided, will use 
text_source only\n");
+av_free(s->text);
+}
+s->text = av_mallocz(AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE *
+ (AV_NUM_DETECTION_BBOX_CLASSIFY + 1));
+if (!s->text)
+return AVERROR(ENOMEM);
+}
+
 if (!s->text) {
 av_log(ctx, AV_LOG_ERROR,
-   "Either text, a valid file or a timecode must be provided\n");
+   "Either text, a valid file, a timecode or text source must be 
provided\n");
 return AVERROR(EINVAL);

[FFmpeg-devel] [PATCH 1/2] lavfi/vf_drawbox.c: fix CID 1485004

2021-06-03 Thread Ting Fu
CID 1485004: Uninitialized variables (UNINIT)
Using uninitialized value "x" when calling "*pixel_belongs_to_region".

Signed-off-by: Ting Fu 
---
 libavfilter/vf_drawbox.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c
index fff78862e9..1e9e028650 100644
--- a/libavfilter/vf_drawbox.c
+++ b/libavfilter/vf_drawbox.c
@@ -126,8 +126,9 @@ static void draw_region(AVFrame *frame, DrawBoxContext 
*ctx, int left, int top,
 for (y = top; y < down; y++) {
 ASSIGN_THREE_CHANNELS
 if (ctx->invert_color) {
-if (pixel_belongs_to_region(ctx, x, y))
-row[0][x] = 0xff - row[0][x];
+for (x = left; x < right; x++)
+if (pixel_belongs_to_region(ctx, x, y))
+row[0][x] = 0xff - row[0][x];
 } else {
 for (x = left; x < right; x++) {
 double alpha = (double)ctx->yuv_color[A] / 255;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] lavfi/vf_drawtext.c: fix CID 1485003

2021-06-03 Thread Ting Fu
CID 1485003: Memory - illegal accesses (UNINIT)
Using uninitialized value "sd".

Signed-off-by: Ting Fu 
---
 libavfilter/vf_drawtext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index 382d589e26..c4c09894e4 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -1554,7 +1554,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame 
*frame)
 AVFrameSideData *sd;
 int loop = 1;
 
-if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES && sd) {
+if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES) {
 sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
 if (sd) {
 header = (AVDetectionBBoxHeader *)sd->data;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] dnn/openvino: add input/output name info

2020-09-08 Thread Ting Fu
show all input/output names when the input or output name not correct

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 2f0998046a..e5842906d1 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -79,6 +79,7 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 OVModel *ov_model = (OVModel *)model;
 OVContext *ctx = &ov_model->ctx;
 char *model_input_name = NULL;
+char *all_input_names = NULL;
 IEStatusCode status;
 size_t model_input_count = 0;
 dimensions_t dims;
@@ -118,12 +119,15 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 input->width= dims.dims[3];
 input->dt   = precision_to_datatype(precision);
 return DNN_SUCCESS;
+} else {
+//incorrect input name
+APPEND_STRING(all_input_names, model_input_name)
 }
 
 ie_network_name_free(&model_input_name);
 }
 
-av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", 
model_input_name);
+av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model, all input(s) 
are: \"%s\"\n", input_name, all_input_names);
 return DNN_ERROR;
 }
 
@@ -246,12 +250,15 @@ err:
 
 DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, 
const char **output_names, uint32_t nb_output)
 {
+char *model_output_name = NULL;
+char *all_output_names = NULL;
 dimensions_t dims;
 precision_e precision;
 ie_blob_buffer_t blob_buffer;
 OVModel *ov_model = (OVModel *)model->model;
 OVContext *ctx = &ov_model->ctx;
 IEStatusCode status = ie_infer_request_infer(ov_model->infer_request);
+size_t model_output_count = 0;
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to start synchronous model 
inference\n");
 return DNN_ERROR;
@@ -262,7 +269,16 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel 
*model, DNNData *outputs, c
 ie_blob_t *output_blob = NULL;
 status = ie_infer_request_get_blob(ov_model->infer_request, 
output_name, &output_blob);
 if (status != OK) {
+//incorrect output name
 av_log(ctx, AV_LOG_ERROR, "Failed to get model output data\n");
+status = ie_network_get_outputs_number(ov_model->network, 
&model_output_count);
+for (size_t i = 0; i < model_output_count; i++) {
+status = ie_network_get_output_name(ov_model->network, i, 
&model_output_name);
+APPEND_STRING(all_output_names, model_output_name)
+}
+av_log(ctx, AV_LOG_ERROR,
+   "output \"%s\" may not correct, all output(s) are: 
\"%s\"\n",
+   output_name, all_output_names);
 return DNN_ERROR;
 }
 
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] dnn/openvino: support run inference via GPU

2020-09-08 Thread Ting Fu
for enabling OpenVINO GPU please:
1. install required OpenCL drivers, see: 
https://github.com/intel/compute-runtime/releases/tag/19.41.14441
2. build OpenVINO c lib with GPU enabled: use cmake config with: 
-DENABLE_CLDNN=ON
3. then make, and include the OpenVINO c lib in environment variables
detailed steps please refer: 
https://github.com/openvinotoolkit/openvino/blob/master/build-instruction.md

inference model with GPU please add: optioins=device=GPU

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 52 ++
 1 file changed, 44 insertions(+), 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 5d6d3ed542..2f0998046a 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -26,10 +26,18 @@
 #include "dnn_backend_openvino.h"
 #include "libavformat/avio.h"
 #include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/avstring.h"
+#include "../internal.h"
 #include 
 
+typedef struct OVOptions{
+char *device_type;
+} OVOptions;
+
 typedef struct OVContext {
 const AVClass *class;
+OVOptions options;
 } OVContext;
 
 typedef struct OVModel{
@@ -41,14 +49,19 @@ typedef struct OVModel{
 ie_blob_t *input_blob;
 } OVModel;
 
-static const AVClass dnn_openvino_class = {
-.class_name = "dnn_openvino",
-.item_name  = av_default_item_name,
-.option = NULL,
-.version= LIBAVUTIL_VERSION_INT,
-.category   = AV_CLASS_CATEGORY_FILTER,
+#define APPEND_STRING(generated_string, iterate_string)
\
+generated_string = generated_string ? av_asprintf("%s %s", 
generated_string, iterate_string) : \
+  av_asprintf("%s", iterate_string);
+
+#define OFFSET(x) offsetof(OVContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption dnn_openvino_options[] = {
+{ "device", "device to run model", OFFSET(options.device_type), 
AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
+{ NULL }
 };
 
+AVFILTER_DEFINE_CLASS(dnn_openvino);
+
 static DNNDataType precision_to_datatype(precision_e precision)
 {
 switch (precision)
@@ -159,10 +172,13 @@ err:
 
 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options)
 {
+char *all_dev_names = NULL;
 DNNModel *model = NULL;
 OVModel *ov_model = NULL;
+OVContext *ctx = NULL;
 IEStatusCode status;
 ie_config_t config = {NULL, NULL, NULL};
+ie_available_devices_t a_dev;
 
 model = av_malloc(sizeof(DNNModel));
 if (!model){
@@ -173,6 +189,14 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, 
const char *options)
 if (!ov_model)
 goto err;
 ov_model->ctx.class = &dnn_openvino_class;
+ctx = &ov_model->ctx;
+
+//parse options
+av_opt_set_defaults(ctx);
+if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
+av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
+goto err;
+}
 
 status = ie_core_create("", &ov_model->core);
 if (status != OK)
@@ -182,9 +206,21 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, 
const char *options)
 if (status != OK)
 goto err;
 
-status = ie_core_load_network(ov_model->core, ov_model->network, "CPU", 
&config, &ov_model->exe_network);
-if (status != OK)
+status = ie_core_load_network(ov_model->core, ov_model->network, 
ctx->options.device_type, &config, &ov_model->exe_network);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n");
+status = ie_core_get_available_devices(ov_model->core, &a_dev);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
+goto err;
+}
+for (int i = 0; i < a_dev.num_devices; i++) {
+APPEND_STRING(all_dev_names, a_dev.devices[i])
+}
+av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all 
available devices are: \"%s\"\n",
+   ctx->options.device_type, all_dev_names);
 goto err;
+}
 
 model->model = (void *)ov_model;
 model->set_input = &set_input_ov;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] lavfi/dnn: Modify error message for incorrect backend_type

2022-12-30 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_interface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index 554a36b0dc..fa484c0905 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -71,7 +71,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 #endif
 break;
 default:
-av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or 
tensorflow\n");
+av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or 
enabled.\n");
 av_freep(&dnn_module);
 return NULL;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V2 1/2] lavfi/dnn: Modify error message for incorrect backend_type

2023-01-02 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_interface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index 554a36b0dc..fa484c0905 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -71,7 +71,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 #endif
 break;
 default:
-av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or 
tensorflow\n");
+av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or 
enabled.\n");
 av_freep(&dnn_module);
 return NULL;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V3 1/3] lavfi/dnn: Mark native backend as deprecated

2023-01-06 Thread Ting Fu
Mark native as deprecated for backed_type option. Modify realted error
message.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_interface.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index 554a36b0dc..12d36f7fed 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -24,7 +24,6 @@
  */
 
 #include "../dnn_interface.h"
-#include "dnn_backend_native.h"
 #include "dnn_backend_tf.h"
 #include "dnn_backend_openvino.h"
 #include "libavutil/mem.h"
@@ -40,12 +39,9 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 
 switch(backend_type){
 case DNN_NATIVE:
-dnn_module->load_model = &ff_dnn_load_model_native;
-dnn_module->execute_model = &ff_dnn_execute_model_native;
-dnn_module->get_result = &ff_dnn_get_result_native;
-dnn_module->flush = &ff_dnn_flush_native;
-dnn_module->free_model = &ff_dnn_free_model_native;
-break;
+av_log(NULL, AV_LOG_ERROR, "Native backend is deprecated, please use 
other supported DNN backends.\n");
+av_freep(&dnn_module);
+return NULL;
 case DNN_TF:
 #if (CONFIG_LIBTENSORFLOW == 1)
 dnn_module->load_model = &ff_dnn_load_model_tf;
@@ -71,7 +67,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 #endif
 break;
 default:
-av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or 
tensorflow\n");
+av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or 
enabled.\n");
 av_freep(&dnn_module);
 return NULL;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V3 2/3] lavfi/dnn: Delete DNN native backend releated tools and docs.

2023-01-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 doc/filters.texi|  43 +-
 tools/python/convert.py |  56 ---
 tools/python/convert_from_tensorflow.py | 607 
 tools/python/convert_header.py  |  26 -
 4 files changed, 4 insertions(+), 728 deletions(-)
 delete mode 100644 tools/python/convert.py
 delete mode 100644 tools/python/convert_from_tensorflow.py
 delete mode 100644 tools/python/convert_header.py

diff --git a/doc/filters.texi b/doc/filters.texi
index 9c32339141..797d1c9fe2 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -11222,9 +11222,6 @@ See 
@url{http://openaccess.thecvf.com/content_ECCV_2018/papers/Xia_Li_Recurrent_
 Training as well as model generation scripts are provided in
 the repository at @url{https://github.com/XueweiMeng/derain_filter.git}.
 
-Native model files (.model) can be generated from TensorFlow model
-files (.pb) by using tools/python/convert.py
-
 The filter accepts the following options:
 
 @table @option
@@ -11245,21 +11242,16 @@ Specify which DNN backend to use for model loading 
and execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
 @url{https://www.tensorflow.org/install/lang_c}) and configure FFmpeg with
 @code{--enable-libtensorflow}
 @end table
-Default value is @samp{native}.
 
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow and native
-backend can load files for only its format.
+Note that different backends use different file formats. TensorFlow can load 
files for only its format.
 @end table
 
 To get full functionality (such as async execution), please use the 
@ref{dnn_processing} filter.
@@ -11583,9 +11575,6 @@ Specify which DNN backend to use for model loading and 
execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
@@ -11601,14 +11590,9 @@ be needed if the header files and libraries are not 
installed into system path)
 
 @end table
 
-Default value is @samp{native}.
-
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow, OpenVINO 
and native
-backend can load files for only its format.
-
-Native model file (.model) can be generated from TensorFlow model file (.pb) 
by using tools/python/convert.py
+Note that different backends use different file formats. TensorFlow, OpenVINO 
backend can load files for only its format.
 
 @item input
 Set the input name of the dnn network.
@@ -11634,12 +11618,6 @@ Remove rain in rgb24 frame with can.pb (see 
@ref{derain} filter):
 ./ffmpeg -i rain.jpg -vf 
format=rgb24,dnn_processing=dnn_backend=tensorflow:model=can.pb:input=x:output=y
 derain.jpg
 @end example
 
-@item
-Halve the pixel value of the frame with format gray32f:
-@example
-ffmpeg -i input.jpg -vf 
format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native
 -y out.native.png
-@end example
-
 @item
 Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with 
yuv420p (planar YUV formats supported):
 @example
@@ -21648,13 +21626,6 @@ Efficient Sub-Pixel Convolutional Neural Network model 
(ESPCN).
 See @url{https://arxiv.org/abs/1609.05158}.
 @end itemize
 
-Training scripts as well as scripts for model file (.pb) saving can be found at
-@url{https://github.com/XueweiMeng/sr/tree/sr_dnn_native}. Original repository
-is at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
-
-Native model files (.model) can be generated from TensorFlow model
-files (.pb) by using tools/python/convert.py
-
 The filter accepts the following options:
 
 @table @option
@@ -21663,9 +21634,6 @@ Specify which DNN backend to use for model loading and 
execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
@@ -21673,13 +21641,10 @@ need to install the TensorFlow for C library (see
 @code{--enable-libtensorflow}
 @end table
 
-Default value is @samp{native}.
-
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow backend
-can load files for both formats, while native backend can load files for only
-its format.
+Note that different backends use different file formats. TensorFlow, OpenVINO 
backend
+can load files for only its format.
 
 @item scale_factor
 Set scale factor

[FFmpeg-devel] [PATCH V4 1/3] lavfi/dnn: Mark native backend as unsupported

2023-01-06 Thread Ting Fu
Native is deprecated value for backed_type option. Modify realted error
message.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_interface.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index 554a36b0dc..5b1695a1dd 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -24,7 +24,6 @@
  */
 
 #include "../dnn_interface.h"
-#include "dnn_backend_native.h"
 #include "dnn_backend_tf.h"
 #include "dnn_backend_openvino.h"
 #include "libavutil/mem.h"
@@ -39,13 +38,6 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 }
 
 switch(backend_type){
-case DNN_NATIVE:
-dnn_module->load_model = &ff_dnn_load_model_native;
-dnn_module->execute_model = &ff_dnn_execute_model_native;
-dnn_module->get_result = &ff_dnn_get_result_native;
-dnn_module->flush = &ff_dnn_flush_native;
-dnn_module->free_model = &ff_dnn_free_model_native;
-break;
 case DNN_TF:
 #if (CONFIG_LIBTENSORFLOW == 1)
 dnn_module->load_model = &ff_dnn_load_model_tf;
@@ -71,7 +63,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
 #endif
 break;
 default:
-av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or 
tensorflow\n");
+av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or 
enabled.\n");
 av_freep(&dnn_module);
 return NULL;
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH V4 2/3] lavfi/dnn: Delete DNN native backend releated tools and docs.

2023-01-06 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 doc/filters.texi|  43 +-
 tools/python/convert.py |  56 ---
 tools/python/convert_from_tensorflow.py | 607 
 tools/python/convert_header.py  |  26 -
 4 files changed, 4 insertions(+), 728 deletions(-)
 delete mode 100644 tools/python/convert.py
 delete mode 100644 tools/python/convert_from_tensorflow.py
 delete mode 100644 tools/python/convert_header.py

diff --git a/doc/filters.texi b/doc/filters.texi
index 9c32339141..797d1c9fe2 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -11222,9 +11222,6 @@ See 
@url{http://openaccess.thecvf.com/content_ECCV_2018/papers/Xia_Li_Recurrent_
 Training as well as model generation scripts are provided in
 the repository at @url{https://github.com/XueweiMeng/derain_filter.git}.
 
-Native model files (.model) can be generated from TensorFlow model
-files (.pb) by using tools/python/convert.py
-
 The filter accepts the following options:
 
 @table @option
@@ -11245,21 +11242,16 @@ Specify which DNN backend to use for model loading 
and execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
 @url{https://www.tensorflow.org/install/lang_c}) and configure FFmpeg with
 @code{--enable-libtensorflow}
 @end table
-Default value is @samp{native}.
 
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow and native
-backend can load files for only its format.
+Note that different backends use different file formats. TensorFlow can load 
files for only its format.
 @end table
 
 To get full functionality (such as async execution), please use the 
@ref{dnn_processing} filter.
@@ -11583,9 +11575,6 @@ Specify which DNN backend to use for model loading and 
execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
@@ -11601,14 +11590,9 @@ be needed if the header files and libraries are not 
installed into system path)
 
 @end table
 
-Default value is @samp{native}.
-
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow, OpenVINO 
and native
-backend can load files for only its format.
-
-Native model file (.model) can be generated from TensorFlow model file (.pb) 
by using tools/python/convert.py
+Note that different backends use different file formats. TensorFlow, OpenVINO 
backend can load files for only its format.
 
 @item input
 Set the input name of the dnn network.
@@ -11634,12 +11618,6 @@ Remove rain in rgb24 frame with can.pb (see 
@ref{derain} filter):
 ./ffmpeg -i rain.jpg -vf 
format=rgb24,dnn_processing=dnn_backend=tensorflow:model=can.pb:input=x:output=y
 derain.jpg
 @end example
 
-@item
-Halve the pixel value of the frame with format gray32f:
-@example
-ffmpeg -i input.jpg -vf 
format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native
 -y out.native.png
-@end example
-
 @item
 Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with 
yuv420p (planar YUV formats supported):
 @example
@@ -21648,13 +21626,6 @@ Efficient Sub-Pixel Convolutional Neural Network model 
(ESPCN).
 See @url{https://arxiv.org/abs/1609.05158}.
 @end itemize
 
-Training scripts as well as scripts for model file (.pb) saving can be found at
-@url{https://github.com/XueweiMeng/sr/tree/sr_dnn_native}. Original repository
-is at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
-
-Native model files (.model) can be generated from TensorFlow model
-files (.pb) by using tools/python/convert.py
-
 The filter accepts the following options:
 
 @table @option
@@ -21663,9 +21634,6 @@ Specify which DNN backend to use for model loading and 
execution. This option ac
 the following values:
 
 @table @samp
-@item native
-Native implementation of DNN loading and execution.
-
 @item tensorflow
 TensorFlow backend. To enable this backend you
 need to install the TensorFlow for C library (see
@@ -21673,13 +21641,10 @@ need to install the TensorFlow for C library (see
 @code{--enable-libtensorflow}
 @end table
 
-Default value is @samp{native}.
-
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow backend
-can load files for both formats, while native backend can load files for only
-its format.
+Note that different backends use different file formats. TensorFlow, OpenVINO 
backend
+can load files for only its format.
 
 @item scale_factor
 Set scale factor

[FFmpeg-devel] [PATCH] dnn: add NV12 pixel format support

2020-12-18 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_io_proc.c   |  2 ++
 libavfilter/vf_dnn_processing.c | 30 +-
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index c9b49be3bd..2744cb6502 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -64,6 +64,7 @@ DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData 
*output, void *log_
 case AV_PIX_FMT_YUV410P:
 case AV_PIX_FMT_YUV411P:
 case AV_PIX_FMT_GRAY8:
+case AV_PIX_FMT_NV12:
 sws_ctx = sws_getContext(frame->width,
  frame->height,
  AV_PIX_FMT_GRAYF32,
@@ -135,6 +136,7 @@ DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, 
DNNData *input, void *log_c
 case AV_PIX_FMT_YUV410P:
 case AV_PIX_FMT_YUV411P:
 case AV_PIX_FMT_GRAY8:
+case AV_PIX_FMT_NV12:
 sws_ctx = sws_getContext(frame->width,
  frame->height,
  AV_PIX_FMT_GRAY8,
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 334243bd2b..76fd2e88db 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -113,6 +113,7 @@ static int query_formats(AVFilterContext *context)
 AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+AV_PIX_FMT_NV12,
 AV_PIX_FMT_NONE
 };
 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -161,6 +162,7 @@ static int check_modelinput_inlink(const DNNData 
*model_input, const AVFilterLin
 case AV_PIX_FMT_YUV444P:
 case AV_PIX_FMT_YUV410P:
 case AV_PIX_FMT_YUV411P:
+case AV_PIX_FMT_NV12:
 if (model_input->channels != 1) {
 LOG_FORMAT_CHANNEL_MISMATCH();
 return AVERROR(EIO);
@@ -212,15 +214,22 @@ static int prepare_uv_scale(AVFilterLink *outlink)
 
 if (isPlanarYUV(fmt)) {
 if (inlink->w != outlink->w || inlink->h != outlink->h) {
-const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
-int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
-int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
-int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
-int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
-ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, 
AV_PIX_FMT_GRAY8,
-   sws_dst_w, sws_dst_h, 
AV_PIX_FMT_GRAY8,
-   SWS_BICUBIC, NULL, NULL, NULL);
-ctx->sws_uv_height = sws_src_h;
+if (fmt == AV_PIX_FMT_NV12) {
+ctx->sws_uv_scale = sws_getContext(inlink->w >> 1, inlink->h 
>> 1, AV_PIX_FMT_YA8,
+   outlink->w >> 1, outlink->h 
>> 1, AV_PIX_FMT_YA8,
+   SWS_BICUBIC, NULL, NULL, 
NULL);
+ctx->sws_uv_height = inlink->h >> 1;
+} else {
+const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, 
desc->log2_chroma_h);
+int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, 
desc->log2_chroma_w);
+ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, 
AV_PIX_FMT_GRAY8,
+   sws_dst_w, sws_dst_h, 
AV_PIX_FMT_GRAY8,
+   SWS_BICUBIC, NULL, NULL, 
NULL);
+ctx->sws_uv_height = sws_src_h;
+}
 }
 }
 
@@ -262,6 +271,9 @@ static int copy_uv_planes(DnnProcessingContext *ctx, 
AVFrame *out, const AVFrame
 in->data[i], in->linesize[i],
 bytewidth, uv_height);
 }
+} else if (in->format == AV_PIX_FMT_NV12) {
+sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), 
in->linesize + 1,
+  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
 } else {
 sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), 
in->linesize + 1,
   0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/3] dnn/openvino: remove unnecessary code

2021-01-10 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index d27e451eea..050be97209 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -284,14 +284,6 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 return DNN_ERROR;
 }
 
-// The order of dims in the openvino is fixed and it is always 
NCHW for 4-D data.
-// while we pass NHWC data from FFmpeg to openvino
-status = ie_network_set_input_layout(ov_model->network, 
input_name, NHWC);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout 
NHWC\n", input_name);
-return DNN_ERROR;
-}
-
 input->channels = dims.dims[1];
 input->height   = dims.dims[2];
 input->width= dims.dims[3];
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/3] dnn/openvino: refine code for better model initialization

2021-01-10 Thread Ting Fu
Move openvino model/inference request creation and initialization steps
from ff_dnn_load_model_ov to new function init_model_ov, for later input
resize support.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 153 +++--
 1 file changed, 93 insertions(+), 60 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 050be97209..d6e0593a0b 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -217,6 +217,78 @@ static void infer_completion_callback(void *args)
 task->done = 1;
 }
 
+static DNNReturnType init_model_ov(OVModel *ov_model)
+{
+OVContext *ctx = &ov_model->ctx;
+IEStatusCode status;
+ie_available_devices_t a_dev;
+ie_config_t config = {NULL, NULL, NULL};
+char *all_dev_names = NULL;
+
+status = ie_core_load_network(ov_model->core, ov_model->network, 
ctx->options.device_type, &config, &ov_model->exe_network);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
+status = ie_core_get_available_devices(ov_model->core, &a_dev);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
+goto err;
+}
+for (int i = 0; i < a_dev.num_devices; i++) {
+APPEND_STRING(all_dev_names, a_dev.devices[i])
+}
+av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all 
available devices are: \"%s\"\n",
+   ctx->options.device_type, all_dev_names);
+goto err;
+}
+
+// create infer_request for sync execution
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&ov_model->infer_request);
+if (status != OK)
+goto err;
+
+// create infer_requests for async execution
+if (ctx->options.nireq <= 0) {
+// the default value is a rough estimation
+ctx->options.nireq = av_cpu_count() / 2 + 1;
+}
+
+ov_model->request_queue = ff_safe_queue_create();
+if (!ov_model->request_queue) {
+goto err;
+}
+
+for (int i = 0; i < ctx->options.nireq; i++) {
+ie_infer_request_t *request;
+RequestItem *item = av_mallocz(sizeof(*item));
+if (!item) {
+goto err;
+}
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&request);
+if (status != OK) {
+av_freep(&item);
+goto err;
+}
+item->infer_request = request;
+item->callback.completeCallBackFunc = infer_completion_callback;
+item->callback.args = item;
+if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) {
+av_freep(&item);
+goto err;
+}
+}
+
+ov_model->task_queue = ff_queue_create();
+if (!ov_model->task_queue) {
+goto err;
+}
+
+return DNN_SUCCESS;
+
+err:
+ff_dnn_free_model_ov(&ov_model->model);
+return DNN_ERROR;
+}
+
 static DNNReturnType execute_model_ov(TaskItem *task, RequestItem *request)
 {
 IEStatusCode status;
@@ -325,6 +397,13 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (!ov_model->exe_network) {
+if (init_model_ov(ov_model) != DNN_SUCCESS) {
+av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
+return DNN_ERROR;
+};
+}
+
 task.done = 0;
 task.do_ioproc = 0;
 task.async = 0;
@@ -347,13 +426,10 @@ static DNNReturnType get_output_ov(void *model, const 
char *input_name, int inpu
 
 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char 
*options, AVFilterContext *filter_ctx)
 {
-char *all_dev_names = NULL;
 DNNModel *model = NULL;
 OVModel *ov_model = NULL;
 OVContext *ctx = NULL;
 IEStatusCode status;
-ie_config_t config = {NULL, NULL, NULL};
-ie_available_devices_t a_dev;
 
 model = av_mallocz(sizeof(DNNModel));
 if (!model){
@@ -385,63 +461,6 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, 
const char *options,
 if (status != OK)
 goto err;
 
-status = ie_core_load_network(ov_model->core, ov_model->network, 
ctx->options.device_type, &config, &ov_model->exe_network);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n");
-status = ie_core_get_available_devices(ov_model->core, &a_dev);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
-goto err;
-}
-for (int i = 0; i &

[FFmpeg-devel] [PATCH 3/3] dnn/openvino: support model input resize

2021-01-10 Thread Ting Fu
OpenVINO APIs require specify input size to run the model, while some
OpenVINO model does accept different input size. To enable this feature
adding input_resizable option here for easier use.
Setting bool variable input_resizable to specify if the input can be resizable 
or not.
input_resizable = 1 means support input resize, aka accept different input size.
input_resizable = 0 (default) means do not support input resize.
Please make sure the inference model does accept different input size
before use this option, otherwise the inference engine may report error(s).
eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\
  model=model_name.xml:input=input_name:output=output_name:\
  options=device=CPU\&input_resizable=1 -y output_video_name.mp4

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index d6e0593a0b..65d74702ff 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -37,6 +37,7 @@
 typedef struct OVOptions{
 char *device_type;
 int nireq;
+int input_resizable;
 } OVOptions;
 
 typedef struct OVContext {
@@ -83,6 +84,7 @@ typedef struct RequestItem {
 static const AVOption dnn_openvino_options[] = {
 { "device", "device to run model", OFFSET(options.device_type), 
AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
 { "nireq",  "number of request",   OFFSET(options.nireq),   
AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS },
+{ "input_resizable", "can input be resizable or not", 
OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, 
FLAGS },
 { NULL }
 };
 
@@ -334,6 +336,7 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 size_t model_input_count = 0;
 dimensions_t dims;
 precision_e precision;
+int input_resizable = ctx->options.input_resizable;
 
 status = ie_network_get_inputs_number(ov_model->network, 
&model_input_count);
 if (status != OK) {
@@ -357,8 +360,8 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 }
 
 input->channels = dims.dims[1];
-input->height   = dims.dims[2];
-input->width= dims.dims[3];
+input->height   = input_resizable ? -1 : dims.dims[2];
+input->width= input_resizable ? -1 : dims.dims[3];
 input->dt   = precision_to_datatype(precision);
 return DNN_SUCCESS;
 } else {
@@ -383,6 +386,8 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 RequestItem request;
 AVFrame *in_frame = av_frame_alloc();
 AVFrame *out_frame = NULL;
+IEStatusCode status;
+input_shapes_t input_shapes;
 
 if (!in_frame) {
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input 
frame\n");
@@ -397,6 +402,18 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (ctx->options.input_resizable) {
+status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
+input_shapes.shapes->shape.dims[2] = input_height;
+input_shapes.shapes->shape.dims[3] = input_width;
+status |= ie_network_reshape(ov_model->network, input_shapes);
+ie_network_input_shapes_free(&input_shapes);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", 
input_name);
+return DNN_ERROR;
+}
+}
+
 if (!ov_model->exe_network) {
 if (init_model_ov(ov_model) != DNN_SUCCESS) {
 av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 3/3] dnn/openvino: support model input resize

2021-01-15 Thread Ting Fu
OpenVINO APIs require specify input size to run the model, while some
OpenVINO model does accept different input size. To enable this feature
adding input_resizable option here for easier use.
Setting bool variable input_resizable to specify if the input can be resizable 
or not.
input_resizable = 1 means support input resize, aka accept different input size.
input_resizable = 0 (default) means do not support input resize.
Please make sure the inference model does accept different input size
before use this option, otherwise the inference engine may report error(s).
eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\
  model=model_name.xml:input=input_name:output=output_name:\
  options=device=CPU\&input_resizable=1 -y output_video_name.mp4

Signed-off-by: Ting Fu 
---
V2:
rebase to latest code

 libavfilter/dnn/dnn_backend_openvino.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 0b125eef65..1664ff5268 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -38,6 +38,7 @@ typedef struct OVOptions{
 char *device_type;
 int nireq;
 int batch_size;
+int input_resizable;
 } OVOptions;
 
 typedef struct OVContext {
@@ -86,6 +87,7 @@ static const AVOption dnn_openvino_options[] = {
 { "device", "device to run model", OFFSET(options.device_type), 
AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
 { "nireq",  "number of request",   OFFSET(options.nireq),   
AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS },
 { "batch_size",  "batch size per request", OFFSET(options.batch_size),  
AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS},
+{ "input_resizable", "can input be resizable or not", 
OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, 
FLAGS },
 { NULL }
 };
 
@@ -393,6 +395,7 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 size_t model_input_count = 0;
 dimensions_t dims;
 precision_e precision;
+int input_resizable = ctx->options.input_resizable;
 
 status = ie_network_get_inputs_number(ov_model->network, 
&model_input_count);
 if (status != OK) {
@@ -416,8 +419,8 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 }
 
 input->channels = dims.dims[1];
-input->height   = dims.dims[2];
-input->width= dims.dims[3];
+input->height   = input_resizable ? -1 : dims.dims[2];
+input->width= input_resizable ? -1 : dims.dims[3];
 input->dt   = precision_to_datatype(precision);
 return DNN_SUCCESS;
 } else {
@@ -443,6 +446,8 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 AVFrame *in_frame = av_frame_alloc();
 AVFrame *out_frame = NULL;
 TaskItem *ptask = &task;
+IEStatusCode status;
+input_shapes_t input_shapes;
 
 if (!in_frame) {
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input 
frame\n");
@@ -457,6 +462,18 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (ctx->options.input_resizable) {
+status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
+input_shapes.shapes->shape.dims[2] = input_height;
+input_shapes.shapes->shape.dims[3] = input_width;
+status |= ie_network_reshape(ov_model->network, input_shapes);
+ie_network_input_shapes_free(&input_shapes);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", 
input_name);
+return DNN_ERROR;
+}
+}
+
 if (!ov_model->exe_network) {
 if (init_model_ov(ov_model) != DNN_SUCCESS) {
 av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 1/3] dnn/openvino: remove unnecessary code

2021-01-15 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 5271d1caa5..8476f4fb38 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -325,14 +325,6 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 return DNN_ERROR;
 }
 
-// The order of dims in the openvino is fixed and it is always 
NCHW for 4-D data.
-// while we pass NHWC data from FFmpeg to openvino
-status = ie_network_set_input_layout(ov_model->network, 
input_name, NHWC);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout 
NHWC\n", input_name);
-return DNN_ERROR;
-}
-
 input->channels = dims.dims[1];
 input->height   = dims.dims[2];
 input->width= dims.dims[3];
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 2/3] dnn/openvino: refine code for better model initialization

2021-01-15 Thread Ting Fu
Move openvino model/inference request creation and initialization steps
from ff_dnn_load_model_ov to new function init_model_ov, for later input
resize support.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 196 ++---
 1 file changed, 111 insertions(+), 85 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 8476f4fb38..0b125eef65 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -248,6 +248,96 @@ static void infer_completion_callback(void *args)
 }
 }
 
+static DNNReturnType init_model_ov(OVModel *ov_model)
+{
+OVContext *ctx = &ov_model->ctx;
+IEStatusCode status;
+ie_available_devices_t a_dev;
+ie_config_t config = {NULL, NULL, NULL};
+char *all_dev_names = NULL;
+
+// batch size
+if (ctx->options.batch_size <= 0) {
+ctx->options.batch_size = 1;
+}
+
+if (ctx->options.batch_size > 1) {
+input_shapes_t input_shapes;
+status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
+if (status != OK)
+goto err;
+for (int i = 0; i < input_shapes.shape_num; i++)
+input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size;
+status = ie_network_reshape(ov_model->network, input_shapes);
+ie_network_input_shapes_free(&input_shapes);
+if (status != OK)
+goto err;
+}
+
+status = ie_core_load_network(ov_model->core, ov_model->network, 
ctx->options.device_type, &config, &ov_model->exe_network);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
+status = ie_core_get_available_devices(ov_model->core, &a_dev);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
+goto err;
+}
+for (int i = 0; i < a_dev.num_devices; i++) {
+APPEND_STRING(all_dev_names, a_dev.devices[i])
+}
+av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all 
available devices are: \"%s\"\n",
+   ctx->options.device_type, all_dev_names);
+goto err;
+}
+
+// create infer_request for sync execution
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&ov_model->infer_request);
+if (status != OK)
+goto err;
+
+// create infer_requests for async execution
+if (ctx->options.nireq <= 0) {
+// the default value is a rough estimation
+ctx->options.nireq = av_cpu_count() / 2 + 1;
+}
+
+ov_model->request_queue = ff_safe_queue_create();
+if (!ov_model->request_queue) {
+goto err;
+}
+
+for (int i = 0; i < ctx->options.nireq; i++) {
+ie_infer_request_t *request;
+RequestItem *item = av_mallocz(sizeof(*item));
+if (!item) {
+goto err;
+}
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&request);
+if (status != OK) {
+av_freep(&item);
+goto err;
+}
+item->infer_request = request;
+item->callback.completeCallBackFunc = infer_completion_callback;
+item->callback.args = item;
+if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) {
+av_freep(&item);
+goto err;
+}
+}
+
+ov_model->task_queue = ff_queue_create();
+if (!ov_model->task_queue) {
+goto err;
+}
+
+return DNN_SUCCESS;
+
+err:
+ff_dnn_free_model_ov(&ov_model->model);
+return DNN_ERROR;
+}
+
 static DNNReturnType execute_model_ov(RequestItem *request)
 {
 IEStatusCode status;
@@ -367,6 +457,13 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (!ov_model->exe_network) {
+if (init_model_ov(ov_model) != DNN_SUCCESS) {
+av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
+return DNN_ERROR;
+};
+}
+
 task.done = 0;
 task.do_ioproc = 0;
 task.async = 0;
@@ -391,13 +488,10 @@ static DNNReturnType get_output_ov(void *model, const 
char *input_name, int inpu
 
 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char 
*options, AVFilterContext *filter_ctx)
 {
-char *all_dev_names = NULL;
 DNNModel *model = NULL;
 OVModel *ov_model = NULL;
 OVContext *ctx = NULL;
 IEStatusCode status;
-ie_config_t config = {NULL, NULL, NULL};
-ie_available_devices_t a_dev;
 
 model = av_mallocz(sizeof(DNNModel));
 if (!model){
@@ -429,88

[FFmpeg-devel] [PATCH V3 1/3] dnn/openvino: remove unnecessary code

2021-01-17 Thread Ting Fu
Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 5271d1caa5..8476f4fb38 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -325,14 +325,6 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 return DNN_ERROR;
 }
 
-// The order of dims in the openvino is fixed and it is always 
NCHW for 4-D data.
-// while we pass NHWC data from FFmpeg to openvino
-status = ie_network_set_input_layout(ov_model->network, 
input_name, NHWC);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout 
NHWC\n", input_name);
-return DNN_ERROR;
-}
-
 input->channels = dims.dims[1];
 input->height   = dims.dims[2];
 input->width= dims.dims[3];
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V3 2/3] dnn/openvino: refine code for better model initialization

2021-01-17 Thread Ting Fu
Move openvino model/inference request creation and initialization steps
from ff_dnn_load_model_ov to new function init_model_ov, for later input
resize support.

Signed-off-by: Ting Fu 
---
 libavfilter/dnn/dnn_backend_openvino.c | 203 ++---
 1 file changed, 118 insertions(+), 85 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 8476f4fb38..ecfd2b3f36 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -248,6 +248,103 @@ static void infer_completion_callback(void *args)
 }
 }
 
+static DNNReturnType init_model_ov(OVModel *ov_model)
+{
+OVContext *ctx = &ov_model->ctx;
+IEStatusCode status;
+ie_available_devices_t a_dev;
+ie_config_t config = {NULL, NULL, NULL};
+char *all_dev_names = NULL;
+
+// batch size
+if (ctx->options.batch_size <= 0) {
+ctx->options.batch_size = 1;
+}
+
+if (ctx->options.batch_size > 1) {
+input_shapes_t input_shapes;
+status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
+if (status != OK)
+goto err;
+for (int i = 0; i < input_shapes.shape_num; i++)
+input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size;
+status = ie_network_reshape(ov_model->network, input_shapes);
+ie_network_input_shapes_free(&input_shapes);
+if (status != OK)
+goto err;
+}
+
+status = ie_core_load_network(ov_model->core, ov_model->network, 
ctx->options.device_type, &config, &ov_model->exe_network);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
+status = ie_core_get_available_devices(ov_model->core, &a_dev);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n");
+goto err;
+}
+for (int i = 0; i < a_dev.num_devices; i++) {
+APPEND_STRING(all_dev_names, a_dev.devices[i])
+}
+av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all 
available devices are: \"%s\"\n",
+   ctx->options.device_type, all_dev_names);
+goto err;
+}
+
+// create infer_request for sync execution
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&ov_model->infer_request);
+if (status != OK)
+goto err;
+
+// create infer_requests for async execution
+if (ctx->options.nireq <= 0) {
+// the default value is a rough estimation
+ctx->options.nireq = av_cpu_count() / 2 + 1;
+}
+
+ov_model->request_queue = ff_safe_queue_create();
+if (!ov_model->request_queue) {
+goto err;
+}
+
+for (int i = 0; i < ctx->options.nireq; i++) {
+RequestItem *item = av_mallocz(sizeof(*item));
+if (!item) {
+goto err;
+}
+
+status = ie_exec_network_create_infer_request(ov_model->exe_network, 
&item->infer_request);
+if (status != OK) {
+av_freep(&item);
+goto err;
+}
+
+item->tasks = av_malloc_array(ctx->options.batch_size, 
sizeof(*item->tasks));
+if (!item->tasks) {
+av_freep(&item);
+goto err;
+}
+item->task_count = 0;
+
+item->callback.completeCallBackFunc = infer_completion_callback;
+item->callback.args = item;
+if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) {
+av_freep(&item);
+goto err;
+}
+}
+
+ov_model->task_queue = ff_queue_create();
+if (!ov_model->task_queue) {
+goto err;
+}
+
+return DNN_SUCCESS;
+
+err:
+ff_dnn_free_model_ov(&ov_model->model);
+return DNN_ERROR;
+}
+
 static DNNReturnType execute_model_ov(RequestItem *request)
 {
 IEStatusCode status;
@@ -367,6 +464,13 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (!ov_model->exe_network) {
+if (init_model_ov(ov_model) != DNN_SUCCESS) {
+av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
+return DNN_ERROR;
+};
+}
+
 task.done = 0;
 task.do_ioproc = 0;
 task.async = 0;
@@ -391,13 +495,10 @@ static DNNReturnType get_output_ov(void *model, const 
char *input_name, int inpu
 
 DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char 
*options, AVFilterContext *filter_ctx)
 {
-char *all_dev_names = NULL;
 DNNModel *model = NULL;
 OVModel *ov_model = NULL;
 OVContext *ctx = NULL;
 IEStatusCode sta

[FFmpeg-devel] [PATCH V3 3/3] dnn/openvino: support model input resize

2021-01-17 Thread Ting Fu
OpenVINO APIs require specify input size to run the model, while some
OpenVINO model does accept different input size. To enable this feature
adding input_resizable option here for easier use.
Setting bool variable input_resizable to specify if the input can be resizable 
or not.
input_resizable = 1 means support input resize, aka accept different input size.
input_resizable = 0 (default) means do not support input resize.
Please make sure the inference model does accept different input size
before use this option, otherwise the inference engine may report error(s).
eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\
  model=model_name.xml:input=input_name:output=output_name:\
  options=device=CPU\&input_resizable=1 -y output_video_name.mp4

Signed-off-by: Ting Fu 
---
V3:
rebase to latest code and add missing code

 libavfilter/dnn/dnn_backend_openvino.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index ecfd2b3f36..8a7abb33f0 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -38,6 +38,7 @@ typedef struct OVOptions{
 char *device_type;
 int nireq;
 int batch_size;
+int input_resizable;
 } OVOptions;
 
 typedef struct OVContext {
@@ -86,6 +87,7 @@ static const AVOption dnn_openvino_options[] = {
 { "device", "device to run model", OFFSET(options.device_type), 
AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
 { "nireq",  "number of request",   OFFSET(options.nireq),   
AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS },
 { "batch_size",  "batch size per request", OFFSET(options.batch_size),  
AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS},
+{ "input_resizable", "can input be resizable or not", 
OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, 
FLAGS },
 { NULL }
 };
 
@@ -400,6 +402,7 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 size_t model_input_count = 0;
 dimensions_t dims;
 precision_e precision;
+int input_resizable = ctx->options.input_resizable;
 
 status = ie_network_get_inputs_number(ov_model->network, 
&model_input_count);
 if (status != OK) {
@@ -423,8 +426,8 @@ static DNNReturnType get_input_ov(void *model, DNNData 
*input, const char *input
 }
 
 input->channels = dims.dims[1];
-input->height   = dims.dims[2];
-input->width= dims.dims[3];
+input->height   = input_resizable ? -1 : dims.dims[2];
+input->width= input_resizable ? -1 : dims.dims[3];
 input->dt   = precision_to_datatype(precision);
 return DNN_SUCCESS;
 } else {
@@ -450,6 +453,8 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 AVFrame *in_frame = av_frame_alloc();
 AVFrame *out_frame = NULL;
 TaskItem *ptask = &task;
+IEStatusCode status;
+input_shapes_t input_shapes;
 
 if (!in_frame) {
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input 
frame\n");
@@ -464,6 +469,18 @@ static DNNReturnType get_output_ov(void *model, const char 
*input_name, int inpu
 in_frame->width = input_width;
 in_frame->height = input_height;
 
+if (ctx->options.input_resizable) {
+status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
+input_shapes.shapes->shape.dims[2] = input_height;
+input_shapes.shapes->shape.dims[3] = input_width;
+status |= ie_network_reshape(ov_model->network, input_shapes);
+ie_network_input_shapes_free(&input_shapes);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", 
input_name);
+return DNN_ERROR;
+}
+}
+
 if (!ov_model->exe_network) {
 if (init_model_ov(ov_model) != DNN_SUCCESS) {
 av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network 
or inference request\n");
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] tests/dnn/mathunary: fix the issue of NAN

2020-07-02 Thread Ting Fu
When one of output[i] & expected_output is NAN, the unit test will always pass.

Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index bf77c44bbe..f251447771 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -74,7 +74,8 @@ static int test(DNNMathUnaryOperation op)
 output = operands[1].data;
 for (int i = 0; i < sizeof(input) / sizeof(float); ++i) {
 float expected_output = get_expected(input[i], op);
-if(fabs(output[i] - expected_output) > EPS) {
+if ((isnan(output[i]) ^ isnan(expected_output)) ||
+fabs(output[i] - expected_output) > EPS) {
 printf("at index %d, output: %f, expected_output: %f\n", i, 
output[i], expected_output);
 av_freep(&output);
 return 1;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2] tests/dnn/mathunary: fix the issue of NAN

2020-07-07 Thread Ting Fu
When one of output[i] & expected_output is NAN, the unit test will always pass.

Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 683e623d95..70c6a43f95 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -86,7 +86,8 @@ static int test(DNNMathUnaryOperation op)
 output = operands[1].data;
 for (int i = 0; i < sizeof(input) / sizeof(float); ++i) {
 float expected_output = get_expected(input[i], op);
-if(fabs(output[i] - expected_output) > EPS) {
+if ((!isnan(output[i]) && !isnan(expected_output) && fabs(output[i] - 
expected_output) > EPS) ||
+(isnan(output[i]) && !isnan(expected_output)) || 
(!isnan(output[i]) && isnan(expected_output))) {
 printf("at index %d, output: %f, expected_output: %f\n", i, 
output[i], expected_output);
 av_freep(&output);
 return 1;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V3] tests/dnn/mathunary: fix the issue of NAN

2020-07-07 Thread Ting Fu
When one of output[i] & expected_output is NAN, the unit test will always pass.

Signed-off-by: Ting Fu 
---
 tests/dnn/dnn-layer-mathunary-test.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/dnn/dnn-layer-mathunary-test.c 
b/tests/dnn/dnn-layer-mathunary-test.c
index 683e623d95..5afc5c157e 100644
--- a/tests/dnn/dnn-layer-mathunary-test.c
+++ b/tests/dnn/dnn-layer-mathunary-test.c
@@ -86,7 +86,10 @@ static int test(DNNMathUnaryOperation op)
 output = operands[1].data;
 for (int i = 0; i < sizeof(input) / sizeof(float); ++i) {
 float expected_output = get_expected(input[i], op);
-if(fabs(output[i] - expected_output) > EPS) {
+int output_nan = isnan(output[i]);
+int expected_nan = isnan(expected_output);
+if ((!output_nan && !expected_nan && fabs(output[i] - expected_output) 
> EPS) ||
+(output_nan && !expected_nan) || (!output_nan && expected_nan)) {
 printf("at index %d, output: %f, expected_output: %f\n", i, 
output[i], expected_output);
 av_freep(&output);
 return 1;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

  1   2   >